├── README.md ├── YOLOX-pytorch-camera ├── .gitignore ├── 2007_train.txt ├── 2007_val.txt ├── get_map.py ├── logs ├── nets │ ├── __init__.py │ ├── attention.py │ ├── darknet.py │ ├── varifocalloss.py │ ├── yolo.py │ └── yolo_training.py ├── predict_one_point _new.py ├── predict_one_point.py ├── predict_three_point.py ├── requirements.txt ├── stereo │ ├── 3code.py │ ├── NCC.py │ ├── dianyuntu.py │ ├── dianyuntu_yolo.py │ ├── new_BM.py │ ├── result │ │ └── left │ │ │ ├── ['bottle']_3.bmp │ │ │ └── ['bottle']_4.bmp │ ├── stereo_config.py │ ├── stereoconfig_040_2-原代码.py │ ├── stereoconfig_040_2.py │ └── yolo │ │ ├── xiaozheng_hou.jpg │ │ ├── xiaozheng_hou1.jpg │ │ ├── xiaozheng_qian.jpg │ │ └── xiaozheng_qian1.jpg ├── summary.py ├── train.py ├── utils │ ├── __init__.py │ ├── callbacks.py │ ├── dataloader.py │ ├── utils.py │ ├── utils_bbox.py │ ├── utils_fit.py │ └── utils_map.py ├── utils_coco │ ├── coco_annotation.py │ └── get_map_coco.py ├── voc_annotation.py └── yolo.py ├── result_1.bmp ├── result_2.bmp ├── result_3.bmp └── result_4.bmp /README.md: -------------------------------------------------------------------------------- 1 | # Camera-YOLOX 2 | YOLOX project that can range distance with binocular camera 3 | YOLOX code carried from Mr. Bubbliiiing, on the basis of this code added binocular ranging program. 4 | The main principle is as follows: when the computer is connected to the binocular camera, the YOLOX code detects the target through the binocular camera (at the same time, the left and right cameras can calculate the left and right disparity according to the image read); 5 | When a target is detected, the YOLOX detection code will give the target a prediction box. The center point of the box is used to calculate the disparity.At this time, the three-dimensional world coordinates of the detected object are obtained. 6 | 可以用双目相机测距的YOLOX项目。来自Bubbliiiing先生的YOLOX代码,在此代码的基础上增加了双目测距程序。主要原理如下:当电脑连接双目摄像头时,YOLOX代码通过双目摄像头检测目标(同时左右摄像头可以根据读取的图像计算左右视差);当检测到目标时,YOLOX 检测代码会给目标一个预测框,而框的中心点用于计算视差,此时得到的就是被检测目标的三维世界坐标。 7 | ## 双目测距代码准备 8 | 在该项目的stereo文件夹下,需要自己对stereo_config.py代码中的双目相机参数进行修改。(只需要在原有的代码中,填入自己的数据,双目相机的一些参数可以通过matlab自带的双目标定APP得到),修改好后,训练好代码,就可以**使用**predict.py同时进行检测及测距。 9 | ## 使用 10 | 双目测距的代码主要是在predict.py文件中,由于对预测文件进行了修改,可以通过predict_one_point.py进行目标物中心点的三维测距和检测,也可以通过predict_three_point.py进行目标物“上 中 下”三个点的三维测距和检测。 11 | ## 训练 12 | YOLOX网络的训练和Mr. Bubbliiiing先生的代码一模一样,故只需要参考其仓库的YOLOX代码训练教程即可。 13 | (注:自己根据自己的任务建立model_data文件夹:用于存放预训练权值和voc_lass标签目录或coco_class标签目录;建立logs文件夹,用于存放训练过程中的权值文件)第一次公开以及上传自己的项目,有点小问题 14 | ## 训练结果 15 | 首先阐述一下存在的问题:立体匹配算法为opencv中实现的SGBM算法,当然通过参数微调和最终定位点的有效查找,一定是可以达到有效的定位精度。在我的实验中,并不是一个大项目,只算是小实验,识别定位地上的水瓶,整体来说,误差在10cm以内,若是没有其他干扰物的存在下,误差会更小。改进的地方还有很多,比如更换立体匹配算法等。最关键的还是关键点的获取,这里可能有小伙伴不懂,这个关键点是什么?为此多解释一下:定位的先决条件是先得到目标检测的预测框,此时左右相机计算视差,要在预测框范围内寻找一点,该点通过多个坐标转换能够代表该目标物距离我们最准确的距离,所以这个关键点的获取很关键,代码中也有实现,感兴趣的朋友可以深入研究。 16 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore map, miou, datasets 2 | map_out/ 3 | miou_out/ 4 | VOCdevkit/ 5 | datasets/ 6 | Medical_Datasets/ 7 | lfw/ 8 | logs/ 9 | model_data/ 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | pip-wheel-metadata/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/2007_train.txt: -------------------------------------------------------------------------------- 1 | G:\pycharm\study_code\LYD_Experience_YOLOX_V5\yolox_gaijin_new\VOCdevkit/VOC2007/JPEGImages/1.jpg 245,136,276,177,1 325,163,366,204,1 272,175,288,196,1 2 | G:\pycharm\study_code\LYD_Experience_YOLOX_V5\yolox_gaijin_new\VOCdevkit/VOC2007/JPEGImages/2.jpg 338,107,379,137,1 350,147,373,159,1 403,152,445,189,1 3 | G:\pycharm\study_code\LYD_Experience_YOLOX_V5\yolox_gaijin_new\VOCdevkit/VOC2007/JPEGImages/3.jpg 348,109,378,140,1 401,158,444,188,1 355,149,370,159,1 4 | G:\pycharm\study_code\LYD_Experience_YOLOX_V5\yolox_gaijin_new\VOCdevkit/VOC2007/JPEGImages/6.jpg 330,165,360,191,1 347,196,366,209,1 395,192,431,214,1 5 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/2007_val.txt: -------------------------------------------------------------------------------- 1 | G:\pycharm\study_code\LYD_Experience_YOLOX_V5\yolox_gaijin_new\VOCdevkit/VOC2007/JPEGImages/4.jpg 323,170,353,194,1 329,203,346,211,1 372,212,419,231,1 2 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/get_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml.etree.ElementTree as ET 3 | 4 | from PIL import Image 5 | from tqdm import tqdm 6 | 7 | from yolo import YOLO 8 | from utils.utils import get_classes 9 | from utils.utils_map import get_coco_map, get_map 10 | 11 | if __name__ == "__main__": 12 | ''' 13 | Recall和Precision不像AP是一个面积的概念,在门限值不同时,网络的Recall和Precision值是不同的。 14 | map计算结果中的Recall和Precision代表的是当预测时,门限置信度为0.5时,所对应的Recall和Precision值。 15 | 此处获得的./map_out/detection-results/里面的txt的框的数量会比直接predict多一些,这是因为这里的门限低, 16 | 目的是为了计算不同门限条件下的Recall和Precision值,从而实现map的计算。 17 | ''' 18 | #------------------------------------------------------------------------------------------------------------------# 19 | # map_mode用于指定该文件运行时计算的内容 20 | # map_mode为0代表整个map计算流程,包括获得预测结果、获得真实框、计算VOC_map。 21 | # map_mode为1代表仅仅获得预测结果。 22 | # map_mode为2代表仅仅获得真实框。 23 | # map_mode为3代表仅仅计算VOC_map。 24 | # map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行 25 | #-------------------------------------------------------------------------------------------------------------------# 26 | map_mode = 0 27 | #-------------------------------------------------------# 28 | # 此处的classes_path用于指定需要测量VOC_map的类别 29 | # 一般情况下与训练和预测所用的classes_path一致即可 30 | #-------------------------------------------------------# 31 | classes_path = 'model_data/voc_classes.txt' 32 | #-------------------------------------------------------# 33 | # MINOVERLAP用于指定想要获得的mAP0.x 34 | # 比如计算mAP0.75,可以设定MINOVERLAP = 0.75。 35 | #-------------------------------------------------------# 36 | MINOVERLAP = 0.5 37 | #-------------------------------------------------------# 38 | # map_vis用于指定是否开启VOC_map计算的可视化 39 | #-------------------------------------------------------# 40 | map_vis = False 41 | #-------------------------------------------------------# 42 | # 指向VOC数据集所在的文件夹 43 | # 默认指向根目录下的VOC数据集 44 | #-------------------------------------------------------# 45 | VOCdevkit_path = 'VOCdevkit' 46 | #-------------------------------------------------------# 47 | # 结果输出的文件夹,默认为map_out 48 | #-------------------------------------------------------# 49 | map_out_path = 'map_out' 50 | 51 | image_ids = open(os.path.join(VOCdevkit_path, "VOC2007/ImageSets/Main/test.txt")).read().strip().split() 52 | 53 | if not os.path.exists(map_out_path): 54 | os.makedirs(map_out_path) 55 | if not os.path.exists(os.path.join(map_out_path, 'ground-truth')): 56 | os.makedirs(os.path.join(map_out_path, 'ground-truth')) 57 | if not os.path.exists(os.path.join(map_out_path, 'detection-results')): 58 | os.makedirs(os.path.join(map_out_path, 'detection-results')) 59 | if not os.path.exists(os.path.join(map_out_path, 'images-optional')): 60 | os.makedirs(os.path.join(map_out_path, 'images-optional')) 61 | 62 | class_names, _ = get_classes(classes_path) 63 | 64 | if map_mode == 0 or map_mode == 1: 65 | print("Load model.") 66 | yolo = YOLO(confidence = 0.001, nms_iou = 0.65) 67 | print("Load model done.") 68 | 69 | print("Get predict result.") 70 | for image_id in tqdm(image_ids): 71 | image_path = os.path.join(VOCdevkit_path, "VOC2007/JPEGImages/"+image_id+".jpg") 72 | image = Image.open(image_path) 73 | if map_vis: 74 | image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg")) 75 | yolo.get_map_txt(image_id, image, class_names, map_out_path) 76 | print("Get predict result done.") 77 | 78 | if map_mode == 0 or map_mode == 2: 79 | print("Get ground truth result.") 80 | for image_id in tqdm(image_ids): 81 | with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f: 82 | root = ET.parse(os.path.join(VOCdevkit_path, "VOC2007/Annotations/"+image_id+".xml")).getroot() 83 | for obj in root.findall('object'): 84 | difficult_flag = False 85 | if obj.find('difficult')!=None: 86 | difficult = obj.find('difficult').text 87 | if int(difficult)==1: 88 | difficult_flag = True 89 | obj_name = obj.find('name').text 90 | if obj_name not in class_names: 91 | continue 92 | bndbox = obj.find('bndbox') 93 | left = bndbox.find('xmin').text 94 | top = bndbox.find('ymin').text 95 | right = bndbox.find('xmax').text 96 | bottom = bndbox.find('ymax').text 97 | 98 | if difficult_flag: 99 | new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom)) 100 | else: 101 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) 102 | print("Get ground truth result done.") 103 | 104 | if map_mode == 0 or map_mode == 3: 105 | print("Get map.") 106 | get_map(MINOVERLAP, True, path = map_out_path) 107 | print("Get map done.") 108 | 109 | if map_mode == 4: 110 | print("Get map.") 111 | get_coco_map(class_names = class_names, path = map_out_path) 112 | print("Get map done.") -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/logs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/nets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/nets/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | class se_block(nn.Module): 6 | def __init__(self, channel, ratio=16): 7 | super(se_block, self).__init__() 8 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 9 | self.fc = nn.Sequential( 10 | nn.Linear(channel, channel // ratio, bias=False), 11 | nn.ReLU(inplace=True), 12 | nn.Linear(channel // ratio, channel, bias=False), 13 | nn.Sigmoid() 14 | ) 15 | 16 | def forward(self, x): 17 | b, c, _, _ = x.size() 18 | y = self.avg_pool(x).view(b, c) 19 | y = self.fc(y).view(b, c, 1, 1) 20 | return x * y 21 | 22 | class ChannelAttention(nn.Module): 23 | def __init__(self, in_planes, ratio=8): 24 | super(ChannelAttention, self).__init__() 25 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 26 | self.max_pool = nn.AdaptiveMaxPool2d(1) 27 | 28 | # 利用1x1卷积代替全连接 29 | self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False) 30 | self.relu1 = nn.ReLU() 31 | self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False) 32 | 33 | self.sigmoid = nn.Sigmoid() 34 | 35 | def forward(self, x): 36 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 37 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 38 | out = avg_out + max_out 39 | return self.sigmoid(out) 40 | 41 | class SpatialAttention(nn.Module): 42 | def __init__(self, kernel_size=7): 43 | super(SpatialAttention, self).__init__() 44 | 45 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7' 46 | padding = 3 if kernel_size == 7 else 1 47 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 48 | self.sigmoid = nn.Sigmoid() 49 | 50 | def forward(self, x): 51 | avg_out = torch.mean(x, dim=1, keepdim=True) 52 | max_out, _ = torch.max(x, dim=1, keepdim=True) 53 | x = torch.cat([avg_out, max_out], dim=1) 54 | x = self.conv1(x) 55 | return self.sigmoid(x) 56 | 57 | class cbam_block(nn.Module): 58 | def __init__(self, channel, ratio=8, kernel_size=7): 59 | super(cbam_block, self).__init__() 60 | self.channelattention = ChannelAttention(channel, ratio=ratio) 61 | self.spatialattention = SpatialAttention(kernel_size=kernel_size) 62 | 63 | def forward(self, x): 64 | print("cbam注意力已运行") 65 | x = x*self.channelattention(x) 66 | x = x*self.spatialattention(x) 67 | return x 68 | 69 | class eca_block(nn.Module): 70 | def __init__(self, channel, b=1, gamma=2): 71 | super(eca_block, self).__init__() 72 | kernel_size = int(abs((math.log(channel, 2) + b) / gamma)) 73 | kernel_size = kernel_size if kernel_size % 2 else kernel_size + 1 74 | 75 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 76 | self.conv = nn.Conv1d(1, 1, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, bias=False) 77 | self.sigmoid = nn.Sigmoid() 78 | 79 | def forward(self, x): 80 | #print("ECA注意力已经运行") 81 | y = self.avg_pool(x) 82 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 83 | y = self.sigmoid(y) 84 | return x * y.expand_as(x) 85 | 86 | class eca_layer(nn.Module): 87 | """Constructs a ECA module. 88 | Args: 89 | channel: Number of channels of the input feature map 90 | k_size: Adaptive selection of kernel size 91 | """ 92 | def __init__(self, channel, k_size=3): 93 | super(eca_layer, self).__init__() 94 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 95 | self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 96 | self.sigmoid = nn.Sigmoid() 97 | 98 | def forward(self, x): 99 | # x: input features with shape [b, c, h, w] 100 | b, c, h, w = x.size() 101 | # feature descriptor on the global spatial information 102 | y = self.avg_pool(x) 103 | # Two different branches of ECA module 104 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 105 | # Multi-scale information fusion 106 | y = self.sigmoid(y) 107 | return x * y.expand_as(x) 108 | 109 | #----------------------------- 坐标注意力 --------------------------- 110 | #--------------------------------------------------------------------- 111 | import torch 112 | import torch.nn as nn 113 | import math 114 | import torch.nn.functional as F 115 | 116 | class h_sigmoid(nn.Module): 117 | def __init__(self, inplace=True): 118 | super(h_sigmoid, self).__init__() 119 | self.relu = nn.ReLU6(inplace=inplace) 120 | 121 | def forward(self, x): 122 | return self.relu(x + 3) / 6 123 | class h_swish(nn.Module): 124 | def __init__(self, inplace=True): 125 | super(h_swish, self).__init__() 126 | self.sigmoid = h_sigmoid(inplace=inplace) 127 | 128 | def forward(self, x): 129 | return x * self.sigmoid(x) 130 | class CoordAtt(nn.Module): 131 | def __init__(self, inp): 132 | super(CoordAtt, self).__init__() 133 | self.pool_h = nn.AdaptiveAvgPool2d((None, 1)) 134 | self.pool_w = nn.AdaptiveAvgPool2d((1, None)) 135 | 136 | reduction = 32 137 | 138 | mip = max(8, inp // reduction) 139 | 140 | self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0) 141 | self.bn1 = nn.BatchNorm2d(mip) 142 | self.act = h_swish() 143 | 144 | self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0) 145 | self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0) 146 | def forward(self, x): 147 | identity = x 148 | #print("坐标注意力已运行") 149 | n, c, h, w = x.size() 150 | x_h = self.pool_h(x) 151 | x_w = self.pool_w(x).permute(0, 1, 3, 2) 152 | 153 | y = torch.cat([x_h, x_w], dim=2) 154 | y = self.conv1(y) 155 | y = self.bn1(y) 156 | y = self.act(y) 157 | 158 | x_h, x_w = torch.split(y, [h, w], dim=2) 159 | x_w = x_w.permute(0, 1, 3, 2) 160 | 161 | a_h = self.conv_h(x_h).sigmoid() 162 | a_w = self.conv_w(x_w).sigmoid() 163 | 164 | out = identity * a_w * a_h 165 | return out -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/nets/darknet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import torch 6 | from torch import nn 7 | 8 | class SiLU(nn.Module): 9 | @staticmethod 10 | def forward(x): 11 | return x * torch.sigmoid(x) 12 | 13 | def get_activation(name="silu", inplace=True): 14 | if name == "silu": 15 | module = SiLU() 16 | elif name == "relu": 17 | module = nn.ReLU(inplace=inplace) 18 | elif name == "lrelu": 19 | module = nn.LeakyReLU(0.1, inplace=inplace) 20 | else: 21 | raise AttributeError("Unsupported act type: {}".format(name)) 22 | return module 23 | 24 | class Focus(nn.Module): 25 | def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"): 26 | super().__init__() 27 | self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act) 28 | 29 | def forward(self, x): 30 | patch_top_left = x[..., ::2, ::2] 31 | patch_bot_left = x[..., 1::2, ::2] 32 | patch_top_right = x[..., ::2, 1::2] 33 | patch_bot_right = x[..., 1::2, 1::2] 34 | x = torch.cat((patch_top_left, patch_bot_left, patch_top_right, patch_bot_right,), dim=1,) 35 | return self.conv(x) 36 | 37 | class BaseConv(nn.Module): 38 | def __init__(self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu"): 39 | super().__init__() 40 | pad = (ksize - 1) // 2 41 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=ksize, stride=stride, padding=pad, groups=groups, bias=bias) 42 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.03) 43 | self.act = get_activation(act, inplace=True) 44 | 45 | def forward(self, x): 46 | return self.act(self.bn(self.conv(x))) 47 | 48 | def fuseforward(self, x): 49 | return self.act(self.conv(x)) 50 | #------------------------ 加入的Inception结构 -------------------------------- 51 | class Inception_BaseConv(nn.Module): 52 | def __init__(self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu"): 53 | super().__init__() 54 | pad = (ksize - 1) // 2 55 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=ksize, stride=stride, padding=pad, groups=groups, bias=bias) 56 | self.conv1_3 = nn.Conv2d(out_channels, out_channels, kernel_size=(1, 3), padding=(0, 1)) 57 | self.conv3_1 = nn.Conv2d(out_channels, out_channels, kernel_size=(3, 1), padding=(1, 0)) 58 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.03) 59 | self.act = get_activation(act, inplace=True) 60 | #----------------------------------------------------------------------- 61 | class DWConv(nn.Module): 62 | def __init__(self, in_channels, out_channels, ksize, stride=1, act="silu"): 63 | super().__init__() 64 | self.dconv = BaseConv(in_channels, in_channels, ksize=ksize, stride=stride, groups=in_channels, act=act,) 65 | self.pconv = BaseConv(in_channels, out_channels, ksize=1, stride=1, groups=1, act=act) 66 | 67 | def forward(self, x): 68 | x = self.dconv(x) 69 | return self.pconv(x) 70 | 71 | class SPPBottleneck(nn.Module): 72 | def __init__(self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu"): 73 | super().__init__() 74 | hidden_channels = in_channels // 2 75 | self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation) 76 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes]) 77 | conv2_channels = hidden_channels * (len(kernel_sizes) + 1) 78 | self.conv2 = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation) 79 | 80 | def forward(self, x): 81 | x = self.conv1(x) 82 | x = torch.cat([x] + [m(x) for m in self.m], dim=1) 83 | x = self.conv2(x) 84 | return x 85 | 86 | #--------------------------------------------------# 87 | # 残差结构的构建,小的残差结构 88 | #--------------------------------------------------# 89 | class Bottleneck(nn.Module): 90 | # Standard bottleneck 91 | def __init__(self, in_channels, out_channels, shortcut=True, expansion=0.5, depthwise=False, act="silu",): 92 | super().__init__() 93 | hidden_channels = int(out_channels * expansion) 94 | Conv = DWConv if depthwise else BaseConv 95 | #--------------------------------------------------# 96 | # 利用1x1卷积进行通道数的缩减。缩减率一般是50% 97 | #--------------------------------------------------# 98 | self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) 99 | #--------------------------------------------------# 100 | # 利用3x3卷积进行通道数的拓张。并且完成特征提取 101 | #--------------------------------------------------# 102 | self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act) 103 | self.use_add = shortcut and in_channels == out_channels 104 | 105 | def forward(self, x): 106 | y = self.conv2(self.conv1(x)) 107 | if self.use_add: 108 | y = y + x 109 | return y 110 | 111 | class CSPLayer(nn.Module): 112 | def __init__(self, in_channels, out_channels, n=1, shortcut=True, expansion=0.5, depthwise=False, act="silu",): 113 | # ch_in, ch_out, number, shortcut, groups, expansion 114 | super().__init__() 115 | hidden_channels = int(out_channels * expansion) 116 | #--------------------------------------------------# 117 | # 主干部分的初次卷积 118 | #--------------------------------------------------# 119 | self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) 120 | #--------------------------------------------------# 121 | # 大的残差边部分的初次卷积 122 | #--------------------------------------------------# 123 | self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) 124 | #-----------------------------------------------# 125 | # 对堆叠的结果进行卷积的处理 126 | #-----------------------------------------------# 127 | self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act) 128 | 129 | #--------------------------------------------------# 130 | # 根据循环的次数构建上述Bottleneck残差结构 131 | #--------------------------------------------------# 132 | module_list = [Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act) for _ in range(n)] 133 | self.m = nn.Sequential(*module_list) 134 | 135 | def forward(self, x): 136 | #-------------------------------# 137 | # x_1是主干部分 138 | #-------------------------------# 139 | x_1 = self.conv1(x) 140 | #-------------------------------# 141 | # x_2是大的残差边部分 142 | #-------------------------------# 143 | x_2 = self.conv2(x) 144 | 145 | #-----------------------------------------------# 146 | # 主干部分利用残差结构堆叠继续进行特征提取 147 | #-----------------------------------------------# 148 | x_1 = self.m(x_1) 149 | #-----------------------------------------------# 150 | # 主干部分和大的残差边部分进行堆叠 151 | #-----------------------------------------------# 152 | x = torch.cat((x_1, x_2), dim=1) 153 | #-----------------------------------------------# 154 | # 对堆叠的结果进行卷积的处理 155 | #-----------------------------------------------# 156 | return self.conv3(x) 157 | 158 | class CSPDarknet(nn.Module): 159 | def __init__(self, dep_mul, wid_mul, out_features=("dark3", "dark4", "dark5"), depthwise=False, act="silu",): 160 | super().__init__() 161 | assert out_features, "please provide output features of Darknet" 162 | self.out_features = out_features 163 | Conv = DWConv if depthwise else BaseConv 164 | 165 | #-----------------------------------------------# 166 | # 输入图片是640, 640, 3 167 | # 初始的基本通道是64 168 | #-----------------------------------------------# 169 | base_channels = int(wid_mul * 64) # 64 170 | base_depth = max(round(dep_mul * 3), 1) # 3 171 | 172 | #-----------------------------------------------# 173 | # 利用focus网络结构进行特征提取 174 | # 640, 640, 3 -> 320, 320, 12 -> 320, 320, 64 175 | #-----------------------------------------------# 176 | self.stem = Focus(3, base_channels, ksize=3, act=act) 177 | 178 | #-----------------------------------------------# 179 | # 完成卷积之后,320, 320, 64 -> 160, 160, 128 180 | # 完成CSPlayer之后,160, 160, 128 -> 160, 160, 128 181 | #-----------------------------------------------# 182 | self.dark2 = nn.Sequential( 183 | Conv(base_channels, base_channels * 2, 3, 2, act=act), 184 | CSPLayer(base_channels * 2, base_channels * 2, n=base_depth, depthwise=depthwise, act=act), 185 | ) 186 | 187 | #-----------------------------------------------# 188 | # 完成卷积之后,160, 160, 128 -> 80, 80, 256 189 | # 完成CSPlayer之后,80, 80, 256 -> 80, 80, 256 190 | #-----------------------------------------------# 191 | self.dark3 = nn.Sequential( 192 | Conv(base_channels * 2, base_channels * 4, 3, 2, act=act), 193 | CSPLayer(base_channels * 4, base_channels * 4, n=base_depth * 3, depthwise=depthwise, act=act), 194 | ) 195 | 196 | #-----------------------------------------------# 197 | # 完成卷积之后,80, 80, 256 -> 40, 40, 512 198 | # 完成CSPlayer之后,40, 40, 512 -> 40, 40, 512 199 | #-----------------------------------------------# 200 | self.dark4 = nn.Sequential( 201 | Conv(base_channels * 4, base_channels * 8, 3, 2, act=act), 202 | CSPLayer(base_channels * 8, base_channels * 8, n=base_depth * 3, depthwise=depthwise, act=act), 203 | ) 204 | 205 | #-----------------------------------------------# 206 | # 完成卷积之后,40, 40, 512 -> 20, 20, 1024 207 | # 完成SPP之后,20, 20, 1024 -> 20, 20, 1024 208 | # 完成CSPlayer之后,20, 20, 1024 -> 20, 20, 1024 209 | #-----------------------------------------------# 210 | self.dark5 = nn.Sequential( 211 | Conv(base_channels * 8, base_channels * 16, 3, 2, act=act), 212 | SPPBottleneck(base_channels * 16, base_channels * 16, activation=act), 213 | CSPLayer(base_channels * 16, base_channels * 16, n=base_depth, shortcut=False, depthwise=depthwise, act=act), 214 | ) 215 | 216 | def forward(self, x): 217 | outputs = {} 218 | x = self.stem(x) 219 | outputs["stem"] = x 220 | x = self.dark2(x) 221 | outputs["dark2"] = x 222 | #-----------------------------------------------# 223 | # dark3的输出为80, 80, 256,是一个有效特征层 224 | #-----------------------------------------------# 225 | x = self.dark3(x) 226 | outputs["dark3"] = x 227 | #-----------------------------------------------# 228 | # dark4的输出为40, 40, 512,是一个有效特征层 229 | #-----------------------------------------------# 230 | x = self.dark4(x) 231 | outputs["dark4"] = x 232 | #-----------------------------------------------# 233 | # dark5的输出为20, 20, 1024,是一个有效特征层 234 | #-----------------------------------------------# 235 | x = self.dark5(x) 236 | outputs["dark5"] = x 237 | return {k: v for k, v in outputs.items() if k in self.out_features} 238 | 239 | 240 | if __name__ == '__main__': 241 | print(CSPDarknet(1, 1)) -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/nets/varifocalloss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def reduce_loss(loss, reduction): 6 | """Reduce loss as specified. 7 | Args: 8 | loss (Tensor): Elementwise loss tensor. 9 | reduction (str): Options are "none", "mean" and "sum". 10 | Return: 11 | Tensor: Reduced loss tensor. 12 | """ 13 | reduction_enum = F._Reduction.get_enum(reduction) 14 | # none: 0, elementwise_mean:1, sum: 2 15 | if reduction_enum == 0: 16 | return loss 17 | elif reduction_enum == 1: 18 | return loss.mean() 19 | elif reduction_enum == 2: 20 | return loss.sum() 21 | 22 | 23 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 24 | """Apply element-wise weight and reduce loss. 25 | Args: 26 | loss (Tensor): Element-wise loss. 27 | weight (Tensor): Element-wise weights. 28 | reduction (str): Same as built-in losses of PyTorch. 29 | avg_factor (float): Avarage factor when computing the mean of losses. 30 | Returns: 31 | Tensor: Processed loss values. 32 | """ 33 | # if weight is specified, apply element-wise weight 34 | if weight is not None: 35 | loss = loss * weight 36 | 37 | # if avg_factor is not specified, just reduce the loss 38 | if avg_factor is None: 39 | loss = reduce_loss(loss, reduction) 40 | else: 41 | # if reduction is mean, then average the loss by avg_factor 42 | if reduction == 'mean': 43 | loss = loss.sum() / avg_factor 44 | # if reduction is 'none', then do nothing, otherwise raise an error 45 | elif reduction != 'none': 46 | raise ValueError('avg_factor can not be used with reduction="sum"') 47 | return loss 48 | 49 | 50 | def varifocal_loss(pred, 51 | target, 52 | weight=None, 53 | alpha=0.75, 54 | gamma=2.0, 55 | iou_weighted=True, 56 | reduction='mean', 57 | avg_factor=None): 58 | """`Varifocal Loss `_ 59 | Args: 60 | pred (torch.Tensor): The prediction with shape (N, C), C is the 61 | number of classes 62 | target (torch.Tensor): The learning target of the iou-aware 63 | classification score with shape (N, C), C is the number of classes. 64 | weight (torch.Tensor, optional): The weight of loss for each 65 | prediction. Defaults to None. 66 | alpha (float, optional): A balance factor for the negative part of 67 | Varifocal Loss, which is different from the alpha of Focal Loss. 68 | Defaults to 0.75. 69 | gamma (float, optional): The gamma for calculating the modulating 70 | factor. Defaults to 2.0. 71 | iou_weighted (bool, optional): Whether to weight the loss of the 72 | positive example with the iou target. Defaults to True. 73 | reduction (str, optional): The method used to reduce the loss into 74 | a scalar. Defaults to 'mean'. Options are "none", "mean" and 75 | "sum". 76 | avg_factor (int, optional): Average factor that is used to average 77 | the loss. Defaults to None. 78 | """ 79 | # pred and target should be of the same size 80 | assert pred.size() == target.size() 81 | pred_sigmoid = pred.sigmoid() 82 | target = target.type_as(pred) 83 | if iou_weighted: 84 | focal_weight = target * (target > 0.0).float() + \ 85 | alpha * (pred_sigmoid - target).abs().pow(gamma) * \ 86 | (target <= 0.0).float() 87 | else: 88 | focal_weight = (target > 0.0).float() + \ 89 | alpha * (pred_sigmoid - target).abs().pow(gamma) * \ 90 | (target <= 0.0).float() 91 | loss = F.binary_cross_entropy_with_logits( 92 | pred, target, reduction='none') * focal_weight 93 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 94 | return loss 95 | 96 | 97 | class VarifocalLoss(nn.Module): 98 | 99 | def __init__(self, 100 | use_sigmoid=True, 101 | alpha=0.75, 102 | gamma=2.0, 103 | iou_weighted=True, 104 | reduction='mean', 105 | loss_weight=1.0): 106 | """`Varifocal Loss `_ 107 | Args: 108 | use_sigmoid (bool, optional): Whether the prediction is 109 | used for sigmoid or softmax. Defaults to True. 110 | alpha (float, optional): A balance factor for the negative part of 111 | Varifocal Loss, which is different from the alpha of Focal 112 | Loss. Defaults to 0.75. 113 | gamma (float, optional): The gamma for calculating the modulating 114 | factor. Defaults to 2.0. 115 | iou_weighted (bool, optional): Whether to weight the loss of the 116 | positive examples with the iou target. Defaults to True. 117 | reduction (str, optional): The method used to reduce the loss into 118 | a scalar. Defaults to 'mean'. Options are "none", "mean" and 119 | "sum". 120 | loss_weight (float, optional): Weight of loss. Defaults to 1.0. 121 | """ 122 | super(VarifocalLoss, self).__init__() 123 | assert use_sigmoid is True, \ 124 | 'Only sigmoid varifocal loss supported now.' 125 | assert alpha >= 0.0 126 | self.use_sigmoid = use_sigmoid 127 | self.alpha = alpha 128 | self.gamma = gamma 129 | self.iou_weighted = iou_weighted 130 | self.reduction = reduction 131 | self.loss_weight = loss_weight 132 | 133 | def forward(self, 134 | pred, 135 | target, 136 | weight=None, 137 | avg_factor=None, 138 | reduction_override=None): 139 | """Forward function. 140 | Args: 141 | pred (torch.Tensor): The prediction. 142 | target (torch.Tensor): The learning target of the prediction. 143 | weight (torch.Tensor, optional): The weight of loss for each 144 | prediction. Defaults to None. 145 | avg_factor (int, optional): Average factor that is used to average 146 | the loss. Defaults to None. 147 | reduction_override (str, optional): The reduction method used to 148 | override the original reduction method of the loss. 149 | Options are "none", "mean" and "sum". 150 | Returns: 151 | torch.Tensor: The calculated loss 152 | """ 153 | assert reduction_override in (None, 'none', 'mean', 'sum') 154 | reduction = ( 155 | reduction_override if reduction_override else self.reduction) 156 | if self.use_sigmoid: 157 | loss_cls = self.loss_weight * varifocal_loss( 158 | pred, 159 | target, 160 | weight, 161 | alpha=self.alpha, 162 | gamma=self.gamma, 163 | iou_weighted=self.iou_weighted, 164 | reduction=reduction, 165 | avg_factor=avg_factor) 166 | else: 167 | raise NotImplementedError 168 | return loss_cls 169 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/nets/yolo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .darknet import BaseConv, CSPDarknet, CSPLayer, DWConv ,Inception_BaseConv 9 | from nets.attention import cbam_block, eca_block, se_block, CoordAtt 10 | attention_block = [se_block, cbam_block, eca_block, CoordAtt] 11 | 12 | class YOLOXHead(nn.Module): 13 | def __init__(self, num_classes, width = 1.0, in_channels = [256, 512, 1024], act = "silu", depthwise = False,): 14 | super().__init__() 15 | Conv = DWConv if depthwise else BaseConv 16 | #------------------INCEPTION 结构--------------------- 17 | Conv_I = Inception_BaseConv 18 | #—————————————————————————————————————————————— 19 | self.cls_convs = nn.ModuleList() 20 | self.reg_convs = nn.ModuleList() 21 | self.cls_preds = nn.ModuleList() 22 | self.reg_preds = nn.ModuleList() 23 | self.obj_preds = nn.ModuleList() 24 | self.stems = nn.ModuleList() 25 | 26 | for i in range(len(in_channels)): 27 | #-------------------inception结构------------------ 28 | #self.stems.append(Inception_BaseConv(in_channels = int(in_channels[i] * width), out_channels = int(256 * width), ksize = 1, stride = 1, act = act)) 29 | #源码 30 | self.stems.append(BaseConv(in_channels = int(in_channels[i] * width), out_channels = int(256 * width), ksize = 1, stride = 1, act = act)) 31 | #—————————————————————————————————————————————————————————————————————————————————————————— 32 | self.cls_convs.append(nn.Sequential(*[ 33 | Conv(in_channels = int(256 * width), out_channels = int(256 * width), ksize = 3, stride = 1, act = act), 34 | Conv(in_channels = int(256 * width), out_channels = int(256 * width), ksize = 3, stride = 1, act = act), 35 | ])) 36 | self.cls_preds.append( 37 | nn.Conv2d(in_channels = int(256 * width), out_channels = num_classes, kernel_size = 1, stride = 1, padding = 0) 38 | ) 39 | 40 | 41 | self.reg_convs.append(nn.Sequential(*[ 42 | Conv(in_channels = int(256 * width), out_channels = int(256 * width), ksize = 3, stride = 1, act = act), 43 | Conv(in_channels = int(256 * width), out_channels = int(256 * width), ksize = 3, stride = 1, act = act) 44 | ])) 45 | self.reg_preds.append( 46 | nn.Conv2d(in_channels = int(256 * width), out_channels = 4, kernel_size = 1, stride = 1, padding = 0) 47 | ) 48 | self.obj_preds.append( 49 | nn.Conv2d(in_channels = int(256 * width), out_channels = 1, kernel_size = 1, stride = 1, padding = 0) 50 | ) 51 | 52 | def forward(self, inputs): 53 | #---------------------------------------------------# 54 | # inputs输入 55 | # P3_out 80, 80, 256 56 | # P4_out 40, 40, 512 57 | # P5_out 20, 20, 1024 58 | #---------------------------------------------------# 59 | outputs = [] 60 | for k, x in enumerate(inputs): 61 | #---------------------------------------------------# 62 | # 利用1x1卷积进行通道整合 63 | #---------------------------------------------------# 64 | x = self.stems[k](x) 65 | #---------------------------------------------------# 66 | # 利用两个卷积标准化激活函数来进行特征提取 67 | #---------------------------------------------------# 68 | cls_feat = self.cls_convs[k](x) 69 | #---------------------------------------------------# 70 | # 判断特征点所属的种类 71 | # 80, 80, num_classes 72 | # 40, 40, num_classes 73 | # 20, 20, num_classes 74 | #---------------------------------------------------# 75 | cls_output = self.cls_preds[k](cls_feat) 76 | 77 | #---------------------------------------------------# 78 | # 利用两个卷积标准化激活函数来进行特征提取 79 | #---------------------------------------------------# 80 | reg_feat = self.reg_convs[k](x) 81 | #---------------------------------------------------# 82 | # 特征点的回归系数 83 | # reg_pred 80, 80, 4 84 | # reg_pred 40, 40, 4 85 | # reg_pred 20, 20, 4 86 | #---------------------------------------------------# 87 | reg_output = self.reg_preds[k](reg_feat) 88 | #---------------------------------------------------# 89 | # 判断特征点是否有对应的物体 90 | # obj_pred 80, 80, 1 91 | # obj_pred 40, 40, 1 92 | # obj_pred 20, 20, 1 93 | #---------------------------------------------------# 94 | obj_output = self.obj_preds[k](reg_feat) 95 | 96 | output = torch.cat([reg_output, obj_output, cls_output], 1) 97 | outputs.append(output) 98 | return outputs 99 | 100 | class YOLOPAFPN(nn.Module): 101 | def __init__(self, depth = 1.0, width = 1.0, in_features = ("dark3", "dark4", "dark5"), in_channels = [256, 512, 1024], depthwise = False, act = "silu",attention=0): 102 | super().__init__() 103 | Conv = DWConv if depthwise else BaseConv 104 | self.backbone = CSPDarknet(depth, width, depthwise = depthwise, act = act) 105 | self.in_features = in_features 106 | self.upsample = nn.Upsample(scale_factor=2, mode="nearest") 107 | #——————————————————————————————加入注意力———————————————————————————————————————————— 108 | if attention >= 5: 109 | raise AssertionError("zyl must be less than or equal to 3 (0, 1, 2, 3).") 110 | #----------添加注意力机制------------ 111 | self.attention = attention 112 | #attention_block = [se_block, cbam_block, eca_block,坐标注意力] 113 | if 1 <= self.attention and self.attention <= 4: 114 | # nano 模型的宽度(通道数)为L模型(256,512,1024)的1/4,(64,128,256) 115 | # tiny 模型的宽度(通道数)为L模型(256,512,1024)的0.375,(96,192,384) 116 | # s 模型的宽度(通道数)为L模型(256,512,1024)的1/2,(128,256,512) 117 | self.feat1_att = attention_block[self.attention - 1](128) 118 | self.feat2_att = attention_block[self.attention - 1](256) 119 | self.feat3_att = attention_block[self.attention - 1](512) 120 | #————————————————————————————————————原模型—————————————————————————————————————————————— 121 | #-------------------------------------------# 122 | # 20, 20, 1024 -> 20, 20, 512 123 | #-------------------------------------------# 124 | self.lateral_conv0 = BaseConv(int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act) 125 | # -------------------------------------------# 126 | # 80, 80, 256 -> 40, 40, 256 127 | # -------------------------------------------# 128 | self.bu_conv2 = Conv(int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act) 129 | 130 | #————————————————————————————————————Bifpn—————————————————————————————————— 131 | # # p3_out进行1*1卷积:80,256----80,512 132 | # self.lateral_conv0 = BaseConv(int(in_channels[0] * width), int(in_channels[1] * width), 1, 1, act=act) 133 | # # p3和p4融合后进行1*1卷积:40,40,512,----40,40,1024 134 | # self.lateral_conv1 = BaseConv(int(in_channels[1] * width), int(in_channels[2] * width), 1, 1, act=act) 135 | # # 自下而上过程 p5_out与p4_zhong结合 20,1024 ---20,512 136 | # self.lateral_conv2 = BaseConv(int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act) 137 | # # --其中self.bu_conv2 需要修改下面 138 | # self.bu_conv2 = Conv(int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act) 139 | # self.bu_conv3 = Conv(int(in_channels[2] * width), int(in_channels[2] * width), 3, 2, act=act) 140 | # # 2048--->1024 141 | # self.p5_out = CSPLayer( 142 | # int(2 * in_channels[2] * width), 143 | # int(in_channels[2] * width), 144 | # round(3 * depth), 145 | # False, 146 | # depthwise=depthwise, 147 | # act=act, 148 | # ) 149 | #—————————————————————————————————————————————————————————————————————————————————————————————— 150 | #-------------------------------------------# 151 | # 40, 40, 1024 -> 40, 40, 512 152 | #-------------------------------------------# 153 | self.C3_p4 = CSPLayer( 154 | int(2 * in_channels[1] * width), 155 | int(in_channels[1] * width), 156 | round(3 * depth), 157 | False, 158 | depthwise = depthwise, 159 | act = act, 160 | ) 161 | 162 | #-------------------------------------------# 163 | # 40, 40, 512 -> 40, 40, 256 164 | #-------------------------------------------# 165 | self.reduce_conv1 = BaseConv(int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act) 166 | #-------------------------------------------# 167 | # 80, 80, 512 -> 80, 80, 256 168 | #-------------------------------------------# 169 | self.C3_p3 = CSPLayer( 170 | int(2 * in_channels[0] * width), 171 | int(in_channels[0] * width), 172 | round(3 * depth), 173 | False, 174 | depthwise = depthwise, 175 | act = act, 176 | ) 177 | 178 | # #-------------------------------------------# 179 | # # 80, 80, 256 -> 40, 40, 256 180 | # #-------------------------------------------# 181 | # self.bu_conv2 = Conv(int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act) 182 | # #-------------------------------------------# 183 | # 40, 40, 256 -> 40, 40, 512 184 | #-------------------------------------------# 185 | self.C3_n3 = CSPLayer( 186 | int(2 * in_channels[0] * width), 187 | int(in_channels[1] * width), 188 | round(3 * depth), 189 | False, 190 | depthwise = depthwise, 191 | act = act, 192 | ) 193 | 194 | #-------------------------------------------# 195 | # 40, 40, 512 -> 20, 20, 512 196 | #-------------------------------------------# 197 | self.bu_conv1 = Conv(int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act) 198 | #-------------------------------------------# 199 | # 20, 20, 1024 -> 20, 20, 1024 200 | #-------------------------------------------# 201 | self.C3_n4 = CSPLayer( 202 | int(2 * in_channels[1] * width), 203 | int(in_channels[2] * width), 204 | round(3 * depth), 205 | False, 206 | depthwise = depthwise, 207 | act = act, 208 | ) 209 | 210 | def forward(self, input): 211 | out_features = self.backbone.forward(input) 212 | [feat1, feat2, feat3] = [out_features[f] for f in self.in_features] 213 | #————————————————————注意力—————————————————————————— 214 | if 1 <= self.attention and self.attention <= 4: 215 | feat1 = self.feat1_att(feat1) 216 | feat2 = self.feat2_att(feat2) 217 | feat3 = self.feat3_att(feat3) 218 | #----------------------------------------------------- 219 | #—————————————————————————————————————————————————————————————————— 220 | #-----------------------Bifpn-------------------------------------- 221 | # # 80,256--->80,512 222 | # p3 = self.lateral_conv0(feat1) 223 | # # 80,512--->40,512(下采样) 224 | # p3_p4 = self.bu_conv2(p3) 225 | # # 40,512 + 40,512 --->40,512 226 | # p4_zhong = torch.cat([p3_p4, feat2], 1) 227 | # # 40,1024---40,512 228 | # p4_zhong = self.C3_p4(p4_zhong) 229 | # # 40,512 --->40,1024 230 | # p4_zhong_conv = self.lateral_conv1(p4_zhong) 231 | # # 40,1024 --->20,1024 232 | # p4_p5 = self.bu_conv3(p4_zhong_conv) 233 | # # 20,1024 + 20,1024 --->20,1024 234 | # p5_zhong = torch.cat([p4_p5, feat3], 1) 235 | # 236 | # P5_out = self.p5_out(p5_zhong) 237 | # 238 | # # 20,1024 --->20,512 239 | # p5_p4 = self.lateral_conv2(P5_out) 240 | # # 20,512--->40,512 241 | # p5_p4_shang = self.upsample(p5_p4) 242 | # # 40,512 (((((可以做文章))))) 243 | # # 40,512 + 40,512 ---->40,512 244 | # p4_zhong_feat1 = torch.cat([p4_zhong, feat2], 1) 245 | # # 40,1024---40,512 246 | # p4_zhong_feat1 = self.C3_p4(p4_zhong_feat1) 247 | # 248 | # P4_out = p5_p4_shang + p4_zhong_feat1 249 | # 250 | # # 40,512--->40,256 251 | # p4_c1 = self.reduce_conv1(P4_out) 252 | # # 40,256--->80,256 253 | # p4_p3_shang = self.upsample(p4_c1) 254 | # # 80,256 + 80,256 --->80,256 255 | # p3_zhong = torch.cat([p4_p3_shang, feat1], 1) 256 | # 257 | # P3_out = self.C3_p3(p3_zhong) 258 | #----------------------------END--------------------------- 259 | 260 | #-------------------------------------------# 261 | # 20, 20, 1024 -> 20, 20, 512 262 | #-------------------------------------------# 263 | P5 = self.lateral_conv0(feat3) 264 | #-------------------------------------------# 265 | # 20, 20, 512 -> 40, 40, 512 266 | #-------------------------------------------# 267 | P5_upsample = self.upsample(P5) 268 | #-------------------------------------------# 269 | # 40, 40, 512 + 40, 40, 512 -> 40, 40, 1024 270 | #-------------------------------------------# 271 | P5_upsample = torch.cat([P5_upsample, feat2], 1) 272 | #-------------------------------------------# 273 | # 40, 40, 1024 -> 40, 40, 512 274 | #-------------------------------------------# 275 | P5_upsample = self.C3_p4(P5_upsample) 276 | 277 | #-------------------------------------------# 278 | # 40, 40, 512 -> 40, 40, 256 279 | #-------------------------------------------# 280 | P4 = self.reduce_conv1(P5_upsample) 281 | #-------------------------------------------# 282 | # 40, 40, 256 -> 80, 80, 256 283 | #-------------------------------------------# 284 | P4_upsample = self.upsample(P4) 285 | #-------------------------------------------# 286 | # 80, 80, 256 + 80, 80, 256 -> 80, 80, 512 287 | #-------------------------------------------# 288 | P4_upsample = torch.cat([P4_upsample, feat1], 1) 289 | #-------------------------------------------# 290 | # 80, 80, 512 -> 80, 80, 256 291 | #-------------------------------------------# 292 | P3_out = self.C3_p3(P4_upsample) 293 | 294 | #-------------------------------------------# 295 | # 80, 80, 256 -> 40, 40, 256 296 | #-------------------------------------------# 297 | P3_downsample = self.bu_conv2(P3_out) 298 | #-------------------------------------------# 299 | # 40, 40, 256 + 40, 40, 256 -> 40, 40, 512 300 | #-------------------------------------------# 301 | P3_downsample = torch.cat([P3_downsample, P4], 1) 302 | #-------------------------------------------# 303 | # 40, 40, 256 -> 40, 40, 512 304 | #-------------------------------------------# 305 | P4_out = self.C3_n3(P3_downsample) 306 | 307 | #-------------------------------------------# 308 | # 40, 40, 512 -> 20, 20, 512 309 | #-------------------------------------------# 310 | P4_downsample = self.bu_conv1(P4_out) 311 | #-------------------------------------------# 312 | # 20, 20, 512 + 20, 20, 512 -> 20, 20, 1024 313 | #-------------------------------------------# 314 | P4_downsample = torch.cat([P4_downsample, P5], 1) 315 | #-------------------------------------------# 316 | # 20, 20, 1024 -> 20, 20, 1024 317 | #-------------------------------------------# 318 | P5_out = self.C3_n4(P4_downsample) 319 | 320 | return (P3_out, P4_out, P5_out) 321 | 322 | class YoloBody(nn.Module): 323 | def __init__(self, num_classes, phi): 324 | super().__init__() 325 | depth_dict = {'nano': 0.33, 'tiny': 0.33, 's' : 0.33, 'm' : 0.67, 'l' : 1.00, 'x' : 1.33,} 326 | width_dict = {'nano': 0.25, 'tiny': 0.375, 's' : 0.50, 'm' : 0.75, 'l' : 1.00, 'x' : 1.25,} 327 | depth, width = depth_dict[phi], width_dict[phi] 328 | depthwise = True if phi == 'nano' else False 329 | #—————————————————————————————————————————————————————————————————— 330 | # ----------------------添加的注意力----------------------------- 331 | # attention = 0--->不使用注意力机制 332 | # attention = 1--->[se_block] 333 | # attention = 2--->[cbam_block] 334 | # attention = 3--->[eca_block] 335 | # attention = 4--->[CoordAtt] 336 | #—————————————————————————————————————————————————————————————————— 337 | self.backbone = YOLOPAFPN(depth, width, depthwise=depthwise,attention=0) 338 | self.head = YOLOXHead(num_classes, width, depthwise=depthwise) 339 | 340 | def forward(self, x): 341 | fpn_outs = self.backbone.forward(x) 342 | outputs = self.head.forward(fpn_outs) 343 | return outputs 344 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/nets/yolo_training.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import math 6 | from functools import partial 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from nets.varifocalloss import VarifocalLoss 12 | 13 | class IOUloss(nn.Module): 14 | def __init__(self, reduction="none", loss_type="iou"): 15 | super(IOUloss, self).__init__() 16 | self.reduction = reduction 17 | self.loss_type = loss_type 18 | 19 | def forward(self, pred, target): 20 | assert pred.shape[0] == target.shape[0] 21 | 22 | pred = pred.view(-1, 4) 23 | target = target.view(-1, 4) 24 | tl = torch.max( 25 | (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) 26 | ) 27 | br = torch.min( 28 | (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) 29 | ) 30 | 31 | area_p = torch.prod(pred[:, 2:], 1) 32 | area_g = torch.prod(target[:, 2:], 1) 33 | 34 | en = (tl < br).type(tl.type()).prod(dim=1) 35 | area_i = torch.prod(br - tl, 1) * en 36 | area_u = area_p + area_g - area_i 37 | iou = (area_i) / (area_u + 1e-16) 38 | 39 | if self.loss_type == "iou": 40 | loss = 1 - iou ** 2 41 | elif self.loss_type == "giou": 42 | c_tl = torch.min( 43 | (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) 44 | ) 45 | c_br = torch.max( 46 | (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) 47 | ) 48 | area_c = torch.prod(c_br - c_tl, 1) 49 | giou = iou - (area_c - area_u) / area_c.clamp(1e-16) 50 | loss = 1 - giou.clamp(min=-1.0, max=1.0) 51 | 52 | if self.reduction == "mean": 53 | loss = loss.mean() 54 | elif self.reduction == "sum": 55 | loss = loss.sum() 56 | 57 | return loss 58 | #------focalloss--------------- 59 | class FocalLoss(nn.Module): 60 | def __init__(self): 61 | super(FocalLoss, self).__init__() 62 | def forward(self, pred, gt): 63 | pos_inds = gt.eq(1).float() 64 | neg_inds = gt.eq(0).float() 65 | pos_loss = torch.log(pred + 1e-5) * torch.pow(1 - pred, 2) * pos_inds * 0.75 66 | neg_loss = torch.log(1 - pred + 1e-5) * torch.pow(pred, 2) * neg_inds * 0.25 67 | loss = -(pos_loss + neg_loss) 68 | return loss 69 | #--------------------------------------- 70 | class YOLOLoss(nn.Module): 71 | def __init__(self, num_classes, strides=[8, 16, 32]): 72 | super().__init__() 73 | self.num_classes = num_classes 74 | self.strides = strides 75 | #---------------varifocalloss---------------------- 76 | self.varifocal_loss = VarifocalLoss(reduction='none') 77 | #---------------focalloss------------------------- 78 | #self.focal_loss = FocalLoss 79 | #---------------bceloss---------------------------- 80 | self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none") 81 | self.iou_loss = IOUloss(reduction="none") 82 | self.grids = [torch.zeros(1)] * len(strides) 83 | 84 | def forward(self, inputs, labels=None): 85 | outputs = [] 86 | x_shifts = [] 87 | y_shifts = [] 88 | expanded_strides = [] 89 | 90 | #-----------------------------------------------# 91 | # inputs [[batch_size, num_classes + 5, 20, 20] 92 | # [batch_size, num_classes + 5, 40, 40] 93 | # [batch_size, num_classes + 5, 80, 80]] 94 | # outputs [[batch_size, 400, num_classes + 5] 95 | # [batch_size, 1600, num_classes + 5] 96 | # [batch_size, 6400, num_classes + 5]] 97 | # x_shifts [[batch_size, 400] 98 | # [batch_size, 1600] 99 | # [batch_size, 6400]] 100 | #-----------------------------------------------# 101 | for k, (stride, output) in enumerate(zip(self.strides, inputs)): 102 | output, grid = self.get_output_and_grid(output, k, stride) 103 | x_shifts.append(grid[:, :, 0]) 104 | y_shifts.append(grid[:, :, 1]) 105 | expanded_strides.append(torch.ones_like(grid[:, :, 0]) * stride) 106 | outputs.append(output) 107 | 108 | return self.get_losses(x_shifts, y_shifts, expanded_strides, labels, torch.cat(outputs, 1)) 109 | 110 | def get_output_and_grid(self, output, k, stride): 111 | grid = self.grids[k] 112 | hsize, wsize = output.shape[-2:] 113 | if grid.shape[2:4] != output.shape[2:4]: 114 | yv, xv = torch.meshgrid([torch.arange(hsize), torch.arange(wsize)]) 115 | grid = torch.stack((xv, yv), 2).view(1, hsize, wsize, 2).type(output.type()) 116 | self.grids[k] = grid 117 | grid = grid.view(1, -1, 2) 118 | 119 | output = output.flatten(start_dim=2).permute(0, 2, 1) 120 | output[..., :2] = (output[..., :2] + grid) * stride 121 | output[..., 2:4] = torch.exp(output[..., 2:4]) * stride 122 | return output, grid 123 | 124 | def get_losses(self, x_shifts, y_shifts, expanded_strides, labels, outputs): 125 | #-----------------------------------------------# 126 | # [batch, n_anchors_all, 4] 127 | #-----------------------------------------------# 128 | bbox_preds = outputs[:, :, :4] 129 | #-----------------------------------------------# 130 | # [batch, n_anchors_all, 1] 131 | #-----------------------------------------------# 132 | obj_preds = outputs[:, :, 4:5] 133 | #-----------------------------------------------# 134 | # [batch, n_anchors_all, n_cls] 135 | #-----------------------------------------------# 136 | cls_preds = outputs[:, :, 5:] 137 | 138 | total_num_anchors = outputs.shape[1] 139 | #-----------------------------------------------# 140 | # x_shifts [1, n_anchors_all] 141 | # y_shifts [1, n_anchors_all] 142 | # expanded_strides [1, n_anchors_all] 143 | #-----------------------------------------------# 144 | x_shifts = torch.cat(x_shifts, 1) 145 | y_shifts = torch.cat(y_shifts, 1) 146 | expanded_strides = torch.cat(expanded_strides, 1) 147 | 148 | cls_targets = [] 149 | reg_targets = [] 150 | obj_targets = [] 151 | fg_masks = [] 152 | 153 | num_fg = 0.0 154 | for batch_idx in range(outputs.shape[0]): 155 | num_gt = len(labels[batch_idx]) 156 | if num_gt == 0: 157 | cls_target = outputs.new_zeros((0, self.num_classes)) 158 | reg_target = outputs.new_zeros((0, 4)) 159 | obj_target = outputs.new_zeros((total_num_anchors, 1)) 160 | fg_mask = outputs.new_zeros(total_num_anchors).bool() 161 | else: 162 | #-----------------------------------------------# 163 | # gt_bboxes_per_image [num_gt, num_classes] 164 | # gt_classes [num_gt] 165 | # bboxes_preds_per_image [n_anchors_all, 4] 166 | # cls_preds_per_image [n_anchors_all, num_classes] 167 | # obj_preds_per_image [n_anchors_all, 1] 168 | #-----------------------------------------------# 169 | gt_bboxes_per_image = labels[batch_idx][..., :4] 170 | gt_classes = labels[batch_idx][..., 4] 171 | bboxes_preds_per_image = bbox_preds[batch_idx] 172 | cls_preds_per_image = cls_preds[batch_idx] 173 | obj_preds_per_image = obj_preds[batch_idx] 174 | 175 | gt_matched_classes, fg_mask, pred_ious_this_matching, matched_gt_inds, num_fg_img = self.get_assignments( 176 | num_gt, total_num_anchors, gt_bboxes_per_image, gt_classes, bboxes_preds_per_image, cls_preds_per_image, obj_preds_per_image, 177 | expanded_strides, x_shifts, y_shifts, 178 | ) 179 | torch.cuda.empty_cache() 180 | num_fg += num_fg_img 181 | cls_target = F.one_hot(gt_matched_classes.to(torch.int64), self.num_classes).float() * pred_ious_this_matching.unsqueeze(-1) 182 | obj_target = fg_mask.unsqueeze(-1) 183 | reg_target = gt_bboxes_per_image[matched_gt_inds] 184 | cls_targets.append(cls_target) 185 | reg_targets.append(reg_target) 186 | obj_targets.append(obj_target.type(cls_target.type())) 187 | fg_masks.append(fg_mask) 188 | 189 | cls_targets = torch.cat(cls_targets, 0) 190 | reg_targets = torch.cat(reg_targets, 0) 191 | obj_targets = torch.cat(obj_targets, 0) 192 | fg_masks = torch.cat(fg_masks, 0) 193 | 194 | num_fg = max(num_fg, 1) 195 | loss_iou = (self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets)).sum() 196 | # ——————————————改进focal_loss 197 | # loss_obj = (self.focal_loss(obj_preds.sigmoid().view(-1, 1), obj_targets)).sum() 198 | # ———————————————改进variFocal_loss 199 | #loss_obj = (self.varifocal_loss(obj_preds.sigmoid().view(-1, 1), obj_targets)).sum() 200 | # ————————————————原始bceloss 201 | loss_obj = (self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets)).sum() 202 | loss_cls = (self.bcewithlog_loss(cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets)).sum() 203 | reg_weight = 5.0 204 | loss = reg_weight * loss_iou + loss_obj + loss_cls 205 | 206 | return loss / num_fg 207 | 208 | @torch.no_grad() 209 | def get_assignments(self, num_gt, total_num_anchors, gt_bboxes_per_image, gt_classes, bboxes_preds_per_image, cls_preds_per_image, obj_preds_per_image, expanded_strides, x_shifts, y_shifts): 210 | #-------------------------------------------------------# 211 | # fg_mask [n_anchors_all] 212 | # is_in_boxes_and_center [num_gt, len(fg_mask)] 213 | #-------------------------------------------------------# 214 | fg_mask, is_in_boxes_and_center = self.get_in_boxes_info(gt_bboxes_per_image, expanded_strides, x_shifts, y_shifts, total_num_anchors, num_gt) 215 | 216 | #-------------------------------------------------------# 217 | # fg_mask [n_anchors_all] 218 | # bboxes_preds_per_image [fg_mask, 4] 219 | # cls_preds_ [fg_mask, num_classes] 220 | # obj_preds_ [fg_mask, 1] 221 | #-------------------------------------------------------# 222 | bboxes_preds_per_image = bboxes_preds_per_image[fg_mask] 223 | cls_preds_ = cls_preds_per_image[fg_mask] 224 | obj_preds_ = obj_preds_per_image[fg_mask] 225 | num_in_boxes_anchor = bboxes_preds_per_image.shape[0] 226 | 227 | #-------------------------------------------------------# 228 | # pair_wise_ious [num_gt, fg_mask] 229 | #-------------------------------------------------------# 230 | pair_wise_ious = self.bboxes_iou(gt_bboxes_per_image, bboxes_preds_per_image, False) 231 | pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8) 232 | 233 | #-------------------------------------------------------# 234 | # cls_preds_ [num_gt, fg_mask, num_classes] 235 | # gt_cls_per_image [num_gt, fg_mask, num_classes] 236 | #-------------------------------------------------------# 237 | cls_preds_ = cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() * obj_preds_.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() 238 | gt_cls_per_image = F.one_hot(gt_classes.to(torch.int64), self.num_classes).float().unsqueeze(1).repeat(1, num_in_boxes_anchor, 1) 239 | pair_wise_cls_loss = F.binary_cross_entropy(cls_preds_.sqrt_(), gt_cls_per_image, reduction="none").sum(-1) 240 | del cls_preds_ 241 | 242 | cost = pair_wise_cls_loss + 3.0 * pair_wise_ious_loss + 100000.0 * (~is_in_boxes_and_center).float() 243 | 244 | num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds = self.dynamic_k_matching(cost, pair_wise_ious, gt_classes, num_gt, fg_mask) 245 | del pair_wise_cls_loss, cost, pair_wise_ious, pair_wise_ious_loss 246 | return gt_matched_classes, fg_mask, pred_ious_this_matching, matched_gt_inds, num_fg 247 | 248 | def bboxes_iou(self, bboxes_a, bboxes_b, xyxy=True): 249 | if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: 250 | raise IndexError 251 | 252 | if xyxy: 253 | tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2]) 254 | br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:]) 255 | area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) 256 | area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) 257 | else: 258 | tl = torch.max( 259 | (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2), 260 | (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2), 261 | ) 262 | br = torch.min( 263 | (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2), 264 | (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2), 265 | ) 266 | 267 | area_a = torch.prod(bboxes_a[:, 2:], 1) 268 | area_b = torch.prod(bboxes_b[:, 2:], 1) 269 | en = (tl < br).type(tl.type()).prod(dim=2) 270 | area_i = torch.prod(br - tl, 2) * en 271 | return area_i / (area_a[:, None] + area_b - area_i) 272 | 273 | def get_in_boxes_info(self, gt_bboxes_per_image, expanded_strides, x_shifts, y_shifts, total_num_anchors, num_gt, center_radius = 2.5): 274 | #-------------------------------------------------------# 275 | # expanded_strides_per_image [n_anchors_all] 276 | # x_centers_per_image [num_gt, n_anchors_all] 277 | # x_centers_per_image [num_gt, n_anchors_all] 278 | #-------------------------------------------------------# 279 | expanded_strides_per_image = expanded_strides[0] 280 | x_centers_per_image = ((x_shifts[0] + 0.5) * expanded_strides_per_image).unsqueeze(0).repeat(num_gt, 1) 281 | y_centers_per_image = ((y_shifts[0] + 0.5) * expanded_strides_per_image).unsqueeze(0).repeat(num_gt, 1) 282 | 283 | #-------------------------------------------------------# 284 | # gt_bboxes_per_image_x [num_gt, n_anchors_all] 285 | #-------------------------------------------------------# 286 | gt_bboxes_per_image_l = (gt_bboxes_per_image[:, 0] - 0.5 * gt_bboxes_per_image[:, 2]).unsqueeze(1).repeat(1, total_num_anchors) 287 | gt_bboxes_per_image_r = (gt_bboxes_per_image[:, 0] + 0.5 * gt_bboxes_per_image[:, 2]).unsqueeze(1).repeat(1, total_num_anchors) 288 | gt_bboxes_per_image_t = (gt_bboxes_per_image[:, 1] - 0.5 * gt_bboxes_per_image[:, 3]).unsqueeze(1).repeat(1, total_num_anchors) 289 | gt_bboxes_per_image_b = (gt_bboxes_per_image[:, 1] + 0.5 * gt_bboxes_per_image[:, 3]).unsqueeze(1).repeat(1, total_num_anchors) 290 | 291 | #-------------------------------------------------------# 292 | # bbox_deltas [num_gt, n_anchors_all, 4] 293 | #-------------------------------------------------------# 294 | b_l = x_centers_per_image - gt_bboxes_per_image_l 295 | b_r = gt_bboxes_per_image_r - x_centers_per_image 296 | b_t = y_centers_per_image - gt_bboxes_per_image_t 297 | b_b = gt_bboxes_per_image_b - y_centers_per_image 298 | bbox_deltas = torch.stack([b_l, b_t, b_r, b_b], 2) 299 | 300 | #-------------------------------------------------------# 301 | # is_in_boxes [num_gt, n_anchors_all] 302 | # is_in_boxes_all [n_anchors_all] 303 | #-------------------------------------------------------# 304 | is_in_boxes = bbox_deltas.min(dim=-1).values > 0.0 305 | is_in_boxes_all = is_in_boxes.sum(dim=0) > 0 306 | 307 | gt_bboxes_per_image_l = (gt_bboxes_per_image[:, 0]).unsqueeze(1).repeat(1, total_num_anchors) - center_radius * expanded_strides_per_image.unsqueeze(0) 308 | gt_bboxes_per_image_r = (gt_bboxes_per_image[:, 0]).unsqueeze(1).repeat(1, total_num_anchors) + center_radius * expanded_strides_per_image.unsqueeze(0) 309 | gt_bboxes_per_image_t = (gt_bboxes_per_image[:, 1]).unsqueeze(1).repeat(1, total_num_anchors) - center_radius * expanded_strides_per_image.unsqueeze(0) 310 | gt_bboxes_per_image_b = (gt_bboxes_per_image[:, 1]).unsqueeze(1).repeat(1, total_num_anchors) + center_radius * expanded_strides_per_image.unsqueeze(0) 311 | 312 | #-------------------------------------------------------# 313 | # center_deltas [num_gt, n_anchors_all, 4] 314 | #-------------------------------------------------------# 315 | c_l = x_centers_per_image - gt_bboxes_per_image_l 316 | c_r = gt_bboxes_per_image_r - x_centers_per_image 317 | c_t = y_centers_per_image - gt_bboxes_per_image_t 318 | c_b = gt_bboxes_per_image_b - y_centers_per_image 319 | center_deltas = torch.stack([c_l, c_t, c_r, c_b], 2) 320 | 321 | #-------------------------------------------------------# 322 | # is_in_centers [num_gt, n_anchors_all] 323 | # is_in_centers_all [n_anchors_all] 324 | #-------------------------------------------------------# 325 | is_in_centers = center_deltas.min(dim=-1).values > 0.0 326 | is_in_centers_all = is_in_centers.sum(dim=0) > 0 327 | 328 | #-------------------------------------------------------# 329 | # is_in_boxes_anchor [n_anchors_all] 330 | # is_in_boxes_and_center [num_gt, is_in_boxes_anchor] 331 | #-------------------------------------------------------# 332 | is_in_boxes_anchor = is_in_boxes_all | is_in_centers_all 333 | is_in_boxes_and_center = is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor] 334 | return is_in_boxes_anchor, is_in_boxes_and_center 335 | 336 | def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask): 337 | #-------------------------------------------------------# 338 | # cost [num_gt, fg_mask] 339 | # pair_wise_ious [num_gt, fg_mask] 340 | # gt_classes [num_gt] 341 | # fg_mask [n_anchors_all] 342 | # matching_matrix [num_gt, fg_mask] 343 | #-------------------------------------------------------# 344 | matching_matrix = torch.zeros_like(cost) 345 | 346 | #------------------------------------------------------------# 347 | # 选取iou最大的n_candidate_k个点 348 | # 然后求和,判断应该有多少点用于该框预测 349 | # topk_ious [num_gt, n_candidate_k] 350 | # dynamic_ks [num_gt] 351 | # matching_matrix [num_gt, fg_mask] 352 | #------------------------------------------------------------# 353 | n_candidate_k = min(10, pair_wise_ious.size(1)) 354 | topk_ious, _ = torch.topk(pair_wise_ious, n_candidate_k, dim=1) 355 | dynamic_ks = torch.clamp(topk_ious.sum(1).int(), min=1) 356 | 357 | for gt_idx in range(num_gt): 358 | #------------------------------------------------------------# 359 | # 给每个真实框选取最小的动态k个点 360 | #------------------------------------------------------------# 361 | _, pos_idx = torch.topk(cost[gt_idx], k=dynamic_ks[gt_idx].item(), largest=False) 362 | matching_matrix[gt_idx][pos_idx] = 1.0 363 | del topk_ious, dynamic_ks, pos_idx 364 | 365 | #------------------------------------------------------------# 366 | # anchor_matching_gt [fg_mask] 367 | #------------------------------------------------------------# 368 | anchor_matching_gt = matching_matrix.sum(0) 369 | if (anchor_matching_gt > 1).sum() > 0: 370 | #------------------------------------------------------------# 371 | # 当某一个特征点指向多个真实框的时候 372 | # 选取cost最小的真实框。 373 | #------------------------------------------------------------# 374 | _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0) 375 | matching_matrix[:, anchor_matching_gt > 1] *= 0.0 376 | matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0 377 | #------------------------------------------------------------# 378 | # fg_mask_inboxes [fg_mask] 379 | # num_fg为正样本的特征点个数 380 | #------------------------------------------------------------# 381 | fg_mask_inboxes = matching_matrix.sum(0) > 0.0 382 | num_fg = fg_mask_inboxes.sum().item() 383 | 384 | #------------------------------------------------------------# 385 | # 对fg_mask进行更新 386 | #------------------------------------------------------------# 387 | fg_mask[fg_mask.clone()] = fg_mask_inboxes 388 | 389 | #------------------------------------------------------------# 390 | # 获得特征点对应的物品种类 391 | #------------------------------------------------------------# 392 | matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0) 393 | gt_matched_classes = gt_classes[matched_gt_inds] 394 | 395 | pred_ious_this_matching = (matching_matrix * pair_wise_ious).sum(0)[fg_mask_inboxes] 396 | return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds 397 | 398 | def weights_init(net, init_type='normal', init_gain = 0.02): 399 | def init_func(m): 400 | classname = m.__class__.__name__ 401 | if hasattr(m, 'weight') and classname.find('Conv') != -1: 402 | if init_type == 'normal': 403 | torch.nn.init.normal_(m.weight.data, 0.0, init_gain) 404 | elif init_type == 'xavier': 405 | torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain) 406 | elif init_type == 'kaiming': 407 | torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') 408 | elif init_type == 'orthogonal': 409 | torch.nn.init.orthogonal_(m.weight.data, gain=init_gain) 410 | else: 411 | raise NotImplementedError('initialization method [%s] is not implemented' % init_type) 412 | elif classname.find('BatchNorm2d') != -1: 413 | torch.nn.init.normal_(m.weight.data, 1.0, 0.02) 414 | torch.nn.init.constant_(m.bias.data, 0.0) 415 | print('initialize network with %s type' % init_type) 416 | net.apply(init_func) 417 | 418 | def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.1, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.3, step_num = 10): 419 | def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters): 420 | if iters <= warmup_total_iters: 421 | # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start 422 | lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start 423 | elif iters >= total_iters - no_aug_iter: 424 | lr = min_lr 425 | else: 426 | lr = min_lr + 0.5 * (lr - min_lr) * ( 427 | 1.0 + math.cos(math.pi* (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter)) 428 | ) 429 | return lr 430 | 431 | def step_lr(lr, decay_rate, step_size, iters): 432 | if step_size < 1: 433 | raise ValueError("step_size must above 1.") 434 | n = iters // step_size 435 | out_lr = lr * decay_rate ** n 436 | return out_lr 437 | 438 | if lr_decay_type == "cos": 439 | warmup_total_iters = min(max(warmup_iters_ratio * total_iters, 1), 3) 440 | warmup_lr_start = max(warmup_lr_ratio * lr, 1e-6) 441 | no_aug_iter = min(max(no_aug_iter_ratio * total_iters, 1), 15) 442 | func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter) 443 | else: 444 | decay_rate = (min_lr / lr) ** (1 / (step_num - 1)) 445 | step_size = total_iters / step_num 446 | func = partial(step_lr, lr, decay_rate, step_size) 447 | 448 | return func 449 | 450 | def set_optimizer_lr(optimizer, lr_scheduler_func, epoch): 451 | lr = lr_scheduler_func(epoch) 452 | for param_group in optimizer.param_groups: 453 | param_group['lr'] = lr 454 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/predict_three_point.py: -------------------------------------------------------------------------------- 1 | #-----------------------------------------------------------------------# 2 | # predict.py将单张图片预测、摄像头检测、FPS测试和目录遍历检测等功能 3 | # 整合到了一个py文件中,通过指定mode进行模式的修改。 4 | #-----------------------------------------------------------------------# 5 | import time 6 | import cv2 7 | import numpy as np 8 | from PIL import Image 9 | from yolo import YOLO 10 | #**********************双目测距调用的函数******************************* 11 | from stereo import stereoconfig_040_2 12 | from stereo.dianyuntu_yolo import preprocess, undistortion, getRectifyTransform, draw_line, rectifyImage,\ 13 | stereoMatchSGBM, hw3ToN3, DepthColor2Cloud, view_cloud,stereoMatchBM 14 | 15 | if __name__ == "__main__": 16 | yolo = YOLO() 17 | 18 | mode = "video" 19 | 20 | #---------------------双目匹配算法参数设置---------------- 21 | # -----------------窗口可调参数设置-------------- 22 | cv2.namedWindow("set") 23 | cv2.createTrackbar("num", "set", 0, 20, lambda x: None) 24 | cv2.createTrackbar("blockSize", "set", 1, 20, lambda x: None) 25 | # -----------------------end---------------------------- 26 | 27 | crop = False 28 | #----------------------------------------------------------------------------------------------------------# 29 | # video_path用于指定视频的路径,当video_path=0时表示检测摄像头 30 | #----------------------------------------------------------------------------------------------------------# 31 | video_path = 0 32 | video_save_path = "" 33 | video_fps = 25.0 34 | #-------------------------------------------------------------------------# 35 | # test_interval用于指定测量fps的时候,图片检测的次数 36 | # 理论上test_interval越大,fps越准确。 37 | #-------------------------------------------------------------------------# 38 | test_interval = 100 39 | #-------------------------------------------------------------------------# 40 | # dir_origin_path指定了用于检测的图片的文件夹路径 41 | # dir_save_path指定了检测完图片的保存路径 42 | # dir_origin_path和dir_save_path仅在mode='dir_predict'时有效 43 | #-------------------------------------------------------------------------# 44 | dir_origin_path = "img/" 45 | dir_save_path = "img_out/" 46 | 47 | if mode == "predict": 48 | while True: 49 | img = input('Input image filename:') 50 | try: 51 | image = Image.open(img) 52 | except: 53 | print('Open Error! Try again!') 54 | continue 55 | else: 56 | r_image = yolo.detect_image(image, crop = crop) 57 | r_image.show() 58 | 59 | elif mode == "video": 60 | #capture = cv2.VideoCapture(video_path) 61 | #-----------------------双目调用------------------------- 62 | i = 0 63 | capture = cv2.VideoCapture(0) 64 | # -----使用建东的双目相机 65 | capture1 = cv2.VideoCapture(1) 66 | #------------------------------------------------------ 67 | if video_save_path!="": 68 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 69 | size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) 70 | out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size) 71 | 72 | ref, frame = capture.read() 73 | if not ref: 74 | raise ValueError("未能正确读取摄像头(视频),请注意是否正确安装摄像头(是否正确填写视频路径)。") 75 | #-------------------------------画图--------------------------- 76 | def cat2images(limg, rimg): 77 | HEIGHT = limg.shape[0] 78 | WIDTH = limg.shape[1] 79 | imgcat = np.zeros((HEIGHT, WIDTH * 2 + 20, 3)) 80 | imgcat[:, :WIDTH, :] = limg 81 | imgcat[:, -WIDTH:, :] = rimg 82 | for i in range(int(HEIGHT / 32)): 83 | imgcat[i * 32, :, :] = 255 84 | return imgcat 85 | #-------------------------------------------------------------- 86 | fps = 0.0 87 | while(True): 88 | t1 = time.time() 89 | # 读取某一帧 90 | ref, frame = capture.read() 91 | ref1, frame1 = capture1.read() 92 | # 格式转变,BGRtoRGB 93 | # ----------------------------------------测试代码------------------------------------------------------- 94 | # 1280 480 left[0:480, 0:640] 95 | # 2560 720 left[0:720, 0:1280] 96 | # 1000 320 left[0:320,0:500] 97 | left_img = frame # ----测试 98 | left_frame = cv2.cvtColor(left_img, cv2.COLOR_BGR2RGB) 99 | left_frame = Image.fromarray(np.uint8(left_frame)) 100 | 101 | right_frame = frame1 102 | right_frame = cv2.cvtColor(right_frame, cv2.COLOR_BGR2RGB) 103 | right_frame = Image.fromarray(np.uint8(right_frame)) 104 | # -------------------------------------------------------------------------------------------------------- 105 | frame_left = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 106 | frame_right = cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB) 107 | # 转变成Image 108 | frame_left = Image.fromarray(np.uint8(frame_left)) 109 | frame_right = Image.fromarray(np.uint8(frame_right)) 110 | # 进行检测 111 | # ------------原代码 (以注释)------- 检测输出目标物的信息,包括类别和置信度分数 112 | # label,frame,xy = np.array(yolo.detect_image(frame)) 113 | frame_left = np.array(frame_left) 114 | frame_right = np.array(frame_right) 115 | 116 | # RGBtoBGR满足opencv显示格式 117 | frame_left = cv2.cvtColor(frame_left, cv2.COLOR_RGB2BGR) 118 | frame_right = cv2.cvtColor(frame_right, cv2.COLOR_RGB2BGR) 119 | # ------------------------------------------------------------------------------------------------------------------------ 120 | # 121 | # 目标检测 + 双目视觉三维定位和目标测距 122 | # 123 | # ------------------------------------------------------------------------------------------------------------------------ 124 | # ------------ SGBM算法实现 ------------ 125 | # 添加一个是否有标签的判断(如果有标签,则出现了要检测的目标物) 126 | #if label: 127 | height_0, width_0 = frame_left.shape[0:2] # height_0 = 480 width_0 = 1280 128 | # cv2.imwrite('./stereo/result/111.bmp',frame) 129 | print("height_0:{},width_0:{}".format(height_0, width_0)) 130 | iml = frame_left[0:int(height_0), 0:int(width_0)] 131 | imr = frame_right[0:int(height_0), 0:int(width_0)] # iml 和 imr 都是(480,640,3)的图像 132 | # print("***************************{}".format(iml.shape)) 133 | # print("***************************{}".format(imr.shape)) 134 | height, width = iml.shape[0:2] # 左相机图片的高和宽 135 | config = stereoconfig_040_2.stereoCamera() # 读取相机的参数 136 | # 获取用于畸变校正和立体校正的映射矩阵以及用于计算像素空间坐标的重投影矩阵 137 | map1x, map1y, map2x, map2y, Q = getRectifyTransform(height, width, config) 138 | # 畸变校正和立体校正 139 | iml_rectified, imr_rectified = rectifyImage(iml, imr, map1x, map1y, map2x, map2y) 140 | # --------------- 将立体校正的图像保存 ------------- 141 | imgcat_out = cat2images(iml_rectified, imr_rectified) 142 | cv2.imwrite('./NO_USE_CODE/imgcat_out.jpg', imgcat_out) 143 | 144 | # 消除畸变 145 | iml = undistortion(iml, config.cam_matrix_left, config.distortion_l) 146 | imr = undistortion(imr, config.cam_matrix_right, config.distortion_r) 147 | # 立体校正 148 | iml_ , imr_ = rectifyImage(iml, imr, map1x, map1y, map2x, map2y) 149 | #---灰度处理 150 | iml_rectified_l, imr_rectified_r = preprocess(iml_, imr_) # 预处理,一般可以削弱光照不均的影响,不做也可以 151 | # SGBM立体匹配算法----stereoMatchSGBM视差计算(得到的左视差和右视差) 152 | disp, _ = stereoMatchSGBM(iml_rectified_l, imr_rectified_r, False) 153 | # -----BM立体匹配算法 154 | # disp = stereoMatchBM(iml_rectified_l,imr_rectified_r) 155 | # 计算像素点的3D坐标(左相机坐标系下)****** 156 | points_3d = cv2.reprojectImageTo3D(disp, Q) 157 | print("points_3d的维度是:{}".format(points_3d.shape)) 158 | # ----------------------------------目标检测代码-------------------------------------------------- 159 | # --------------------------------------------------------------------------------------------- 160 | label, frame, xy = np.array(yolo.detect_image(left_frame)) 161 | print("xy:{}".format(xy)) 162 | # label, frame,xy,list_top,list_left,list_bottom,list_right = np.array(yolo.detect_image(left_frame)) 163 | # xywh.append(list_top) 164 | # xywh.append(list_left) 165 | # xywh.append(list_bottom) 166 | # xywh.append(list_right) 167 | # print("xywh:{}".format(xywh)) 168 | # ———————————————————————————————————————————————————————————————————— 169 | frame = np.array(frame) 170 | # xy = np.array(xy) 171 | # #为了调用,初始化为0一些变量,这样在后面也就可以直接对变量进行使用 172 | x = 0 173 | y = 0 174 | x1 = 0 175 | y1 = 0 176 | x2 = 0 177 | y2 = 0 178 | # dis = 0 179 | x_left = 0 180 | x_top = 0 181 | x_right = 0 182 | x_bottom = 0 183 | # ———————————————————————————————————————————————————————————————————————————————————————————————————————————————————————— 184 | # ----------------是否有标签进行判断(有标签即代表检测到物体)--------------------------------------------------------------------------- 185 | # xy = [top,left,bottom,right] 186 | if xy: 187 | list = np.array(xy).reshape(-1, 4) # n行4列 188 | print("list:{}".format(list)) 189 | label = np.array(label).reshape(-1, 1) # n行1列 label包含:标签和置信度 190 | print("label:{}".format(label)) 191 | # print("label_one:{}".format(label)) 192 | # 循环n行(次) 193 | for temp_list in list: 194 | 195 | y_temp = 0 196 | x_temp = 0 197 | idx = 0 198 | for values in temp_list: 199 | 200 | idx += 1 201 | if idx == 1: 202 | x_top = values 203 | y_temp += values 204 | if idx == 2: 205 | x_left = values 206 | x_temp += values 207 | 208 | if idx == 3: 209 | x_bottom = values 210 | y_temp += values 211 | if idx == 4: 212 | x_right = values 213 | x_temp += values 214 | # print("values:{}".format(values[0]])) 215 | # print(f"values: -> {values}") 216 | print(f"修正前x的值为: {x_temp // 2}") 217 | print(f"修正前y的值为: {y_temp // 2}") 218 | x = (x_temp // 2) 219 | y = (y_temp // 2) 220 | #print("x_top:{}".format(x_top)) 221 | #--------------------------边缘点----想在这加一个判断------- 222 | if (x_right - x_left) > (x_bottom - x_top): 223 | x1 = x_left + 1 224 | y1 = (x_bottom + x_top) / 2 225 | x2 = x_right - 5 226 | y2 = (x_bottom + x_top) / 2 227 | else: 228 | x1 = (x_left + x_right) / 2 229 | y1 = x_top + 2 230 | x2 = (x_left + x_right) / 2 231 | y2 = x_bottom - 2 232 | 233 | 234 | count = 0 235 | # while((points_3d[int(y), int(x), 2] < 0) | (points_3d[int(y), int(x), 2] > 2500)): 236 | # 对x和y的值加一个限制,以免x和y的值不在图像像素内 237 | # 1280 480 (bool((x < 640) & (y < 480)) 238 | # 2560 720 (bool((x < 1280) & (y < 720)) 239 | # 1000 320 (bool((x < 500) & (y < 320)) 240 | 241 | while (bool((x_left < x < x_right) & (x_top < y < x_bottom))): # out of index 242 | count += 1 243 | x += count 244 | if (x >= x_right): # 640 1280 500 245 | x = x_right - 2 # 638 1278 498 246 | break 247 | if (x <= x_left): 248 | x = x_left + 1 249 | break 250 | if (0 < points_3d[int(y), int(x), 2] < 2500): 251 | break 252 | y += count 253 | if (y >= x_bottom): # 480 720 320 254 | y = x_bottom - 2 # 478 718 318 255 | break 256 | # if (y <= x_top): 257 | # y = x_top + 2 258 | if (0 < points_3d[int(y), int(x), 2] < 2500): 259 | break 260 | count += 1 261 | x -= count 262 | if (0 < points_3d[int(y), int(x), 2] < 2500): 263 | break 264 | y -= count 265 | if (0 < points_3d[int(y), int(x), 2] < 2500): 266 | break 267 | if (x >= x_right): # 640 1280 500 268 | x = x_right - 2 # 638 1278 498 269 | print("x is out of index!") 270 | if (y >= x_bottom): # 480 720 320 271 | y = x_bottom - 2 # 478 718 318 272 | print("y is out of index!") 273 | 274 | 275 | # -------------------------各参数依次是:图片,添加的文字,左下角坐标,字体,字体大小,颜色,字体粗细其中字体可以选择 276 | # frame = frame.copy() 277 | # text_cxy = "*" 278 | # frame = cv2.putText(frame, text_cxy, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 279 | # ------------------------------------------------------------------------------------------ 280 | # 输出某个点的三维坐标 281 | print('修正后-点 (%d, %d) 的三维坐标 (x:%.1fcm, y:%.1fcm, z:%.1fcm)' % (int(x), int(y), 282 | points_3d[int(y), int(x), 0] / 10, 283 | points_3d[int(y), int(x), 1] / 10, 284 | points_3d[int(y), int(x), 2] / 10)) 285 | 286 | dis = ((points_3d[int(y), int(x), 0] ** 2 + points_3d[int(y), int(x), 1] ** 2 + points_3d[ 287 | int(y), int(x), 2] ** 2) ** 0.5) / 10 288 | print('修正后-点 (%d, %d) 的 %s 距离左摄像头的相对距离为 %0.1f cm' % (x, y, label, dis)) 289 | # ------------------- 对x,y中心点做修改 --------------------- 290 | dis = ((points_3d[int(y1), int(x1), 0] ** 2 + points_3d[int(y1), int(x1), 1] ** 2 + points_3d[ 291 | int(y1), int(x1), 2] ** 2) ** 0.5) / 10 292 | dis1 = ((points_3d[int(y2), int(x2), 0] ** 2 + points_3d[int(y2), int(x2), 1] ** 2 + points_3d[ 293 | int(y2), int(x2), 2] ** 2) ** 0.5) / 10 294 | dis2 = ((points_3d[int(y), int(x), 0] ** 2 + points_3d[int(y), int(x), 1] ** 2 + points_3d[ 295 | int(y), int(x), 2] ** 2) ** 0.5) / 10 296 | x1 = int(x1) 297 | y1 = int(y1) 298 | x2 = int(x2) 299 | y2 = int(y2) 300 | #------------------------------------------------------------ 301 | # ---------------------------在窗口显示目标像素的坐标中心点(x,y)------------------------- 302 | text_cxy = "*" 303 | frame = cv2.putText(frame, text_cxy, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 125), 1) 304 | 305 | # --------------------------在窗口显示目标的像素坐标(x1,y1)----------------------------- 306 | text_cxy = "*" 307 | frame = cv2.putText(frame, text_cxy, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) 308 | 309 | # --------------------------在窗口显示目标的像素坐标(x2,y2)----------------------------- 310 | text_cxy = "*" 311 | frame = cv2.putText(frame, text_cxy, (x2, y2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1) 312 | #--------------------------------------------------------------------- 313 | if text_cxy: 314 | # -----x1点坐标-------- "x:%.1fcm",,"y:%.1fcm",,"z:%.1fcm" 315 | text_x = "x:%.1f" % (points_3d[int(y1), int(x1), 0] / 10) 316 | text_y = "y:%.1f" % (points_3d[int(y1), int(x1), 1] / 10) 317 | text_z = "z:%.1f" % (points_3d[int(y1), int(x1), 2] / 10) 318 | text_dis = "dis:%.1fcm" % dis 319 | # -----x2点坐标 320 | text_x1 = "x:%.1f" % (points_3d[int(y2), int(x2), 0] / 10) 321 | text_y1 = "y:%.1f" % (points_3d[int(y2), int(x2), 1] / 10) 322 | text_z1 = "z:%.1f" % (points_3d[int(y2), int(x2), 2] / 10) 323 | text_dis1 = "dis:%.1fcm" % dis1 324 | # -----x_center点坐标 325 | text_xcenter = "x:%.1f" % (points_3d[int(y), int(x), 0] / 10) 326 | text_ycenter = "y:%.1f" % (points_3d[int(y), int(x), 1] / 10) 327 | text_zcenter = "z:%.1f" % (points_3d[int(y), int(x), 2] / 10) 328 | text_dis_center = "dis:%.1fcm" % dis2 329 | 330 | # -----------------------在窗口显示像素点(x,y)的三维世界坐标-------------------------------- 331 | x_left = int(x_left) 332 | x_top = int(x_top) 333 | x_right = int(x_right) 334 | # ---------------------绘制x1点在视窗口的显示 335 | cv2.putText(frame, text_x, (x_left + (x_right - x_left) + 5, x_top + 10), cv2.FONT_ITALIC, 0.5, 336 | (0, 0, 255), 1) 337 | cv2.putText(frame, text_y, (x_left + (x_right - x_left) + 5, x_top + 30), cv2.FONT_ITALIC, 0.5, 338 | (0, 0, 255), 1) 339 | cv2.putText(frame, text_z, (x_left + (x_right - x_left) + 5, x_top + 50), cv2.FONT_ITALIC, 0.5, 340 | (0, 0, 255), 1) 341 | cv2.putText(frame, text_dis, (x_left + (x_right - x_left) + 5, x_top + 65), cv2.FONT_ITALIC, 342 | 0.5, (0, 0, 255), 1) 343 | # ---------------------绘制x2点在视窗口的显示 344 | cv2.putText(frame, text_x1, (x_left + (x_right - x_left) + 5, x_top + 80), cv2.FONT_ITALIC, 0.5, 345 | (0, 255, 255), 1) 346 | cv2.putText(frame, text_y1, (x_left + (x_right - x_left) + 5, x_top + 100), cv2.FONT_ITALIC, 347 | 0.5, 348 | (0, 255, 255), 1) 349 | cv2.putText(frame, text_z1, (x_left + (x_right - x_left) + 5, x_top + 120), cv2.FONT_ITALIC, 350 | 0.5, 351 | (0, 255, 255), 1) 352 | cv2.putText(frame, text_dis1, (x_left + (x_right - x_left) + 5, x_top + 140), cv2.FONT_ITALIC, 353 | 0.5, (0, 255, 255), 1) 354 | # --------------------绘制x_center点在视窗口的显示 355 | cv2.putText(frame, text_xcenter, (x_temp // 2 - 60, y_temp // 2), cv2.FONT_ITALIC, 0.5, 356 | (255, 0, 125), 1) 357 | cv2.putText(frame, text_ycenter, (x_temp // 2 - 60, y_temp // 2 + 15), cv2.FONT_ITALIC, 358 | 0.5, 359 | (255, 0, 125), 1) 360 | cv2.putText(frame, text_zcenter, (x_temp // 2 - 60, y_temp // 2 + 30), cv2.FONT_ITALIC, 361 | 0.5, 362 | (255, 0, 125), 1) 363 | cv2.putText(frame, text_dis_center, (x_temp // 2 - 60, y_temp // 2 + 45), cv2.FONT_ITALIC, 364 | 0.5, (255, 0, 125), 1) 365 | 366 | fps = ( fps + (1./(time.time()-t1)) ) / 2 367 | print("fps= %.2f"%(fps)) 368 | frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 369 | frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) 370 | # frame11 = cv2.cvtColor(frame11, cv2.COLOR_RGB2BGR) 371 | # ---这里实际上只是显示了左摄像机的一个窗口(作为目标检测框和对目标的三维定位(测距)框来使用) 372 | # 1280 480 left_frame = frame[0:480, 0:640] 373 | # 2560 720 left_frame = frame[0:720, 0:1280] 374 | # 1000 320 left_frame = frame[0:320, 0:500] 375 | left_frame = frame[0:480, 0:640] 376 | # right_frame = frame11[0:480, 0:640] 377 | # ---对桌面显示的窗口进行重命名(目标检测和目标定位) 378 | cv2.imshow("Object detection and object localization", left_frame) 379 | 380 | # --------------------- 将检测到的实时画面保存为图片 ----------------------- 381 | if xy: 382 | i += 1 383 | name = str(label[0]).split(' ')[0] 384 | cv2.imwrite('./stereo/result/left/{}_{}.bmp'.format(name,i),left_frame) 385 | #cv2.imwrite('./stereo/result/right/{}_{}.bmp'.format(name, i), right_frame) 386 | 387 | 388 | #cv2.imshow("video",frame) 389 | c= cv2.waitKey(1) & 0xff 390 | if video_save_path!="": 391 | out.write(frame) 392 | 393 | if c==27: 394 | capture.release() 395 | break 396 | capture.release() 397 | out.release() 398 | cv2.destroyAllWindows() 399 | # print("Video Detection Done!") 400 | # capture.release() 401 | # if video_save_path!="": 402 | # print("Save processed video to the path :" + video_save_path) 403 | # out.release() 404 | # cv2.destroyAllWindows() 405 | 406 | elif mode == "fps": 407 | img = Image.open('img/street.jpg') 408 | tact_time = yolo.get_FPS(img, test_interval) 409 | print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1') 410 | 411 | elif mode == "dir_predict": 412 | import os 413 | 414 | from tqdm import tqdm 415 | 416 | img_names = os.listdir(dir_origin_path) 417 | for img_name in tqdm(img_names): 418 | if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')): 419 | image_path = os.path.join(dir_origin_path, img_name) 420 | image = Image.open(image_path) 421 | r_image = yolo.detect_image(image) 422 | if not os.path.exists(dir_save_path): 423 | os.makedirs(dir_save_path) 424 | r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0) 425 | 426 | else: 427 | raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.") 428 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy==1.2.1 2 | numpy==1.17.0 3 | matplotlib==3.1.2 4 | opencv_python==4.1.2.30 5 | torch==1.2.0 6 | torchvision==0.4.0 7 | tqdm==4.60.0 8 | Pillow==8.2.0 9 | h5py==2.10.0 10 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/3code.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import time 4 | import numpy as np 5 | from PIL import Image 6 | 7 | def cat2images(limg, rimg): 8 | HEIGHT = limg.shape[0] 9 | WIDTH = limg.shape[1] 10 | imgcat = np.zeros((HEIGHT, WIDTH * 2 + 20, 3)) 11 | imgcat[:, :WIDTH, :] = limg 12 | imgcat[:, -WIDTH:, :] = rimg 13 | for i in range(int(HEIGHT / 32)): 14 | imgcat[i * 32, :, :] = 255 15 | return imgcat 16 | 17 | capture = cv2.VideoCapture(0,cv2.CAP_DSHOW) 18 | capture1 = cv2.VideoCapture(1, cv2.CAP_DSHOW) 19 | ret,frame = capture.read() 20 | ref1, frame1 = capture1.read() 21 | 22 | frame_left = frame 23 | frame_right = frame1 24 | 25 | 26 | 27 | left_image = cv2.imread("./yolo/left/left_7.bmp") 28 | right_image = cv2.imread("./yolo/right/right_7.bmp") 29 | imgcat_source = cat2images(left_image, right_image) 30 | 31 | #imgcat_source = cat2images(frame_left, frame_right) 32 | 33 | HEIGHT = frame_left.shape[0] 34 | WIDTH = frame_left.shape[1] 35 | cv2.imwrite('./yolo/xiaozheng_qian1.jpg', imgcat_source) 36 | 37 | camera_matrix0 = np.array([[757.894, -2.54024, 281.73356], 38 | [0, 748.34388, 230.360], 39 | [0, 0, 1]]).reshape((3, 3)) # 即上文标定得到的 cameraMatrix1 40 | 41 | distortion0 = np.array( 42 | [-0.219606598267299,1.538265449459559,-0.008759917468026,-0.012829271853108,-15.006098042168361]) # 即上文标定得到的 distCoeffs1 43 | 44 | camera_matrix1 = np.array([[756.2052, -3.677445,282.60554], 45 | [0, 751.20658,213.2578202], 46 | [0, 0 ,1]]).reshape((3, 3)) # 即上文标定得到的 cameraMatrix2 47 | distortion1 = np.array( 48 | [-0.045914586942381,-6.258346101399701,-0.008021351260658,-0.022897393264614,72.703142703152610]) # 即上文标定得到的 distCoeffs2 49 | 50 | R = np.array([[0.999989229476667,-0.001079697704587,-0.004513887850642], 51 | [9.617580928426970e-04,0.999660199821240,-0.026049182611556], 52 | [0.004540479273416,0.026044560780057,0.999650471365738]] 53 | ) # 即上文标定得到的 R 54 | T = np.array([-79.363393754544390,0.695577933585846,1.098609901964254]) # 即上文标定得到的T 55 | 56 | (R_l, R_r, P_l, P_r, Q, validPixROI1, validPixROI2) = \ 57 | cv2.stereoRectify(camera_matrix0, distortion0, camera_matrix1, distortion1, np.array([WIDTH, HEIGHT]), R, 58 | T) # 计算旋转矩阵和投影矩阵 59 | 60 | (map1, map2) = \ 61 | cv2.initUndistortRectifyMap(camera_matrix0, distortion0, R_l, P_l, np.array([WIDTH, HEIGHT]), 62 | cv2.CV_32FC1) # 计算校正查找映射表 63 | 64 | rect_left_image = cv2.remap(left_image, map1, map2, cv2.INTER_CUBIC) # 重映射 65 | 66 | # 左右图需要分别计算校正查找映射表以及重映射 67 | (map1, map2) = \ 68 | cv2.initUndistortRectifyMap(camera_matrix1, distortion1, R_r, P_r, np.array([WIDTH, HEIGHT]), cv2.CV_32FC1) 69 | 70 | rect_right_image = cv2.remap(right_image, map1, map2, cv2.INTER_CUBIC) 71 | 72 | imgcat_out = cat2images(rect_left_image, rect_right_image) 73 | cv2.imwrite('./yolo/xiaozheng_hou1.jpg', imgcat_out) 74 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/NCC.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | im1 = './yolo/left/left_7.bmp' 5 | im2 = './yolo/right/right_7.bmp' 6 | img1 = cv2.imread(im1, cv2.CV_8UC1) 7 | img2 = cv2.imread(im2, cv2.CV_8UC1) 8 | rows, cols = img1.shape 9 | 10 | print("图片的维度是:{}".format(img1.shape)) 11 | 12 | def translaton(image, shape): 13 | step = round((shape[0]-1)/2) 14 | print("迭代次数:{}".format(step)) 15 | shifted = [] 16 | for i in range(0, step+1): 17 | for j in range(0, step+1): 18 | if i==0 and j==0: 19 | M1 = np.float32([[1, 0, i], [0, 1, j]]) 20 | shifted.append(cv2.warpAffine(image, M1, (image.shape[1], image.shape[0]))) 21 | elif i==0 and j!=0: 22 | M1 = np.float32([[1, 0, i], [0, 1, j]]) 23 | M2 = np.float32([[1, 0, i], [0, 1, -j]]) 24 | shifted.append(cv2.warpAffine(image, M1, (image.shape[1], image.shape[0]))) 25 | shifted.append(cv2.warpAffine(image, M2, (image.shape[1], image.shape[0]))) 26 | elif i!=0 and j==0: 27 | M1 = np.float32([[1, 0, i], [0, 1, j]]) 28 | M2 = np.float32([[1, 0, -i], [0, 1, j]]) 29 | shifted.append(cv2.warpAffine(image, M1, (image.shape[1], image.shape[0]))) 30 | shifted.append(cv2.warpAffine(image, M2, (image.shape[1], image.shape[0]))) 31 | else: 32 | M1 = np.float32([[1, 0, i], [0, 1, j]]) 33 | M2 = np.float32([[1, 0, -i], [0, 1, -j]]) 34 | M3 = np.float32([[1, 0, -i], [0, 1, j]]) 35 | M4 = np.float32([[1, 0, i], [0, 1, -j]]) 36 | shifted .append(cv2.warpAffine(image, M1, (image.shape[1], image.shape[0]))) 37 | shifted.append(cv2.warpAffine(image, M2, (image.shape[1], image.shape[0]))) 38 | shifted.append(cv2.warpAffine(image, M3, (image.shape[1], image.shape[0]))) 39 | shifted.append(cv2.warpAffine(image, M4, (image.shape[1], image.shape[0]))) 40 | 41 | #print(len(shifted)) 42 | return np.array(shifted) 43 | 44 | #I(x,y)-avg(I(x,y)) 45 | def img_sub_avg(img_shifted, avg_img): 46 | len, height, width = img1_shifted.shape 47 | tmp_ncc1 = np.zeros([len, height, width]) 48 | for i in range(len): 49 | tmp_ncc1[i] = img_shifted[i] - avg_img 50 | #print(tmp_ncc1) 51 | return tmp_ncc1 52 | 53 | def NCC(img1_sub_avg,img2_sub_avg, threshold, max_d): 54 | #设立阈值 55 | len, height, width = img1_sub_avg.shape 56 | thershould_shifted = np.zeros([len, height, width]) 57 | ncc_max = np.zeros([height, width]) 58 | ncc_d = np.zeros([height, width]) 59 | for j in range(3, max_d): 60 | tmp_ncc1 = np.zeros([height, width]) 61 | tmp_ncc2 = np.zeros([height, width]) 62 | tmp_ncc3 = np.zeros([height, width]) 63 | for k in range(len): 64 | M1 = np.float32([[1, 0, -j - 1], [0, 1, 0]]) 65 | thershould_shifted[k] = cv2.warpAffine(img1_sub_avg[k], M1, (img1_sub_avg.shape[2], img1_sub_avg.shape[1])) 66 | for i in range(len): 67 | tmp_ncc1 += (img2_sub_avg[i])*(thershould_shifted[i]) 68 | tmp_ncc2 += pow(img2_sub_avg[i], 2) 69 | tmp_ncc3 += pow(thershould_shifted[i], 2) 70 | 71 | tmp_ncc2 = tmp_ncc2*tmp_ncc3 72 | tmp_ncc2 = np.sqrt(tmp_ncc2) 73 | tmp_ncc4 = tmp_ncc1/tmp_ncc2 74 | for m in range(height): 75 | for n in range(width): 76 | if tmp_ncc4[m, n] > ncc_max[m ,n] and tmp_ncc4[m, n] > threshold: 77 | ncc_max[m, n] = tmp_ncc4[m, n] 78 | ncc_d[m , n] = j 79 | for i in ncc_d: 80 | print(i) 81 | return ncc_max, ncc_d 82 | 83 | if __name__ == "__main__": 84 | disparity = np.zeros([rows, cols]) 85 | NCC_value = np.zeros([rows, cols]) 86 | deeps = np.zeros([rows, cols]) 87 | # 用3*3卷积核做均值滤波 88 | avg_img1 = cv2.blur(img1, (7, 7)) 89 | avg_img2 = cv2.blur(img2, (7, 7)) 90 | fimg1 = img1.astype(np.float32) 91 | fimg2 = img2.astype(np.float32) 92 | avg_img1 = avg_img1.astype(np.float32) 93 | avg_img2 = avg_img2.astype(np.float32) 94 | img1_shifted = translaton(fimg1, [7, 7]) 95 | img2_shifted = translaton(fimg2, [7, 7]) 96 | img1_sub_avg = img_sub_avg(img1_shifted, avg_img1) 97 | img2_sub_avg = img_sub_avg(img2_shifted, avg_img2) 98 | ncc_max, ncc_d = NCC(img1_sub_avg,img2_sub_avg, threshold = 0.5, max_d = 64) 99 | 100 | print(img1_shifted.shape) 101 | 102 | disp = cv2.normalize(ncc_d, ncc_d, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, 103 | dtype=cv2.CV_8U) 104 | cv2.imwrite("./yolo/left.jpg", img1) 105 | cv2.imwrite("./yolo/right.jpg", img2) 106 | cv2.imwrite("./yolo/depth.jpg", disp) 107 | cv2.waitKey(0) # 等待按键按下 108 | cv2.destroyAllWindows()#清除所有窗口 -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/dianyuntu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import cv2 3 | import numpy as np 4 | import stereoconfig_040_2 #导入相机标定的参数 5 | import pcl 6 | import pcl.pcl_visualization 7 | import os 8 | 9 | # 预处理 10 | def preprocess(img1, img2): 11 | # 彩色图->灰度图 12 | if(img1.ndim == 3):#判断为三维数组 13 | img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) # 通过OpenCV加载的图像通道顺序是BGR 14 | if(img2.ndim == 3): 15 | img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) 16 | 17 | # 直方图均衡 18 | img1 = cv2.equalizeHist(img1) 19 | img2 = cv2.equalizeHist(img2) 20 | 21 | return img1, img2 22 | 23 | 24 | # 消除畸变 25 | def undistortion(image, camera_matrix, dist_coeff): 26 | undistortion_image = cv2.undistort(image, camera_matrix, dist_coeff) 27 | 28 | return undistortion_image 29 | 30 | 31 | # 获取畸变校正和立体校正的映射变换矩阵、重投影矩阵 32 | # @param:config是一个类,存储着双目标定的参数:config = stereoconfig.stereoCamera() 33 | def getRectifyTransform(height, width, config): 34 | # 读取内参和外参 35 | left_K = config.cam_matrix_left 36 | right_K = config.cam_matrix_right 37 | left_distortion = config.distortion_l 38 | right_distortion = config.distortion_r 39 | R = config.R 40 | T = config.T 41 | 42 | # 计算校正变换 43 | R1, R2, P1, P2, Q, roi1, roi2 = cv2.stereoRectify(left_K, left_distortion, right_K, right_distortion, 44 | (width, height), R, T, alpha=0) 45 | 46 | map1x, map1y = cv2.initUndistortRectifyMap(left_K, left_distortion, R1, P1, (width, height), cv2.CV_32FC1) 47 | map2x, map2y = cv2.initUndistortRectifyMap(right_K, right_distortion, R2, P2, (width, height), cv2.CV_32FC1) 48 | 49 | return map1x, map1y, map2x, map2y, Q 50 | 51 | 52 | # 畸变校正和立体校正 53 | def rectifyImage(image1, image2, map1x, map1y, map2x, map2y): 54 | rectifyed_img1 = cv2.remap(image1, map1x, map1y, cv2.INTER_AREA) 55 | rectifyed_img2 = cv2.remap(image2, map2x, map2y, cv2.INTER_AREA) 56 | 57 | return rectifyed_img1, rectifyed_img2 58 | 59 | 60 | # 立体校正检验----画线 61 | def draw_line(image1, image2): 62 | # 建立输出图像 63 | height = max(image1.shape[0], image2.shape[0]) 64 | width = image1.shape[1] + image2.shape[1] 65 | 66 | output = np.zeros((height, width, 3), dtype=np.uint8) 67 | output[0:image1.shape[0], 0:image1.shape[1]] = image1 68 | output[0:image2.shape[0], image1.shape[1]:] = image2 69 | 70 | # 绘制等间距平行线 71 | line_interval = 50 # 直线间隔:50 72 | for k in range(height // line_interval): 73 | cv2.line(output, (0, line_interval * (k + 1)), (2 * width, line_interval * (k + 1)), (0, 255, 0), thickness=2, lineType=cv2.LINE_AA) 74 | 75 | return output 76 | 77 | 78 | # 视差计算 79 | def stereoMatchSGBM(left_image, right_image, down_scale=False): 80 | # SGBM匹配参数设置 81 | if left_image.ndim == 2: 82 | img_channels = 1 83 | else: 84 | img_channels = 3 85 | blockSize = 3 86 | paraml = {'minDisparity': 0, 87 | 'numDisparities': 128, 88 | 'blockSize': blockSize, 89 | 'P1': 8 * img_channels * blockSize ** 2, 90 | 'P2': 32 * img_channels * blockSize ** 2, 91 | 'disp12MaxDiff': 1, 92 | 'preFilterCap': 63, 93 | 'uniquenessRatio': 15, 94 | 'speckleWindowSize': 100, 95 | 'speckleRange': 1, 96 | 'mode': cv2.STEREO_SGBM_MODE_SGBM_3WAY 97 | } 98 | 99 | # 构建SGBM对象 100 | left_matcher = cv2.StereoSGBM_create(**paraml) 101 | paramr = paraml 102 | paramr['minDisparity'] = -paraml['numDisparities'] 103 | right_matcher = cv2.StereoSGBM_create(**paramr) 104 | 105 | # 计算视差图 106 | size = (left_image.shape[1], left_image.shape[0]) 107 | if down_scale == False: 108 | disparity_left = left_matcher.compute(left_image, right_image) 109 | disparity_right = right_matcher.compute(right_image, left_image) 110 | 111 | else: 112 | left_image_down = cv2.pyrDown(left_image) 113 | right_image_down = cv2.pyrDown(right_image) 114 | factor = left_image.shape[1] / left_image_down.shape[1] 115 | 116 | disparity_left_half = left_matcher.compute(left_image_down, right_image_down) 117 | disparity_right_half = right_matcher.compute(right_image_down, left_image_down) 118 | disparity_left = cv2.resize(disparity_left_half, size, interpolation=cv2.INTER_AREA) 119 | disparity_right = cv2.resize(disparity_right_half, size, interpolation=cv2.INTER_AREA) 120 | disparity_left = factor * disparity_left 121 | disparity_right = factor * disparity_right 122 | 123 | # 真实视差(因为SGBM算法得到的视差是×16的) 124 | trueDisp_left = disparity_left.astype(np.float32) / 16. 125 | trueDisp_right = disparity_right.astype(np.float32) / 16. 126 | 127 | return trueDisp_left, trueDisp_right 128 | 129 | 130 | # 将h×w×3数组转换为N×3的数组 131 | def hw3ToN3(points): 132 | height, width = points.shape[0:2] 133 | 134 | points_1 = points[:, :, 0].reshape(height * width, 1) 135 | points_2 = points[:, :, 1].reshape(height * width, 1) 136 | points_3 = points[:, :, 2].reshape(height * width, 1) 137 | 138 | points_ = np.hstack((points_1, points_2, points_3)) 139 | 140 | return points_ 141 | 142 | 143 | # 深度、颜色转换为点云 144 | def DepthColor2Cloud(points_3d, colors): 145 | rows, cols = points_3d.shape[0:2] 146 | size = rows * cols 147 | 148 | points_ = hw3ToN3(points_3d) 149 | colors_ = hw3ToN3(colors).astype(np.int64) 150 | 151 | # 颜色信息 152 | blue = colors_[:, 0].reshape(size, 1) 153 | green = colors_[:, 1].reshape(size, 1) 154 | red = colors_[:, 2].reshape(size, 1) 155 | 156 | rgb = np.left_shift(blue, 0) + np.left_shift(green, 8) + np.left_shift(red, 16) 157 | 158 | # 将坐标+颜色叠加为点云数组 159 | pointcloud = np.hstack((points_, rgb)).astype(np.float32) 160 | 161 | # 删掉一些不合适的点 162 | X = pointcloud[:, 0] 163 | Y = pointcloud[:, 1] 164 | Z = pointcloud[:, 2] 165 | 166 | remove_idx1 = np.where(Z <= 0) 167 | remove_idx2 = np.where(Z > 15000) 168 | remove_idx3 = np.where(X > 10000) 169 | remove_idx4 = np.where(X < -10000) 170 | remove_idx5 = np.where(Y > 10000) 171 | remove_idx6 = np.where(Y < -10000) 172 | remove_idx = np.hstack((remove_idx1[0], remove_idx2[0], remove_idx3[0], remove_idx4[0], remove_idx5[0], remove_idx6[0])) 173 | 174 | pointcloud_1 = np.delete(pointcloud, remove_idx, 0) 175 | 176 | return pointcloud_1 177 | 178 | 179 | # 点云显示 180 | def view_cloud(pointcloud): 181 | cloud = pcl.PointCloud_PointXYZRGBA() 182 | cloud.from_array(pointcloud) 183 | 184 | try: 185 | visual = pcl.pcl_visualization.CloudViewing() 186 | visual.ShowColorACloud(cloud) 187 | v = True 188 | while v: 189 | v = not (visual.WasStopped()) 190 | except: 191 | pass 192 | 193 | 194 | if __name__ == '__main__': 195 | 196 | i = 4 #8 197 | string = 'Val' 198 | # 读取数据集的图片 199 | iml = cv2.imread('G://pycharm//study_code//LYD-Champion-Photo-Camera//yolov5+stereo//stereo//yolo//zuo//left%d.bmp' %i) 200 | #iml = cv2.imread('./stereo/yolo/zuo/%sleft%d.bmp' %(string,i) ) # 左图 201 | imr = cv2.imread('G://pycharm//study_code//LYD-Champion-Photo-Camera//yolov5+stereo//stereo//yolo//you//right%d.bmp' %i) 202 | #imr = cv2.imread('./yolo/you/%sright%d.bmp'%(string,i) ) # 右图 203 | 204 | print("左图的尺寸{}:".format(iml.shape)) 205 | print("右图的尺寸{}:".format(imr.shape)) 206 | 207 | height, width = iml.shape[0:2] 208 | 209 | print("width = %d \n" % width) 210 | print("height = %d \n" % height) 211 | 212 | 213 | # 读取相机内参和外参 214 | config = stereoconfig_040_2.stereoCamera() 215 | 216 | # 立体校正 217 | map1x, map1y, map2x, map2y, Q = getRectifyTransform(height, width, config) # 获取用于畸变校正和立体校正的映射矩阵以及用于计算像素空间坐标的重投影矩阵 218 | iml_rectified, imr_rectified = rectifyImage(iml, imr, map1x, map1y, map2x, map2y) 219 | 220 | print("Print Q!") 221 | print(Q) 222 | 223 | # 绘制等间距平行线,检查立体校正的效果 224 | line = draw_line(iml_rectified, imr_rectified) 225 | cv2.imwrite('./%s检验%d.png' %(string,i), line) 226 | 227 | # 消除畸变 228 | iml = undistortion(iml, config.cam_matrix_left, config.distortion_l) 229 | imr = undistortion(imr, config.cam_matrix_right, config.distortion_r) 230 | 231 | # 立体匹配 232 | iml_, imr_ = preprocess(iml, imr) # 预处理,一般可以削弱光照不均的影响,不做也可以 233 | 234 | iml_rectified_l, imr_rectified_r = rectifyImage(iml_, imr_, map1x, map1y, map2x, map2y) 235 | 236 | disp, _ = stereoMatchSGBM(iml_rectified_l, imr_rectified_r, True) 237 | cv2.imwrite('./%s视差%d.png' %(string,i), disp) 238 | 239 | 240 | 241 | # 计算像素点的3D坐标(左相机坐标系下) 242 | points_3d = cv2.reprojectImageTo3D(disp, Q) # 可以使用上文的stereo_config.py给出的参数 243 | 244 | #points_3d = points_3d 245 | 246 | # 鼠标点击事件 247 | def onMouse(event, x, y, flags, param): 248 | if event == cv2.EVENT_LBUTTONDOWN: 249 | print('点 (%d, %d) 的三维坐标 (x:%.3fm, y:%.3fm, z:%.3fm)' % (x, y, points_3d[y, x, 0]/1000, points_3d[y, x, 1]/1000, points_3d[y, x, 2]/1000)) 250 | dis = ( (points_3d[y, x, 0] ** 2 + points_3d[y, x, 1] ** 2 + points_3d[y, x, 2] **2) ** 0.5) / 1000 251 | print('点 (%d, %d) 距离左摄像头的相对距离为 %0.3f m' %(x, y, dis) ) 252 | 253 | # 显示图片 254 | cv2.namedWindow("disparity",0) 255 | cv2.imshow("disparity", disp) 256 | cv2.setMouseCallback("disparity", onMouse, 0) 257 | 258 | 259 | 260 | # 构建点云--Point_XYZRGBA格式 261 | pointcloud = DepthColor2Cloud(points_3d, iml) 262 | 263 | # 显示点云 264 | view_cloud(pointcloud) 265 | 266 | cv2.waitKey(0) 267 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/dianyuntu_yolo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import cv2 3 | import numpy as np 4 | 5 | from stereo.stereoconfig_040_2 import stereoCamera 6 | #from stereoconfig_040_2 import stereoCamera 7 | #import stereoconfig_040_2 8 | #import stereoconfig_040_2 #导入相机标定的参数 9 | import pcl 10 | import pcl.pcl_visualization 11 | #config = stereoCamera() 12 | # 预处理 13 | #config = stereoconfig_040_2.stereoCamera() 14 | 15 | def preprocess(img1, img2): 16 | # 彩色图->灰度图 17 | if(img1.ndim == 3):#判断为三维数组 18 | img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) # 通过OpenCV加载的图像通道顺序是BGR 19 | if(img2.ndim == 3): 20 | img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) 21 | 22 | # 直方图均衡 23 | img1 = cv2.equalizeHist(img1) 24 | img2 = cv2.equalizeHist(img2) 25 | 26 | return img1, img2 27 | 28 | 29 | # 消除畸变 30 | def undistortion(image, camera_matrix, dist_coeff): 31 | undistortion_image = cv2.undistort(image, camera_matrix, dist_coeff) 32 | 33 | return undistortion_image 34 | 35 | 36 | # 获取畸变校正和立体校正的映射变换矩阵、重投影矩阵 37 | # @param:config是一个类,存储着双目标定的参数:config = stereoconfig.stereoCamera() 38 | def getRectifyTransform(height, width, config): 39 | # 读取内参和外参 40 | left_K = config.cam_matrix_left 41 | right_K = config.cam_matrix_right 42 | left_distortion = config.distortion_l 43 | right_distortion = config.distortion_r 44 | R = config.R 45 | #print("R{}".format(config.R)) 46 | T = config.T 47 | height = int(height) 48 | width = int(width) 49 | #R1 = stereoCamera.R1 50 | # 计算校正变换 51 | # stereoRectify() 的作用是为每个摄像头计算立体校正的映射矩阵。 52 | # 所以其运行结果并不是直接将图片进行立体矫正,而是得出进行立体矫正所需要的映射矩阵。 53 | R1, R2, P1, P2, Q, roi1, roi2 = cv2.stereoRectify(left_K, left_distortion, right_K, right_distortion, 54 | (width, height), R, T, flags=1, alpha=-1) 55 | #这个函数用于计算无畸变和修成转换关系 56 | #输出左图和右图的X和Y坐标的重映射参数 57 | map1x, map1y = cv2.initUndistortRectifyMap(left_K, left_distortion, R1, P1, (width, height), cv2.CV_32FC1)#cv2.CV_16SC2 58 | map2x, map2y = cv2.initUndistortRectifyMap(right_K, right_distortion, R2, P2, (width, height), cv2.CV_32FC1) 59 | 60 | return map1x, map1y, map2x, map2y, Q 61 | 62 | 63 | # 畸变校正和立体校正 64 | def rectifyImage(image1, image2, map1x, map1y, map2x, map2y): 65 | #一幅图像中某位置的像素放置到另一个图片指定位置 #cv2.INTER_LINEAR 66 | rectifyed_img1 = cv2.remap(image1, map1x, map1y, cv2.INTER_LANCZOS4, cv2.BORDER_CONSTANT, 0)#cv2.INTER_LANCZOS4, cv2.BORDER_CONSTANT, 0 67 | rectifyed_img2 = cv2.remap(image2, map2x, map2y, cv2.INTER_LANCZOS4, cv2.BORDER_CONSTANT, 0) 68 | #----------------------- 69 | return rectifyed_img1, rectifyed_img2 70 | 71 | 72 | # 立体校正检验----画线 73 | def draw_line(image1, image2): 74 | # 建立输出图像 75 | height = max(image1.shape[0], image2.shape[0]) 76 | width = image1.shape[1] + image2.shape[1] 77 | 78 | output = np.zeros((height, width, 3), dtype=np.uint8) 79 | output[0:image1.shape[0], 0:image1.shape[1]] = image1 80 | output[0:image2.shape[0], image1.shape[1]:] = image2 81 | 82 | # 绘制等间距平行线 83 | line_interval = 50 # 直线间隔:50 84 | for k in range(height // line_interval): 85 | cv2.line(output, (0, line_interval * (k + 1)), (2 * width, line_interval * (k + 1)), (0, 255, 0), thickness=2, lineType=cv2.LINE_AA) 86 | print("立体校正完成————————————") 87 | return output 88 | 89 | 90 | # 视差计算 91 | def stereoMatchSGBM(left_image, right_image, down_scale=False): 92 | 93 | #-----------------设置参数可调窗口显示----------------- 94 | num = cv2.getTrackbarPos("num", "set") 95 | blockSize = cv2.getTrackbarPos("blockSize", "set") 96 | if blockSize % 1 == 0: 97 | blockSize += 1 98 | if blockSize < 1: 99 | blockSize = 1 100 | if num < 2: 101 | num = 2 102 | #---------------------------END------------------------- 103 | # SGBM匹配参数设置 104 | if left_image.ndim == 2: 105 | img_channels = 1 106 | else: 107 | img_channels = 3 108 | # num = 8 109 | # blockSize = 3 # 3-->5---> 110 | paraml = {'minDisparity': 0, 111 | 'numDisparities': 16 * num, 112 | #'numDisparities': 128, 113 | 'blockSize': blockSize, 114 | 'P1': 8 * img_channels * blockSize ** 2, 115 | 'P2': 32 * img_channels * blockSize ** 2, 116 | 'disp12MaxDiff': 1, 117 | 'preFilterCap': 63, 118 | 'uniquenessRatio': 15, 119 | 'speckleWindowSize': 100, 120 | 'speckleRange': 2, 121 | 'mode': cv2.STEREO_SGBM_MODE_SGBM_3WAY 122 | } 123 | 124 | # 构建SGBM对象 125 | left_matcher = cv2.StereoSGBM_create(**paraml) 126 | paramr = paraml 127 | paramr['minDisparity'] = -paraml['numDisparities'] 128 | right_matcher = cv2.StereoSGBM_create(**paramr) 129 | 130 | # 计算视差图 131 | size = (left_image.shape[1], left_image.shape[0]) 132 | if down_scale == False: 133 | disparity_left = left_matcher.compute(left_image, right_image) 134 | disparity_right = right_matcher.compute(right_image, left_image) 135 | 136 | else: 137 | left_image_down = cv2.pyrDown(left_image) 138 | right_image_down = cv2.pyrDown(right_image) 139 | factor = left_image.shape[1] / left_image_down.shape[1] 140 | 141 | disparity_left_half = left_matcher.compute(left_image_down, right_image_down) 142 | disparity_right_half = right_matcher.compute(right_image_down, left_image_down) 143 | disparity_left = cv2.resize(disparity_left_half, size, interpolation=cv2.INTER_AREA) 144 | disparity_right = cv2.resize(disparity_right_half, size, interpolation=cv2.INTER_AREA) 145 | disparity_left = factor * disparity_left 146 | disparity_right = factor * disparity_right 147 | 148 | # 真实视差(因为SGBM算法得到的视差是×16的) 149 | trueDisp_left = disparity_left.astype(np.float32) / 16. 150 | trueDisp_right = disparity_right.astype(np.float32) / 16. 151 | 152 | return trueDisp_left, trueDisp_right 153 | 154 | #-----------------------New-SGBM算法-------------- 155 | def new_SGBM(left_image, right_image, down_scale = False): 156 | # -----------------设置参数可调窗口显示----------------- 157 | num = cv2.getTrackbarPos("num", "set") 158 | blockSize = cv2.getTrackbarPos("blockSize", "set") 159 | if blockSize % 1 == 0: 160 | blockSize += 1 161 | if blockSize < 1: 162 | blockSize = 1 163 | if num < 2: 164 | num = 2 165 | # ---------------------------END------------------------- 166 | # SGBM匹配参数设置 167 | if left_image.ndim == 2: # python-opencv读取的灰度图像是二维列表(数组),彩色图像是三位列表(数组),.ndim返回的是数组的维度 168 | img_channels = 1 169 | else: 170 | img_channels = 3 171 | # ------------------------------ 172 | # blockSize = 3 173 | # ---------------end------------- 174 | paraml = {'minDisparity': 0, 175 | 'numDisparities': 16 * num, # 64 176 | 'blockSize': blockSize, 177 | 'P1': 8 * img_channels * blockSize ** 2, 178 | 'P2': 32 * img_channels * blockSize ** 2, 179 | 'disp12MaxDiff': 1, 180 | 'preFilterCap': 5, # 63 181 | 'uniquenessRatio': 15, 182 | 'speckleWindowSize': 100, 183 | 'speckleRange': 1, 184 | 'mode': cv2.STEREO_SGBM_MODE_SGBM_3WAY 185 | } 186 | 187 | # 构建SGBM对象 188 | stereo = cv2.StereoSGBM_create(**paraml) 189 | disp = stereo.compute(left_image, right_image) 190 | 191 | # 转换为单通道图片 192 | #disp = cv2.normalize(disp, disp, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U) 193 | 194 | return disp 195 | 196 | 197 | 198 | 199 | #-----------------------BM算法----------------------------------- 200 | def stereoMatchBM(left_image, right_image): 201 | # BM算法用到的 202 | #cv2.namedWindow("left") 203 | #cv2.namedWindow("right") 204 | cv2.namedWindow("depth") 205 | #cv2.moveWindow("left", 0, 0) 206 | #cv2.moveWindow("right", 600, 0) 207 | cv2.createTrackbar("num", "depth", 0, 20, lambda x: None) 208 | cv2.createTrackbar("blockSize", "depth", 1, 25, lambda x: None) 209 | # 两个trackbar用来调节不同的参数查看效果 210 | num = cv2.getTrackbarPos("num", "depth") 211 | blockSize = cv2.getTrackbarPos("blockSize", "depth") 212 | if blockSize % 2 == 0: 213 | blockSize += 1 214 | if blockSize < 5: 215 | blockSize = 5 216 | # 根据Block Maching方法生成差异图(opencv里也提供了SGBM/Semi-Global Block Matching算法,有兴趣可以试试) 217 | stereo = cv2.StereoBM_create(numDisparities=16 * num, blockSize=blockSize) 218 | # stereo = cv2.StereoSGBM_create(numDisparities=16 * num, blockSize=blockSize) 219 | disparity = stereo.compute(left_image, right_image) 220 | disp = cv2.normalize(disparity, disparity, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U) 221 | return disp 222 | #----------------------------------------------------------- 223 | 224 | # 将h×w×3数组转换为N×3的数组 225 | def hw3ToN3(points): 226 | height, width = points.shape[0:2] 227 | 228 | points_1 = points[:, :, 0].reshape(height * width, 1) 229 | points_2 = points[:, :, 1].reshape(height * width, 1) 230 | points_3 = points[:, :, 2].reshape(height * width, 1) 231 | 232 | points_ = np.hstack((points_1, points_2, points_3)) 233 | 234 | return points_ 235 | 236 | 237 | # 深度、颜色转换为点云 238 | def DepthColor2Cloud(points_3d, colors): 239 | rows, cols = points_3d.shape[0:2] 240 | size = rows * cols 241 | 242 | points_ = hw3ToN3(points_3d) 243 | colors_ = hw3ToN3(colors).astype(np.int64) 244 | 245 | # 颜色信息 246 | blue = colors_[:, 0].reshape(size, 1) 247 | green = colors_[:, 1].reshape(size, 1) 248 | red = colors_[:, 2].reshape(size, 1) 249 | 250 | rgb = np.left_shift(blue, 0) + np.left_shift(green, 8) + np.left_shift(red, 16) 251 | 252 | # 将坐标+颜色叠加为点云数组 253 | pointcloud = np.hstack((points_, rgb)).astype(np.float32) 254 | 255 | # 删掉一些不合适的点 256 | X = pointcloud[:, 0] 257 | Y = pointcloud[:, 1] 258 | Z = pointcloud[:, 2] 259 | 260 | remove_idx1 = np.where(Z <= 0) 261 | remove_idx2 = np.where(Z > 10000) 262 | remove_idx3 = np.where(X > 10000) 263 | remove_idx4 = np.where(X < -10000) 264 | remove_idx5 = np.where(Y > 10000) 265 | remove_idx6 = np.where(Y < -10000) 266 | remove_idx = np.hstack((remove_idx1[0], remove_idx2[0], remove_idx3[0], remove_idx4[0], remove_idx5[0], remove_idx6[0])) 267 | 268 | pointcloud_1 = np.delete(pointcloud, remove_idx, 0) 269 | 270 | return pointcloud_1 271 | 272 | 273 | # 点云显示 274 | def view_cloud(pointcloud): 275 | cloud = pcl.PointCloud_PointXYZRGBA() 276 | cloud.from_array(pointcloud) 277 | 278 | try: 279 | visual = pcl.pcl_visualization.CloudViewing() 280 | visual.ShowColorACloud(cloud) 281 | v = True 282 | while v: 283 | v = not (visual.WasStopped()) 284 | except: 285 | pass 286 | 287 | 288 | if __name__ == '__main__': 289 | 290 | #i = 1 291 | #string = '' 292 | # 读取数据集的图片 293 | iml = cv2.imread('./yolo/left/left_37.bmp' ) # 左图 294 | imr = cv2.imread('./yolo/right/right_37.bmp' ) # 右图 295 | height, width = iml.shape[0:2] 296 | #***************************************************************** 297 | print("左图的尺寸{}:".format(iml.shape)) 298 | print("右图的尺寸{}:".format(imr.shape)) 299 | #***************************************************************** 300 | print("width = %d \n" % width) 301 | print("height = %d \n" % height) 302 | 303 | 304 | # 读取相机内参和外参 305 | config = stereoCamera() 306 | 307 | # 立体校正 308 | # 获取用于畸变校正和立体校正的映射矩阵以及用于计算像素空间坐标的重投影矩阵 309 | map1x, map1y, map2x, map2y, Q = getRectifyTransform(height, width, config) 310 | iml_rectified, imr_rectified = rectifyImage(iml, imr, map1x, map1y, map2x, map2y) 311 | 312 | print("Print Q!") 313 | print(Q[2,3]) 314 | 315 | # 绘制等间距平行线,检查立体校正的效果 316 | line = draw_line(iml_rectified, imr_rectified) 317 | cv2.imwrite('./yolo/1_j.png', line) 318 | 319 | # 消除畸变 320 | iml = undistortion(iml, config.cam_matrix_left, config.distortion_l) 321 | imr = undistortion(imr, config.cam_matrix_right, config.distortion_r) 322 | 323 | # 立体匹配 324 | iml_, imr_ = preprocess(iml, imr) # 预处理,一般可以削弱光照不均的影响,不做也可以 325 | 326 | iml_rectified_l, imr_rectified_r = rectifyImage(iml_, imr_, map1x, map1y, map2x, map2y) 327 | 328 | disp, _ = stereoMatchSGBM(iml_rectified_l, imr_rectified_r, True) 329 | cv2.imwrite('./yolo/1.png', disp) 330 | 331 | 332 | 333 | # 计算像素点的3D坐标(左相机坐标系下) 334 | points_3d = cv2.reprojectImageTo3D(disp, Q) # 可以使用上文的stereo_config.py给出的参数 335 | 336 | #points_3d = points_3d 337 | 338 | # 鼠标点击事件 339 | def onMouse(event, x, y, flags, param): 340 | if event == cv2.EVENT_LBUTTONDOWN: 341 | print('点 (%d, %d) 的三维坐标 (x:%.3fm, y:%.3fm, z:%.3fm)' % (x, y, points_3d[y, x, 0]/1000, points_3d[y, x, 1]/1000, points_3d[y, x, 2]/1000)) 342 | dis = ( (points_3d[y, x, 0] ** 2 + points_3d[y, x, 1] ** 2 + points_3d[y, x, 2] **2) ** 0.5) / 1000 343 | print('点 (%d, %d) 距离左摄像头的相对距离为 %0.3f m' %(x, y, dis) ) 344 | 345 | # 显示图片 346 | cv2.namedWindow("disparity",0) 347 | cv2.imshow("disparity", disp) 348 | cv2.setMouseCallback("disparity", onMouse, 0) 349 | 350 | 351 | 352 | # 构建点云--Point_XYZRGBA格式 353 | pointcloud = DepthColor2Cloud(points_3d, iml) 354 | 355 | # 显示点云 356 | view_cloud(pointcloud) 357 | 358 | cv2.waitKey(0) 359 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/new_BM.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | from stereo import stereo_config 5 | 6 | 7 | cv2.namedWindow("left") 8 | cv2.namedWindow("right") 9 | cv2.namedWindow("depth") 10 | cv2.moveWindow("left", 0, 0) 11 | cv2.moveWindow("right", 600, 0) 12 | cv2.createTrackbar("num", "depth", 0, 20, lambda x: None) 13 | cv2.createTrackbar("blockSize", "depth", 1, 25, lambda x: None) 14 | 15 | #camera1 = cv2.VideoCapture(0) 16 | #camera2 = cv2.VideoCapture(1) 17 | 18 | # 彩色图->灰度图 19 | def preprocess(img1, img2): 20 | if(img1.ndim == 3):#判断为三维数组 21 | img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) # 通过OpenCV加载的图像通道顺序是BGR 22 | if(img2.ndim == 3): 23 | img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) 24 | # 直方图均衡 25 | img1 = cv2.equalizeHist(img1) 26 | img2 = cv2.equalizeHist(img2) 27 | return img1, img2 28 | 29 | # 消除畸变 30 | def undistortion(image, camera_matrix, dist_coeff): 31 | undistortion_image = cv2.undistort(image, camera_matrix, dist_coeff) 32 | return undistortion_image 33 | 34 | # 获取畸变校正和立体校正的映射变换矩阵、重投影矩阵 35 | def getRectifyTransform(height,weight): 36 | Q = stereo_config.Q 37 | left_map1 = stereo_config.left_map1 38 | left_map2 = stereo_config.left_map2 39 | right_map1 = stereo_config.right_map1 40 | right_map2 = stereo_config.right_map2 41 | return Q,left_map1,left_map2,right_map1,right_map2 42 | 43 | # 畸变校正和立体校正 44 | def rectifyImage(image1,image2,left_map1,left_map2,rignt_map1,right_map2): 45 | rectifyed_img1 = cv2.remap(image1, stereo_config.left_map1, stereo_config.left_map2, cv2.INTER_LINEAR) 46 | rectifyed_img2 = cv2.remap(image2, stereo_config.right_map1, stereo_config.right_map2, cv2.INTER_LINEAR) 47 | return rectifyed_img1,rectifyed_img2 48 | 49 | #立体匹配算法 50 | def stereoMatchBM(left_image, right_image): 51 | # 两个trackbar用来调节不同的参数查看效果 52 | num = cv2.getTrackbarPos("num", "depth") 53 | blockSize = cv2.getTrackbarPos("blockSize", "depth") 54 | if blockSize % 2 == 0: 55 | blockSize += 1 56 | if blockSize < 5: 57 | blockSize = 5 58 | # 根据Block Maching方法生成差异图(opencv里也提供了SGBM/Semi-Global Block Matching算法,有兴趣可以试试) 59 | stereo = cv2.StereoBM_create(numDisparities=16 * num, blockSize=blockSize) 60 | # stereo = cv2.StereoSGBM_create(numDisparities=16 * num, blockSize=blockSize) 61 | disparity = stereo.compute(left_image, right_image) 62 | disp = cv2.normalize(disparity, disparity, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U) 63 | return disp 64 | 65 | # 视差计算 66 | def stereoMatchSGBM(left_image, right_image, down_scale=False): 67 | # SGBM匹配参数设置 68 | if left_image.ndim == 2: 69 | img_channels = 1 70 | else: 71 | img_channels = 3 72 | blockSize = 3 73 | paraml = {'minDisparity': 0, 74 | 'numDisparities': 128, 75 | 'blockSize': blockSize, 76 | 'P1': 8 * img_channels * blockSize ** 2, 77 | 'P2': 32 * img_channels * blockSize ** 2, 78 | 'disp12MaxDiff': 1, 79 | 'preFilterCap': 63, 80 | 'uniquenessRatio': 15, 81 | 'speckleWindowSize': 100, 82 | 'speckleRange': 2, 83 | 'mode': cv2.STEREO_SGBM_MODE_SGBM_3WAY 84 | } 85 | 86 | # 构建SGBM对象 87 | left_matcher = cv2.StereoSGBM_create(**paraml) 88 | paramr = paraml 89 | paramr['minDisparity'] = -paraml['numDisparities'] 90 | right_matcher = cv2.StereoSGBM_create(**paramr) 91 | 92 | # 计算视差图 93 | size = (left_image.shape[1], left_image.shape[0]) 94 | if down_scale == False: 95 | disparity_left = left_matcher.compute(left_image, right_image) 96 | disparity_right = right_matcher.compute(right_image, left_image) 97 | 98 | else: 99 | left_image_down = cv2.pyrDown(left_image) 100 | right_image_down = cv2.pyrDown(right_image) 101 | factor = left_image.shape[1] / left_image_down.shape[1] 102 | 103 | disparity_left_half = left_matcher.compute(left_image_down, right_image_down) 104 | disparity_right_half = right_matcher.compute(right_image_down, left_image_down) 105 | disparity_left = cv2.resize(disparity_left_half, size, interpolation=cv2.INTER_AREA) 106 | disparity_right = cv2.resize(disparity_right_half, size, interpolation=cv2.INTER_AREA) 107 | disparity_left = factor * disparity_left 108 | disparity_right = factor * disparity_right 109 | 110 | # 真实视差(因为SGBM算法得到的视差是×16的) 111 | trueDisp_left = disparity_left.astype(np.float32) / 16. 112 | trueDisp_right = disparity_right.astype(np.float32) / 16. 113 | 114 | return trueDisp_left, trueDisp_right 115 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/result/left/['bottle']_3.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/YOLOX-pytorch-camera/stereo/result/left/['bottle']_3.bmp -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/result/left/['bottle']_4.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/YOLOX-pytorch-camera/stereo/result/left/['bottle']_4.bmp -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/stereo_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | # 双目相机参数 5 | cam_matrix_left = np.array([ [824.2550965681365, -6.828558820006515, 310.1707580237725], 6 | [0, 815.4139580995355, 249.1939684854760], 7 | [0, 0, 1]]) 8 | # 右相机内参 9 | cam_matrix_right = np.array([ [822.9498573074617, -6.900112764184275,297.7290467865062], 10 | [0, 818.8382890544332,222.9237421035944], 11 | [0, 0 ,1]]) 12 | 13 | # 左右相机畸变系数:[k1, k2, p1, p2, k3] 14 | distortion_l = np.array([-0.246284288363976,-0.361503832978841, -0.010028059937390,-0.015986382957965, 74.621557080121280]) 15 | distortion_r = np.array([-0.246284288363976,-0.361503832978841,-0.010618446977411,-0.026827033332358,73.641634617150690]) 16 | #-0.034424624717455,-5.965881599855729 17 | # 旋转矩阵 18 | R = np.array([[0.999989229476667,-0.001079697704587,-0.004513887850642], 19 | [9.617580928426970e-04,0.999660199821240,-0.026049182611556], 20 | [0.004540479273416,0.026044560780057,0.999650471365738]]) 21 | # 平移矩阵 22 | T = np.array([[-79.431038317632310],[0.900649741211188],[1.327465851666942]]) 23 | 24 | # 焦距 25 | focal_length = 819.83445 # 默认值,一般取立体校正后的重投影矩阵Q中的 Q[2,3] 26 | 27 | # 基线距离 28 | baseline = 79.431038317632310 # 单位:mm, 为平移向量的第一个参数(取绝对值) 29 | width = 640 30 | heigh = 480 31 | size = (640,480) 32 | # 进行立体更正 33 | R1, R2, P1, P2, Q, validPixROI1, validPixROI2 = cv2.stereoRectify(cam_matrix_left,distortion_l, 34 | cam_matrix_right,distortion_r, size, 35 | R,T) 36 | # 计算更正map 37 | left_map1, left_map2 = cv2.initUndistortRectifyMap(cam_matrix_left,distortion_l, R1, P1, size, 38 | cv2.CV_16SC2) 39 | right_map1, right_map2 = cv2.initUndistortRectifyMap(cam_matrix_right,distortion_r, R2, P2, size, 40 | cv2.CV_16SC2) 41 | 42 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/stereoconfig_040_2-原代码.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | ####################仅仅是一个示例################################### 5 | 6 | 7 | # 双目相机参数 8 | class stereoCamera(object): 9 | def __init__(self): 10 | # 左相机内参 11 | self.cam_matrix_left = np.array([ [480.1827401743536, -3.996193819153865, 313.2484204798977], 12 | [0, 477.3816416456625, 240.3265232955334], 13 | [0, 0, 1]]) 14 | # 右相机内参 15 | self.cam_matrix_right = np.array([ [487.3771447246162, -5.130691903427561, 274.7902060436339], 16 | [ 0, 485.3107014237426, 252.7409169924596], 17 | [ 0, 0, 1]]) 18 | 19 | # 左右相机畸变系数:[k1, k2, p1, p2, k3] 20 | self.distortion_l = np.array([[0.0891,-0.4382,0,0,1.3420]]) 21 | self.distortion_r = np.array([[0.0461050900425548, -0.0472360135958562, 0,0,0.0220064865167176]]) 22 | 23 | # 旋转矩阵 24 | self.R = np.array([ [0.996924143460104, -0.001204726123944, -0.078363261936352], 25 | [0.002139716549006, 0.999927513315018, 0.011848617165509], 26 | [0.078343307304638, -0.011979847687312, 0.996854457506190]]) 27 | # 平移矩阵 28 | self.T = np.array([[-44.046409551751990], [0.058862432865362], [6.362708016358077]]) 29 | 30 | # 焦距 31 | self.focal_length = 478.78219 # 默认值,一般取立体校正后的重投影矩阵Q中的 Q[2,3] 32 | 33 | # 基线距离 34 | self.baseline = 44.046409551751990 # 单位:mm, 为平移向量的第一个参数(取绝对值) 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/stereoconfig_040_2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | ####################仅仅是一个示例################################### 5 | 6 | 7 | # 双目相机参数 8 | class stereoCamera(object): 9 | def __init__(self): 10 | # 左相机内参 11 | self.cam_matrix_left = np.array([ [824.2550965681365, -6.828558820006515, 310.1707580237725], 12 | [0, 815.4139580995355, 249.1939684854760], 13 | [0, 0, 1]]) 14 | # 右相机内参 15 | self.cam_matrix_right = np.array([ [822.9498573074617, -6.900112764184275,297.7290467865062], 16 | [0, 818.8382890544332,222.9237421035944], 17 | [0, 0 ,1]]) 18 | 19 | # 左右相机畸变系数:[k1, k2, p1, p2, k3] 20 | self.distortion_l = np.array([-0.246284288363976,-0.361503832978841, -0.010028059937390,-0.015986382957965, 74.621557080121280]) 21 | self.distortion_r = np.array([-0.246284288363976,-0.361503832978841,-0.010618446977411,-0.026827033332358,73.641634617150690]) 22 | #-0.034424624717455,-5.965881599855729 23 | # 旋转矩阵 24 | self.R = np.array([[0.999989229476667, -0.001079697704587, -0.004513887850642], 25 | [9.617580928426970e-04, 0.999660199821240, -0.026049182611556], 26 | [0.004540479273416, 0.026044560780057, 0.999650471365738]]) 27 | # 平移矩阵 28 | self.T = np.array([[-79.431038317632310],[0.900649741211188],[1.327465851666942]]) 29 | 30 | # 焦距 31 | self.focal_length = 819.83445 # 默认值,一般取立体校正后的重投影矩阵Q中的 Q[2,3] 32 | 33 | # 基线距离 34 | self.baseline = 79.431038317632310 # 单位:mm, 为平移向量的第一个参数(取绝对值) 35 | width = 640 36 | heigh = 480 37 | #——————————————————————新加的———————————————————————————————————— 38 | # ( R1, R2, P1, P2, Q, roi1, roi2) = \ 39 | # cv2.stereoRectify(self.cam_matrix_left, self.distortion_l, self.cam_matrix_right, self.distortion_r, np.array([width,heigh]), self.R,self.T) # 计算旋转矩阵和投影矩阵 40 | # 41 | # 42 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/yolo/xiaozheng_hou.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/YOLOX-pytorch-camera/stereo/yolo/xiaozheng_hou.jpg -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/yolo/xiaozheng_hou1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/YOLOX-pytorch-camera/stereo/yolo/xiaozheng_hou1.jpg -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/yolo/xiaozheng_qian.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/YOLOX-pytorch-camera/stereo/yolo/xiaozheng_qian.jpg -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/stereo/yolo/xiaozheng_qian1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/YOLOX-pytorch-camera/stereo/yolo/xiaozheng_qian1.jpg -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/summary.py: -------------------------------------------------------------------------------- 1 | #--------------------------------------------# 2 | # 该部分代码用于看网络结构 3 | #--------------------------------------------# 4 | import torch 5 | from torchsummary import summary 6 | 7 | from nets.yolo import YoloBody 8 | 9 | if __name__ == "__main__": 10 | # 需要使用device来指定网络在GPU还是CPU运行 11 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 12 | m = YoloBody(80, 's').to(device) 13 | 14 | summary(m, input_size=(3, 640, 640)) 15 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/train.py: -------------------------------------------------------------------------------- 1 | #-------------------------------------# 2 | # 对数据集进行训练 3 | #-------------------------------------# 4 | import numpy as np 5 | import torch 6 | import torch.backends.cudnn as cudnn 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from torch.utils.data import DataLoader 10 | 11 | from nets.yolo import YoloBody 12 | from nets.yolo_training import (YOLOLoss, get_lr_scheduler, set_optimizer_lr, 13 | weights_init) 14 | from utils.callbacks import LossHistory 15 | from utils.dataloader import YoloDataset, yolo_dataset_collate 16 | from utils.utils import get_classes 17 | from utils.utils_fit import fit_one_epoch 18 | import os 19 | 20 | if __name__ == "__main__": 21 | #---------------------------------# 22 | # Cuda 是否使用Cuda 23 | # 没有GPU可以设置成False 24 | #---------------------------------# 25 | Cuda = False 26 | #---------------------------------------------------------------------# 27 | # classes_path 指向model_data下的txt,与自己训练的数据集相关 28 | # 训练前一定要修改classes_path,使其对应自己的数据集 29 | #---------------------------------------------------------------------# 30 | classes_path = 'model_data/coco_classes.txt' 31 | 32 | # 此处使用的是整个模型的权重,因此是在train.py进行加载的。 33 | # 如果想要让模型从0开始训练,则设置model_path = '',下面的Freeze_Train = Fasle,此时从0开始训练,且没有冻结主干的过程。 34 | # 35 | # 一般来讲,网络从0开始的训练效果会很差,因为权值太过随机,特征提取效果不明显,因此非常、非常、非常不建议大家从0开始训练! 36 | #----------------------------------------------------------------------------------------------------------------------------# 37 | model_path = 'model_data/yolox_nano.pth' 38 | #------------------------------------------------------# 39 | # input_shape 输入的shape大小,一定要是32的倍数 40 | #------------------------------------------------------# 41 | input_shape = [416,416] 42 | #------------------------------------------------------# 43 | # 所使用的YoloX的版本。nano、tiny、s、m、l、x 44 | #------------------------------------------------------# 45 | phi = 'nano' 46 | #------------------------------------------------------# 47 | # Yolov4的tricks应用 48 | # mosaic 马赛克数据增强 49 | # 参考YoloX,由于Mosaic生成的训练图片, 50 | # 远远脱离自然图片的真实分布。 51 | # 本代码会在训练结束前的N个epoch自动关掉Mosaic 52 | # 100个世代会关闭30个世代(比例可在dataloader.py调整) 53 | # 54 | # 余弦退火算法的参数放到下面的lr_decay_type中设置 55 | #------------------------------------------------------# 56 | mosaic = False 57 | 58 | #----------------------------------------------------------------------------------------------------------------------------# 59 | # 训练分为两个阶段,分别是冻结阶段和解冻阶段。设置冻结阶段是为了满足机器性能不足的同学的训练需求。 60 | # 冻结训练需要的显存较小,显卡非常差的情况下,可设置Freeze_Epoch等于UnFreeze_Epoch,此时仅仅进行冻结训练。 61 | # 62 | # 在此提供若干参数设置建议,各位训练者根据自己的需求进行灵活调整: 63 | # (一)从整个模型的预训练权重开始训练: 64 | # Init_Epoch = 0,Freeze_Epoch = 50,UnFreeze_Epoch = 100,Freeze_Train = True(默认参数) 65 | # Init_Epoch = 0,UnFreeze_Epoch = 100,Freeze_Train = False(不冻结训练) 66 | # 其中:UnFreeze_Epoch可以在100-300之间调整。optimizer_type = 'sgd',Init_lr = 1e-2。 67 | # (二)从0开始训练: 68 | # Init_Epoch = 0,UnFreeze_Epoch >= 300,Unfreeze_batch_size >= 16,Freeze_Train = False(不冻结训练) 69 | # 其中:UnFreeze_Epoch尽量不小于300。optimizer_type = 'sgd',Init_lr = 1e-2,mosaic = True。 70 | # (三)batch_size的设置: 71 | # 在显卡能够接受的范围内,以大为好。显存不足与数据集大小无关,提示显存不足(OOM或者CUDA out of memory)请调小batch_size。 72 | # 受到BatchNorm层影响,batch_size最小为2,不能为1。 73 | # 正常情况下Freeze_batch_size建议为Unfreeze_batch_size的1-2倍。不建议设置的差距过大,因为关系到学习率的自动调整。 74 | #----------------------------------------------------------------------------------------------------------------------------# 75 | #------------------------------------------------------------------# 76 | # 冻结阶段训练参数 77 | # 此时模型的主干被冻结了,特征提取网络不发生改变 78 | # 占用的显存较小,仅对网络进行微调 79 | # Init_Epoch 模型当前开始的训练世代,其值可以大于Freeze_Epoch,如设置: 80 | # Init_Epoch = 60、Freeze_Epoch = 50、UnFreeze_Epoch = 100 81 | # 会跳过冻结阶段,直接从60代开始,并调整对应的学习率。 82 | # (断点续练时使用) 83 | # Freeze_Epoch 模型冻结训练的Freeze_Epoch 84 | # (当Freeze_Train=False时失效) 85 | # Freeze_batch_size 模型冻结训练的batch_size 86 | # (当Freeze_Train=False时失效) 87 | #------------------------------------------------------------------# 88 | Init_Epoch = 0 89 | Freeze_Epoch = 2 90 | Freeze_batch_size = 1 91 | #------------------------------------------------------------------# 92 | # 解冻阶段训练参数 93 | # 此时模型的主干不被冻结了,特征提取网络会发生改变 94 | # 占用的显存较大,网络所有的参数都会发生改变 95 | # UnFreeze_Epoch 模型总共训练的epoch 96 | # Unfreeze_batch_size 模型在解冻后的batch_size 97 | #------------------------------------------------------------------# 98 | UnFreeze_Epoch = 15 99 | Unfreeze_batch_size = 1 100 | #------------------------------------------------------------------# 101 | # Freeze_Train 是否进行冻结训练 102 | # 默认先冻结主干训练后解冻训练。 103 | #------------------------------------------------------------------# 104 | Freeze_Train = True 105 | 106 | #------------------------------------------------------------------# 107 | # 其它训练参数:学习率、优化器、学习率下降有关 108 | #------------------------------------------------------------------# 109 | #------------------------------------------------------------------# 110 | # Init_lr 模型的最大学习率 111 | # Min_lr 模型的最小学习率,默认为最大学习率的0.01 112 | #------------------------------------------------------------------# 113 | Init_lr = 1e-2 114 | Min_lr = Init_lr * 0.01 115 | #------------------------------------------------------------------# 116 | # optimizer_type 使用到的优化器种类,可选的有adam、sgd 117 | # 当使用Adam优化器时建议设置 Init_lr=1e-3 118 | # 当使用SGD优化器时建议设置 Init_lr=1e-2 119 | # momentum 优化器内部使用到的momentum参数 120 | # weight_decay 权值衰减,可防止过拟合 121 | # adam会导致weight_decay错误,使用adam时建议设置为0。 122 | #------------------------------------------------------------------# 123 | optimizer_type = "sgd" 124 | momentum = 0.937 125 | weight_decay = 5e-4 126 | #------------------------------------------------------------------# 127 | # lr_decay_type 使用到的学习率下降方式,可选的有step、cos 128 | #------------------------------------------------------------------# 129 | lr_decay_type = "cos" 130 | #------------------------------------------------------------------# 131 | # save_period 多少个epoch保存一次权值,默认每个世代都保存 132 | #------------------------------------------------------------------# 133 | save_period = 1 134 | #------------------------------------------------------------------# 135 | # save_dir 权值与日志文件保存的文件夹 136 | #------------------------------------------------------------------# 137 | save_dir = 'logs' 138 | #------------------------------------------------------------------# 139 | # num_workers 用于设置是否使用多线程读取数据 140 | # 开启后会加快数据读取速度,但是会占用更多内存 141 | # 内存较小的电脑可以设置为2或者0 142 | #------------------------------------------------------------------# 143 | num_workers = 2 144 | #---------------------------------------------------------------- 145 | resume = False 146 | resume_path = 'G:\\pycharm\\study_code\\LYD-Champion-YOLOX\\YOLOX-gai_jin\\resume\\checkpoint\\ckpt_best_2.pth' 147 | #---------------------------------------------------------------- 148 | 149 | #----------------------------------------------------# 150 | # 获得图片路径和标签 151 | #----------------------------------------------------# 152 | train_annotation_path = '2007_train.txt' 153 | val_annotation_path = '2007_val.txt' 154 | 155 | #----------------------------------------------------# 156 | # 获取classes和anchor 157 | #----------------------------------------------------# 158 | class_names, num_classes = get_classes(classes_path) 159 | 160 | #------------------------------------------------------# 161 | # 创建yolo模型 162 | #------------------------------------------------------# 163 | model = YoloBody(num_classes, phi) 164 | weights_init(model) 165 | if model_path != '': 166 | #------------------------------------------------------# 167 | # 权值文件请看README,百度网盘下载 168 | #------------------------------------------------------# 169 | print('Load weights {}.'.format(model_path)) 170 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 171 | model_dict = model.state_dict() 172 | pretrained_dict = torch.load(model_path, map_location = device) 173 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} 174 | model_dict.update(pretrained_dict) 175 | model.load_state_dict(model_dict) 176 | 177 | yolo_loss = YOLOLoss(num_classes) 178 | loss_history = LossHistory(save_dir, model, input_shape=input_shape) 179 | 180 | model_train = model.train() 181 | if Cuda: 182 | model_train = torch.nn.DataParallel(model) 183 | cudnn.benchmark = True 184 | model_train = model_train.cuda() 185 | 186 | #---------------------------# 187 | # 读取数据集对应的txt 188 | #---------------------------# 189 | with open(train_annotation_path, encoding='utf-8') as f: 190 | train_lines = f.readlines() 191 | with open(val_annotation_path, encoding='utf-8') as f: 192 | val_lines = f.readlines() 193 | num_train = len(train_lines) 194 | num_val = len(val_lines) 195 | 196 | #------------------------------------------------------# 197 | # 主干特征提取网络特征通用,冻结训练可以加快训练速度 198 | # 也可以在训练初期防止权值被破坏。 199 | # Init_Epoch为起始世代 200 | # Freeze_Epoch为冻结训练的世代 201 | # UnFreeze_Epoch总训练世代 202 | # 提示OOM或者显存不足请调小Batch_size 203 | #------------------------------------------------------# 204 | if True: 205 | UnFreeze_flag = False 206 | #------------------------------------# 207 | # 冻结一定部分训练 208 | #------------------------------------# 209 | if Freeze_Train: 210 | for param in model.backbone.parameters(): 211 | param.requires_grad = False 212 | 213 | #-------------------------------------------------------------------# 214 | # 如果不冻结训练的话,直接设置batch_size为Unfreeze_batch_size 215 | #-------------------------------------------------------------------# 216 | batch_size = Freeze_batch_size if Freeze_Train else Unfreeze_batch_size 217 | 218 | #-------------------------------------------------------------------# 219 | # 判断当前batch_size与64的差别,自适应调整学习率 220 | #-------------------------------------------------------------------# 221 | nbs = 64 222 | Init_lr_fit = max(batch_size / nbs * Init_lr, 3e-4) 223 | Min_lr_fit = max(batch_size / nbs * Min_lr, 3e-6) 224 | 225 | #---------------------------------------# 226 | # 根据optimizer_type选择优化器 227 | #---------------------------------------# 228 | pg0, pg1, pg2 = [], [], [] 229 | for k, v in model.named_modules(): 230 | if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): 231 | pg2.append(v.bias) 232 | if isinstance(v, nn.BatchNorm2d) or "bn" in k: 233 | pg0.append(v.weight) 234 | elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): 235 | pg1.append(v.weight) 236 | optimizer = { 237 | 'adam' : optim.Adam(pg0, Init_lr_fit, betas = (momentum, 0.999)), 238 | 'sgd' : optim.SGD(pg0, Init_lr_fit, momentum = momentum, nesterov=True) 239 | }[optimizer_type] 240 | optimizer.add_param_group({"params": pg1, "weight_decay": weight_decay}) 241 | optimizer.add_param_group({"params": pg2}) 242 | 243 | #---------------------------------------# 244 | # 获得学习率下降的公式 245 | #---------------------------------------# 246 | lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch) 247 | 248 | #---------------------------------------# 249 | # 判断每一个世代的长度 250 | #---------------------------------------# 251 | epoch_step = num_train // batch_size 252 | epoch_step_val = num_val // batch_size 253 | 254 | if epoch_step == 0 or epoch_step_val == 0: 255 | raise ValueError("数据集过小,无法继续进行训练,请扩充数据集。") 256 | #---------------------------------------------------------------------------------------- 257 | if resume: 258 | ckpt = torch.load(resume_path) 259 | begin_epoch = ckpt['epoch'] 260 | print(f"begin_epoch: {begin_epoch}") 261 | optimizer.load_state_dict(ckpt['optimizer']) 262 | start_epoch = begin_epoch 263 | #----------------------------------------------------------------------------------- 264 | #---------------------------------------# 265 | #------训练 266 | #---------------------------------------# 267 | train_dataset = YoloDataset(train_lines, input_shape, num_classes, epoch_length = UnFreeze_Epoch, mosaic=mosaic, train = True) 268 | val_dataset = YoloDataset(val_lines, input_shape, num_classes, epoch_length = UnFreeze_Epoch, mosaic=False, train = False) 269 | gen = DataLoader(train_dataset, shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 270 | drop_last=True, collate_fn=yolo_dataset_collate) 271 | gen_val = DataLoader(val_dataset , shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 272 | drop_last=True, collate_fn=yolo_dataset_collate) 273 | 274 | #---------------------------------------# 275 | # 开始模型训练 276 | #---------------------------------------# 277 | for epoch in range(Init_Epoch, UnFreeze_Epoch): 278 | #---------------------------------------# 279 | # 如果模型有冻结学习部分 280 | # 则解冻,并设置参数 281 | #---------------------------------------# 282 | if epoch >= Freeze_Epoch and not UnFreeze_flag and Freeze_Train: 283 | batch_size = Unfreeze_batch_size 284 | 285 | #-------------------------------------------------------------------# 286 | # 判断当前batch_size与64的差别,自适应调整学习率 287 | #-------------------------------------------------------------------# 288 | nbs = 64 289 | Init_lr_fit = max(batch_size / nbs * Init_lr, 3e-4) 290 | Min_lr_fit = max(batch_size / nbs * Min_lr, 3e-6) 291 | #---------------------------------------# 292 | # 获得学习率下降的公式 293 | #---------------------------------------# 294 | lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch) 295 | 296 | for param in model.backbone.parameters(): 297 | param.requires_grad = True 298 | 299 | epoch_step = num_train // batch_size 300 | epoch_step_val = num_val // batch_size 301 | 302 | if epoch_step == 0 or epoch_step_val == 0: 303 | raise ValueError("数据集过小,无法继续进行训练,请扩充数据集。") 304 | 305 | gen = DataLoader(train_dataset, shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 306 | drop_last=True, collate_fn=yolo_dataset_collate) 307 | gen_val = DataLoader(val_dataset , shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 308 | drop_last=True, collate_fn=yolo_dataset_collate) 309 | 310 | UnFreeze_flag = True 311 | 312 | gen.dataset.epoch_now = epoch 313 | gen_val.dataset.epoch_now = epoch 314 | 315 | set_optimizer_lr(optimizer, lr_scheduler_func, epoch) 316 | 317 | fit_one_epoch(model_train, model, yolo_loss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, UnFreeze_Epoch, Cuda, save_period, save_dir) 318 | #———————————————————————————————————————————————————————————————————————— 319 | checkpoint = { 320 | "model": model.state_dict(), 321 | 'optimizer': optimizer.state_dict(), 322 | "epoch": epoch 323 | } 324 | if not os.path.isdir("./resume/checkpoint"): 325 | os.mkdir("./resume/checkpoint") 326 | torch.save(checkpoint, './resume/checkpoint/ckpt_best_%s.pth' % (str(epoch))) 327 | print(f"模型已保存,路径为:./resume/checkpoint/ckpt_best_{str(epoch)}.pth") 328 | # ———————————————————————————————————————————————————————————————————————————————————————————————————————————————————————— 329 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/utils/callbacks.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | import torch 5 | import matplotlib 6 | matplotlib.use('Agg') 7 | import scipy.signal 8 | from matplotlib import pyplot as plt 9 | from torch.utils.tensorboard import SummaryWriter 10 | 11 | 12 | class LossHistory(): 13 | def __init__(self, log_dir, model, input_shape): 14 | time_str = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S') 15 | self.log_dir = os.path.join(log_dir, "loss_" + str(time_str)) 16 | self.losses = [] 17 | self.val_loss = [] 18 | 19 | os.makedirs(self.log_dir) 20 | self.writer = SummaryWriter(self.log_dir) 21 | try: 22 | dummy_input = torch.randn(2, 3, input_shape[0], input_shape[1]) 23 | self.writer.add_graph(model, dummy_input) 24 | except: 25 | pass 26 | 27 | def append_loss(self, epoch, loss, val_loss): 28 | if not os.path.exists(self.log_dir): 29 | os.makedirs(self.log_dir) 30 | 31 | self.losses.append(loss) 32 | self.val_loss.append(val_loss) 33 | 34 | with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f: 35 | f.write(str(loss)) 36 | f.write("\n") 37 | with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f: 38 | f.write(str(val_loss)) 39 | f.write("\n") 40 | 41 | self.writer.add_scalar('loss', loss, epoch) 42 | self.writer.add_scalar('val_loss', val_loss, epoch) 43 | self.loss_plot() 44 | 45 | def loss_plot(self): 46 | iters = range(len(self.losses)) 47 | 48 | plt.figure() 49 | plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss') 50 | plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss') 51 | try: 52 | if len(self.losses) < 25: 53 | num = 5 54 | else: 55 | num = 15 56 | 57 | plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss') 58 | plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss') 59 | except: 60 | pass 61 | 62 | plt.grid(True) 63 | plt.xlabel('Epoch') 64 | plt.ylabel('Loss') 65 | plt.legend(loc="upper right") 66 | 67 | plt.savefig(os.path.join(self.log_dir, "epoch_loss.png")) 68 | 69 | plt.cla() 70 | plt.close("all") 71 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/utils/dataloader.py: -------------------------------------------------------------------------------- 1 | from random import sample, shuffle 2 | 3 | import cv2 4 | import numpy as np 5 | from PIL import Image 6 | from torch.utils.data.dataset import Dataset 7 | 8 | from utils.utils import cvtColor, preprocess_input 9 | 10 | 11 | class YoloDataset(Dataset): 12 | def __init__(self, annotation_lines, input_shape, num_classes, epoch_length, mosaic, train, mosaic_ratio = 0.7): 13 | super(YoloDataset, self).__init__() 14 | self.annotation_lines = annotation_lines 15 | self.input_shape = input_shape 16 | self.num_classes = num_classes 17 | self.epoch_length = epoch_length 18 | self.mosaic = mosaic 19 | self.train = train 20 | self.mosaic_ratio = mosaic_ratio 21 | 22 | self.epoch_now = -1 23 | self.length = len(self.annotation_lines) 24 | 25 | def __len__(self): 26 | return self.length 27 | 28 | def __getitem__(self, index): 29 | index = index % self.length 30 | 31 | #---------------------------------------------------# 32 | # 训练时进行数据的随机增强 33 | # 验证时不进行数据的随机增强 34 | #---------------------------------------------------# 35 | if self.mosaic: 36 | if self.rand() < 0.5 and self.epoch_now < self.epoch_length * self.mosaic_ratio: 37 | lines = sample(self.annotation_lines, 3) 38 | lines.append(self.annotation_lines[index]) 39 | shuffle(lines) 40 | image, box = self.get_random_data_with_Mosaic(lines, self.input_shape) 41 | else: 42 | image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train) 43 | else: 44 | image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train) 45 | image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1)) 46 | box = np.array(box, dtype=np.float32) 47 | if len(box) != 0: 48 | box[:, 2:4] = box[:, 2:4] - box[:, 0:2] 49 | box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2 50 | return image, box 51 | 52 | def rand(self, a=0, b=1): 53 | return np.random.rand()*(b-a) + a 54 | 55 | def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True): 56 | line = annotation_line.split() 57 | #------------------------------# 58 | # 读取图像并转换成RGB图像 59 | #------------------------------# 60 | image = Image.open(line[0]) 61 | image = cvtColor(image) 62 | #------------------------------# 63 | # 获得图像的高宽与目标高宽 64 | #------------------------------# 65 | iw, ih = image.size 66 | h, w = input_shape 67 | #------------------------------# 68 | # 获得预测框 69 | #------------------------------# 70 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) 71 | 72 | if not random: 73 | scale = min(w/iw, h/ih) 74 | nw = int(iw*scale) 75 | nh = int(ih*scale) 76 | dx = (w-nw)//2 77 | dy = (h-nh)//2 78 | 79 | #---------------------------------# 80 | # 将图像多余的部分加上灰条 81 | #---------------------------------# 82 | image = image.resize((nw,nh), Image.BICUBIC) 83 | new_image = Image.new('RGB', (w,h), (128,128,128)) 84 | new_image.paste(image, (dx, dy)) 85 | image_data = np.array(new_image, np.float32) 86 | 87 | #---------------------------------# 88 | # 对真实框进行调整 89 | #---------------------------------# 90 | if len(box)>0: 91 | np.random.shuffle(box) 92 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 93 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 94 | box[:, 0:2][box[:, 0:2]<0] = 0 95 | box[:, 2][box[:, 2]>w] = w 96 | box[:, 3][box[:, 3]>h] = h 97 | box_w = box[:, 2] - box[:, 0] 98 | box_h = box[:, 3] - box[:, 1] 99 | box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box 100 | 101 | return image_data, box 102 | 103 | #------------------------------------------# 104 | # 对图像进行缩放并且进行长和宽的扭曲 105 | #------------------------------------------# 106 | new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) 107 | scale = self.rand(.25, 2) 108 | if new_ar < 1: 109 | nh = int(scale*h) 110 | nw = int(nh*new_ar) 111 | else: 112 | nw = int(scale*w) 113 | nh = int(nw/new_ar) 114 | image = image.resize((nw,nh), Image.BICUBIC) 115 | 116 | #------------------------------------------# 117 | # 将图像多余的部分加上灰条 118 | #------------------------------------------# 119 | dx = int(self.rand(0, w-nw)) 120 | dy = int(self.rand(0, h-nh)) 121 | new_image = Image.new('RGB', (w,h), (128,128,128)) 122 | new_image.paste(image, (dx, dy)) 123 | image = new_image 124 | 125 | #------------------------------------------# 126 | # 翻转图像 127 | #------------------------------------------# 128 | flip = self.rand()<.5 129 | if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) 130 | 131 | image_data = np.array(image, np.uint8) 132 | #---------------------------------# 133 | # 对图像进行色域变换 134 | # 计算色域变换的参数 135 | #---------------------------------# 136 | r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 137 | #---------------------------------# 138 | # 将图像转到HSV上 139 | #---------------------------------# 140 | hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV)) 141 | dtype = image_data.dtype 142 | #---------------------------------# 143 | # 应用变换 144 | #---------------------------------# 145 | x = np.arange(0, 256, dtype=r.dtype) 146 | lut_hue = ((x * r[0]) % 180).astype(dtype) 147 | lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) 148 | lut_val = np.clip(x * r[2], 0, 255).astype(dtype) 149 | 150 | image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) 151 | image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB) 152 | 153 | #---------------------------------# 154 | # 对真实框进行调整 155 | #---------------------------------# 156 | if len(box)>0: 157 | np.random.shuffle(box) 158 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 159 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 160 | if flip: box[:, [0,2]] = w - box[:, [2,0]] 161 | box[:, 0:2][box[:, 0:2]<0] = 0 162 | box[:, 2][box[:, 2]>w] = w 163 | box[:, 3][box[:, 3]>h] = h 164 | box_w = box[:, 2] - box[:, 0] 165 | box_h = box[:, 3] - box[:, 1] 166 | box = box[np.logical_and(box_w>1, box_h>1)] 167 | 168 | return image_data, box 169 | 170 | def merge_bboxes(self, bboxes, cutx, cuty): 171 | merge_bbox = [] 172 | for i in range(len(bboxes)): 173 | for box in bboxes[i]: 174 | tmp_box = [] 175 | x1, y1, x2, y2 = box[0], box[1], box[2], box[3] 176 | 177 | if i == 0: 178 | if y1 > cuty or x1 > cutx: 179 | continue 180 | if y2 >= cuty and y1 <= cuty: 181 | y2 = cuty 182 | if x2 >= cutx and x1 <= cutx: 183 | x2 = cutx 184 | 185 | if i == 1: 186 | if y2 < cuty or x1 > cutx: 187 | continue 188 | if y2 >= cuty and y1 <= cuty: 189 | y1 = cuty 190 | if x2 >= cutx and x1 <= cutx: 191 | x2 = cutx 192 | 193 | if i == 2: 194 | if y2 < cuty or x2 < cutx: 195 | continue 196 | if y2 >= cuty and y1 <= cuty: 197 | y1 = cuty 198 | if x2 >= cutx and x1 <= cutx: 199 | x1 = cutx 200 | 201 | if i == 3: 202 | if y1 > cuty or x2 < cutx: 203 | continue 204 | if y2 >= cuty and y1 <= cuty: 205 | y2 = cuty 206 | if x2 >= cutx and x1 <= cutx: 207 | x1 = cutx 208 | tmp_box.append(x1) 209 | tmp_box.append(y1) 210 | tmp_box.append(x2) 211 | tmp_box.append(y2) 212 | tmp_box.append(box[-1]) 213 | merge_bbox.append(tmp_box) 214 | return merge_bbox 215 | 216 | def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4): 217 | h, w = input_shape 218 | min_offset_x = self.rand(0.3, 0.7) 219 | min_offset_y = self.rand(0.3, 0.7) 220 | 221 | image_datas = [] 222 | box_datas = [] 223 | index = 0 224 | for line in annotation_line: 225 | #---------------------------------# 226 | # 每一行进行分割 227 | #---------------------------------# 228 | line_content = line.split() 229 | #---------------------------------# 230 | # 打开图片 231 | #---------------------------------# 232 | image = Image.open(line_content[0]) 233 | image = cvtColor(image) 234 | 235 | #---------------------------------# 236 | # 图片的大小 237 | #---------------------------------# 238 | iw, ih = image.size 239 | #---------------------------------# 240 | # 保存框的位置 241 | #---------------------------------# 242 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]]) 243 | 244 | #---------------------------------# 245 | # 是否翻转图片 246 | #---------------------------------# 247 | flip = self.rand()<.5 248 | if flip and len(box)>0: 249 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 250 | box[:, [0,2]] = iw - box[:, [2,0]] 251 | 252 | #------------------------------------------# 253 | # 对图像进行缩放并且进行长和宽的扭曲 254 | #------------------------------------------# 255 | new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) 256 | scale = self.rand(.4, 1) 257 | if new_ar < 1: 258 | nh = int(scale*h) 259 | nw = int(nh*new_ar) 260 | else: 261 | nw = int(scale*w) 262 | nh = int(nw/new_ar) 263 | image = image.resize((nw, nh), Image.BICUBIC) 264 | 265 | #-----------------------------------------------# 266 | # 将图片进行放置,分别对应四张分割图片的位置 267 | #-----------------------------------------------# 268 | if index == 0: 269 | dx = int(w*min_offset_x) - nw 270 | dy = int(h*min_offset_y) - nh 271 | elif index == 1: 272 | dx = int(w*min_offset_x) - nw 273 | dy = int(h*min_offset_y) 274 | elif index == 2: 275 | dx = int(w*min_offset_x) 276 | dy = int(h*min_offset_y) 277 | elif index == 3: 278 | dx = int(w*min_offset_x) 279 | dy = int(h*min_offset_y) - nh 280 | 281 | new_image = Image.new('RGB', (w,h), (128,128,128)) 282 | new_image.paste(image, (dx, dy)) 283 | image_data = np.array(new_image) 284 | 285 | index = index + 1 286 | box_data = [] 287 | #---------------------------------# 288 | # 对box进行重新处理 289 | #---------------------------------# 290 | if len(box)>0: 291 | np.random.shuffle(box) 292 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 293 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 294 | box[:, 0:2][box[:, 0:2]<0] = 0 295 | box[:, 2][box[:, 2]>w] = w 296 | box[:, 3][box[:, 3]>h] = h 297 | box_w = box[:, 2] - box[:, 0] 298 | box_h = box[:, 3] - box[:, 1] 299 | box = box[np.logical_and(box_w>1, box_h>1)] 300 | box_data = np.zeros((len(box),5)) 301 | box_data[:len(box)] = box 302 | 303 | image_datas.append(image_data) 304 | box_datas.append(box_data) 305 | 306 | #---------------------------------# 307 | # 将图片分割,放在一起 308 | #---------------------------------# 309 | cutx = int(w * min_offset_x) 310 | cuty = int(h * min_offset_y) 311 | 312 | new_image = np.zeros([h, w, 3]) 313 | new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :] 314 | new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :] 315 | new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :] 316 | new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :] 317 | 318 | new_image = np.array(new_image, np.uint8) 319 | #---------------------------------# 320 | # 对图像进行色域变换 321 | # 计算色域变换的参数 322 | #---------------------------------# 323 | r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 324 | #---------------------------------# 325 | # 将图像转到HSV上 326 | #---------------------------------# 327 | hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV)) 328 | dtype = new_image.dtype 329 | #---------------------------------# 330 | # 应用变换 331 | #---------------------------------# 332 | x = np.arange(0, 256, dtype=r.dtype) 333 | lut_hue = ((x * r[0]) % 180).astype(dtype) 334 | lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) 335 | lut_val = np.clip(x * r[2], 0, 255).astype(dtype) 336 | 337 | new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) 338 | new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB) 339 | 340 | #---------------------------------# 341 | # 对框进行进一步的处理 342 | #---------------------------------# 343 | new_boxes = self.merge_bboxes(box_datas, cutx, cuty) 344 | 345 | return new_image, new_boxes 346 | 347 | # DataLoader中collate_fn使用 348 | def yolo_dataset_collate(batch): 349 | images = [] 350 | bboxes = [] 351 | for img, box in batch: 352 | images.append(img) 353 | bboxes.append(box) 354 | images = np.array(images) 355 | return images, bboxes 356 | 357 | 358 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | 5 | #---------------------------------------------------------# 6 | # 将图像转换成RGB图像,防止灰度图在预测时报错。 7 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 8 | #---------------------------------------------------------# 9 | def cvtColor(image): 10 | if len(np.shape(image)) == 3 and np.shape(image)[2] == 3: 11 | return image 12 | else: 13 | image = image.convert('RGB') 14 | return image 15 | 16 | #---------------------------------------------------# 17 | # 对输入图像进行resize 18 | #---------------------------------------------------# 19 | def resize_image(image, size, letterbox_image): 20 | iw, ih = image.size 21 | w, h = size 22 | if letterbox_image: 23 | scale = min(w/iw, h/ih) 24 | nw = int(iw*scale) 25 | nh = int(ih*scale) 26 | 27 | image = image.resize((nw,nh), Image.BICUBIC) 28 | new_image = Image.new('RGB', size, (128,128,128)) 29 | new_image.paste(image, ((w-nw)//2, (h-nh)//2)) 30 | else: 31 | new_image = image.resize((w, h), Image.BICUBIC) 32 | return new_image 33 | 34 | #---------------------------------------------------# 35 | # 获得类 36 | #---------------------------------------------------# 37 | def get_classes(classes_path): 38 | with open(classes_path, encoding='utf-8') as f: 39 | class_names = f.readlines() 40 | class_names = [c.strip() for c in class_names] 41 | return class_names, len(class_names) 42 | 43 | def preprocess_input(image): 44 | image /= 255.0 45 | image -= np.array([0.485, 0.456, 0.406]) 46 | image /= np.array([0.229, 0.224, 0.225]) 47 | return image 48 | 49 | #---------------------------------------------------# 50 | # 获得学习率 51 | #---------------------------------------------------# 52 | def get_lr(optimizer): 53 | for param_group in optimizer.param_groups: 54 | return param_group['lr'] -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/utils/utils_bbox.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torchvision.ops import nms, boxes 4 | 5 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image): 6 | #-----------------------------------------------------------------# 7 | # 把y轴放前面是因为方便预测框和图像的宽高进行相乘 8 | #-----------------------------------------------------------------# 9 | box_yx = box_xy[..., ::-1] 10 | box_hw = box_wh[..., ::-1] 11 | input_shape = np.array(input_shape) 12 | image_shape = np.array(image_shape) 13 | 14 | if letterbox_image: 15 | #-----------------------------------------------------------------# 16 | # 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况 17 | # new_shape指的是宽高缩放情况 18 | #-----------------------------------------------------------------# 19 | new_shape = np.round(image_shape * np.min(input_shape/image_shape)) 20 | offset = (input_shape - new_shape)/2./input_shape 21 | scale = input_shape/new_shape 22 | 23 | box_yx = (box_yx - offset) * scale 24 | box_hw *= scale 25 | 26 | box_mins = box_yx - (box_hw / 2.) 27 | box_maxes = box_yx + (box_hw / 2.) 28 | boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1) 29 | boxes *= np.concatenate([image_shape, image_shape], axis=-1) 30 | return boxes 31 | 32 | def decode_outputs(outputs, input_shape): 33 | grids = [] 34 | strides = [] 35 | hw = [x.shape[-2:] for x in outputs] 36 | #---------------------------------------------------# 37 | # outputs输入前代表每个特征层的预测结果 38 | # batch_size, 4 + 1 + num_classes, 80, 80 => batch_size, 4 + 1 + num_classes, 6400 39 | # batch_size, 5 + num_classes, 40, 40 40 | # batch_size, 5 + num_classes, 20, 20 41 | # batch_size, 4 + 1 + num_classes, 6400 + 1600 + 400 -> batch_size, 4 + 1 + num_classes, 8400 42 | # 堆叠后为batch_size, 8400, 5 + num_classes 43 | #---------------------------------------------------# 44 | outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1) 45 | #---------------------------------------------------# 46 | # 获得每一个特征点属于每一个种类的概率 47 | #---------------------------------------------------# 48 | outputs[:, :, 4:] = torch.sigmoid(outputs[:, :, 4:]) 49 | for h, w in hw: 50 | #---------------------------# 51 | # 根据特征层的高宽生成网格点 52 | #---------------------------# 53 | grid_y, grid_x = torch.meshgrid([torch.arange(h), torch.arange(w)]) 54 | #---------------------------# 55 | # 1, 6400, 2 56 | # 1, 1600, 2 57 | # 1, 400, 2 58 | #---------------------------# 59 | grid = torch.stack((grid_x, grid_y), 2).view(1, -1, 2) 60 | shape = grid.shape[:2] 61 | 62 | grids.append(grid) 63 | strides.append(torch.full((shape[0], shape[1], 1), input_shape[0] / h)) 64 | #---------------------------# 65 | # 将网格点堆叠到一起 66 | # 1, 6400, 2 67 | # 1, 1600, 2 68 | # 1, 400, 2 69 | # 70 | # 1, 8400, 2 71 | #---------------------------# 72 | grids = torch.cat(grids, dim=1).type(outputs.type()) 73 | strides = torch.cat(strides, dim=1).type(outputs.type()) 74 | #------------------------# 75 | # 根据网格点进行解码 76 | #------------------------# 77 | outputs[..., :2] = (outputs[..., :2] + grids) * strides 78 | outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides 79 | #-----------------# 80 | # 归一化 81 | #-----------------# 82 | outputs[..., [0,2]] = outputs[..., [0,2]] / input_shape[1] 83 | outputs[..., [1,3]] = outputs[..., [1,3]] / input_shape[0] 84 | return outputs 85 | 86 | def non_max_suppression(prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4): 87 | #----------------------------------------------------------# 88 | # 将预测结果的格式转换成左上角右下角的格式。 89 | # prediction [batch_size, num_anchors, 85] 90 | #----------------------------------------------------------# 91 | box_corner = prediction.new(prediction.shape) 92 | box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 93 | box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 94 | box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 95 | box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 96 | prediction[:, :, :4] = box_corner[:, :, :4] 97 | 98 | output = [None for _ in range(len(prediction))] 99 | #----------------------------------------------------------# 100 | # 对输入图片进行循环,一般只会进行一次 101 | #----------------------------------------------------------# 102 | for i, image_pred in enumerate(prediction): 103 | #----------------------------------------------------------# 104 | # 对种类预测部分取max。 105 | # class_conf [num_anchors, 1] 种类置信度 106 | # class_pred [num_anchors, 1] 种类 107 | #----------------------------------------------------------# 108 | class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True) 109 | 110 | #----------------------------------------------------------# 111 | # 利用置信度进行第一轮筛选 112 | #----------------------------------------------------------# 113 | conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze() 114 | 115 | if not image_pred.size(0): 116 | continue 117 | #-------------------------------------------------------------------------# 118 | # detections [num_anchors, 7] 119 | # 7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred 120 | #-------------------------------------------------------------------------# 121 | detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) 122 | detections = detections[conf_mask] 123 | 124 | nms_out_index = boxes.batched_nms( 125 | detections[:, :4], 126 | detections[:, 4] * detections[:, 5], 127 | detections[:, 6], 128 | nms_thres, 129 | ) 130 | 131 | output[i] = detections[nms_out_index] 132 | 133 | # #------------------------------------------# 134 | # # 获得预测结果中包含的所有种类 135 | # #------------------------------------------# 136 | # unique_labels = detections[:, -1].cpu().unique() 137 | 138 | # if prediction.is_cuda: 139 | # unique_labels = unique_labels.cuda() 140 | # detections = detections.cuda() 141 | 142 | # for c in unique_labels: 143 | # #------------------------------------------# 144 | # # 获得某一类得分筛选后全部的预测结果 145 | # #------------------------------------------# 146 | # detections_class = detections[detections[:, -1] == c] 147 | 148 | # #------------------------------------------# 149 | # # 使用官方自带的非极大抑制会速度更快一些! 150 | # #------------------------------------------# 151 | # keep = nms( 152 | # detections_class[:, :4], 153 | # detections_class[:, 4] * detections_class[:, 5], 154 | # nms_thres 155 | # ) 156 | # max_detections = detections_class[keep] 157 | 158 | # # # 按照存在物体的置信度排序 159 | # # _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True) 160 | # # detections_class = detections_class[conf_sort_index] 161 | # # # 进行非极大抑制 162 | # # max_detections = [] 163 | # # while detections_class.size(0): 164 | # # # 取出这一类置信度最高的,一步一步往下判断,判断重合程度是否大于nms_thres,如果是则去除掉 165 | # # max_detections.append(detections_class[0].unsqueeze(0)) 166 | # # if len(detections_class) == 1: 167 | # # break 168 | # # ious = bbox_iou(max_detections[-1], detections_class[1:]) 169 | # # detections_class = detections_class[1:][ious < nms_thres] 170 | # # # 堆叠 171 | # # max_detections = torch.cat(max_detections).data 172 | 173 | # # Add max detections to outputs 174 | # output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections)) 175 | 176 | if output[i] is not None: 177 | output[i] = output[i].cpu().numpy() 178 | box_xy, box_wh = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2] 179 | output[i][:, :4] = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image) 180 | return output 181 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/utils/utils_fit.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | from tqdm import tqdm 5 | 6 | from utils.utils import get_lr 7 | 8 | 9 | def fit_one_epoch(model_train, model, yolo_loss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, save_period, save_dir): 10 | loss = 0 11 | val_loss = 0 12 | 13 | model_train.train() 14 | print('Start Train') 15 | with tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: 16 | for iteration, batch in enumerate(gen): 17 | if iteration >= epoch_step: 18 | break 19 | 20 | images, targets = batch[0], batch[1] 21 | with torch.no_grad(): 22 | if cuda: 23 | images = torch.from_numpy(images).type(torch.FloatTensor).cuda() 24 | targets = [torch.from_numpy(ann).type(torch.FloatTensor).cuda() for ann in targets] 25 | else: 26 | images = torch.from_numpy(images).type(torch.FloatTensor) 27 | targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets] 28 | #----------------------# 29 | # 清零梯度 30 | #----------------------# 31 | optimizer.zero_grad() 32 | #----------------------# 33 | # 前向传播 34 | #----------------------# 35 | outputs = model_train(images) 36 | 37 | #----------------------# 38 | # 计算损失 39 | #----------------------# 40 | loss_value = yolo_loss(outputs, targets) 41 | 42 | #----------------------# 43 | # 反向传播 44 | #----------------------# 45 | loss_value.backward() 46 | optimizer.step() 47 | 48 | loss += loss_value.item() 49 | 50 | pbar.set_postfix(**{'loss' : loss / (iteration + 1), 51 | 'lr' : get_lr(optimizer)}) 52 | pbar.update(1) 53 | 54 | print('Finish Train') 55 | 56 | model_train.eval() 57 | print('Start Validation') 58 | with tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: 59 | for iteration, batch in enumerate(gen_val): 60 | if iteration >= epoch_step_val: 61 | break 62 | images, targets = batch[0], batch[1] 63 | with torch.no_grad(): 64 | if cuda: 65 | images = torch.from_numpy(images).type(torch.FloatTensor).cuda() 66 | targets = [torch.from_numpy(ann).type(torch.FloatTensor).cuda() for ann in targets] 67 | else: 68 | images = torch.from_numpy(images).type(torch.FloatTensor) 69 | targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets] 70 | #----------------------# 71 | # 清零梯度 72 | #----------------------# 73 | optimizer.zero_grad() 74 | #----------------------# 75 | # 前向传播 76 | #----------------------# 77 | outputs = model_train(images) 78 | 79 | #----------------------# 80 | # 计算损失 81 | #----------------------# 82 | loss_value = yolo_loss(outputs, targets) 83 | 84 | val_loss += loss_value.item() 85 | pbar.set_postfix(**{'val_loss': val_loss / (iteration + 1)}) 86 | pbar.update(1) 87 | 88 | print('Finish Validation') 89 | 90 | loss_history.append_loss(epoch + 1, loss / epoch_step, val_loss / epoch_step_val) 91 | print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch)) 92 | print('Total Loss: %.3f || Val Loss: %.3f ' % (loss / epoch_step, val_loss / epoch_step_val)) 93 | if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch: 94 | torch.save(model.state_dict(), os.path.join(save_dir, "ep%03d-loss%.3f-val_loss%.3f.pth" % (epoch + 1, loss / epoch_step, val_loss / epoch_step_val))) 95 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/utils_coco/coco_annotation.py: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------# 2 | # 用于处理COCO数据集,根据json文件生成txt文件用于训练 3 | #-------------------------------------------------------# 4 | import json 5 | import os 6 | from collections import defaultdict 7 | 8 | #-------------------------------------------------------# 9 | # 指向了COCO训练集与验证集图片的路径 10 | #-------------------------------------------------------# 11 | train_datasets_path = "coco_dataset/train2017" 12 | val_datasets_path = "coco_dataset/val2017" 13 | 14 | #-------------------------------------------------------# 15 | # 指向了COCO训练集与验证集标签的路径 16 | #-------------------------------------------------------# 17 | train_annotation_path = "coco_dataset/annotations/instances_train2017.json" 18 | val_annotation_path = "coco_dataset/annotations/instances_val2017.json" 19 | 20 | #-------------------------------------------------------# 21 | # 生成的txt文件路径 22 | #-------------------------------------------------------# 23 | train_output_path = "coco_train.txt" 24 | val_output_path = "coco_val.txt" 25 | 26 | if __name__ == "__main__": 27 | name_box_id = defaultdict(list) 28 | id_name = dict() 29 | f = open(train_annotation_path, encoding='utf-8') 30 | data = json.load(f) 31 | 32 | annotations = data['annotations'] 33 | for ant in annotations: 34 | id = ant['image_id'] 35 | name = os.path.join(train_datasets_path, '%012d.jpg' % id) 36 | cat = ant['category_id'] 37 | if cat >= 1 and cat <= 11: 38 | cat = cat - 1 39 | elif cat >= 13 and cat <= 25: 40 | cat = cat - 2 41 | elif cat >= 27 and cat <= 28: 42 | cat = cat - 3 43 | elif cat >= 31 and cat <= 44: 44 | cat = cat - 5 45 | elif cat >= 46 and cat <= 65: 46 | cat = cat - 6 47 | elif cat == 67: 48 | cat = cat - 7 49 | elif cat == 70: 50 | cat = cat - 9 51 | elif cat >= 72 and cat <= 82: 52 | cat = cat - 10 53 | elif cat >= 84 and cat <= 90: 54 | cat = cat - 11 55 | name_box_id[name].append([ant['bbox'], cat]) 56 | 57 | f = open(train_output_path, 'w') 58 | for key in name_box_id.keys(): 59 | f.write(key) 60 | box_infos = name_box_id[key] 61 | for info in box_infos: 62 | x_min = int(info[0][0]) 63 | y_min = int(info[0][1]) 64 | x_max = x_min + int(info[0][2]) 65 | y_max = y_min + int(info[0][3]) 66 | 67 | box_info = " %d,%d,%d,%d,%d" % ( 68 | x_min, y_min, x_max, y_max, int(info[1])) 69 | f.write(box_info) 70 | f.write('\n') 71 | f.close() 72 | 73 | name_box_id = defaultdict(list) 74 | id_name = dict() 75 | f = open(val_annotation_path, encoding='utf-8') 76 | data = json.load(f) 77 | 78 | annotations = data['annotations'] 79 | for ant in annotations: 80 | id = ant['image_id'] 81 | name = os.path.join(val_datasets_path, '%012d.jpg' % id) 82 | cat = ant['category_id'] 83 | if cat >= 1 and cat <= 11: 84 | cat = cat - 1 85 | elif cat >= 13 and cat <= 25: 86 | cat = cat - 2 87 | elif cat >= 27 and cat <= 28: 88 | cat = cat - 3 89 | elif cat >= 31 and cat <= 44: 90 | cat = cat - 5 91 | elif cat >= 46 and cat <= 65: 92 | cat = cat - 6 93 | elif cat == 67: 94 | cat = cat - 7 95 | elif cat == 70: 96 | cat = cat - 9 97 | elif cat >= 72 and cat <= 82: 98 | cat = cat - 10 99 | elif cat >= 84 and cat <= 90: 100 | cat = cat - 11 101 | name_box_id[name].append([ant['bbox'], cat]) 102 | 103 | f = open(val_output_path, 'w') 104 | for key in name_box_id.keys(): 105 | f.write(key) 106 | box_infos = name_box_id[key] 107 | for info in box_infos: 108 | x_min = int(info[0][0]) 109 | y_min = int(info[0][1]) 110 | x_max = x_min + int(info[0][2]) 111 | y_max = y_min + int(info[0][3]) 112 | 113 | box_info = " %d,%d,%d,%d,%d" % ( 114 | x_min, y_min, x_max, y_max, int(info[1])) 115 | f.write(box_info) 116 | f.write('\n') 117 | f.close() 118 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/utils_coco/get_map_coco.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import numpy as np 5 | import torch 6 | from PIL import Image 7 | from pycocotools.coco import COCO 8 | from pycocotools.cocoeval import COCOeval 9 | from tqdm import tqdm 10 | from utils.utils import cvtColor, get_classes, preprocess_input, resize_image 11 | from utils.utils_bbox import decode_outputs, non_max_suppression 12 | from yolo import YOLO 13 | 14 | #---------------------------------------------------------------------------# 15 | # map_mode用于指定该文件运行时计算的内容 16 | # map_mode为0代表整个map计算流程,包括获得预测结果、计算map。 17 | # map_mode为1代表仅仅获得预测结果。 18 | # map_mode为2代表仅仅获得计算map。 19 | #---------------------------------------------------------------------------# 20 | map_mode = 0 21 | #-------------------------------------------------------# 22 | # 指向了验证集标签与图片路径 23 | #-------------------------------------------------------# 24 | cocoGt_path = 'coco_dataset/annotations/instances_val2017.json' 25 | dataset_img_path = 'coco_dataset/val2017' 26 | #-------------------------------------------------------# 27 | # 结果输出的文件夹,默认为map_out 28 | #-------------------------------------------------------# 29 | temp_save_path = 'map_out/coco_eval' 30 | 31 | class mAP_YOLO(YOLO): 32 | #---------------------------------------------------# 33 | # 检测图片 34 | #---------------------------------------------------# 35 | def detect_image(self, image_id, image, results): 36 | #---------------------------------------------------# 37 | # 计算输入图片的高和宽 38 | #---------------------------------------------------# 39 | image_shape = np.array(np.shape(image)[0:2]) 40 | #---------------------------------------------------------# 41 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 42 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 43 | #---------------------------------------------------------# 44 | image = cvtColor(image) 45 | #---------------------------------------------------------# 46 | # 给图像增加灰条,实现不失真的resize 47 | # 也可以直接resize进行识别 48 | #---------------------------------------------------------# 49 | image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) 50 | #---------------------------------------------------------# 51 | # 添加上batch_size维度 52 | #---------------------------------------------------------# 53 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 54 | 55 | with torch.no_grad(): 56 | images = torch.from_numpy(image_data) 57 | if self.cuda: 58 | images = images.cuda() 59 | #---------------------------------------------------------# 60 | # 将图像输入网络当中进行预测! 61 | #---------------------------------------------------------# 62 | outputs = self.net(images) 63 | outputs = decode_outputs(outputs, self.input_shape) 64 | #---------------------------------------------------------# 65 | # 将预测框进行堆叠,然后进行非极大抑制 66 | #---------------------------------------------------------# 67 | outputs = non_max_suppression(outputs, self.num_classes, self.input_shape, 68 | image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) 69 | 70 | if outputs[0] is None: 71 | return results 72 | 73 | top_label = np.array(outputs[0][:, 6], dtype = 'int32') 74 | top_conf = outputs[0][:, 4] * outputs[0][:, 5] 75 | top_boxes = outputs[0][:, :4] 76 | 77 | for i, c in enumerate(top_label): 78 | result = {} 79 | top, left, bottom, right = top_boxes[i] 80 | 81 | result["image_id"] = int(image_id) 82 | result["category_id"] = clsid2catid[c] 83 | result["bbox"] = [float(left),float(top),float(right-left),float(bottom-top)] 84 | result["score"] = float(top_conf[i]) 85 | results.append(result) 86 | return results 87 | 88 | if __name__ == "__main__": 89 | if not os.path.exists(temp_save_path): 90 | os.makedirs(temp_save_path) 91 | 92 | cocoGt = COCO(cocoGt_path) 93 | ids = list(cocoGt.imgToAnns.keys()) 94 | clsid2catid = cocoGt.getCatIds() 95 | 96 | if map_mode == 0 or map_mode == 1: 97 | yolo = mAP_YOLO(confidence = 0.001, nms_iou = 0.65) 98 | 99 | with open(os.path.join(temp_save_path, 'eval_results.json'),"w") as f: 100 | results = [] 101 | for image_id in tqdm(ids): 102 | image_path = os.path.join(dataset_img_path, cocoGt.loadImgs(image_id)[0]['file_name']) 103 | image = Image.open(image_path) 104 | results = yolo.detect_image(image_id, image, results) 105 | json.dump(results, f) 106 | 107 | if map_mode == 0 or map_mode == 2: 108 | cocoDt = cocoGt.loadRes(os.path.join(temp_save_path, 'eval_results.json')) 109 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 110 | cocoEval.evaluate() 111 | cocoEval.accumulate() 112 | cocoEval.summarize() 113 | print("Get map done.") 114 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/voc_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import xml.etree.ElementTree as ET 4 | 5 | from utils.utils import get_classes 6 | 7 | #--------------------------------------------------------------------------------------------------------------------------------# 8 | # annotation_mode用于指定该文件运行时计算的内容 9 | # annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt 10 | # annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt 11 | # annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt 12 | #--------------------------------------------------------------------------------------------------------------------------------# 13 | annotation_mode = 0 14 | #-------------------------------------------------------------------# 15 | # 必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息 16 | # 与训练和预测所用的classes_path一致即可 17 | # 如果生成的2007_train.txt里面没有目标信息 18 | # 那么就是因为classes没有设定正确 19 | # 仅在annotation_mode为0和2的时候有效 20 | #-------------------------------------------------------------------# 21 | classes_path = 'model_data/lyd_classes.txt' 22 | #--------------------------------------------------------------------------------------------------------------------------------# 23 | # trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1 24 | # train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1 25 | # 仅在annotation_mode为0和1的时候有效 26 | #--------------------------------------------------------------------------------------------------------------------------------# 27 | trainval_percent = 0.9 28 | train_percent = 0.9 29 | #-------------------------------------------------------# 30 | # 指向VOC数据集所在的文件夹 31 | # 默认指向根目录下的VOC数据集 32 | #-------------------------------------------------------# 33 | VOCdevkit_path = 'VOCdevkit' 34 | 35 | VOCdevkit_sets = [('2007', 'train'), ('2007', 'val')] 36 | classes, _ = get_classes(classes_path) 37 | 38 | def convert_annotation(year, image_id, list_file): 39 | #in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml' % (year, image_id)), encoding='utf-8') 40 | in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml'%(year, image_id)), encoding='gbk') 41 | tree=ET.parse(in_file) 42 | root = tree.getroot() 43 | 44 | for obj in root.iter('object'): 45 | difficult = 0 46 | if obj.find('difficult')!=None: 47 | difficult = obj.find('difficult').text 48 | cls = obj.find('name').text 49 | if cls not in classes or int(difficult)==1: 50 | continue 51 | cls_id = classes.index(cls) 52 | xmlbox = obj.find('bndbox') 53 | b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text))) 54 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) 55 | 56 | if __name__ == "__main__": 57 | random.seed(0) 58 | if annotation_mode == 0 or annotation_mode == 1: 59 | print("Generate txt in ImageSets.") 60 | xmlfilepath = os.path.join(VOCdevkit_path, 'VOC2007/Annotations') 61 | saveBasePath = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main') 62 | temp_xml = os.listdir(xmlfilepath) 63 | total_xml = [] 64 | for xml in temp_xml: 65 | if xml.endswith(".xml"): 66 | total_xml.append(xml) 67 | 68 | num = len(total_xml) 69 | list = range(num) 70 | tv = int(num*trainval_percent) 71 | tr = int(tv*train_percent) 72 | trainval= random.sample(list,tv) 73 | train = random.sample(trainval,tr) 74 | 75 | print("train and val size",tv) 76 | print("train size",tr) 77 | ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w') 78 | ftest = open(os.path.join(saveBasePath,'test.txt'), 'w') 79 | ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w') 80 | fval = open(os.path.join(saveBasePath,'val.txt'), 'w') 81 | 82 | for i in list: 83 | name=total_xml[i][:-4]+'\n' 84 | if i in trainval: 85 | ftrainval.write(name) 86 | if i in train: 87 | ftrain.write(name) 88 | else: 89 | fval.write(name) 90 | else: 91 | ftest.write(name) 92 | 93 | ftrainval.close() 94 | ftrain.close() 95 | fval.close() 96 | ftest.close() 97 | print("Generate txt in ImageSets done.") 98 | 99 | if annotation_mode == 0 or annotation_mode == 2: 100 | print("Generate 2007_train.txt and 2007_val.txt for train.") 101 | for year, image_set in VOCdevkit_sets: 102 | image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt'%(year, image_set)), encoding='utf-8').read().strip().split() 103 | list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8') 104 | for image_id in image_ids: 105 | list_file.write('%s/VOC%s/JPEGImages/%s.jpg'%(os.path.abspath(VOCdevkit_path), year, image_id)) 106 | 107 | convert_annotation(year, image_id, list_file) 108 | list_file.write('\n') 109 | list_file.close() 110 | print("Generate 2007_train.txt and 2007_val.txt for train done.") 111 | -------------------------------------------------------------------------------- /YOLOX-pytorch-camera/yolo.py: -------------------------------------------------------------------------------- 1 | import colorsys 2 | import os 3 | import time 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | from PIL import ImageDraw, ImageFont 9 | 10 | from nets.yolo import YoloBody 11 | from utils.utils import cvtColor, get_classes, preprocess_input, resize_image 12 | from utils.utils_bbox import decode_outputs, non_max_suppression 13 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 14 | 15 | class YOLO(object): 16 | _defaults = { 17 | "model_path" : 'model_data/yolox_s.pth', 18 | "classes_path" : 'model_data/coco_classes.txt', 19 | #---------------------------------------------------------------------# 20 | # 输入图片的大小,必须为32的倍数。 21 | #---------------------------------------------------------------------# 22 | "input_shape" : [416,416], 23 | #---------------------------------------------------------------------# 24 | # 所使用的YoloX的版本。nano、tiny、s、m、l、x 25 | #---------------------------------------------------------------------# 26 | "phi" : 's', 27 | #---------------------------------------------------------------------# 28 | # 只有得分大于置信度的预测框会被保留下来 29 | #---------------------------------------------------------------------# 30 | "confidence" : 0.5, 31 | #---------------------------------------------------------------------# 32 | # 非极大抑制所用到的nms_iou大小 33 | #---------------------------------------------------------------------# 34 | "nms_iou" : 0.3, 35 | #---------------------------------------------------------------------# 36 | # 该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize, 37 | # 在多次测试后,发现关闭letterbox_image直接resize的效果更好 38 | #---------------------------------------------------------------------# 39 | "letterbox_image" : True, 40 | #-------------------------------# 41 | # 是否使用Cuda 42 | # 没有GPU可以设置成False 43 | #-------------------------------# 44 | "cuda" : False, 45 | } 46 | 47 | @classmethod 48 | def get_defaults(cls, n): 49 | if n in cls._defaults: 50 | return cls._defaults[n] 51 | else: 52 | return "Unrecognized attribute name '" + n + "'" 53 | 54 | #---------------------------------------------------# 55 | # 初始化YOLO 56 | #---------------------------------------------------# 57 | def __init__(self, **kwargs): 58 | self.__dict__.update(self._defaults) 59 | for name, value in kwargs.items(): 60 | setattr(self, name, value) 61 | 62 | #---------------------------------------------------# 63 | # 获得种类和先验框的数量 64 | #---------------------------------------------------# 65 | self.class_names, self.num_classes = get_classes(self.classes_path) 66 | 67 | #---------------------------------------------------# 68 | # 画框设置不同的颜色 69 | #---------------------------------------------------# 70 | hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] 71 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 72 | self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) 73 | self.generate() 74 | 75 | #---------------------------------------------------# 76 | # 生成模型 77 | #---------------------------------------------------# 78 | def generate(self): 79 | self.net = YoloBody(self.num_classes, self.phi) 80 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 81 | self.net.load_state_dict(torch.load(self.model_path, map_location=device)) 82 | self.net = self.net.eval() 83 | print('{} model, and classes loaded.'.format(self.model_path)) 84 | 85 | if self.cuda: 86 | self.net = nn.DataParallel(self.net) 87 | self.net = self.net.cuda() 88 | 89 | #---------------------------------------------------# 90 | # 检测图片 91 | #---------------------------------------------------# 92 | def detect_image(self, image, crop = False): 93 | 94 | #------------标签----------------- 95 | labels = [] 96 | #------------坐标----------------- 97 | xy = [] 98 | #------------------------------- 99 | #---------------------------------------------------# 100 | # 获得输入图片的高和宽 101 | #---------------------------------------------------# 102 | image_shape = np.array(np.shape(image)[0:2]) 103 | #---------------------------------------------------------# 104 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 105 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 106 | #---------------------------------------------------------# 107 | image = cvtColor(image) 108 | #---------------------------------------------------------# 109 | # 给图像增加灰条,实现不失真的resize 110 | # 也可以直接resize进行识别 111 | #---------------------------------------------------------# 112 | image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) 113 | #---------------------------------------------------------# 114 | # 添加上batch_size维度 115 | #---------------------------------------------------------# 116 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 117 | 118 | with torch.no_grad(): 119 | images = torch.from_numpy(image_data) 120 | if self.cuda: 121 | #images = images.cuda() 122 | images = images.to(device) 123 | #---------------------------------------------------------# 124 | # 将图像输入网络当中进行预测! 125 | #---------------------------------------------------------# 126 | outputs = self.net(images) 127 | outputs = decode_outputs(outputs, self.input_shape) 128 | #---------------------------------------------------------# 129 | # 将预测框进行堆叠,然后进行非极大抑制 130 | #---------------------------------------------------------# 131 | results = non_max_suppression(outputs, self.num_classes, self.input_shape, 132 | image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) 133 | 134 | if results[0] is None: 135 | return image 136 | 137 | top_label = np.array(results[0][:, 6], dtype = 'int32') 138 | top_conf = results[0][:, 4] * results[0][:, 5] 139 | top_boxes = results[0][:, :4] 140 | #---------------------------------------------------------# 141 | # 设置字体与边框厚度 142 | #---------------------------------------------------------# 143 | font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) 144 | thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) 145 | 146 | #---------------------------------------------------------# 147 | # 是否进行目标的裁剪 148 | #---------------------------------------------------------# 149 | if crop: 150 | for i, c in list(enumerate(top_label)): 151 | top, left, bottom, right = top_boxes[i] 152 | top = max(0, np.floor(top).astype('int32')) 153 | left = max(0, np.floor(left).astype('int32')) 154 | bottom = min(image.size[1], np.floor(bottom).astype('int32')) 155 | right = min(image.size[0], np.floor(right).astype('int32')) 156 | 157 | dir_save_path = "img_crop" 158 | if not os.path.exists(dir_save_path): 159 | os.makedirs(dir_save_path) 160 | crop_image = image.crop([left, top, right, bottom]) 161 | crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) 162 | print("save crop_" + str(i) + ".png to " + dir_save_path) 163 | 164 | #---------------------------------------------------------# 165 | # 图像绘制 166 | #---------------------------------------------------------# 167 | for i, c in list(enumerate(top_label)): 168 | predicted_class = self.class_names[int(c)] 169 | box = top_boxes[i] 170 | score = top_conf[i] 171 | 172 | top, left, bottom, right = box 173 | 174 | top = max(0, np.floor(top).astype('int32')) 175 | left = max(0, np.floor(left).astype('int32')) 176 | bottom = min(image.size[1], np.floor(bottom).astype('int32')) 177 | right = min(image.size[0], np.floor(right).astype('int32')) 178 | 179 | label = '{} {:.2f}'.format(predicted_class, score) 180 | draw = ImageDraw.Draw(image) 181 | label_size = draw.textsize(label, font) 182 | label = label.encode('utf-8') 183 | print(label, top, left, bottom, right) 184 | 185 | # ************************ 有目标物时输出的目标物的信息 (目标边框的中心点坐标 x 和 y )******************************************************* 186 | 187 | # #预测框的中心点 188 | # x = ((right - left)/2 + left) 189 | # #print("x的值是:{}".format(x)) 190 | # y = ((bottom - top)/2 + top) 191 | # xy.append(x) 192 | # #print("xy的值是:{}".format(xy)) 193 | # xy.append(y) 194 | # 195 | # xy.append(left) 196 | # xy.append(top) 197 | # xy.append(right) 198 | # xy.append(bottom) 199 | xy.append(top) 200 | xy.append(left) 201 | xy.append(bottom) 202 | xy.append(right) 203 | labels.append(predicted_class) 204 | print("原始xy的值:{}".format(xy)) 205 | # ************************************************************** 206 | if top - label_size[1] >= 0: 207 | text_origin = np.array([left, top - label_size[1]]) 208 | else: 209 | text_origin = np.array([left, top + 1]) 210 | 211 | for i in range(thickness): 212 | draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) 213 | draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) 214 | draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) 215 | del draw 216 | 217 | return labels,image,xy 218 | 219 | def get_FPS(self, image, test_interval): 220 | image_shape = np.array(np.shape(image)[0:2]) 221 | #---------------------------------------------------------# 222 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 223 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 224 | #---------------------------------------------------------# 225 | image = cvtColor(image) 226 | #---------------------------------------------------------# 227 | # 给图像增加灰条,实现不失真的resize 228 | # 也可以直接resize进行识别 229 | #---------------------------------------------------------# 230 | image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) 231 | #---------------------------------------------------------# 232 | # 添加上batch_size维度 233 | #---------------------------------------------------------# 234 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 235 | 236 | with torch.no_grad(): 237 | images = torch.from_numpy(image_data) 238 | if self.cuda: 239 | images = images.cuda() 240 | #---------------------------------------------------------# 241 | # 将图像输入网络当中进行预测! 242 | #---------------------------------------------------------# 243 | outputs = self.net(images) 244 | outputs = decode_outputs(outputs, self.input_shape) 245 | #---------------------------------------------------------# 246 | # 将预测框进行堆叠,然后进行非极大抑制 247 | #---------------------------------------------------------# 248 | results = non_max_suppression(outputs, self.num_classes, self.input_shape, 249 | image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) 250 | 251 | t1 = time.time() 252 | for _ in range(test_interval): 253 | with torch.no_grad(): 254 | #---------------------------------------------------------# 255 | # 将图像输入网络当中进行预测! 256 | #---------------------------------------------------------# 257 | outputs = self.net(images) 258 | outputs = decode_outputs(outputs, self.input_shape) 259 | #---------------------------------------------------------# 260 | # 将预测框进行堆叠,然后进行非极大抑制 261 | #---------------------------------------------------------# 262 | results = non_max_suppression(outputs, self.num_classes, self.input_shape, 263 | image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) 264 | 265 | t2 = time.time() 266 | tact_time = (t2 - t1) / test_interval 267 | return tact_time 268 | 269 | def get_map_txt(self, image_id, image, class_names, map_out_path): 270 | f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 271 | image_shape = np.array(np.shape(image)[0:2]) 272 | #---------------------------------------------------------# 273 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 274 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 275 | #---------------------------------------------------------# 276 | image = cvtColor(image) 277 | #---------------------------------------------------------# 278 | # 给图像增加灰条,实现不失真的resize 279 | # 也可以直接resize进行识别 280 | #---------------------------------------------------------# 281 | image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) 282 | #---------------------------------------------------------# 283 | # 添加上batch_size维度 284 | #---------------------------------------------------------# 285 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 286 | 287 | with torch.no_grad(): 288 | images = torch.from_numpy(image_data) 289 | if self.cuda: 290 | images = images.cuda() 291 | #---------------------------------------------------------# 292 | # 将图像输入网络当中进行预测! 293 | #---------------------------------------------------------# 294 | outputs = self.net(images) 295 | outputs = decode_outputs(outputs, self.input_shape) 296 | #---------------------------------------------------------# 297 | # 将预测框进行堆叠,然后进行非极大抑制 298 | #---------------------------------------------------------# 299 | results = non_max_suppression(outputs, self.num_classes, self.input_shape, 300 | image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) 301 | 302 | if results[0] is None: 303 | return 304 | 305 | top_label = np.array(results[0][:, 6], dtype = 'int32') 306 | top_conf = results[0][:, 4] * results[0][:, 5] 307 | top_boxes = results[0][:, :4] 308 | 309 | for i, c in list(enumerate(top_label)): 310 | predicted_class = self.class_names[int(c)] 311 | box = top_boxes[i] 312 | score = str(top_conf[i]) 313 | 314 | top, left, bottom, right = box 315 | if predicted_class not in class_names: 316 | continue 317 | 318 | f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) 319 | 320 | f.close() 321 | return 322 | -------------------------------------------------------------------------------- /result_1.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/result_1.bmp -------------------------------------------------------------------------------- /result_2.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/result_2.bmp -------------------------------------------------------------------------------- /result_3.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/result_3.bmp -------------------------------------------------------------------------------- /result_4.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxy293/Camera-YOLOX/a7b3e9bc4daa6e0a879284833740ee5e714c39e1/result_4.bmp --------------------------------------------------------------------------------