├── .gitignore ├── bilibili-guide.md ├── cv-attention ├── A2Attention.py ├── BAM.py ├── Biformer.py ├── CAA.py ├── CBAM.py ├── CPCA.py ├── CloAttention.py ├── CoTAttention.py ├── CoordAttention.py ├── DAttention.py ├── ECA.py ├── ELA.py ├── EMA.py ├── EffectiveSE.py ├── GAM.py ├── GC.py ├── GE.py ├── LSKA.py ├── LSKBlock.py ├── MHSA.py ├── MLCA.py ├── MobileViTAttention.py ├── ParNetAttention.py ├── PolarizedSelfAttention.py ├── S2Attention.py ├── SE.py ├── SGE.py ├── SK.py ├── SequentialSelfAttention.py ├── ShuffleAttention.py ├── SimAM.py ├── TripletAttention.py └── readme.md ├── cvpr2025-deim-project.md ├── damo-yolo ├── Annotations │ └── ReadMe.md ├── JPEGImages │ └── ReadMe.md ├── readme.md └── voc2coco.py ├── data-offline-aug ├── object_detection_data_aug.py ├── readme.md └── segment_data_aug.py ├── mmdet-course ├── config │ ├── atss_r50_fpn_dyhead_1x_visdrone.py │ ├── cascade-rcnn_r50_fpn_1x_visdrone.py │ ├── ddq-detr-4scale_r50_8xb2-12e_visdrone.py │ ├── dino-4scale_r50_8xb2-12e_visdrone.py │ ├── faster-rcnn_r50_fpn_ciou_1x_visdrone.py │ ├── gfl_r50_fpn_1x_visdrone.py │ ├── retinanet_r50_fpn_1x_visdrone.py │ ├── rtmdet_tiny_8xb32-300e_visdrone.py │ ├── tood_r50_fpn_1x_visdrone.py │ └── yolox_tiny_8xb8-300e_visdrone.py ├── mmdet2yolo.py ├── readme.md └── yolo2coco.py ├── mustread-paper ├── MobileNets.pdf └── RTMDet.pdf ├── mutilmodel-project.md ├── objectdetection-tricks ├── readme.md ├── tricks_1.py ├── tricks_10.py ├── tricks_11.py ├── tricks_12.py ├── tricks_13.py ├── tricks_14.py ├── tricks_2.py ├── tricks_3.py ├── tricks_4.py ├── tricks_5.py ├── tricks_6.py ├── tricks_7.py ├── tricks_8.py └── tricks_9.py ├── readme.md ├── visdrone2019-benchmark └── readme.md ├── yolo-gradcam ├── README.md ├── yolov11_heatmap.py ├── yolov5_heatmap.py ├── yolov7_heatmap.py ├── yolov8_heatmap.py └── yolov9_heatmap.py ├── yolo-improve ├── CAM.py ├── iou.py ├── paper.md ├── readme.md ├── rtdetr-compress.md ├── rtdetr-distill.md ├── rtdetr-project.md ├── yolov11-project.md ├── yolov5-AIFI.py ├── yolov5-AUX │ ├── benchmarks.py │ ├── data │ │ ├── Argoverse.yaml │ │ ├── GlobalWheat2020.yaml │ │ ├── ImageNet.yaml │ │ ├── Objects365.yaml │ │ ├── SKU-110K.yaml │ │ ├── VOC.yaml │ │ ├── VisDrone.yaml │ │ ├── coco.yaml │ │ ├── coco128-seg.yaml │ │ ├── coco128.yaml │ │ ├── hyps │ │ │ ├── hyp.Objects365.yaml │ │ │ ├── hyp.VOC.yaml │ │ │ ├── hyp.no-augmentation.yaml │ │ │ ├── hyp.scratch-high.yaml │ │ │ ├── hyp.scratch-low.yaml │ │ │ └── hyp.scratch-med.yaml │ │ ├── images │ │ │ ├── bus.jpg │ │ │ └── zidane.jpg │ │ ├── scripts │ │ │ ├── download_weights.sh │ │ │ ├── get_coco.sh │ │ │ ├── get_coco128.sh │ │ │ └── get_imagenet.sh │ │ └── xView.yaml │ ├── detect.py │ ├── export.py │ ├── hubconf.py │ ├── models │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── common.cpython-38.pyc │ │ │ ├── experimental.cpython-38.pyc │ │ │ └── yolo.cpython-38.pyc │ │ ├── common.py │ │ ├── experimental.py │ │ ├── hub │ │ │ ├── anchors.yaml │ │ │ ├── yolov3-spp.yaml │ │ │ ├── yolov3-tiny.yaml │ │ │ ├── yolov3.yaml │ │ │ ├── yolov5-bifpn.yaml │ │ │ ├── yolov5-fpn.yaml │ │ │ ├── yolov5-p2.yaml │ │ │ ├── yolov5-p34.yaml │ │ │ ├── yolov5-p6.yaml │ │ │ ├── yolov5-p7.yaml │ │ │ ├── yolov5-panet.yaml │ │ │ ├── yolov5l6.yaml │ │ │ ├── yolov5m6.yaml │ │ │ ├── yolov5n6.yaml │ │ │ ├── yolov5s-LeakyReLU.yaml │ │ │ ├── yolov5s-ghost.yaml │ │ │ ├── yolov5s-transformer.yaml │ │ │ ├── yolov5s6.yaml │ │ │ └── yolov5x6.yaml │ │ ├── segment │ │ │ ├── yolov5l-seg.yaml │ │ │ ├── yolov5m-seg.yaml │ │ │ ├── yolov5n-seg.yaml │ │ │ ├── yolov5s-seg.yaml │ │ │ └── yolov5x-seg.yaml │ │ ├── tf.py │ │ ├── yolo.py │ │ ├── yolov5_aux.yaml │ │ ├── yolov5l.yaml │ │ ├── yolov5m.yaml │ │ ├── yolov5n.yaml │ │ ├── yolov5s.yaml │ │ └── yolov5x.yaml │ ├── train.py │ ├── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── augmentations.cpython-38.pyc │ │ │ ├── autoanchor.cpython-38.pyc │ │ │ ├── autobatch.cpython-38.pyc │ │ │ ├── callbacks.cpython-38.pyc │ │ │ ├── dataloaders.cpython-38.pyc │ │ │ ├── downloads.cpython-38.pyc │ │ │ ├── general.cpython-38.pyc │ │ │ ├── loss.cpython-38.pyc │ │ │ ├── metrics.cpython-38.pyc │ │ │ ├── plots.cpython-38.pyc │ │ │ └── torch_utils.cpython-38.pyc │ │ ├── activations.py │ │ ├── augmentations.py │ │ ├── autoanchor.py │ │ ├── autobatch.py │ │ ├── aws │ │ │ ├── __init__.py │ │ │ ├── mime.sh │ │ │ ├── resume.py │ │ │ └── userdata.sh │ │ ├── callbacks.py │ │ ├── dataloaders.py │ │ ├── docker │ │ │ ├── Dockerfile │ │ │ ├── Dockerfile-arm64 │ │ │ └── Dockerfile-cpu │ │ ├── downloads.py │ │ ├── flask_rest_api │ │ │ ├── README.md │ │ │ ├── example_request.py │ │ │ └── restapi.py │ │ ├── general.py │ │ ├── google_app_engine │ │ │ ├── Dockerfile │ │ │ ├── additional_requirements.txt │ │ │ └── app.yaml │ │ ├── loggers │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ └── __init__.cpython-38.pyc │ │ │ ├── clearml │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ └── clearml_utils.cpython-38.pyc │ │ │ │ ├── clearml_utils.py │ │ │ │ └── hpo.py │ │ │ ├── comet │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ └── comet_utils.cpython-38.pyc │ │ │ │ ├── comet_utils.py │ │ │ │ ├── hpo.py │ │ │ │ └── optimizer_config.json │ │ │ └── wandb │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── wandb_utils.cpython-38.pyc │ │ │ │ └── wandb_utils.py │ │ ├── loss.py │ │ ├── metrics.py │ │ ├── plots.py │ │ ├── segment │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── general.cpython-38.pyc │ │ │ ├── augmentations.py │ │ │ ├── dataloaders.py │ │ │ ├── general.py │ │ │ ├── loss.py │ │ │ ├── metrics.py │ │ │ └── plots.py │ │ ├── torch_utils.py │ │ └── triton.py │ ├── val.py │ └── yolov5-AUX.zip ├── yolov5-C3RFEM.py ├── yolov5-CARAFE.py ├── yolov5-CCFM.py ├── yolov5-ContextAggregation.py ├── yolov5-CoordConv.py ├── yolov5-DBB.py ├── yolov5-DCN.py ├── yolov5-DCNV3 │ ├── commod.py │ └── ops_dcnv3 │ │ ├── DCNv3.egg-info │ │ ├── PKG-INFO │ │ ├── SOURCES.txt │ │ ├── dependency_links.txt │ │ └── top_level.txt │ │ ├── build │ │ ├── lib.linux-x86_64-cpython-38 │ │ │ ├── DCNv3.cpython-38-x86_64-linux-gnu.so │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── dcnv3_func.py │ │ │ └── modules │ │ │ │ ├── __init__.py │ │ │ │ └── dcnv3.py │ │ └── temp.linux-x86_64-cpython-38 │ │ │ └── home │ │ │ └── hjj │ │ │ └── Desktop │ │ │ └── python_project │ │ │ └── InternImage-master │ │ │ └── classification │ │ │ └── ops_dcnv3 │ │ │ └── src │ │ │ ├── cpu │ │ │ └── dcnv3_cpu.o │ │ │ ├── cuda │ │ │ └── dcnv3_cuda.o │ │ │ └── vision.o │ │ ├── dist │ │ └── DCNv3-1.0-py3.8-linux-x86_64.egg │ │ ├── functions │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── dcnv3_func.cpython-38.pyc │ │ └── dcnv3_func.py │ │ ├── make.sh │ │ ├── modules │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── dcnv3.cpython-38.pyc │ │ └── dcnv3.py │ │ ├── setup.py │ │ ├── src │ │ ├── cpu │ │ │ ├── dcnv3_cpu.cpp │ │ │ └── dcnv3_cpu.h │ │ ├── cuda │ │ │ ├── dcnv3_cuda.cu │ │ │ ├── dcnv3_cuda.h │ │ │ └── dcnv3_im2col_cuda.cuh │ │ ├── dcnv3.h │ │ └── vision.cpp │ │ └── test.py ├── yolov5-DSConv.py ├── yolov5-DecoupledHead.py ├── yolov5-DySnakeConv.py ├── yolov5-EVC.py ├── yolov5-FasterBlock.py ├── yolov5-GFPN │ ├── extra_modules.py │ └── yolov5_GFPN.yaml ├── yolov5-GOLDYOLO │ ├── common.py │ ├── yolo.py │ ├── yolov5n-goldyolo.yaml │ ├── yolov7-goldyolo.yaml │ └── yolov7-tiny-goldyolo.yaml ├── yolov5-NWD.py ├── yolov5-OTA │ └── loss.py ├── yolov5-RepNCSPELAN.py ├── yolov5-SAConv.py ├── yolov5-TSCODE.py ├── yolov5-aLRPLoss.py ├── yolov5-asf.py ├── yolov5-backbone │ ├── CVPR2023-EfficientViT │ │ └── EfficientViT.py │ ├── CVPR2024-StarNet │ │ └── starnet.py │ ├── ConvNextV2 │ │ └── convnextv2.py │ ├── EMO │ │ └── emo.py │ ├── EfficientFormerV2 │ │ └── EfficientFormerV2.py │ ├── EfficientViT │ │ └── efficientViT.py │ ├── FocalNet │ │ └── FocalNet.py │ ├── LSKNet │ │ └── lsknet.py │ ├── MobileNetV4 │ │ └── mobilenetv4.py │ ├── NextViT │ │ └── NextViT.py │ ├── ODConv │ │ ├── __pycache__ │ │ │ ├── od_mobilenetv2.cpython-38.pyc │ │ │ ├── od_resnet.cpython-38.pyc │ │ │ └── odconv.cpython-38.pyc │ │ ├── od_mobilenetv2.py │ │ ├── od_resnet.py │ │ └── odconv.py │ ├── ODConvFuse │ │ ├── __pycache__ │ │ │ ├── od_mobilenetv2.cpython-38.pyc │ │ │ ├── od_resnet.cpython-38.pyc │ │ │ └── odconv.cpython-38.pyc │ │ ├── od_mobilenetv2.py │ │ ├── od_resnet.py │ │ └── odconv.py │ ├── PoolFormer │ │ └── poolformer.py │ ├── RIFormer │ │ └── RIFormer.py │ ├── RepViT │ │ └── repvit.py │ ├── SwinTransformer │ │ └── SwinTransformer.py │ ├── UniRepLKNet │ │ └── unireplknet.py │ ├── VanillaNet │ │ └── VanillaNet.py │ ├── fasternet │ │ ├── faster_cfg │ │ │ ├── fasternet_l.yaml │ │ │ ├── fasternet_m.yaml │ │ │ ├── fasternet_s.yaml │ │ │ ├── fasternet_t0.yaml │ │ │ ├── fasternet_t1.yaml │ │ │ └── fasternet_t2.yaml │ │ └── fasternet.py │ ├── inceptionnext │ │ └── inceptionnext.py │ ├── main.py │ ├── yolo.py │ └── yolov5-custom.yaml ├── yolov5-dyhead.py ├── yolov5-res2block.py ├── yolov5-softnms.py ├── yolov5v7-light.md ├── yolov7-CoordConv.py ├── yolov7-DBB.py ├── yolov7-DCN.py ├── yolov7-DCNV3.py ├── yolov7-DSConv.py ├── yolov7-DecoupledHead.py ├── yolov7-DySnakeConv.py ├── yolov7-EVC.py ├── yolov7-MPDiou.py ├── yolov7-NWD.py ├── yolov7-PConv.py ├── yolov7-RFEM.py ├── yolov7-RepNCSPELAN.py ├── yolov7-SAConv.py ├── yolov7-asf.py ├── yolov7-head │ ├── yolov7-tiny-5-heads.yaml │ ├── yolov7-tiny-P2.yaml │ └── yolov7-tiny-P6.yaml ├── yolov7-iou.py ├── yolov7-odconv.py ├── yolov7-slimneck.py ├── yolov7-softnms.py ├── yolov8-DCN.py ├── yolov8-compress.md ├── yolov8-distill.md ├── yolov8-erf.py ├── yolov8-objectcount.py ├── yolov8-track.py ├── yolov8.py ├── yolov8v10-project.md └── yolov9-backbone │ ├── yolo.py │ └── yolov9-c-custom.yaml └── yolo ├── data.yaml ├── dataset ├── VOCdevkit │ ├── Annotations │ │ └── ReadMe.md │ ├── JPEGImages │ │ └── ReadMe.md │ └── txt │ │ └── ReadMe.md ├── split_data.py └── xml2txt.py └── readme.md /cv-attention/A2Attention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | from torch.nn import functional as F 6 | 7 | 8 | 9 | class DoubleAttention(nn.Module): 10 | 11 | def __init__(self, in_channels,c_m=128,c_n=128,reconstruct = True): 12 | super().__init__() 13 | self.in_channels=in_channels 14 | self.reconstruct = reconstruct 15 | self.c_m=c_m 16 | self.c_n=c_n 17 | self.convA=nn.Conv2d(in_channels,c_m,1) 18 | self.convB=nn.Conv2d(in_channels,c_n,1) 19 | self.convV=nn.Conv2d(in_channels,c_n,1) 20 | if self.reconstruct: 21 | self.conv_reconstruct = nn.Conv2d(c_m, in_channels, kernel_size = 1) 22 | self.init_weights() 23 | 24 | 25 | def init_weights(self): 26 | for m in self.modules(): 27 | if isinstance(m, nn.Conv2d): 28 | init.kaiming_normal_(m.weight, mode='fan_out') 29 | if m.bias is not None: 30 | init.constant_(m.bias, 0) 31 | elif isinstance(m, nn.BatchNorm2d): 32 | init.constant_(m.weight, 1) 33 | init.constant_(m.bias, 0) 34 | elif isinstance(m, nn.Linear): 35 | init.normal_(m.weight, std=0.001) 36 | if m.bias is not None: 37 | init.constant_(m.bias, 0) 38 | 39 | def forward(self, x): 40 | b, c, h,w=x.shape 41 | assert c==self.in_channels 42 | A=self.convA(x) #b,c_m,h,w 43 | B=self.convB(x) #b,c_n,h,w 44 | V=self.convV(x) #b,c_n,h,w 45 | tmpA=A.view(b,self.c_m,-1) 46 | attention_maps=F.softmax(B.view(b,self.c_n,-1)) 47 | attention_vectors=F.softmax(V.view(b,self.c_n,-1)) 48 | # step 1: feature gating 49 | global_descriptors=torch.bmm(tmpA,attention_maps.permute(0,2,1)) #b.c_m,c_n 50 | # step 2: feature distribution 51 | tmpZ = global_descriptors.matmul(attention_vectors) #b,c_m,h*w 52 | tmpZ=tmpZ.view(b,self.c_m,h,w) #b,c_m,h,w 53 | if self.reconstruct: 54 | tmpZ=self.conv_reconstruct(tmpZ) 55 | 56 | return tmpZ 57 | 58 | 59 | if __name__ == '__main__': 60 | input=torch.randn(50,512,7,7) 61 | a2 = DoubleAttention(512) 62 | output=a2(input) 63 | print(output.shape) -------------------------------------------------------------------------------- /cv-attention/CAA.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | def autopad(k, p=None, d=1): # kernel, padding, dilation 4 | """Pad to 'same' shape outputs.""" 5 | if d > 1: 6 | k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size 7 | if p is None: 8 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 9 | return p 10 | 11 | 12 | class Conv(nn.Module): 13 | """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation).""" 14 | 15 | default_act = nn.SiLU() # default activation 16 | 17 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): 18 | """Initialize Conv layer with given arguments including activation.""" 19 | super().__init__() 20 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) 21 | self.bn = nn.BatchNorm2d(c2) 22 | self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() 23 | 24 | def forward(self, x): 25 | """Apply convolution, batch normalization and activation to input tensor.""" 26 | return self.act(self.bn(self.conv(x))) 27 | 28 | def forward_fuse(self, x): 29 | """Perform transposed convolution of 2D data.""" 30 | return self.act(self.conv(x)) 31 | 32 | class CAA(nn.Module): 33 | def __init__(self, ch, h_kernel_size = 11, v_kernel_size = 11) -> None: 34 | super().__init__() 35 | 36 | self.avg_pool = nn.AvgPool2d(7, 1, 3) 37 | self.conv1 = Conv(ch, ch) 38 | self.h_conv = nn.Conv2d(ch, ch, (1, h_kernel_size), 1, (0, h_kernel_size // 2), 1, ch) 39 | self.v_conv = nn.Conv2d(ch, ch, (v_kernel_size, 1), 1, (v_kernel_size // 2, 0), 1, ch) 40 | self.conv2 = Conv(ch, ch) 41 | self.act = nn.Sigmoid() 42 | 43 | def forward(self, x): 44 | attn_factor = self.act(self.conv2(self.v_conv(self.h_conv(self.conv1(self.avg_pool(x)))))) 45 | return attn_factor * x -------------------------------------------------------------------------------- /cv-attention/CoTAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import flatten, nn 4 | from torch.nn import init 5 | from torch.nn.modules.activation import ReLU 6 | from torch.nn.modules.batchnorm import BatchNorm2d 7 | from torch.nn import functional as F 8 | 9 | 10 | class CoTAttention(nn.Module): 11 | 12 | def __init__(self, dim=512, kernel_size=3): 13 | super().__init__() 14 | self.dim = dim 15 | self.kernel_size = kernel_size 16 | 17 | self.key_embed = nn.Sequential( 18 | nn.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size // 2, groups=4, bias=False), 19 | nn.BatchNorm2d(dim), 20 | nn.ReLU() 21 | ) 22 | self.value_embed = nn.Sequential( 23 | nn.Conv2d(dim, dim, 1, bias=False), 24 | nn.BatchNorm2d(dim) 25 | ) 26 | 27 | factor = 4 28 | self.attention_embed = nn.Sequential( 29 | nn.Conv2d(2 * dim, 2 * dim // factor, 1, bias=False), 30 | nn.BatchNorm2d(2 * dim // factor), 31 | nn.ReLU(), 32 | nn.Conv2d(2 * dim // factor, kernel_size * kernel_size * dim, 1) 33 | ) 34 | 35 | def forward(self, x): 36 | bs, c, h, w = x.shape 37 | k1 = self.key_embed(x) # bs,c,h,w 38 | v = self.value_embed(x).view(bs, c, -1) # bs,c,h,w 39 | 40 | y = torch.cat([k1, x], dim=1) # bs,2c,h,w 41 | att = self.attention_embed(y) # bs,c*k*k,h,w 42 | att = att.reshape(bs, c, self.kernel_size * self.kernel_size, h, w) 43 | att = att.mean(2, keepdim=False).view(bs, c, -1) # bs,c,h*w 44 | k2 = F.softmax(att, dim=-1) * v 45 | k2 = k2.view(bs, c, h, w) 46 | 47 | return k1 + k2 48 | 49 | 50 | if __name__ == '__main__': 51 | input = torch.randn(50, 512, 7, 7) 52 | cot = CoTAttention(dim=512, kernel_size=3) 53 | output = cot(input) 54 | print(output.shape) 55 | -------------------------------------------------------------------------------- /cv-attention/CoordAttention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class h_sigmoid(nn.Module): 7 | def __init__(self, inplace=True): 8 | super(h_sigmoid, self).__init__() 9 | self.relu = nn.ReLU6(inplace=inplace) 10 | 11 | def forward(self, x): 12 | return self.relu(x + 3) / 6 13 | 14 | 15 | class h_swish(nn.Module): 16 | def __init__(self, inplace=True): 17 | super(h_swish, self).__init__() 18 | self.sigmoid = h_sigmoid(inplace=inplace) 19 | 20 | def forward(self, x): 21 | return x * self.sigmoid(x) 22 | 23 | 24 | class CoordAtt(nn.Module): 25 | def __init__(self, inp, reduction=32): 26 | super(CoordAtt, self).__init__() 27 | self.pool_h = nn.AdaptiveAvgPool2d((None, 1)) 28 | self.pool_w = nn.AdaptiveAvgPool2d((1, None)) 29 | 30 | mip = max(8, inp // reduction) 31 | 32 | self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0) 33 | self.bn1 = nn.BatchNorm2d(mip) 34 | self.act = h_swish() 35 | 36 | self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0) 37 | self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0) 38 | 39 | def forward(self, x): 40 | identity = x 41 | 42 | n, c, h, w = x.size() 43 | x_h = self.pool_h(x) 44 | x_w = self.pool_w(x).permute(0, 1, 3, 2) 45 | 46 | y = torch.cat([x_h, x_w], dim=2) 47 | y = self.conv1(y) 48 | y = self.bn1(y) 49 | y = self.act(y) 50 | 51 | x_h, x_w = torch.split(y, [h, w], dim=2) 52 | x_w = x_w.permute(0, 1, 3, 2) 53 | 54 | a_h = self.conv_h(x_h).sigmoid() 55 | a_w = self.conv_w(x_w).sigmoid() 56 | 57 | out = identity * a_w * a_h 58 | 59 | return out 60 | 61 | if __name__ == '__main__': 62 | input = torch.randn(50, 512, 7, 7) 63 | pna = CoordAtt(inp=512) 64 | output = pna(input) 65 | print(output.shape) -------------------------------------------------------------------------------- /cv-attention/ECA.py: -------------------------------------------------------------------------------- 1 | import torch, math 2 | from torch import nn 3 | 4 | class EfficientChannelAttention(nn.Module): # Efficient Channel Attention module 5 | def __init__(self, c, b=1, gamma=2): 6 | super(EfficientChannelAttention, self).__init__() 7 | t = int(abs((math.log(c, 2) + b) / gamma)) 8 | k = t if t % 2 else t + 1 9 | 10 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 11 | self.conv1 = nn.Conv1d(1, 1, kernel_size=k, padding=int(k/2), bias=False) 12 | self.sigmoid = nn.Sigmoid() 13 | 14 | def forward(self, x): 15 | out = self.avg_pool(x) 16 | out = self.conv1(out.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 17 | out = self.sigmoid(out) 18 | return out * x 19 | 20 | 21 | if __name__ == '__main__': 22 | input = torch.randn(50, 512, 7, 7) 23 | eca = EfficientChannelAttention(c=512) 24 | output = eca(input) 25 | print(output.shape) -------------------------------------------------------------------------------- /cv-attention/ELA.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class ELA(nn.Module): 4 | def __init__(self, channels) -> None: 5 | super().__init__() 6 | self.pool_h = nn.AdaptiveAvgPool2d((None, 1)) 7 | self.pool_w = nn.AdaptiveAvgPool2d((1, None)) 8 | self.conv1x1 = nn.Sequential( 9 | nn.Conv1d(channels, channels, 1), 10 | nn.GroupNorm(16, channels), 11 | nn.Sigmoid() 12 | ) 13 | 14 | def forward(self, x): 15 | b, c, h, w = x.size() 16 | x_h = self.conv1x1(self.pool_h(x).reshape((b, c, h))).reshape((b, c, h, 1)) 17 | x_w = self.conv1x1(self.pool_w(x).reshape((b, c, w))).reshape((b, c, 1, w)) 18 | return x * x_h * x_w -------------------------------------------------------------------------------- /cv-attention/EMA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class EMA(nn.Module): 5 | def __init__(self, channels, factor=8): 6 | super(EMA, self).__init__() 7 | self.groups = factor 8 | assert channels // self.groups > 0 9 | self.softmax = nn.Softmax(-1) 10 | self.agp = nn.AdaptiveAvgPool2d((1, 1)) 11 | self.pool_h = nn.AdaptiveAvgPool2d((None, 1)) 12 | self.pool_w = nn.AdaptiveAvgPool2d((1, None)) 13 | self.gn = nn.GroupNorm(channels // self.groups, channels // self.groups) 14 | self.conv1x1 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=1, stride=1, padding=0) 15 | self.conv3x3 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=3, stride=1, padding=1) 16 | 17 | def forward(self, x): 18 | b, c, h, w = x.size() 19 | group_x = x.reshape(b * self.groups, -1, h, w) # b*g,c//g,h,w 20 | x_h = self.pool_h(group_x) 21 | x_w = self.pool_w(group_x).permute(0, 1, 3, 2) 22 | hw = self.conv1x1(torch.cat([x_h, x_w], dim=2)) 23 | x_h, x_w = torch.split(hw, [h, w], dim=2) 24 | x1 = self.gn(group_x * x_h.sigmoid() * x_w.permute(0, 1, 3, 2).sigmoid()) 25 | x2 = self.conv3x3(group_x) 26 | x11 = self.softmax(self.agp(x1).reshape(b * self.groups, -1, 1).permute(0, 2, 1)) 27 | x12 = x2.reshape(b * self.groups, c // self.groups, -1) # b*g, c//g, hw 28 | x21 = self.softmax(self.agp(x2).reshape(b * self.groups, -1, 1).permute(0, 2, 1)) 29 | x22 = x1.reshape(b * self.groups, c // self.groups, -1) # b*g, c//g, hw 30 | weights = (torch.matmul(x11, x12) + torch.matmul(x21, x22)).reshape(b * self.groups, 1, h, w) 31 | return (group_x * weights.sigmoid()).reshape(b, c, h, w) -------------------------------------------------------------------------------- /cv-attention/EffectiveSE.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn as nn 3 | from timm.models.layers.create_act import create_act_layer 4 | 5 | 6 | class EffectiveSEModule(nn.Module): 7 | def __init__(self, channels, add_maxpool=False, gate_layer='hard_sigmoid'): 8 | super(EffectiveSEModule, self).__init__() 9 | self.add_maxpool = add_maxpool 10 | self.fc = nn.Conv2d(channels, channels, kernel_size=1, padding=0) 11 | self.gate = create_act_layer(gate_layer) 12 | 13 | def forward(self, x): 14 | x_se = x.mean((2, 3), keepdim=True) 15 | if self.add_maxpool: 16 | # experimental codepath, may remove or change 17 | x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True) 18 | x_se = self.fc(x_se) 19 | return x * self.gate(x_se) 20 | 21 | if __name__ == '__main__': 22 | input=torch.randn(50,512,7,7) 23 | Ese = EffectiveSEModule(512) 24 | output=Ese(input) 25 | print(output.shape) -------------------------------------------------------------------------------- /cv-attention/GAM.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | class GAM_Attention(nn.Module): 5 | def __init__(self, in_channels, rate=4): 6 | super(GAM_Attention, self).__init__() 7 | 8 | self.channel_attention = nn.Sequential( 9 | nn.Linear(in_channels, int(in_channels / rate)), 10 | nn.ReLU(inplace=True), 11 | nn.Linear(int(in_channels / rate), in_channels) 12 | ) 13 | 14 | self.spatial_attention = nn.Sequential( 15 | nn.Conv2d(in_channels, int(in_channels / rate), kernel_size=7, padding=3), 16 | nn.BatchNorm2d(int(in_channels / rate)), 17 | nn.ReLU(inplace=True), 18 | nn.Conv2d(int(in_channels / rate), in_channels, kernel_size=7, padding=3), 19 | nn.BatchNorm2d(in_channels) 20 | ) 21 | 22 | def forward(self, x): 23 | b, c, h, w = x.shape 24 | x_permute = x.permute(0, 2, 3, 1).view(b, -1, c) 25 | x_att_permute = self.channel_attention(x_permute).view(b, h, w, c) 26 | x_channel_att = x_att_permute.permute(0, 3, 1, 2).sigmoid() 27 | 28 | x = x * x_channel_att 29 | 30 | x_spatial_att = self.spatial_attention(x).sigmoid() 31 | out = x * x_spatial_att 32 | 33 | return out 34 | 35 | if __name__ == '__main__': 36 | x = torch.randn(1, 64, 20, 20) 37 | b, c, h, w = x.shape 38 | net = GAM_Attention(in_channels=c) 39 | y = net(x) 40 | print(y.size()) -------------------------------------------------------------------------------- /cv-attention/LSKBlock.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class LSKblock(nn.Module): 5 | def __init__(self, dim): 6 | super().__init__() 7 | self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) 8 | self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3) 9 | self.conv1 = nn.Conv2d(dim, dim//2, 1) 10 | self.conv2 = nn.Conv2d(dim, dim//2, 1) 11 | self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3) 12 | self.conv = nn.Conv2d(dim//2, dim, 1) 13 | 14 | def forward(self, x): 15 | attn1 = self.conv0(x) 16 | attn2 = self.conv_spatial(attn1) 17 | 18 | attn1 = self.conv1(attn1) 19 | attn2 = self.conv2(attn2) 20 | 21 | attn = torch.cat([attn1, attn2], dim=1) 22 | avg_attn = torch.mean(attn, dim=1, keepdim=True) 23 | max_attn, _ = torch.max(attn, dim=1, keepdim=True) 24 | agg = torch.cat([avg_attn, max_attn], dim=1) 25 | sig = self.conv_squeeze(agg).sigmoid() 26 | attn = attn1 * sig[:,0,:,:].unsqueeze(1) + attn2 * sig[:,1,:,:].unsqueeze(1) 27 | attn = self.conv(attn) 28 | return x * attn -------------------------------------------------------------------------------- /cv-attention/MHSA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class MHSA(nn.Module): 5 | def __init__(self, n_dims, width=14, height=14, heads=4, pos_emb=False): 6 | super(MHSA, self).__init__() 7 | 8 | self.heads = heads 9 | self.query = nn.Conv2d(n_dims, n_dims, kernel_size=1) 10 | self.key = nn.Conv2d(n_dims, n_dims, kernel_size=1) 11 | self.value = nn.Conv2d(n_dims, n_dims, kernel_size=1) 12 | self.pos = pos_emb 13 | if self.pos: 14 | self.rel_h_weight = nn.Parameter(torch.randn([1, heads, (n_dims) // heads, 1, int(height)]), 15 | requires_grad=True) 16 | self.rel_w_weight = nn.Parameter(torch.randn([1, heads, (n_dims) // heads, int(width), 1]), 17 | requires_grad=True) 18 | self.softmax = nn.Softmax(dim=-1) 19 | 20 | def forward(self, x): 21 | n_batch, C, width, height = x.size() 22 | q = self.query(x).view(n_batch, self.heads, C // self.heads, -1) 23 | k = self.key(x).view(n_batch, self.heads, C // self.heads, -1) 24 | v = self.value(x).view(n_batch, self.heads, C // self.heads, -1) 25 | content_content = torch.matmul(q.permute(0, 1, 3, 2), k) # 1,C,h*w,h*w 26 | c1, c2, c3, c4 = content_content.size() 27 | if self.pos: 28 | content_position = (self.rel_h_weight + self.rel_w_weight).view(1, self.heads, C // self.heads, -1).permute( 29 | 0, 1, 3, 2) # 1,4,1024,64 30 | 31 | content_position = torch.matmul(content_position, q) # ([1, 4, 1024, 256]) 32 | content_position = content_position if ( 33 | content_content.shape == content_position.shape) else content_position[:, :, :c3, ] 34 | assert (content_content.shape == content_position.shape) 35 | energy = content_content + content_position 36 | else: 37 | energy = content_content 38 | attention = self.softmax(energy) 39 | out = torch.matmul(v, attention.permute(0, 1, 3, 2)) # 1,4,256,64 40 | out = out.view(n_batch, C, width, height) 41 | return out 42 | 43 | if __name__ == '__main__': 44 | input = torch.randn(50, 512, 7, 7) 45 | mhsa = MHSA(n_dims=512) 46 | output = mhsa(input) 47 | print(output.shape) -------------------------------------------------------------------------------- /cv-attention/MLCA.py: -------------------------------------------------------------------------------- 1 | import math, torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | class MLCA(nn.Module): 6 | def __init__(self, in_size, local_size=5, gamma = 2, b = 1,local_weight=0.5): 7 | super(MLCA, self).__init__() 8 | 9 | # ECA 计算方法 10 | self.local_size=local_size 11 | self.gamma = gamma 12 | self.b = b 13 | t = int(abs(math.log(in_size, 2) + self.b) / self.gamma) # eca gamma=2 14 | k = t if t % 2 else t + 1 15 | 16 | self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2, bias=False) 17 | self.conv_local = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2, bias=False) 18 | 19 | self.local_weight=local_weight 20 | 21 | self.local_arv_pool = nn.AdaptiveAvgPool2d(local_size) 22 | self.global_arv_pool=nn.AdaptiveAvgPool2d(1) 23 | 24 | def forward(self, x): 25 | local_arv=self.local_arv_pool(x) 26 | global_arv=self.global_arv_pool(local_arv) 27 | 28 | b,c,m,n = x.shape 29 | b_local, c_local, m_local, n_local = local_arv.shape 30 | 31 | # (b,c,local_size,local_size) -> (b,c,local_size*local_size) -> (b,local_size*local_size,c) -> (b,1,local_size*local_size*c) 32 | temp_local= local_arv.view(b, c_local, -1).transpose(-1, -2).reshape(b, 1, -1) 33 | # (b,c,1,1) -> (b,c,1) -> (b,1,c) 34 | temp_global = global_arv.view(b, c, -1).transpose(-1, -2) 35 | 36 | y_local = self.conv_local(temp_local) 37 | y_global = self.conv(temp_global) 38 | 39 | # (b,c,local_size,local_size) <- (b,c,local_size*local_size)<-(b,local_size*local_size,c) <- (b,1,local_size*local_size*c) 40 | y_local_transpose=y_local.reshape(b, self.local_size * self.local_size,c).transpose(-1,-2).view(b, c, self.local_size , self.local_size) 41 | # (b,1,c) -> (b,c,1) -> (b,c,1,1) 42 | y_global_transpose = y_global.transpose(-1,-2).unsqueeze(-1) 43 | 44 | # 反池化 45 | att_local = y_local_transpose.sigmoid() 46 | att_global = F.adaptive_avg_pool2d(y_global_transpose.sigmoid(),[self.local_size, self.local_size]) 47 | att_all = F.adaptive_avg_pool2d(att_global*(1-self.local_weight)+(att_local*self.local_weight), [m, n]) 48 | 49 | x = x * att_all 50 | return x 51 | 52 | if __name__ == '__main__': 53 | attention = MLCA(in_size=256) 54 | inputs = torch.randn((2, 256, 16, 16)) 55 | result = attention(inputs) 56 | print(result.size()) -------------------------------------------------------------------------------- /cv-attention/ParNetAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | class ParNetAttention(nn.Module): 8 | 9 | def __init__(self, channel=512): 10 | super().__init__() 11 | self.sse = nn.Sequential( 12 | nn.AdaptiveAvgPool2d(1), 13 | nn.Conv2d(channel, channel, kernel_size=1), 14 | nn.Sigmoid() 15 | ) 16 | 17 | self.conv1x1 = nn.Sequential( 18 | nn.Conv2d(channel, channel, kernel_size=1), 19 | nn.BatchNorm2d(channel) 20 | ) 21 | self.conv3x3 = nn.Sequential( 22 | nn.Conv2d(channel, channel, kernel_size=3, padding=1), 23 | nn.BatchNorm2d(channel) 24 | ) 25 | self.silu = nn.SiLU() 26 | 27 | def forward(self, x): 28 | b, c, _, _ = x.size() 29 | x1 = self.conv1x1(x) 30 | x2 = self.conv3x3(x) 31 | x3 = self.sse(x) * x 32 | y = self.silu(x1 + x2 + x3) 33 | return y 34 | 35 | 36 | if __name__ == '__main__': 37 | input = torch.randn(50, 512, 7, 7) 38 | pna = ParNetAttention(channel=512) 39 | output = pna(input) 40 | print(output.shape) -------------------------------------------------------------------------------- /cv-attention/PolarizedSelfAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | 8 | class ParallelPolarizedSelfAttention(nn.Module): 9 | 10 | def __init__(self, channel=512): 11 | super().__init__() 12 | self.ch_wv=nn.Conv2d(channel,channel//2,kernel_size=(1,1)) 13 | self.ch_wq=nn.Conv2d(channel,1,kernel_size=(1,1)) 14 | self.softmax_channel=nn.Softmax(1) 15 | self.softmax_spatial=nn.Softmax(-1) 16 | self.ch_wz=nn.Conv2d(channel//2,channel,kernel_size=(1,1)) 17 | self.ln=nn.LayerNorm(channel) 18 | self.sigmoid=nn.Sigmoid() 19 | self.sp_wv=nn.Conv2d(channel,channel//2,kernel_size=(1,1)) 20 | self.sp_wq=nn.Conv2d(channel,channel//2,kernel_size=(1,1)) 21 | self.agp=nn.AdaptiveAvgPool2d((1,1)) 22 | 23 | def forward(self, x): 24 | b, c, h, w = x.size() 25 | 26 | #Channel-only Self-Attention 27 | channel_wv=self.ch_wv(x) #bs,c//2,h,w 28 | channel_wq=self.ch_wq(x) #bs,1,h,w 29 | channel_wv=channel_wv.reshape(b,c//2,-1) #bs,c//2,h*w 30 | channel_wq=channel_wq.reshape(b,-1,1) #bs,h*w,1 31 | channel_wq=self.softmax_channel(channel_wq) 32 | channel_wz=torch.matmul(channel_wv,channel_wq).unsqueeze(-1) #bs,c//2,1,1 33 | channel_weight=self.sigmoid(self.ln(self.ch_wz(channel_wz).reshape(b,c,1).permute(0,2,1))).permute(0,2,1).reshape(b,c,1,1) #bs,c,1,1 34 | channel_out=channel_weight*x 35 | 36 | #Spatial-only Self-Attention 37 | spatial_wv=self.sp_wv(x) #bs,c//2,h,w 38 | spatial_wq=self.sp_wq(x) #bs,c//2,h,w 39 | spatial_wq=self.agp(spatial_wq) #bs,c//2,1,1 40 | spatial_wv=spatial_wv.reshape(b,c//2,-1) #bs,c//2,h*w 41 | spatial_wq=spatial_wq.permute(0,2,3,1).reshape(b,1,c//2) #bs,1,c//2 42 | spatial_wq=self.softmax_spatial(spatial_wq) 43 | spatial_wz=torch.matmul(spatial_wq,spatial_wv) #bs,1,h*w 44 | spatial_weight=self.sigmoid(spatial_wz.reshape(b,1,h,w)) #bs,1,h,w 45 | spatial_out=spatial_weight*x 46 | out=spatial_out+channel_out 47 | return out 48 | 49 | 50 | if __name__ == '__main__': 51 | input=torch.randn(1,512,7,7) 52 | psa = ParallelPolarizedSelfAttention(channel=512) 53 | output=psa(input) 54 | print(output.shape) 55 | -------------------------------------------------------------------------------- /cv-attention/SE.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | 7 | 8 | class SEAttention(nn.Module): 9 | 10 | def __init__(self, channel=512,reduction=16): 11 | super().__init__() 12 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 13 | self.fc = nn.Sequential( 14 | nn.Linear(channel, channel // reduction, bias=False), 15 | nn.ReLU(inplace=True), 16 | nn.Linear(channel // reduction, channel, bias=False), 17 | nn.Sigmoid() 18 | ) 19 | 20 | 21 | def init_weights(self): 22 | for m in self.modules(): 23 | if isinstance(m, nn.Conv2d): 24 | init.kaiming_normal_(m.weight, mode='fan_out') 25 | if m.bias is not None: 26 | init.constant_(m.bias, 0) 27 | elif isinstance(m, nn.BatchNorm2d): 28 | init.constant_(m.weight, 1) 29 | init.constant_(m.bias, 0) 30 | elif isinstance(m, nn.Linear): 31 | init.normal_(m.weight, std=0.001) 32 | if m.bias is not None: 33 | init.constant_(m.bias, 0) 34 | 35 | def forward(self, x): 36 | b, c, _, _ = x.size() 37 | y = self.avg_pool(x).view(b, c) 38 | y = self.fc(y).view(b, c, 1, 1) 39 | return x * y.expand_as(x) 40 | 41 | 42 | if __name__ == '__main__': 43 | input=torch.randn(50,512,7,7) 44 | se = SEAttention(channel=512,reduction=8) 45 | output=se(input) 46 | print(output.shape) 47 | -------------------------------------------------------------------------------- /cv-attention/SGE.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | class SpatialGroupEnhance(nn.Module): 7 | def __init__(self, groups=8): 8 | super().__init__() 9 | self.groups=groups 10 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 11 | self.weight=nn.Parameter(torch.zeros(1,groups,1,1)) 12 | self.bias=nn.Parameter(torch.zeros(1,groups,1,1)) 13 | self.sig=nn.Sigmoid() 14 | self.init_weights() 15 | 16 | def init_weights(self): 17 | for m in self.modules(): 18 | if isinstance(m, nn.Conv2d): 19 | init.kaiming_normal_(m.weight, mode='fan_out') 20 | if m.bias is not None: 21 | init.constant_(m.bias, 0) 22 | elif isinstance(m, nn.BatchNorm2d): 23 | init.constant_(m.weight, 1) 24 | init.constant_(m.bias, 0) 25 | elif isinstance(m, nn.Linear): 26 | init.normal_(m.weight, std=0.001) 27 | if m.bias is not None: 28 | init.constant_(m.bias, 0) 29 | 30 | def forward(self, x): 31 | b, c, h,w=x.shape 32 | x=x.view(b*self.groups,-1,h,w) #bs*g,dim//g,h,w 33 | xn=x*self.avg_pool(x) #bs*g,dim//g,h,w 34 | xn=xn.sum(dim=1,keepdim=True) #bs*g,1,h,w 35 | t=xn.view(b*self.groups,-1) #bs*g,h*w 36 | 37 | t=t-t.mean(dim=1,keepdim=True) #bs*g,h*w 38 | std=t.std(dim=1,keepdim=True)+1e-5 39 | t=t/std #bs*g,h*w 40 | t=t.view(b,self.groups,h,w) #bs,g,h*w 41 | 42 | t=t*self.weight+self.bias #bs,g,h*w 43 | t=t.view(b*self.groups,1,h,w) #bs*g,1,h*w 44 | x=x*self.sig(t) 45 | x=x.view(b,c,h,w) 46 | return x 47 | 48 | 49 | if __name__ == '__main__': 50 | input=torch.randn(50,512,7,7) 51 | sge = SpatialGroupEnhance(groups=8) 52 | output=sge(input) 53 | print(output.shape) -------------------------------------------------------------------------------- /cv-attention/SK.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | from collections import OrderedDict 6 | 7 | 8 | class SKAttention(nn.Module): 9 | 10 | def __init__(self, channel=512, kernels=[1, 3, 5, 7], reduction=16, group=1, L=32): 11 | super().__init__() 12 | self.d = max(L, channel // reduction) 13 | self.convs = nn.ModuleList([]) 14 | for k in kernels: 15 | self.convs.append( 16 | nn.Sequential(OrderedDict([ 17 | ('conv', nn.Conv2d(channel, channel, kernel_size=k, padding=k // 2, groups=group)), 18 | ('bn', nn.BatchNorm2d(channel)), 19 | ('relu', nn.ReLU()) 20 | ])) 21 | ) 22 | self.fc = nn.Linear(channel, self.d) 23 | self.fcs = nn.ModuleList([]) 24 | for i in range(len(kernels)): 25 | self.fcs.append(nn.Linear(self.d, channel)) 26 | self.softmax = nn.Softmax(dim=0) 27 | 28 | def forward(self, x): 29 | bs, c, _, _ = x.size() 30 | conv_outs = [] 31 | ### split 32 | for conv in self.convs: 33 | conv_outs.append(conv(x)) 34 | feats = torch.stack(conv_outs, 0) # k,bs,channel,h,w 35 | 36 | ### fuse 37 | U = sum(conv_outs) # bs,c,h,w 38 | 39 | ### reduction channel 40 | S = U.mean(-1).mean(-1) # bs,c 41 | Z = self.fc(S) # bs,d 42 | 43 | ### calculate attention weight 44 | weights = [] 45 | for fc in self.fcs: 46 | weight = fc(Z) 47 | weights.append(weight.view(bs, c, 1, 1)) # bs,channel 48 | attention_weughts = torch.stack(weights, 0) # k,bs,channel,1,1 49 | attention_weughts = self.softmax(attention_weughts) # k,bs,channel,1,1 50 | 51 | ### fuse 52 | V = (attention_weughts * feats).sum(0) 53 | return V 54 | 55 | 56 | if __name__ == '__main__': 57 | input = torch.randn(50, 512, 7, 7) 58 | se = SKAttention(channel=512, reduction=8) 59 | output = se(input) 60 | print(output.shape) 61 | -------------------------------------------------------------------------------- /cv-attention/SequentialSelfAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | class SequentialPolarizedSelfAttention(nn.Module): 7 | 8 | def __init__(self, channel=512): 9 | super().__init__() 10 | self.ch_wv=nn.Conv2d(channel,channel//2,kernel_size=(1,1)) 11 | self.ch_wq=nn.Conv2d(channel,1,kernel_size=(1,1)) 12 | self.softmax_channel=nn.Softmax(1) 13 | self.softmax_spatial=nn.Softmax(-1) 14 | self.ch_wz=nn.Conv2d(channel//2,channel,kernel_size=(1,1)) 15 | self.ln=nn.LayerNorm(channel) 16 | self.sigmoid=nn.Sigmoid() 17 | self.sp_wv=nn.Conv2d(channel,channel//2,kernel_size=(1,1)) 18 | self.sp_wq=nn.Conv2d(channel,channel//2,kernel_size=(1,1)) 19 | self.agp=nn.AdaptiveAvgPool2d((1,1)) 20 | 21 | def forward(self, x): 22 | b, c, h, w = x.size() 23 | 24 | #Channel-only Self-Attention 25 | channel_wv=self.ch_wv(x) #bs,c//2,h,w 26 | channel_wq=self.ch_wq(x) #bs,1,h,w 27 | channel_wv=channel_wv.reshape(b,c//2,-1) #bs,c//2,h*w 28 | channel_wq=channel_wq.reshape(b,-1,1) #bs,h*w,1 29 | channel_wq=self.softmax_channel(channel_wq) 30 | channel_wz=torch.matmul(channel_wv,channel_wq).unsqueeze(-1) #bs,c//2,1,1 31 | channel_weight=self.sigmoid(self.ln(self.ch_wz(channel_wz).reshape(b,c,1).permute(0,2,1))).permute(0,2,1).reshape(b,c,1,1) #bs,c,1,1 32 | channel_out=channel_weight*x 33 | 34 | #Spatial-only Self-Attention 35 | spatial_wv=self.sp_wv(channel_out) #bs,c//2,h,w 36 | spatial_wq=self.sp_wq(channel_out) #bs,c//2,h,w 37 | spatial_wq=self.agp(spatial_wq) #bs,c//2,1,1 38 | spatial_wv=spatial_wv.reshape(b,c//2,-1) #bs,c//2,h*w 39 | spatial_wq=spatial_wq.permute(0,2,3,1).reshape(b,1,c//2) #bs,1,c//2 40 | spatial_wq=self.softmax_spatial(spatial_wq) 41 | spatial_wz=torch.matmul(spatial_wq,spatial_wv) #bs,1,h*w 42 | spatial_weight=self.sigmoid(spatial_wz.reshape(b,1,h,w)) #bs,1,h,w 43 | spatial_out=spatial_weight*channel_out 44 | return spatial_out 45 | 46 | if __name__ == '__main__': 47 | input=torch.randn(1,512,7,7) 48 | psa = SequentialPolarizedSelfAttention(channel=512) 49 | output=psa(input) 50 | print(output.shape) 51 | -------------------------------------------------------------------------------- /cv-attention/SimAM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SimAM(torch.nn.Module): 6 | def __init__(self, e_lambda=1e-4): 7 | super(SimAM, self).__init__() 8 | 9 | self.activaton = nn.Sigmoid() 10 | self.e_lambda = e_lambda 11 | 12 | def __repr__(self): 13 | s = self.__class__.__name__ + '(' 14 | s += ('lambda=%f)' % self.e_lambda) 15 | return s 16 | 17 | @staticmethod 18 | def get_module_name(): 19 | return "simam" 20 | 21 | def forward(self, x): 22 | b, c, h, w = x.size() 23 | 24 | n = w * h - 1 25 | 26 | x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2) 27 | y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5 28 | 29 | return x * self.activaton(y) 30 | 31 | 32 | if __name__ == '__main__': 33 | input = torch.randn(3, 64, 7, 7) 34 | model = SimAM() 35 | outputs = model(input) 36 | print(outputs.shape) 37 | -------------------------------------------------------------------------------- /damo-yolo/Annotations/ReadMe.md: -------------------------------------------------------------------------------- 1 | # 存放VOC标注格式的文件夹 -------------------------------------------------------------------------------- /damo-yolo/JPEGImages/ReadMe.md: -------------------------------------------------------------------------------- 1 | # 存放图像的文件夹 -------------------------------------------------------------------------------- /damo-yolo/readme.md: -------------------------------------------------------------------------------- 1 | # DAMO-YOLO的数据集处理文件 2 | 本目录下的脚本是针对与DAMO-YOLO的数据集处理脚本,支持如下: 3 | 1. VOC标注格式转换为COCO标注格式,并生成train.json,val.json,test.json. 4 | 5 | # 使用方法 6 | 1. 把图片存放在JPEGImages中,图片后缀需要一致,比如都是jpg或者png等等,不支持混合的图片后缀格式,比如一些是jpg,一些是png。 7 | 2. 把VOC标注格式的XML文件存放在Annotations中。 8 | 3. 运行voc2coco.py,其中postfix参数是JPEGImages的图片后缀,train_ratio是训练集的比例,val_ratio是验证集的比例,剩下的就是测试集的比例。 -------------------------------------------------------------------------------- /data-offline-aug/readme.md: -------------------------------------------------------------------------------- 1 | # data-offline-aug 2 | 3 | ### 环境 4 | 5 | pip install -i https://pypi.tuna.tsinghua.edu.cn/simple albumentations 6 | 7 | ### 1. object_detection_data_aug.py 8 | 9 | 目标检测数据集yolo格式离线数据增强脚本. 10 | 视频教程链接:https://www.bilibili.com/video/BV1bT421k7iq/ 11 | 12 | ### 2. segment_data_aug.py 13 | 14 | 语义分割离线数据增强脚本. 15 | 视频教程链接:https://www.bilibili.com/video/BV1xi421a7Gb/ 16 | 17 | # Reference 18 | https://github.com/albumentations-team/albumentations -------------------------------------------------------------------------------- /mmdet-course/config/atss_r50_fpn_dyhead_1x_visdrone.py: -------------------------------------------------------------------------------- 1 | _base_ = 'atss_r50_fpn_dyhead_1x_coco.py' 2 | 3 | model = dict( 4 | bbox_head=dict( 5 | num_classes=10 6 | ) 7 | ) 8 | 9 | # 修改数据集相关配置 10 | data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/' 11 | metainfo = { 12 | 'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'), 13 | # 'palette': [ 14 | # (220, 20, 60), 15 | # ] 16 | } 17 | train_dataloader = dict( 18 | batch_size=8, 19 | num_workers=8, 20 | dataset=dict( 21 | data_root=data_root, 22 | metainfo=metainfo, 23 | ann_file='VisDrone2019-DET-train/annotations/train.json', 24 | data_prefix=dict(img='VisDrone2019-DET-train/images/'))) 25 | val_dataloader = dict( 26 | batch_size=8, 27 | num_workers=8, 28 | dataset=dict( 29 | data_root=data_root, 30 | metainfo=metainfo, 31 | ann_file='VisDrone2019-DET-val/annotations/val.json', 32 | data_prefix=dict(img='VisDrone2019-DET-val/images/'))) 33 | test_dataloader = dict( 34 | batch_size=8, 35 | num_workers=8, 36 | dataset=dict( 37 | data_root=data_root, 38 | metainfo=metainfo, 39 | ann_file='VisDrone2019-DET-test-dev/annotations/test.json', 40 | data_prefix=dict(img='VisDrone2019-DET-test-dev/images/'))) 41 | 42 | # 修改评价指标相关配置 43 | val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json') 44 | test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json') 45 | 46 | # optim_wrapper = dict(type='AmpOptimWrapper') 47 | 48 | default_hooks = dict(logger=dict(type='LoggerHook', interval=200)) 49 | 50 | load_from='atss_r50_fpn_dyhead_4x4_1x_coco_20211219_023314-eaa620c6.pth' 51 | 52 | # nohup python tools/train.py configs/dyhead/atss_r50_fpn_dyhead_1x_visdrone.py > atss-dyhead-visdrone.log 2>&1 & tail -f atss-dyhead-visdrone.log 53 | # python tools/test.py configs/dyhead/atss_r50_fpn_dyhead_1x_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --show --show-dir test_save 54 | # python tools/test.py configs/dyhead/atss_r50_fpn_dyhead_1x_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --tta -------------------------------------------------------------------------------- /mmdet-course/config/ddq-detr-4scale_r50_8xb2-12e_visdrone.py: -------------------------------------------------------------------------------- 1 | _base_ = 'ddq-detr-4scale_r50_8xb2-12e_coco.py' 2 | 3 | model = dict( 4 | bbox_head=dict( 5 | type='DDQDETRHead', 6 | num_classes=10 7 | ) 8 | ) 9 | 10 | # 修改数据集相关配置 11 | data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/' 12 | metainfo = { 13 | 'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'), 14 | # 'palette': [ 15 | # (220, 20, 60), 16 | # ] 17 | } 18 | train_dataloader = dict( 19 | batch_size=2, 20 | num_workers=2, 21 | dataset=dict( 22 | data_root=data_root, 23 | metainfo=metainfo, 24 | ann_file='VisDrone2019-DET-train/annotations/train.json', 25 | data_prefix=dict(img='VisDrone2019-DET-train/images/'))) 26 | val_dataloader = dict( 27 | batch_size=2, 28 | num_workers=2, 29 | dataset=dict( 30 | data_root=data_root, 31 | metainfo=metainfo, 32 | ann_file='VisDrone2019-DET-val/annotations/val.json', 33 | data_prefix=dict(img='VisDrone2019-DET-val/images/'))) 34 | test_dataloader = dict( 35 | batch_size=2, 36 | num_workers=2, 37 | dataset=dict( 38 | data_root=data_root, 39 | metainfo=metainfo, 40 | ann_file='VisDrone2019-DET-test-dev/annotations/test.json', 41 | data_prefix=dict(img='VisDrone2019-DET-test-dev/images/'))) 42 | 43 | # 修改评价指标相关配置 44 | val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json') 45 | test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json') 46 | 47 | # optim_wrapper = dict(type='AmpOptimWrapper') 48 | 49 | default_hooks = dict(logger=dict(type='LoggerHook', interval=1000)) 50 | 51 | load_from='ddq-detr-4scale_r50_8xb2-12e_coco_20230809_170711-42528127.pth' 52 | 53 | # nohup python tools/train.py configs/ddq/ddq-detr-4scale_r50_8xb2-12e_visdrone.py > ddq-visdrone.log 2>&1 & tail -f ddq-visdrone.log 54 | # python tools/test.py configs/ddq/ddq-detr-4scale_r50_8xb2-12e_visdrone.py work_dirs/faster-rcnn_r50_fpn_ciou_1x_visdrone/epoch_12.pth --show --show-dir test_save 55 | # python tools/test.py configs/ddq/ddq-detr-4scale_r50_8xb2-12e_visdrone.py work_dirs/faster-rcnn_r50_fpn_ciou_1x_visdrone/epoch_12.pth --tta -------------------------------------------------------------------------------- /mmdet-course/config/dino-4scale_r50_8xb2-12e_visdrone.py: -------------------------------------------------------------------------------- 1 | _base_ = 'dino-4scale_r50_8xb2-12e_coco.py' 2 | 3 | model = dict( 4 | bbox_head=dict( 5 | type='DINOHead', 6 | num_classes=10, 7 | ) 8 | ) 9 | 10 | # 修改数据集相关配置 11 | data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/' 12 | metainfo = { 13 | 'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'), 14 | # 'palette': [ 15 | # (220, 20, 60), 16 | # ] 17 | } 18 | train_dataloader = dict( 19 | batch_size=4, 20 | num_workers=4, 21 | dataset=dict( 22 | data_root=data_root, 23 | metainfo=metainfo, 24 | ann_file='VisDrone2019-DET-train/annotations/train.json', 25 | data_prefix=dict(img='VisDrone2019-DET-train/images/'))) 26 | val_dataloader = dict( 27 | batch_size=4, 28 | num_workers=4, 29 | dataset=dict( 30 | data_root=data_root, 31 | metainfo=metainfo, 32 | ann_file='VisDrone2019-DET-val/annotations/val.json', 33 | data_prefix=dict(img='VisDrone2019-DET-val/images/'))) 34 | test_dataloader = dict( 35 | batch_size=4, 36 | num_workers=4, 37 | dataset=dict( 38 | data_root=data_root, 39 | metainfo=metainfo, 40 | ann_file='VisDrone2019-DET-test-dev/annotations/test.json', 41 | data_prefix=dict(img='VisDrone2019-DET-test-dev/images/'))) 42 | 43 | # 修改评价指标相关配置 44 | val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json') 45 | test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json') 46 | 47 | # optim_wrapper = dict(type='AmpOptimWrapper') 48 | 49 | default_hooks = dict(logger=dict(type='LoggerHook', interval=500)) 50 | 51 | load_from='dino-4scale_r50_8xb2-12e_coco_20221202_182705-55b2bba2.pth' 52 | 53 | # nohup python tools/train.py configs/dino/dino-4scale_r50_8xb2-12e_visdrone.py > dino-visdrone.log 2>&1 & tail -f dino-visdrone.log 54 | # python tools/test.py configs/dino/dino-4scale_r50_8xb2-12e_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --show --show-dir test_save 55 | # python tools/test.py configs/dino/dino-4scale_r50_8xb2-12e_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --tta -------------------------------------------------------------------------------- /mmdet-course/config/faster-rcnn_r50_fpn_ciou_1x_visdrone.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_ciou_1x_coco.py' 2 | 3 | # 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数 4 | model = dict( 5 | roi_head=dict( 6 | bbox_head=dict( 7 | type='Shared2FCBBoxHead', 8 | num_classes=10 9 | ) 10 | ) 11 | ) 12 | 13 | # 修改数据集相关配置 14 | data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/' 15 | metainfo = { 16 | 'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'), 17 | # 'palette': [ 18 | # (220, 20, 60), 19 | # ] 20 | } 21 | train_dataloader = dict( 22 | batch_size=8, 23 | num_workers=8, 24 | dataset=dict( 25 | data_root=data_root, 26 | metainfo=metainfo, 27 | ann_file='VisDrone2019-DET-train/annotations/train.json', 28 | data_prefix=dict(img='VisDrone2019-DET-train/images/'))) 29 | val_dataloader = dict( 30 | batch_size=8, 31 | num_workers=8, 32 | dataset=dict( 33 | data_root=data_root, 34 | metainfo=metainfo, 35 | ann_file='VisDrone2019-DET-val/annotations/val.json', 36 | data_prefix=dict(img='VisDrone2019-DET-val/images/'))) 37 | test_dataloader = dict( 38 | batch_size=8, 39 | num_workers=8, 40 | dataset=dict( 41 | data_root=data_root, 42 | metainfo=metainfo, 43 | ann_file='VisDrone2019-DET-test-dev/annotations/test.json', 44 | data_prefix=dict(img='VisDrone2019-DET-test-dev/images/'))) 45 | 46 | # 修改评价指标相关配置 47 | val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json') 48 | test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json') 49 | 50 | # optim_wrapper = dict(type='AmpOptimWrapper') 51 | 52 | default_hooks = dict(logger=dict(type='LoggerHook', interval=200)) 53 | 54 | load_from='faster_rcnn_r50_fpn_giou_1x_coco-0eada910.pth' 55 | 56 | # nohup python tools/train.py configs/faster_rcnn/faster-rcnn_r50_fpn_ciou_1x_visdrone.py > faster-rcnn-visdrone.log 2>&1 & tail -f faster-rcnn-visdrone.log 57 | # python tools/test.py configs/faster_rcnn/faster-rcnn_r50_fpn_ciou_1x_visdrone.py work_dirs/faster-rcnn_r50_fpn_ciou_1x_visdrone/epoch_12.pth --show --show-dir test_save 58 | # python tools/test.py configs/faster_rcnn/faster-rcnn_r50_fpn_ciou_1x_visdrone.py work_dirs/faster-rcnn_r50_fpn_ciou_1x_visdrone/epoch_12.pth --tta -------------------------------------------------------------------------------- /mmdet-course/config/gfl_r50_fpn_1x_visdrone.py: -------------------------------------------------------------------------------- 1 | _base_ = 'gfl_r50_fpn_1x_coco.py' 2 | 3 | # 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数 4 | model = dict( 5 | bbox_head=dict( 6 | num_classes=10 7 | ) 8 | ) 9 | 10 | # 修改数据集相关配置 11 | data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/' 12 | metainfo = { 13 | 'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'), 14 | # 'palette': [ 15 | # (220, 20, 60), 16 | # ] 17 | } 18 | train_dataloader = dict( 19 | batch_size=8, 20 | num_workers=8, 21 | dataset=dict( 22 | data_root=data_root, 23 | metainfo=metainfo, 24 | ann_file='VisDrone2019-DET-train/annotations/train.json', 25 | data_prefix=dict(img='VisDrone2019-DET-train/images/'))) 26 | val_dataloader = dict( 27 | batch_size=8, 28 | num_workers=8, 29 | dataset=dict( 30 | data_root=data_root, 31 | metainfo=metainfo, 32 | ann_file='VisDrone2019-DET-val/annotations/val.json', 33 | data_prefix=dict(img='VisDrone2019-DET-val/images/'))) 34 | test_dataloader = dict( 35 | batch_size=8, 36 | num_workers=8, 37 | dataset=dict( 38 | data_root=data_root, 39 | metainfo=metainfo, 40 | ann_file='VisDrone2019-DET-test-dev/annotations/test.json', 41 | data_prefix=dict(img='VisDrone2019-DET-test-dev/images/'))) 42 | 43 | # 修改评价指标相关配置 44 | val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json') 45 | test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json') 46 | 47 | # optim_wrapper = dict(type='AmpOptimWrapper') 48 | 49 | default_hooks = dict(logger=dict(type='LoggerHook', interval=200)) 50 | load_from='gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth' 51 | 52 | # nohup python tools/train.py configs/gfl/gfl_r50_fpn_1x_visdrone.py > gfl-visdrone.log 2>&1 & tail -f gfl-visdrone.log 53 | # python tools/test.py configs/gfl/gfl_r50_fpn_1x_visdrone.py work_dirs/gfl_r50_fpn_1x_visdrone/epoch_12.pth --show --show-dir test_save 54 | # python tools/test.py configs/gfl/gfl_r50_fpn_1x_visdrone.py work_dirs/gfl_r50_fpn_1x_visdrone/epoch_12.pth --tta 55 | # python tools/analysis_tools/get_flops.py configs/gfl/gfl_r50_fpn_1x_visdrone.py -------------------------------------------------------------------------------- /mmdet-course/config/retinanet_r50_fpn_1x_visdrone.py: -------------------------------------------------------------------------------- 1 | _base_ = 'retinanet_r50_fpn_1x_coco.py' 2 | 3 | # 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数 4 | model = dict( 5 | bbox_head=dict( 6 | num_classes=10 7 | ) 8 | ) 9 | 10 | # 修改数据集相关配置 11 | data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/' 12 | metainfo = { 13 | 'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'), 14 | # 'palette': [ 15 | # (220, 20, 60), 16 | # ] 17 | } 18 | train_dataloader = dict( 19 | batch_size=8, 20 | num_workers=8, 21 | dataset=dict( 22 | data_root=data_root, 23 | metainfo=metainfo, 24 | ann_file='VisDrone2019-DET-train/annotations/train.json', 25 | data_prefix=dict(img='VisDrone2019-DET-train/images/'))) 26 | val_dataloader = dict( 27 | batch_size=8, 28 | num_workers=8, 29 | dataset=dict( 30 | data_root=data_root, 31 | metainfo=metainfo, 32 | ann_file='VisDrone2019-DET-val/annotations/val.json', 33 | data_prefix=dict(img='VisDrone2019-DET-val/images/'))) 34 | test_dataloader = dict( 35 | batch_size=8, 36 | num_workers=8, 37 | dataset=dict( 38 | data_root=data_root, 39 | metainfo=metainfo, 40 | ann_file='VisDrone2019-DET-test-dev/annotations/test.json', 41 | data_prefix=dict(img='VisDrone2019-DET-test-dev/images/'))) 42 | 43 | # 修改评价指标相关配置 44 | val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json') 45 | test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json') 46 | 47 | # optim_wrapper = dict(type='AmpOptimWrapper') 48 | 49 | default_hooks = dict(logger=dict(type='LoggerHook', interval=200)) 50 | 51 | load_from='retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' 52 | 53 | # nohup python tools/train.py configs/retinanet/retinanet_r50_fpn_1x_visdrone.py > retinanet-visdrone.log 2>&1 & tail -f retinanet-visdrone.log 54 | # python tools/test.py configs/retinanet/retinanet_r50_fpn_1x_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --show --show-dir test_save 55 | # python tools/test.py configs/retinanet/retinanet_r50_fpn_1x_visdrone.py work_dirs/retinanet_r50_fpn_1x_visdrone/epoch_12.pth --tta 56 | # python tools/analysis_tools/get_flops.py configs/retinanet/retinanet_r50_fpn_1x_visdrone.py -------------------------------------------------------------------------------- /mmdet-course/config/rtmdet_tiny_8xb32-300e_visdrone.py: -------------------------------------------------------------------------------- 1 | _base_ = 'rtmdet_tiny_8xb32-300e_coco.py' 2 | 3 | model = dict( 4 | bbox_head=dict( 5 | num_classes=10 6 | ) 7 | ) 8 | 9 | # 修改数据集相关配置 10 | data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/' 11 | metainfo = { 12 | 'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'), 13 | # 'palette': [ 14 | # (220, 20, 60), 15 | # ] 16 | } 17 | train_dataloader = dict( 18 | batch_size=16, 19 | num_workers=8, 20 | dataset=dict( 21 | data_root=data_root, 22 | metainfo=metainfo, 23 | ann_file='VisDrone2019-DET-train/annotations/train.json', 24 | data_prefix=dict(img='VisDrone2019-DET-train/images/'))) 25 | val_dataloader = dict( 26 | batch_size=16, 27 | num_workers=8, 28 | dataset=dict( 29 | data_root=data_root, 30 | metainfo=metainfo, 31 | ann_file='VisDrone2019-DET-val/annotations/val.json', 32 | data_prefix=dict(img='VisDrone2019-DET-val/images/'))) 33 | test_dataloader = dict( 34 | batch_size=16, 35 | num_workers=8, 36 | dataset=dict( 37 | data_root=data_root, 38 | metainfo=metainfo, 39 | ann_file='VisDrone2019-DET-test-dev/annotations/test.json', 40 | data_prefix=dict(img='VisDrone2019-DET-test-dev/images/'))) 41 | 42 | # 修改评价指标相关配置 43 | val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json') 44 | test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json') 45 | 46 | # optim_wrapper = dict(type='AmpOptimWrapper') 47 | 48 | default_hooks = dict(logger=dict(type='LoggerHook', interval=200)) 49 | load_from='rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' 50 | 51 | # nohup python tools/train.py configs/rtmdet/rtmdet_tiny_8xb32-300e_visdrone.py > rtmdet-tiny-visdrone.log 2>&1 & tail -f rtmdet-tiny-visdrone.log 52 | # python tools/test.py configs/rtmdet/rtmdet_tiny_8xb32-300e_visdrone.py work_dirs/rtmdet_tiny_8xb32-300e_visdrone/epoch_300.pth --show --show-dir test_save 53 | # python tools/test.py configs/rtmdet/rtmdet_tiny_8xb32-300e_visdrone.py work_dirs/rtmdet_tiny_8xb32-300e_visdrone/epoch_300.pth --tta 54 | # python tools/analysis_tools/get_flops.py configs/rtmdet/rtmdet_tiny_8xb32-300e_visdrone.py -------------------------------------------------------------------------------- /mmdet-course/config/tood_r50_fpn_1x_visdrone.py: -------------------------------------------------------------------------------- 1 | _base_ = './tood_r50_fpn_1x_coco.py' 2 | 3 | # 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数 4 | model = dict( 5 | bbox_head=dict( 6 | num_classes=10 7 | ) 8 | ) 9 | 10 | # 修改数据集相关配置 11 | data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/' 12 | metainfo = { 13 | 'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'), 14 | # 'palette': [ 15 | # (220, 20, 60), 16 | # ] 17 | } 18 | train_dataloader = dict( 19 | batch_size=8, 20 | num_workers=8, 21 | dataset=dict( 22 | data_root=data_root, 23 | metainfo=metainfo, 24 | ann_file='VisDrone2019-DET-train/annotations/train.json', 25 | data_prefix=dict(img='VisDrone2019-DET-train/images/'))) 26 | val_dataloader = dict( 27 | batch_size=8, 28 | num_workers=8, 29 | dataset=dict( 30 | data_root=data_root, 31 | metainfo=metainfo, 32 | ann_file='VisDrone2019-DET-val/annotations/val.json', 33 | data_prefix=dict(img='VisDrone2019-DET-val/images/'))) 34 | test_dataloader = dict( 35 | batch_size=8, 36 | num_workers=8, 37 | dataset=dict( 38 | data_root=data_root, 39 | metainfo=metainfo, 40 | ann_file='VisDrone2019-DET-test-dev/annotations/test.json', 41 | data_prefix=dict(img='VisDrone2019-DET-test-dev/images/'))) 42 | 43 | # 修改评价指标相关配置 44 | val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json') 45 | test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json') 46 | 47 | # optim_wrapper = dict(type='AmpOptimWrapper') 48 | 49 | default_hooks = dict(logger=dict(type='LoggerHook', interval=200)) 50 | 51 | load_from='tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth' 52 | 53 | # nohup python tools/train.py configs/tood/tood_r50_fpn_1x_visdrone.py > tood-visdrone.log 2>&1 & tail -f tood-visdrone.log 54 | # python tools/test.py configs/tood/tood_r50_fpn_1x_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --show --show-dir test_save 55 | # python tools/test.py configs/tood/tood_r50_fpn_1x_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --tta -------------------------------------------------------------------------------- /mustread-paper/MobileNets.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/mustread-paper/MobileNets.pdf -------------------------------------------------------------------------------- /mustread-paper/RTMDet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/mustread-paper/RTMDet.pdf -------------------------------------------------------------------------------- /mutilmodel-project.md: -------------------------------------------------------------------------------- 1 | # 2025-YOLO|RTDETR多模态目标检测项目 2 | 对于当今的视觉任务来说,最简单入手的便是YOLO系列,通过ultralytics库的帮助下,无论是否来自计算机科班的同学基本都可以快速构建自己的目标检测模型。但是与简单方便相伴而来的是现在的YOLO系列模型的整体拒稿率越来越高,甚至与很多期刊或导师看到YOLO四个字便直接Reject,即使组合出性能优异的检测模型也难以发表到心仪的期刊上去,因此单靠单模态的YOLO发有点要求的期刊已经开始显得有些吃力。很多人尝试转向RT-DETR模型,对于从YOLO迁移过去的人来说一样简单好用,但是RTDETR的训练成本要比YOLO系列模型略高,因此对于部分没有服务器/自费服务器的同学来说可能有点难接受。虽然单模态的YOLO确实显得吃力,但是多模态的YOLO就不是这样了,从去年开始多模态就开始慢慢火起来,但由于缺乏相对应的教程,让很多人望而止步,从去年到今年,也越来越多人问,有没有多模态相关的YOLO改进项目?别急,它终于要来了,而且还不止YOLO,RTDETR的多模态也有! 3 | 4 | ## 1. 这个项目包含什么内容? 5 | 6 | 1. 这个项目主体思路是在尽可能的保证继承ultralytics库简单好用的基础上为YOLO与RT-DETR现阶段这两个最热门的目标检测器,提供出多模态的能力。<可以理解为YOLO|RTDETR的进阶版> 7 | 2. 这个项目的核心是在原有可见光(RGB图像的基础上)结合红外或深度图谱实现多模态信息结合的能力。 8 | 3. 同时根据自身的工作经验,我们在本项目中提供的改进点均与多模态方面高度相关,不会用一些毫不相关的改进内容充数! 9 | 4. <基于Ultralytics的YOLOV8、YOLOV10、YOLO11、YOLO12、RTDETR>我们都会去支持其实现多模态能力。同时尽可能的使得这个扩展可以兼容魔导目前已经存在的其他改进项目实现交叉结合,拥有几百个改进点的改进项目再结合多模态直接起飞~ 10 | 5. 当前阶段仅考虑支持目标检测。实例分割,姿态检测,旋转目标检测暂时不考虑。 11 | 12 | ## 2. 这个项目会以什么形式开展? 13 | 14 | 1. 本次项目多模态部分主要是提供额外的一些多模态的文件,可以直接粘贴到自己的Ultralytics项目内(例如v8v10、v11v12、rtdetr改进项目中),这样可以做到像剪枝蒸馏项目一样,有一定的兼容性和移植性,同时会提供对应的视频教程。 15 | 2. 这个项目会以未来持续更新的态势进行扩展,包括支持更多模型的多模态,以及不同模态之间融合策略(早期、中期、后期的多模态融合策略),还有专属于多模态项目的改进模块。考虑到工作与时间上的问题这会是一个持续更新的过程。 16 | 3. 附带答疑群,群里主要是答疑实验,代码操作,代码报错等问题。考虑到个人空闲时间问题不一定每一个问题都能及时回答,也可以在群里询问其他大佬的帮助。一些反复出现的高频问题也会收集录制对应的答疑视频来给大家解答。我本人也会在群里给一些多模态写作投稿的思路与建议。 17 | 18 | ## 3. 入手须知 19 | 20 | 1. 本项目毕竟是为YOLO以及RT-DETR系列做的扩展,因此建议在已经有了ultralytics库的使用经验后来使用本项目。同时为了达到最佳效果,强烈建议搭配魔导的相关改进项目来配合使用。 21 | 以下人群入手此项目需要谨慎考虑: 22 | - 未入门、1000%计算机小白(可以考虑先补充相关的基础知识)。 23 | - 不想花时间学习,不想了解多模态结构,仅仅只想水论文。 24 | - 不喜欢看说明或使用文档的。 25 | 2. 此项目不涉及多模态数据中的配准相关问题。 26 | 3. 考虑到架构复杂性问题以及多模态结构的特殊性,所以不会考虑提供多模态的剪枝蒸馏在内。但是会考虑提供生成模态的办法作为数据集来源缺失的补充。(生成模态办法包括深度,红外等方面,采用成熟深度学习代码包括一些顶会的工作进行相关模态生成。由于生成模态的作用因此可以在单一模态数据集上进行额外扩展,实现一集多用的办法同时避免配准的问题。) 27 | 4. 本项目仅包含图像相关的多模态,不包含图像+文字的多模态。 28 | 5. 本项目的Ultralytics版本建议是v8.1.9(RTDETR改进项目),v8.2.50(YOLOV8V10改进项目),v8.3.9(YOLOV11V12改进项目),v8.3.117(给那些不需要基础改进项目的人使用)。 29 | 30 | ## 4. 价格 31 | 32 | 1. 本项目价格为288,购买过其中之一的优惠50,优惠后价格为238。没有时效限制。 33 | 2. 虚拟项目一经售出不退不换,需要入手前考虑清楚,如果你是初次入手我的项目,怕我不靠谱,可以先考虑入手个YOLO和RTDETR看下。 34 | 35 | ## 5. 项目使用问题 36 | 37 | 1. 购买本项目的使用者都会得到一个独一无二的用于解压7z的密码,到时候用于解压对应的压缩包,此密码自己妥善保管,请勿告诉他人。 38 | 2. 本项目的视频和直播回放统一都是加密视频,每个购买者都可以得到一个激活码,激活码在每个人专属的7z压缩文件内。 -------------------------------------------------------------------------------- /objectdetection-tricks/readme.md: -------------------------------------------------------------------------------- 1 | # objectdetection-tricks 2 | 这个项目主要是提供一些关于目标检测的tricks. 3 | 4 | # Explanation 5 | - **tricks_1** 6 | 可视化并统计目标检测中的TP,FP,FN. 7 | 视频教学地址:[可视化-哔哩哔哩](https://www.bilibili.com/video/BV18M411c7jN/). [统计-哔哩哔哩](https://www.bilibili.com/video/BV1yM4y1d7Gp/). 8 | - **tricks_2** 9 | 深度学习小实验-卷积家族(fps,flops,param)对比实验. 10 | 目前支持:Conv,DWConv,Ghost-Conv,GSConv,DSConv,PConv,DCNV2,DCNV3. 11 | 视频教学地址:[3.8 哔哩哔哩](https://www.bilibili.com/video/BV15x4y1T7Ly/). [3.19 哔哩哔哩](https://www.bilibili.com/video/BV1UL411R7Qr/). 12 | - **tricks_3** 13 | yolov5中的FeatureMap可视化(热力图格式). 14 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV1LV4y1R7w6/). 15 | - **tricks_4** 16 | 用于yolov5和v7中的yolo格式转换coco格式的脚本.(如何在v5和v7中输出ap_small,ap_middle,ap_large coco指标) 17 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV14T411s7Ts/). 18 | - **tricks_5** 19 | Segment Anything演示代码. 20 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV1hv4y1H7eg/). 21 | - **tricks_6** 22 | 固定随机种子以便在同一个主机上进行复现结果. 23 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV1bh4y1n7Yc/). 24 | - **tricks_7** 25 | 计算yolov5推理时间和FPS的脚本. 26 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV1Uu4y1C714/). 27 | - **tricks_8** 28 | 计算yolov7推理时间和FPS的脚本. 29 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV17p4y177Pe/). 30 | - **tricks_9** 31 | 深度学习小实验-YOLO-Block家族(fps,flops,param)对比实验. 32 | 目前支持:C3(Yolov5),ELAN(Yolov7),C2f(Yolov8)RepNCSPELAN(Yolov9). 33 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV17H4y1V7s9/). 34 | - **tricks_10** 35 | 输出YOLOV8、RTDETR各个层的计算量和参数量. 36 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV1tb421b7aB/). 37 | - **tricks_11** 38 | 以YOLOV8为例,保存多个模型的PR曲线的数据并进行读取绘制到一张图上. 39 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV1uC41177oE/). 40 | - **tricks_12** 41 | yolov5、v7、v8、v9、v10曲线对比图、推理时间vs精度对比图绘制手把手教程. 42 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV1yf421X7t5/). 43 | - **tricks_13** 44 | YOLOV8-输出每一层的图特征图尺寸和通道数. 45 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV1Mz421B7xz/). 46 | - **tricks_14** 47 | YOLOV8V10V11V12更详细的输出精度结果. 48 | 视频教学地址:[哔哩哔哩](https://www.bilibili.com/video/BV1dBQDY6Ec5/). -------------------------------------------------------------------------------- /objectdetection-tricks/tricks_10.py: -------------------------------------------------------------------------------- 1 | import torch, thop 2 | from thop import profile 3 | from ultralytics import YOLO, RTDETR 4 | from prettytable import PrettyTable 5 | 6 | if __name__ == '__main__': 7 | batch_size, height, width = 1, 640, 640 8 | 9 | model = YOLO(r'ultralytics/cfg/models/yolov8/yolov8n.yaml').model # select your model.pt path 10 | # model = RTDETR(r'ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml').model 11 | model.fuse() 12 | input = torch.randn(batch_size, 3, height, width) 13 | total_flops, total_params, layers = profile(model, [input], verbose=True, ret_layer_info=True) 14 | FLOPs, Params = thop.clever_format([total_flops * 2 / batch_size, total_params], "%.3f") 15 | table = PrettyTable() 16 | table.title = f'Model Flops:{FLOPs} Params:{Params}' 17 | table.field_names = ['Layer ID', "FLOPs", "Params"] 18 | for layer_id in layers['model'][2]: 19 | data = layers['model'][2][layer_id] 20 | FLOPs, Params = thop.clever_format([data[0] * 2 / batch_size, data[1]], "%.3f") 21 | table.add_row([layer_id, FLOPs, Params]) 22 | print(table) -------------------------------------------------------------------------------- /objectdetection-tricks/tricks_11.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | if __name__ == '__main__': 6 | file_list = ['a/face_Box.csv', 'b/face_Box.csv'] 7 | names = ['improve', 'baseline'] 8 | ap = ['0.673', '0.639'] 9 | 10 | plt.figure(figsize=(6, 6)) 11 | for i in range(len(file_list)): 12 | pr_data = pd.read_csv(file_list[i], header=None) 13 | recall, precision = np.array(pr_data[0]), np.array(pr_data[1]) 14 | 15 | plt.plot(recall, precision, label=f'{names[i]} ap:{ap[i]}') 16 | plt.xlabel('Recall') 17 | plt.ylabel('Precision') 18 | plt.title('Precision-Recall Curve') 19 | plt.legend() 20 | plt.tight_layout() 21 | plt.savefig('pr.png') -------------------------------------------------------------------------------- /objectdetection-tricks/tricks_13.py: -------------------------------------------------------------------------------- 1 | if type(x) in {list, tuple}: 2 | if idx == (len(self.model) - 1): 3 | if type(x[1]) is dict: 4 | print(f'layer id:{idx:>2} {m.type:>50} output shape:{", ".join([str(x_.size()) for x_ in x[1]["one2one"]])}') 5 | else: 6 | print(f'layer id:{idx:>2} {m.type:>50} output shape:{", ".join([str(x_.size()) for x_ in x[1]])}') 7 | else: 8 | print(f'layer id:{idx:>2} {m.type:>50} output shape:{", ".join([str(x_.size()) for x_ in x if x_ is not None])}') 9 | elif type(x) is dict: 10 | print(f'layer id:{idx:>2} {m.type:>50} output shape:{", ".join([str(x_.size()) for x_ in x["one2one"]])}') 11 | else: 12 | if not hasattr(m, 'backbone'): 13 | print(f'layer id:{idx:>2} {m.type:>50} output shape:{x.size()}') -------------------------------------------------------------------------------- /objectdetection-tricks/tricks_3.py: -------------------------------------------------------------------------------- 1 | def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')): 2 | """ 3 | x: Features to be visualized 4 | module_type: Module type 5 | stage: Module stage within model 6 | n: Maximum number of feature maps to plot 7 | save_dir: Directory to save results 8 | """ 9 | if 'Detect' not in module_type: 10 | batch, channels, height, width = x.shape # batch, channels, height, width 11 | if height > 1 and width > 1: 12 | f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename 13 | 14 | blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels 15 | n = min(n, channels) # number of plots 16 | fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols 17 | ax = ax.ravel() 18 | plt.subplots_adjust(wspace=0.05, hspace=0.05) 19 | for i in range(n): 20 | block = blocks[i].squeeze().detach().numpy() 21 | block = (block - np.min(block)) / (np.max(block) - np.min(block)) 22 | temp = np.array(block * 255.0, dtype=np.uint8) 23 | temp = cv2.applyColorMap(temp, cv2.COLORMAP_JET) 24 | ax[i].imshow(temp, cmap=plt.cm.jet) # cmap='gray' 25 | ax[i].axis('off') 26 | 27 | LOGGER.info(f'Saving {f}... ({n}/{channels})') 28 | plt.savefig(f, dpi=300, bbox_inches='tight') 29 | plt.close() 30 | np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save -------------------------------------------------------------------------------- /objectdetection-tricks/tricks_6.py: -------------------------------------------------------------------------------- 1 | import pkg_resources as pkg 2 | def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False): 3 | # Check version vs. required version 4 | current, minimum = (pkg.parse_version(x) for x in (current, minimum)) 5 | result = (current == minimum) if pinned else (current >= minimum) # bool 6 | return result 7 | 8 | 9 | def set_seeds(seed=0, deterministic=False): 10 | # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html 11 | random.seed(seed) 12 | np.random.seed(seed) 13 | torch.manual_seed(seed) 14 | torch.cuda.manual_seed(seed) 15 | torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe 16 | # torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287 17 | if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 18 | torch.use_deterministic_algorithms(True) 19 | torch.backends.cudnn.deterministic = True 20 | os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' 21 | os.environ['PYTHONHASHSEED'] = str(seed) -------------------------------------------------------------------------------- /yolo-gradcam/README.md: -------------------------------------------------------------------------------- 1 | # yolo-gradcam 2 | yolo model with gradcam visual. 3 | 即插即用,不需要对源码进行任何修改! 4 | 5 | ## 哔哩哔哩视频教学地址 6 | 1. yolov5-[哔哩哔哩地址](https://www.bilibili.com/video/BV1F6421V77v/) 7 | 2. yolov7-[哔哩哔哩地址](https://www.bilibili.com/video/BV1F6421V77v/) 8 | 3. yolov8-[哔哩哔哩地址](https://www.bilibili.com/video/BV1T2N6eaEFD/) 9 | 4. yolov9-[哔哩哔哩地址](https://www.bilibili.com/video/BV14H4y157MP/) 10 | 5. yolov11-[哔哩哔哩地址](https://www.bilibili.com/video/BV1T2N6eaEFD/) 11 | 12 | ## 环境 13 | pip install grad-cam==1.4.8 -i https://pypi.tuna.tsinghua.edu.cn/simple 14 | 15 | ## 注意事项 16 | 1. yolov5是在v7.0进行编写和测试的。 17 | 2. yolov7是在2023.10.1号的版本进行编写和测试的。 18 | 3. yolov8是在2024.1.31号的版本进行编写和测试的。 19 | 4. yolov9是在2024.3.7号的版本进行编写和测试的。 20 | 5. 建议在新版本下进行使用,旧版本可能会有报错,需要自行解决。 21 | -------------------------------------------------------------------------------- /yolo-improve/paper.md: -------------------------------------------------------------------------------- 1 | # 基于YOLO和RT-DETR的论文全流程指导项目<此项目全程由E导主导> 2 | 3 | ### 1. 入手此项目后如果还需要一对一的服务享受9折优惠,此一对一为E导主导 4 | 5 | 1. 实验方面讲解 248/h (会员218/h) --(拒绝废话纯干货直击痛点) 6 | 2. 论文方面讲解 298/h (会员248/h) --(拒绝废话纯干货直击痛点) 7 | 8 | ### 2. 讲课相关安排 9 | 10 | - 1.进群须知: 11 | - (1) 从入群时间起,群内会员有效期为一年(一年后如有需要则续费即可) 12 | - (2) 1月份建群起开始直播讲课,逐渐直播+直播回放(而不是加群则提前录制好了全部课程) 13 | - (3) 讲课方式:qq群课堂or腾讯会议直播(具体群通知)(后进群或没参与直播的可看录屏回放) 14 | - (4) 每次直播附带直播答疑服务,每次直播约1-2小时 15 | - (5) 一周至少一次直播课,每次直播会按照以下流程告知讲课内容 16 | - (6) 项目不附带私人答疑服务,群里附带答疑服务,平时我有时间都会回复群里部分问题 17 | 18 | - 2. 答疑相关细节: 19 | - (1) 直播时答疑:课前excel收集群内近日答疑问题,直播时讲解答疑问题 20 | - (2) 群内日常答疑:群里附带答疑服务,平时我有时间都会回复群里部分问题 21 | - 3. 讲课流程: 22 | - (1) 课前 23 | - - 课前 先 提前告知讲课时间 && 收集讲课内容(群投票) 24 | - (群投票内容为讲课目录,投票最多的地方则为本次课需要讲解的地方,若无则按顺序讲解) 25 | - - 课前 中 选定课程目录后告知讲课内容 26 | - - 课前 后 Excel在线表格收集该内容的相关答疑问题,上课解决(答疑问题时本人必须在场) 27 | - (2) 课中(全程1小时左右/每次课) 28 | - - 课中 先 直播讲课(按照课前定好的目录) 29 | - - 课中 中 总结讲课内容 30 | - - 课中 后 直播答疑(按照课前Excel的收集表,课中弹幕出现的问题)--答疑期间可连麦可互动 31 | - (3) 课后 录制回放发群里,下次讲课时间再定(根据实际情况一周2次以上,上不封顶) 32 | - (4) 课后 每周群内某个时间段免费远程解决bug问题(可Todesk远程帮忙解决) 33 | - (5) 后续项目内容会逐渐完善,会员福利会逐渐更新补充,敬请期待 34 | 35 | ### 3. 论文项目课程目录(每次直播回放视频会对应课程目录内容,提供索引供大家后期检索) 36 | 37 | 1. 搜论文的几种方式 38 | 1.1 谷歌学术 web of science IEEE Springer MDPI ScienceDirection 等等 39 | 1.2 一些技巧(查看不能看的论文等等) 40 | 2. 如何参考相关论文,关键字搜索--针对性找到自己想要的参考论文 41 | 3. 写论文的方法(每个部分的写作逻辑和模版)(①介绍 ②相关工作 ③方法 ④实验 ⑤结论) 42 | 3.1 介绍-------只需要确定好课题方向即可开写(实验部分先空着) 43 | 3.1.1 写作逻辑和思路讲解 44 | 3.1.2 怎么写,该写什么 45 | 3.2 相关工作---可能会涉及到数据集,基线模型,三个创新点方面的相关工作 46 | 3.2.1 写作逻辑和思路讲解 47 | 3.2.2 怎么写,该写什么 48 | 3.3 方法-------整体框架+三到四个创新点 49 | 3.3.0 写作逻辑和思路讲解 50 | 3.3.1 画图(从入门到接近顶会水平) 51 | 3.3.2 公式(如何写公式等等) 52 | 3.3.3 文字描述创新点(快有快的方法,慢有慢的方法) 53 | 3.4 实验 54 | 3.4.0 写作逻辑和思路讲解 55 | 3.4.1 表格(该做哪些实验,该放哪些指标,授人以鱼并且授人以渔) 56 | 3.5 摘要,结论 57 | 3.5.0 写作逻辑和思路讲解 58 | 3.5.1 总结性内容一次性搞清 59 | 3.6 参考文献 60 | 3.6.1 如何引用,引用格式 61 | 4. 投稿选择(会议 or 期刊) 62 | 4.1 EI论文 63 | 4.2 CCF论文 64 | 4.3 SCI论文---如何筛选自己适合投哪些期刊 65 | 4.4 中文核心 or 北大核心 or 学报 66 | 5. 论文规范 67 | 5.1 审美,格式规范 68 | 5.2 论文逻辑严谨 69 | 5.3 论文书写有说服力 70 | 5.4 投稿前先预审稿 71 | 6. 独特技巧经验,高效技巧(讲课过程中会随机穿插小技巧,不过多解释,懂的都懂) 72 | 7. 投稿前的一些准备工作,根据期刊等级帮忙查看是否达到发论文的要求(一对一范畴) 73 | 8. 硕士毕业大论文书写 74 | 9. 持续更新中........ -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/GlobalWheat2020.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan 3 | # Example usage: python train.py --data GlobalWheat2020.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── GlobalWheat2020 ← downloads here (7.0 GB) 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/GlobalWheat2020 # dataset root dir 12 | train: # train images (relative to 'path') 3422 images 13 | - images/arvalis_1 14 | - images/arvalis_2 15 | - images/arvalis_3 16 | - images/ethz_1 17 | - images/rres_1 18 | - images/inrae_1 19 | - images/usask_1 20 | val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1) 21 | - images/ethz_1 22 | test: # test images (optional) 1276 images 23 | - images/utokyo_1 24 | - images/utokyo_2 25 | - images/nau_1 26 | - images/uq_1 27 | 28 | # Classes 29 | names: 30 | 0: wheat_head 31 | 32 | 33 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 34 | download: | 35 | from utils.general import download, Path 36 | 37 | 38 | # Download 39 | dir = Path(yaml['path']) # dataset root dir 40 | urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip', 41 | 'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip'] 42 | download(urls, dir=dir) 43 | 44 | # Make Directories 45 | for p in 'annotations', 'images', 'labels': 46 | (dir / p).mkdir(parents=True, exist_ok=True) 47 | 48 | # Move 49 | for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \ 50 | 'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1': 51 | (dir / p).rename(dir / 'images' / p) # move to /images 52 | f = (dir / p).with_suffix('.json') # json file 53 | if f.exists(): 54 | f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations 55 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/coco128-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics 3 | # Example usage: python train.py --data coco128.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── coco128-seg ← downloads here (7 MB) 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco128-seg # dataset root dir 12 | train: images/train2017 # train images (relative to 'path') 128 images 13 | val: images/train2017 # val images (relative to 'path') 128 images 14 | test: # test images (optional) 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: airplane 23 | 5: bus 24 | 6: train 25 | 7: truck 26 | 8: boat 27 | 9: traffic light 28 | 10: fire hydrant 29 | 11: stop sign 30 | 12: parking meter 31 | 13: bench 32 | 14: bird 33 | 15: cat 34 | 16: dog 35 | 17: horse 36 | 18: sheep 37 | 19: cow 38 | 20: elephant 39 | 21: bear 40 | 22: zebra 41 | 23: giraffe 42 | 24: backpack 43 | 25: umbrella 44 | 26: handbag 45 | 27: tie 46 | 28: suitcase 47 | 29: frisbee 48 | 30: skis 49 | 31: snowboard 50 | 32: sports ball 51 | 33: kite 52 | 34: baseball bat 53 | 35: baseball glove 54 | 36: skateboard 55 | 37: surfboard 56 | 38: tennis racket 57 | 39: bottle 58 | 40: wine glass 59 | 41: cup 60 | 42: fork 61 | 43: knife 62 | 44: spoon 63 | 45: bowl 64 | 46: banana 65 | 47: apple 66 | 48: sandwich 67 | 49: orange 68 | 50: broccoli 69 | 51: carrot 70 | 52: hot dog 71 | 53: pizza 72 | 54: donut 73 | 55: cake 74 | 56: chair 75 | 57: couch 76 | 58: potted plant 77 | 59: bed 78 | 60: dining table 79 | 61: toilet 80 | 62: tv 81 | 63: laptop 82 | 64: mouse 83 | 65: remote 84 | 66: keyboard 85 | 67: cell phone 86 | 68: microwave 87 | 69: oven 88 | 70: toaster 89 | 71: sink 90 | 72: refrigerator 91 | 73: book 92 | 74: clock 93 | 75: vase 94 | 76: scissors 95 | 77: teddy bear 96 | 78: hair drier 97 | 79: toothbrush 98 | 99 | 100 | # Download script/URL (optional) 101 | download: https://ultralytics.com/assets/coco128-seg.zip 102 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/coco128.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics 3 | # Example usage: python train.py --data coco128.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── coco128 ← downloads here (7 MB) 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco128 # dataset root dir 12 | train: images/train2017 # train images (relative to 'path') 128 images 13 | val: images/train2017 # val images (relative to 'path') 128 images 14 | test: # test images (optional) 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: airplane 23 | 5: bus 24 | 6: train 25 | 7: truck 26 | 8: boat 27 | 9: traffic light 28 | 10: fire hydrant 29 | 11: stop sign 30 | 12: parking meter 31 | 13: bench 32 | 14: bird 33 | 15: cat 34 | 16: dog 35 | 17: horse 36 | 18: sheep 37 | 19: cow 38 | 20: elephant 39 | 21: bear 40 | 22: zebra 41 | 23: giraffe 42 | 24: backpack 43 | 25: umbrella 44 | 26: handbag 45 | 27: tie 46 | 28: suitcase 47 | 29: frisbee 48 | 30: skis 49 | 31: snowboard 50 | 32: sports ball 51 | 33: kite 52 | 34: baseball bat 53 | 35: baseball glove 54 | 36: skateboard 55 | 37: surfboard 56 | 38: tennis racket 57 | 39: bottle 58 | 40: wine glass 59 | 41: cup 60 | 42: fork 61 | 43: knife 62 | 44: spoon 63 | 45: bowl 64 | 46: banana 65 | 47: apple 66 | 48: sandwich 67 | 49: orange 68 | 50: broccoli 69 | 51: carrot 70 | 52: hot dog 71 | 53: pizza 72 | 54: donut 73 | 55: cake 74 | 56: chair 75 | 57: couch 76 | 58: potted plant 77 | 59: bed 78 | 60: dining table 79 | 61: toilet 80 | 62: tv 81 | 63: laptop 82 | 64: mouse 83 | 65: remote 84 | 66: keyboard 85 | 67: cell phone 86 | 68: microwave 87 | 69: oven 88 | 70: toaster 89 | 71: sink 90 | 72: refrigerator 91 | 73: book 92 | 74: clock 93 | 75: vase 94 | 76: scissors 95 | 77: teddy bear 96 | 78: hair drier 97 | 79: toothbrush 98 | 99 | 100 | # Download script/URL (optional) 101 | download: https://ultralytics.com/assets/coco128.zip 102 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/hyps/hyp.Objects365.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters for Objects365 training 3 | # python train.py --weights yolov5m.pt --data Objects365.yaml --evolve 4 | # See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials 5 | 6 | lr0: 0.00258 7 | lrf: 0.17 8 | momentum: 0.779 9 | weight_decay: 0.00058 10 | warmup_epochs: 1.33 11 | warmup_momentum: 0.86 12 | warmup_bias_lr: 0.0711 13 | box: 0.0539 14 | cls: 0.299 15 | cls_pw: 0.825 16 | obj: 0.632 17 | obj_pw: 1.0 18 | iou_t: 0.2 19 | anchor_t: 3.44 20 | anchors: 3.2 21 | fl_gamma: 0.0 22 | hsv_h: 0.0188 23 | hsv_s: 0.704 24 | hsv_v: 0.36 25 | degrees: 0.0 26 | translate: 0.0902 27 | scale: 0.491 28 | shear: 0.0 29 | perspective: 0.0 30 | flipud: 0.0 31 | fliplr: 0.5 32 | mosaic: 1.0 33 | mixup: 0.0 34 | copy_paste: 0.0 35 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/hyps/hyp.VOC.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters for VOC training 3 | # python train.py --batch 128 --weights yolov5m6.pt --data VOC.yaml --epochs 50 --img 512 --hyp hyp.scratch-med.yaml --evolve 4 | # See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials 5 | 6 | # YOLOv5 Hyperparameter Evolution Results 7 | # Best generation: 467 8 | # Last generation: 996 9 | # metrics/precision, metrics/recall, metrics/mAP_0.5, metrics/mAP_0.5:0.95, val/box_loss, val/obj_loss, val/cls_loss 10 | # 0.87729, 0.85125, 0.91286, 0.72664, 0.0076739, 0.0042529, 0.0013865 11 | 12 | lr0: 0.00334 13 | lrf: 0.15135 14 | momentum: 0.74832 15 | weight_decay: 0.00025 16 | warmup_epochs: 3.3835 17 | warmup_momentum: 0.59462 18 | warmup_bias_lr: 0.18657 19 | box: 0.02 20 | cls: 0.21638 21 | cls_pw: 0.5 22 | obj: 0.51728 23 | obj_pw: 0.67198 24 | iou_t: 0.2 25 | anchor_t: 3.3744 26 | fl_gamma: 0.0 27 | hsv_h: 0.01041 28 | hsv_s: 0.54703 29 | hsv_v: 0.27739 30 | degrees: 0.0 31 | translate: 0.04591 32 | scale: 0.75544 33 | shear: 0.0 34 | perspective: 0.0 35 | flipud: 0.0 36 | fliplr: 0.5 37 | mosaic: 0.85834 38 | mixup: 0.04266 39 | copy_paste: 0.0 40 | anchors: 3.412 41 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/hyps/hyp.no-augmentation.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters when using Albumentations frameworks 3 | # python train.py --hyp hyp.no-augmentation.yaml 4 | # See https://github.com/ultralytics/yolov5/pull/3882 for YOLOv5 + Albumentations Usage examples 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.3 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 0.7 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | # this parameters are all zero since we want to use albumentation framework 22 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 23 | hsv_h: 0 # image HSV-Hue augmentation (fraction) 24 | hsv_s: 00 # image HSV-Saturation augmentation (fraction) 25 | hsv_v: 0 # image HSV-Value augmentation (fraction) 26 | degrees: 0.0 # image rotation (+/- deg) 27 | translate: 0 # image translation (+/- fraction) 28 | scale: 0 # image scale (+/- gain) 29 | shear: 0 # image shear (+/- deg) 30 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 31 | flipud: 0.0 # image flip up-down (probability) 32 | fliplr: 0.0 # image flip left-right (probability) 33 | mosaic: 0.0 # image mosaic (probability) 34 | mixup: 0.0 # image mixup (probability) 35 | copy_paste: 0.0 # segment copy-paste (probability) 36 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/hyps/hyp.scratch-high.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters for high-augmentation COCO training from scratch 3 | # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300 4 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.3 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 0.7 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.9 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.1 # image mixup (probability) 34 | copy_paste: 0.1 # segment copy-paste (probability) 35 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/hyps/hyp.scratch-low.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters for low-augmentation COCO training from scratch 3 | # python train.py --batch 64 --cfg yolov5n6.yaml --weights '' --data coco.yaml --img 640 --epochs 300 --linear 4 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.5 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 1.0 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.5 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.0 # image mixup (probability) 34 | copy_paste: 0.0 # segment copy-paste (probability) 35 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/hyps/hyp.scratch-med.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Hyperparameters for medium-augmentation COCO training from scratch 3 | # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300 4 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.3 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 0.7 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.9 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.1 # image mixup (probability) 34 | copy_paste: 0.0 # segment copy-paste (probability) 35 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/images/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/data/images/bus.jpg -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/images/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/data/images/zidane.jpg -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/data/scripts/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 3 | # Download latest models from https://github.com/ultralytics/yolov5/releases 4 | # Example usage: bash data/scripts/download_weights.sh 5 | # parent 6 | # └── yolov5 7 | # ├── yolov5s.pt ← downloads here 8 | # ├── yolov5m.pt 9 | # └── ... 10 | 11 | python - < 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel 31 | 32 | if ddp: # multi-GPU 33 | port += 1 34 | cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}' 35 | else: # single-GPU 36 | cmd = f'python train.py --resume {last}' 37 | 38 | cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread 39 | print(cmd) 40 | os.system(cmd) 41 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/aws/userdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html 3 | # This script will run only once on first instance start (for a re-start script see mime.sh) 4 | # /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir 5 | # Use >300 GB SSD 6 | 7 | cd home/ubuntu 8 | if [ ! -d yolov5 ]; then 9 | echo "Running first-time script." # install dependencies, download COCO, pull Docker 10 | git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5 11 | cd yolov5 12 | bash data/scripts/get_coco.sh && echo "COCO done." & 13 | sudo docker pull ultralytics/yolov5:latest && echo "Docker done." & 14 | python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." & 15 | wait && echo "All tasks done." # finish background tasks 16 | else 17 | echo "Running re-start script." # resume interrupted runs 18 | i=0 19 | list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour' 20 | while IFS= read -r id; do 21 | ((i++)) 22 | echo "restarting container $i: $id" 23 | sudo docker start $id 24 | # sudo docker exec -it $id python train.py --resume # single-GPU 25 | sudo docker exec -d $id python utils/aws/resume.py # multi-scenario 26 | done <<<"$list" 27 | fi 28 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/docker/Dockerfile-arm64: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Builds ultralytics/yolov5:latest-arm64 image on DockerHub https://hub.docker.com/r/ultralytics/yolov5 3 | # Image is aarch64-compatible for Apple M1 and other ARM architectures i.e. Jetson Nano and Raspberry Pi 4 | 5 | # Start FROM Ubuntu image https://hub.docker.com/_/ubuntu 6 | FROM arm64v8/ubuntu:rolling 7 | 8 | # Downloads to user config dir 9 | ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ 10 | 11 | # Install linux packages 12 | ENV DEBIAN_FRONTEND noninteractive 13 | RUN apt update 14 | RUN TZ=Etc/UTC apt install -y tzdata 15 | RUN apt install --no-install-recommends -y python3-pip git zip curl htop gcc libgl1-mesa-glx libglib2.0-0 libpython3-dev 16 | # RUN alias python=python3 17 | 18 | # Install pip packages 19 | COPY requirements.txt . 20 | RUN python3 -m pip install --upgrade pip wheel 21 | RUN pip install --no-cache -r requirements.txt albumentations gsutil notebook \ 22 | coremltools onnx onnxruntime 23 | # tensorflow-aarch64 tensorflowjs \ 24 | 25 | # Create working directory 26 | RUN mkdir -p /usr/src/app 27 | WORKDIR /usr/src/app 28 | 29 | # Copy contents 30 | # COPY . /usr/src/app (issues as not a .git directory) 31 | RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app 32 | ENV DEBIAN_FRONTEND teletype 33 | 34 | 35 | # Usage Examples ------------------------------------------------------------------------------------------------------- 36 | 37 | # Build and Push 38 | # t=ultralytics/yolov5:latest-arm64 && sudo docker build --platform linux/arm64 -f utils/docker/Dockerfile-arm64 -t $t . && sudo docker push $t 39 | 40 | # Pull and Run 41 | # t=ultralytics/yolov5:latest-arm64 && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t 42 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/docker/Dockerfile-cpu: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Builds ultralytics/yolov5:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/yolov5 3 | # Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLOv5 deployments 4 | 5 | # Start FROM Ubuntu image https://hub.docker.com/_/ubuntu 6 | FROM ubuntu:rolling 7 | 8 | # Downloads to user config dir 9 | ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ 10 | 11 | # Install linux packages 12 | ENV DEBIAN_FRONTEND noninteractive 13 | RUN apt update 14 | RUN TZ=Etc/UTC apt install -y tzdata 15 | RUN apt install --no-install-recommends -y python3-pip git zip curl htop libgl1-mesa-glx libglib2.0-0 libpython3-dev gnupg 16 | # RUN alias python=python3 17 | 18 | # Install pip packages 19 | COPY requirements.txt . 20 | RUN python3 -m pip install --upgrade pip wheel 21 | RUN pip install --no-cache -r requirements.txt albumentations gsutil notebook \ 22 | coremltools onnx onnx-simplifier onnxruntime 'openvino-dev>=2022.3' \ 23 | # tensorflow tensorflowjs \ 24 | --extra-index-url https://download.pytorch.org/whl/cpu 25 | 26 | # Create working directory 27 | RUN mkdir -p /usr/src/app 28 | WORKDIR /usr/src/app 29 | 30 | # Copy contents 31 | # COPY . /usr/src/app (issues as not a .git directory) 32 | RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app 33 | ENV DEBIAN_FRONTEND teletype 34 | 35 | 36 | # Usage Examples ------------------------------------------------------------------------------------------------------- 37 | 38 | # Build and Push 39 | # t=ultralytics/yolov5:latest-cpu && sudo docker build -f utils/docker/Dockerfile-cpu -t $t . && sudo docker push $t 40 | 41 | # Pull and Run 42 | # t=ultralytics/yolov5:latest-cpu && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t 43 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/flask_rest_api/README.md: -------------------------------------------------------------------------------- 1 | # Flask REST API 2 | 3 | [REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are 4 | commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API 5 | created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/). 6 | 7 | ## Requirements 8 | 9 | [Flask](https://palletsprojects.com/p/flask/) is required. Install with: 10 | 11 | ```shell 12 | $ pip install Flask 13 | ``` 14 | 15 | ## Run 16 | 17 | After Flask installation run: 18 | 19 | ```shell 20 | $ python3 restapi.py --port 5000 21 | ``` 22 | 23 | Then use [curl](https://curl.se/) to perform a request: 24 | 25 | ```shell 26 | $ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s' 27 | ``` 28 | 29 | The model inference results are returned as a JSON response: 30 | 31 | ```json 32 | [ 33 | { 34 | "class": 0, 35 | "confidence": 0.8900438547, 36 | "height": 0.9318675399, 37 | "name": "person", 38 | "width": 0.3264600933, 39 | "xcenter": 0.7438579798, 40 | "ycenter": 0.5207948685 41 | }, 42 | { 43 | "class": 0, 44 | "confidence": 0.8440024257, 45 | "height": 0.7155083418, 46 | "name": "person", 47 | "width": 0.6546785235, 48 | "xcenter": 0.427829951, 49 | "ycenter": 0.6334488392 50 | }, 51 | { 52 | "class": 27, 53 | "confidence": 0.3771208823, 54 | "height": 0.3902671337, 55 | "name": "tie", 56 | "width": 0.0696444362, 57 | "xcenter": 0.3675483763, 58 | "ycenter": 0.7991207838 59 | }, 60 | { 61 | "class": 27, 62 | "confidence": 0.3527112305, 63 | "height": 0.1540903747, 64 | "name": "tie", 65 | "width": 0.0336618312, 66 | "xcenter": 0.7814827561, 67 | "ycenter": 0.5065554976 68 | } 69 | ] 70 | ``` 71 | 72 | An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given 73 | in `example_request.py` 74 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/flask_rest_api/example_request.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Perform test request 4 | """ 5 | 6 | import pprint 7 | 8 | import requests 9 | 10 | DETECTION_URL = 'http://localhost:5000/v1/object-detection/yolov5s' 11 | IMAGE = 'zidane.jpg' 12 | 13 | # Read image 14 | with open(IMAGE, 'rb') as f: 15 | image_data = f.read() 16 | 17 | response = requests.post(DETECTION_URL, files={'image': image_data}).json() 18 | 19 | pprint.pprint(response) 20 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/flask_rest_api/restapi.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Run a Flask REST API exposing one or more YOLOv5s models 4 | """ 5 | 6 | import argparse 7 | import io 8 | 9 | import torch 10 | from flask import Flask, request 11 | from PIL import Image 12 | 13 | app = Flask(__name__) 14 | models = {} 15 | 16 | DETECTION_URL = '/v1/object-detection/' 17 | 18 | 19 | @app.route(DETECTION_URL, methods=['POST']) 20 | def predict(model): 21 | if request.method != 'POST': 22 | return 23 | 24 | if request.files.get('image'): 25 | # Method 1 26 | # with request.files["image"] as f: 27 | # im = Image.open(io.BytesIO(f.read())) 28 | 29 | # Method 2 30 | im_file = request.files['image'] 31 | im_bytes = im_file.read() 32 | im = Image.open(io.BytesIO(im_bytes)) 33 | 34 | if model in models: 35 | results = models[model](im, size=640) # reduce size=320 for faster inference 36 | return results.pandas().xyxy[0].to_json(orient='records') 37 | 38 | 39 | if __name__ == '__main__': 40 | parser = argparse.ArgumentParser(description='Flask API exposing YOLOv5 model') 41 | parser.add_argument('--port', default=5000, type=int, help='port number') 42 | parser.add_argument('--model', nargs='+', default=['yolov5s'], help='model(s) to run, i.e. --model yolov5n yolov5s') 43 | opt = parser.parse_args() 44 | 45 | for m in opt.model: 46 | models[m] = torch.hub.load('ultralytics/yolov5', m, force_reload=True, skip_validation=True) 47 | 48 | app.run(host='0.0.0.0', port=opt.port) # debug=True causes Restarting with stat 49 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==21.1 3 | Flask==1.0.2 4 | gunicorn==19.10.0 5 | werkzeug>=2.2.3 # not directly required, pinned by Snyk to avoid a vulnerability 6 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolov5app 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 15 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/loggers/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/loggers/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/loggers/clearml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/loggers/clearml/__init__.py -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/loggers/clearml/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/loggers/clearml/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/loggers/clearml/__pycache__/clearml_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/loggers/clearml/__pycache__/clearml_utils.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/loggers/comet/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/loggers/comet/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/loggers/comet/__pycache__/comet_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/loggers/comet/__pycache__/comet_utils.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/loggers/wandb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/loggers/wandb/__init__.py -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/loggers/wandb/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/loggers/wandb/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/loggers/wandb/__pycache__/wandb_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/loggers/wandb/__pycache__/wandb_utils.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/segment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/segment/__init__.py -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/segment/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/segment/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/utils/segment/__pycache__/general.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/utils/segment/__pycache__/general.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-AUX/yolov5-AUX.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-AUX/yolov5-AUX.zip -------------------------------------------------------------------------------- /yolo-improve/yolov5-CARAFE.py: -------------------------------------------------------------------------------- 1 | class CARAFE(nn.Module): 2 | def __init__(self, c, k_enc=3, k_up=5, c_mid=64, scale=2): 3 | """ The unofficial implementation of the CARAFE module. 4 | The details are in "https://arxiv.org/abs/1905.02188". 5 | Args: 6 | c: The channel number of the input and the output. 7 | c_mid: The channel number after compression. 8 | scale: The expected upsample scale. 9 | k_up: The size of the reassembly kernel. 10 | k_enc: The kernel size of the encoder. 11 | Returns: 12 | X: The upsampled feature map. 13 | """ 14 | super(CARAFE, self).__init__() 15 | self.scale = scale 16 | 17 | self.comp = Conv(c, c_mid) 18 | self.enc = Conv(c_mid, (scale*k_up)**2, k=k_enc, act=False) 19 | self.pix_shf = nn.PixelShuffle(scale) 20 | 21 | self.upsmp = nn.Upsample(scale_factor=scale, mode='nearest') 22 | self.unfold = nn.Unfold(kernel_size=k_up, dilation=scale, 23 | padding=k_up//2*scale) 24 | 25 | def forward(self, X): 26 | b, c, h, w = X.size() 27 | h_, w_ = h * self.scale, w * self.scale 28 | 29 | W = self.comp(X) # b * m * h * w 30 | W = self.enc(W) # b * 100 * h * w 31 | W = self.pix_shf(W) # b * 25 * h_ * w_ 32 | W = torch.softmax(W, dim=1) # b * 25 * h_ * w_ 33 | 34 | X = self.upsmp(X) # b * c * h_ * w_ 35 | X = self.unfold(X) # b * 25c * h_ * w_ 36 | X = X.view(b, c, -1, h_, w_) # b * 25 * c * h_ * w_ 37 | 38 | X = torch.einsum('bkhw,bckhw->bchw', [W, X]) # b * c * h_ * w_ 39 | return X 40 | 41 | elif m is CARAFE: 42 | c2 = ch[f] 43 | args = [c2, *args] -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/commod.py: -------------------------------------------------------------------------------- 1 | from models.ops_dcnv3.modules import DCNv3 2 | class DCNV3_YoLo(nn.Module): 3 | def __init__(self, inc, ouc, k=1, s=1, p=None, g=1, d=1, act=True): 4 | super().__init__() 5 | 6 | self.conv = Conv(inc, ouc, k=1) 7 | self.dcnv3 = DCNv3(ouc, kernel_size=k, stride=s, group=g, dilation=d) 8 | self.bn = nn.BatchNorm2d(ouc) 9 | self.act = Conv.default_act 10 | 11 | def forward(self, x): 12 | x = self.conv(x) 13 | x = x.permute(0, 2, 3, 1) 14 | x = self.dcnv3(x) 15 | x = x.permute(0, 3, 1, 2) 16 | x = self.act(self.bn(x)) 17 | return x 18 | 19 | class Bottleneck_DCNV3(nn.Module): 20 | # Standard bottleneck 21 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 22 | super().__init__() 23 | c_ = int(c2 * e) # hidden channels 24 | self.cv1 = Conv(c1, c_, 1, 1) 25 | self.cv2 = DCNV3_YoLo(c_, c2, 3, 1, g=g) 26 | self.add = shortcut and c1 == c2 27 | 28 | def forward(self, x): 29 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 30 | 31 | class C3_DCNV3(nn.Module): 32 | # CSP Bottleneck with 3 convolutions 33 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 34 | super().__init__() 35 | c_ = int(c2 * e) # hidden channels 36 | self.cv1 = Conv(c1, c_, 1, 1) 37 | self.cv2 = Conv(c1, c_, 1, 1) 38 | self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) 39 | self.m = nn.Sequential(*(Bottleneck_DCNV3(c_, c_, shortcut, g, e=1.0) for _ in range(n))) 40 | 41 | def forward(self, x): 42 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) 43 | 44 | # models/yolo.py DetectionModel class 45 | self.model.to(torch.device('cuda')) 46 | m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s).to(torch.device('cuda')))]).cpu() # forward 47 | self.model.cpu() -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/DCNv3.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: DCNv3 3 | Version: 1.0 4 | Summary: PyTorch Wrapper for CUDA Functions of DCNv3 5 | Home-page: https://github.com/OpenGVLab/InternImage 6 | Author: InternImage 7 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/DCNv3.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | setup.py 2 | /home/hjj/Desktop/python_project/InternImage-master/classification/ops_dcnv3/src/vision.cpp 3 | /home/hjj/Desktop/python_project/InternImage-master/classification/ops_dcnv3/src/cpu/dcnv3_cpu.cpp 4 | /home/hjj/Desktop/python_project/InternImage-master/classification/ops_dcnv3/src/cuda/dcnv3_cuda.cu 5 | /home/hjj/Desktop/python_project/yolov5-master/models/ops_dcnv3/src/vision.cpp 6 | /home/hjj/Desktop/python_project/yolov5-master/models/ops_dcnv3/src/cpu/dcnv3_cpu.cpp 7 | /home/hjj/Desktop/python_project/yolov5-master/models/ops_dcnv3/src/cuda/dcnv3_cuda.cu 8 | DCNv3.egg-info/PKG-INFO 9 | DCNv3.egg-info/SOURCES.txt 10 | DCNv3.egg-info/dependency_links.txt 11 | DCNv3.egg-info/top_level.txt 12 | functions/__init__.py 13 | functions/dcnv3_func.py 14 | modules/__init__.py 15 | modules/dcnv3.py -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/DCNv3.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/DCNv3.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | DCNv3 2 | functions 3 | modules 4 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/build/lib.linux-x86_64-cpython-38/DCNv3.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-DCNV3/ops_dcnv3/build/lib.linux-x86_64-cpython-38/DCNv3.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/build/lib.linux-x86_64-cpython-38/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch 8 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/build/lib.linux-x86_64-cpython-38/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3 import DCNv3 -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/build/temp.linux-x86_64-cpython-38/home/hjj/Desktop/python_project/InternImage-master/classification/ops_dcnv3/src/cpu/dcnv3_cpu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-DCNV3/ops_dcnv3/build/temp.linux-x86_64-cpython-38/home/hjj/Desktop/python_project/InternImage-master/classification/ops_dcnv3/src/cpu/dcnv3_cpu.o -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/build/temp.linux-x86_64-cpython-38/home/hjj/Desktop/python_project/InternImage-master/classification/ops_dcnv3/src/cuda/dcnv3_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-DCNV3/ops_dcnv3/build/temp.linux-x86_64-cpython-38/home/hjj/Desktop/python_project/InternImage-master/classification/ops_dcnv3/src/cuda/dcnv3_cuda.o -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/build/temp.linux-x86_64-cpython-38/home/hjj/Desktop/python_project/InternImage-master/classification/ops_dcnv3/src/vision.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-DCNV3/ops_dcnv3/build/temp.linux-x86_64-cpython-38/home/hjj/Desktop/python_project/InternImage-master/classification/ops_dcnv3/src/vision.o -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/dist/DCNv3-1.0-py3.8-linux-x86_64.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-DCNV3/ops_dcnv3/dist/DCNv3-1.0-py3.8-linux-x86_64.egg -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch 8 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/functions/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-DCNV3/ops_dcnv3/functions/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/functions/__pycache__/dcnv3_func.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-DCNV3/ops_dcnv3/functions/__pycache__/dcnv3_func.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -------------------------------------------------------- 3 | # InternImage 4 | # Copyright (c) 2022 OpenGVLab 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | python setup.py build install 9 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3 import DCNv3 -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/modules/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-DCNV3/ops_dcnv3/modules/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/modules/__pycache__/dcnv3.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-DCNV3/ops_dcnv3/modules/__pycache__/dcnv3.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/src/cpu/dcnv3_cpu.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset, 18 | const at::Tensor &mask, const int kernel_h, 19 | const int kernel_w, const int stride_h, 20 | const int stride_w, const int pad_h, 21 | const int pad_w, const int dilation_h, 22 | const int dilation_w, const int group, 23 | const int group_channels, const float offset_scale, 24 | const int im2col_step) { 25 | AT_ERROR("Not implement on cpu"); 26 | } 27 | 28 | std::vector 29 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset, 30 | const at::Tensor &mask, const int kernel_h, 31 | const int kernel_w, const int stride_h, const int stride_w, 32 | const int pad_h, const int pad_w, const int dilation_h, 33 | const int dilation_w, const int group, 34 | const int group_channels, const float offset_scale, 35 | const at::Tensor &grad_output, const int im2col_step) { 36 | AT_ERROR("Not implement on cpu"); 37 | } 38 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/src/cpu/dcnv3_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #pragma once 13 | #include 14 | 15 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset, 16 | const at::Tensor &mask, const int kernel_h, 17 | const int kernel_w, const int stride_h, 18 | const int stride_w, const int pad_h, 19 | const int pad_w, const int dilation_h, 20 | const int dilation_w, const int group, 21 | const int group_channels, const float offset_scale, 22 | const int im2col_step); 23 | 24 | std::vector 25 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset, 26 | const at::Tensor &mask, const int kernel_h, 27 | const int kernel_w, const int stride_h, const int stride_w, 28 | const int pad_h, const int pad_w, const int dilation_h, 29 | const int dilation_w, const int group, 30 | const int group_channels, const float offset_scale, 31 | const at::Tensor &grad_output, const int im2col_step); 32 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/src/cuda/dcnv3_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #pragma once 13 | #include 14 | 15 | at::Tensor dcnv3_cuda_forward(const at::Tensor &input, const at::Tensor &offset, 16 | const at::Tensor &mask, const int kernel_h, 17 | const int kernel_w, const int stride_h, 18 | const int stride_w, const int pad_h, 19 | const int pad_w, const int dilation_h, 20 | const int dilation_w, const int group, 21 | const int group_channels, 22 | const float offset_scale, const int im2col_step); 23 | 24 | std::vector 25 | dcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset, 26 | const at::Tensor &mask, const int kernel_h, 27 | const int kernel_w, const int stride_h, const int stride_w, 28 | const int pad_h, const int pad_w, const int dilation_h, 29 | const int dilation_w, const int group, 30 | const int group_channels, const float offset_scale, 31 | const at::Tensor &grad_output, const int im2col_step); 32 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-DCNV3/ops_dcnv3/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #include "dcnv3.h" 13 | 14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 15 | m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward"); 16 | m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward"); 17 | } 18 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-GFPN/yolov5_GFPN.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # DAMO-YOLO GFPN Head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], # 10 30 | [6, 1, Conv, [512, 3, 2]], 31 | [[-1, 10], 1, Concat, [1]], 32 | [-1, 3, CSPStage, [512]], # 13 33 | 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], #14 35 | [4, 1, Conv, [256, 3, 2]], # 15 36 | [[14, -1, 6], 1, Concat, [1]], 37 | [-1, 3, CSPStage, [512]], # 17 38 | 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [[-1, 4], 1, Concat, [1]], 41 | [-1, 3, CSPStage, [256]], # 20 42 | 43 | [-1, 1, Conv, [256, 3, 2]], 44 | [[-1, 17], 1, Concat, [1]], 45 | [-1, 3, CSPStage, [512]], # 23 46 | 47 | [17, 1, Conv, [256, 3, 2]], # 24 48 | [23, 1, Conv, [256, 3, 2]], # 25 49 | [[13, 24, -1], 1, Concat, [1]], 50 | [-1, 3, CSPStage, [1024]], # 27 51 | 52 | [[20, 23, 27], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 53 | ] -------------------------------------------------------------------------------- /yolo-improve/yolov5-GOLDYOLO/yolo.py: -------------------------------------------------------------------------------- 1 | elif m is SimFusion_4in: 2 | c2 = sum(ch[x] for x in f) 3 | elif m is SimFusion_3in: 4 | c2 = args[0] 5 | if c2 != no: # if not output 6 | c2 = make_divisible(c2 * gw, 8) 7 | args = [[ch[f_] for f_ in f], c2] 8 | elif m is IFM: 9 | c1 = ch[f] 10 | c2 = sum(args[0]) 11 | args = [c1, *args] 12 | elif m is InjectionMultiSum_Auto_pool: 13 | c1 = ch[f[0]] 14 | c2 = args[0] 15 | args = [c1, *args] 16 | elif m is PyramidPoolAgg: 17 | c2 = args[0] 18 | args = [sum([ch[f_] for f_ in f]), *args] 19 | elif m is AdvPoolFusion: 20 | c2 = sum(ch[x] for x in f) 21 | elif m is TopBasicLayer: 22 | c2 = sum(args[1]) -------------------------------------------------------------------------------- /yolo-improve/yolov5-GOLDYOLO/yolov5n-goldyolo.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[[2, 4, 6, 9], 1, SimFusion_4in, []], # 10 30 | [-1, 1, IFM, [[64, 32]]], # 11 31 | 32 | [9, 1, Conv, [512, 1, 1]], # 12 33 | [[4, 6, -1], 1, SimFusion_3in, [512]], # 13 34 | [[-1, 11], 1, InjectionMultiSum_Auto_pool, [512, [64, 32], 0]], # 14 35 | [-1, 3, C3, [512, False]], # 15 36 | 37 | [6, 1, Conv, [256, 1, 1]], # 16 38 | [[2, 4, -1], 1, SimFusion_3in, [256]], # 17 39 | [[-1, 11], 1, InjectionMultiSum_Auto_pool, [256, [64, 32], 1]], # 18 40 | [-1, 3, C3, [256, False]], # 19 41 | 42 | [[19, 15, 9], 1, PyramidPoolAgg, [352, 2]], # 20 43 | [-1, 1, TopBasicLayer, [352, [64, 128]]], # 21 44 | 45 | [[19, 16], 1, AdvPoolFusion, []], # 22 46 | [[-1, 21], 1, InjectionMultiSum_Auto_pool, [256, [64, 128], 0]], # 23 47 | [-1, 3, C3, [256, False]], # 24 48 | 49 | [[-1, 12], 1, AdvPoolFusion, []], # 25 50 | [[-1, 21], 1, InjectionMultiSum_Auto_pool, [512, [64, 128], 1]], # 26 51 | [-1, 3, C3, [512, False]], # 27 52 | 53 | [[19, 24, 27], 1, Detect, [nc, anchors]] # 28 54 | ] 55 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-GOLDYOLO/yolov7-goldyolo.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # yolov7 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | 17 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 18 | [-1, 1, Conv, [64, 3, 1]], 19 | 20 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 21 | [-1, 1, Yolov7_E_ELAN, [256, 64]], # 4 22 | 23 | [-1, 1, V7DownSampling, [128]], # 5-P3/8 24 | [-1, 1, Yolov7_E_ELAN, [512, 128]], # 6 25 | 26 | [-1, 1, V7DownSampling, [256]], # 7-P4/16 27 | [-1, 1, Yolov7_E_ELAN, [1024, 256]], # 8 28 | 29 | [-1, 1, V7DownSampling, [512]], # 9-P5/32 30 | [-1, 1, Yolov7_E_ELAN, [1024, 256]], # 10 31 | ] 32 | 33 | # yolov7 head 34 | head: 35 | [[-1, 1, SPPCSPC, [512]], # 11-Yolov7-tiny-spp 36 | [[4, 6, 8, 11], 1, SimFusion_4in, []], # 12 37 | [-1, 1, IFM, [[64, 32]]], # 13 38 | 39 | [11, 1, Conv, [1024, 1, 1]], # 14 40 | [[6, 8, -1], 1, SimFusion_3in, [256]], # 15 41 | [[-1, 13], 1, InjectionMultiSum_Auto_pool, [256, [64, 32], 0]], # 16 42 | [-1, 1, Yolov7_E_ELAN_NECK, [256, 128]], # 17 43 | 44 | [8, 1, Conv, [128, 1, 1]], # 18 45 | [[4, 6, -1], 1, SimFusion_3in, [128]], # 19 46 | [[-1, 13], 1, InjectionMultiSum_Auto_pool, [128, [64, 32], 1]], # 20 47 | [-1, 1, Yolov7_E_ELAN_NECK, [128, 64]], # 21 48 | 49 | [[21, 17, 11], 1, PyramidPoolAgg, [352, 2]], # 22 50 | [-1, 1, TopBasicLayer, [352, [64, 128]]], # 23 51 | 52 | [[21, 18], 1, AdvPoolFusion, []], # 24 53 | [[-1, 23], 1, InjectionMultiSum_Auto_pool, [256, [64, 128], 0]], # 25 54 | [-1, 1, Yolov7_E_ELAN_NECK, [256, 128]], # 26 55 | 56 | [[-1, 14], 1, AdvPoolFusion, []], # 27 57 | [[-1, 23], 1, InjectionMultiSum_Auto_pool, [512, [64, 128], 1]], # 28 58 | [-1, 1, Yolov7_E_ELAN_NECK, [512, 256]], # 29 59 | 60 | [21, 1, RepConv, [256, 3, 1]], # 30-P3 61 | [26, 1, RepConv, [512, 3, 1]], # 31-P4 62 | [29, 1, RepConv, [1024, 3, 1]], # 32-P5 63 | 64 | [[30, 31, 32], 1, IDetect, [nc, anchors]] # 33 65 | ] -------------------------------------------------------------------------------- /yolo-improve/yolov5-GOLDYOLO/yolov7-tiny-goldyolo.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # yolov7-tiny backbone 13 | backbone: 14 | # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True 15 | [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 0-P1/2 16 | 17 | [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 1-P2/4 18 | 19 | [-1, 1, Yolov7_Tiny_E_ELAN, [64, 32, nn.LeakyReLU(0.1)]], # 2 20 | 21 | [-1, 1, MP, []], # 3-P3/8 22 | [-1, 1, Yolov7_Tiny_E_ELAN, [128, 64, nn.LeakyReLU(0.1)]], # 4 23 | 24 | [-1, 1, MP, []], # 5-P4/16 25 | [-1, 1, Yolov7_Tiny_E_ELAN, [256, 128, nn.LeakyReLU(0.1)]], # 6 26 | 27 | [-1, 1, MP, []], # 7-P5/32 28 | [-1, 1, Yolov7_Tiny_E_ELAN, [512, 256, nn.LeakyReLU(0.1)]], # 8 29 | ] 30 | 31 | # yolov7-tiny head 32 | head: 33 | [[-1, 1, Yolov7_Tiny_SPP, [256, nn.LeakyReLU(0.1)]], # 9-Yolov7-tiny-spp 34 | [[2, 4, 6, 9], 1, SimFusion_4in, []], # 10 35 | [-1, 1, IFM, [[64, 32]]], # 11 36 | 37 | [9, 1, Conv, [256, 1, 1]], # 12 38 | [[4, 6, -1], 1, SimFusion_3in, [256]], # 13 39 | [[-1, 11], 1, InjectionMultiSum_Auto_pool, [256, [64, 32], 0]], # 14 40 | [-1, 1, Yolov7_Tiny_E_ELAN, [128, 64, nn.LeakyReLU(0.1)]], # 15 41 | 42 | [6, 1, Conv, [128, 1, 1]], # 16 43 | [[2, 4, -1], 1, SimFusion_3in, [128]], # 17 44 | [[-1, 11], 1, InjectionMultiSum_Auto_pool, [128, [64, 32], 1]], # 18 45 | [-1, 1, Yolov7_Tiny_E_ELAN, [64, 32, nn.LeakyReLU(0.1)]], # 19 46 | 47 | [[19, 15, 9], 1, PyramidPoolAgg, [352, 2]], # 20 48 | [-1, 1, TopBasicLayer, [352, [64, 128]]], # 21 49 | 50 | [[19, 16], 1, AdvPoolFusion, []], # 22 51 | [[-1, 21], 1, InjectionMultiSum_Auto_pool, [128, [64, 128], 0]], # 23 52 | [-1, 1, Yolov7_Tiny_E_ELAN, [128, 64, nn.LeakyReLU(0.1)]], # 24 53 | 54 | [[-1, 12], 1, AdvPoolFusion, []], # 25 55 | [[-1, 21], 1, InjectionMultiSum_Auto_pool, [256, [64, 128], 1]], # 26 56 | [-1, 1, Yolov7_Tiny_E_ELAN, [256, 128, nn.LeakyReLU(0.1)]], # 27 57 | 58 | [19, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], # 28-P3 59 | [24, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]], # 29-P4 60 | [27, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]], # 30-P5 61 | 62 | [[28, 29, 30], 1, IDetect, [nc, anchors]] # 28 63 | ] -------------------------------------------------------------------------------- /yolo-improve/yolov5-NWD.py: -------------------------------------------------------------------------------- 1 | def wasserstein_loss(pred, target, eps=1e-7, constant=12.8): 2 | r"""`Implementation of paper `Enhancing Geometric Factors into 3 | Model Learning and Inference for Object Detection and Instance 4 | Segmentation `_. 5 | Code is modified from https://github.com/Zzh-tju/CIoU. 6 | Args: 7 | pred (Tensor): Predicted bboxes of format (x_center, y_center, w, h), 8 | shape (n, 4). 9 | target (Tensor): Corresponding gt bboxes, shape (n, 4). 10 | eps (float): Eps to avoid log(0). 11 | Return: 12 | Tensor: Loss tensor. 13 | """ 14 | 15 | center1 = pred[:, :2] 16 | center2 = target[:, :2] 17 | 18 | whs = center1[:, :2] - center2[:, :2] 19 | 20 | center_distance = whs[:, 0] * whs[:, 0] + whs[:, 1] * whs[:, 1] + eps # 21 | 22 | w1 = pred[:, 2] + eps 23 | h1 = pred[:, 3] + eps 24 | w2 = target[:, 2] + eps 25 | h2 = target[:, 3] + eps 26 | 27 | wh_distance = ((w1 - w2) ** 2 + (h1 - h2) ** 2) / 4 28 | 29 | wasserstein_2 = center_distance + wh_distance 30 | return torch.exp(-torch.sqrt(wasserstein_2) / constant) 31 | 32 | 33 | nwd = wasserstein_loss(pbox, tbox[i]).squeeze() 34 | iou_ratio = 0.5 35 | lbox += (1 - iou_ratio) * (1.0 - nwd).mean() + iou_ratio * (1.0 - iou).mean() # iou loss 36 | 37 | # Objectness 38 | iou = (iou.detach() * iou_ratio + nwd.detach() * (1 - iou_ratio)).clamp(0, 1).type(tobj.dtype) -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/ODConv/__pycache__/od_mobilenetv2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-backbone/ODConv/__pycache__/od_mobilenetv2.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/ODConv/__pycache__/od_resnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-backbone/ODConv/__pycache__/od_resnet.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/ODConv/__pycache__/odconv.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-backbone/ODConv/__pycache__/odconv.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/ODConvFuse/__pycache__/od_mobilenetv2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-backbone/ODConvFuse/__pycache__/od_mobilenetv2.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/ODConvFuse/__pycache__/od_resnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-backbone/ODConvFuse/__pycache__/od_resnet.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/ODConvFuse/__pycache__/odconv.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z1069614715/objectdetection_script/0630fb72f2d247cbf1113be7f6273be70024b057/yolo-improve/yolov5-backbone/ODConvFuse/__pycache__/odconv.cpython-38.pyc -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/fasternet/faster_cfg/fasternet_l.yaml: -------------------------------------------------------------------------------- 1 | mlp_ratio: 2 2 | embed_dim: 192 3 | depths: [3, 4, 18, 3] 4 | feature_dim: 1280 5 | patch_size: 4 6 | patch_stride: 4 7 | patch_size2: 2 8 | patch_stride2: 2 9 | layer_scale_init_value: 0 # no layer scale 10 | drop_path_rate: 0.3 11 | norm_layer: BN 12 | act_layer: RELU 13 | n_div: 4 -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/fasternet/faster_cfg/fasternet_m.yaml: -------------------------------------------------------------------------------- 1 | mlp_ratio: 2 2 | embed_dim: 144 3 | depths: [3, 4, 18, 3] 4 | feature_dim: 1280 5 | patch_size: 4 6 | patch_stride: 4 7 | patch_size2: 2 8 | patch_stride2: 2 9 | layer_scale_init_value: 0 # no layer scale 10 | drop_path_rate: 0.2 11 | norm_layer: BN 12 | act_layer: RELU 13 | n_div: 4 -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/fasternet/faster_cfg/fasternet_s.yaml: -------------------------------------------------------------------------------- 1 | mlp_ratio: 2 2 | embed_dim: 128 3 | depths: [1, 2, 13, 2] 4 | feature_dim: 1280 5 | patch_size: 4 6 | patch_stride: 4 7 | patch_size2: 2 8 | patch_stride2: 2 9 | layer_scale_init_value: 0 # no layer scale 10 | drop_path_rate: 0.1 11 | norm_layer: BN 12 | act_layer: RELU 13 | n_div: 4 -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/fasternet/faster_cfg/fasternet_t0.yaml: -------------------------------------------------------------------------------- 1 | mlp_ratio: 2 2 | embed_dim: 40 3 | depths: [1, 2, 8, 2] 4 | feature_dim: 1280 5 | patch_size: 4 6 | patch_stride: 4 7 | patch_size2: 2 8 | patch_stride2: 2 9 | layer_scale_init_value: 0 # no layer scale 10 | drop_path_rate: 0. 11 | norm_layer: BN 12 | act_layer: GELU 13 | n_div: 4 14 | -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/fasternet/faster_cfg/fasternet_t1.yaml: -------------------------------------------------------------------------------- 1 | mlp_ratio: 2 2 | embed_dim: 64 3 | depths: [1, 2, 8, 2] 4 | feature_dim: 1280 5 | patch_size: 4 6 | patch_stride: 4 7 | patch_size2: 2 8 | patch_stride2: 2 9 | layer_scale_init_value: 0 # no layer scale 10 | drop_path_rate: 0.02 11 | norm_layer: BN 12 | act_layer: GELU 13 | n_div: 4 -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/fasternet/faster_cfg/fasternet_t2.yaml: -------------------------------------------------------------------------------- 1 | mlp_ratio: 2 2 | embed_dim: 96 3 | depths: [1, 2, 8, 2] 4 | feature_dim: 1280 5 | patch_size: 4 6 | patch_stride: 4 7 | patch_size2: 2 8 | patch_stride2: 2 9 | layer_scale_init_value: 0 # no layer scale 10 | drop_path_rate: 0.05 11 | norm_layer: BN 12 | act_layer: RELU 13 | n_div: 4 -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/main.py: -------------------------------------------------------------------------------- 1 | import torch, timm 2 | from thop import clever_format, profile 3 | 4 | # print(timm.list_models()) 5 | 6 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 7 | dummy_input = torch.randn(1, 3, 640, 640).to(device) 8 | 9 | # model = timm.create_model('edgenext_small', pretrained=False, features_only=True) 10 | model = timm.create_model('vovnet39a', pretrained=False, features_only=True) 11 | model.to(device) 12 | model.eval() 13 | 14 | print(model.feature_info.channels()) 15 | for feature in model(dummy_input): 16 | print(feature.size()) 17 | 18 | flops, params = profile(model.to(device), (dummy_input,), verbose=False) 19 | flops, params = clever_format([flops * 2, params], "%.3f") 20 | print('Total FLOPS: %s' % (flops)) 21 | print('Total params: %s' % (params)) -------------------------------------------------------------------------------- /yolo-improve/yolov5-backbone/yolov5-custom.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # 0-P1/2 13 | # 1-P2/4 14 | # 2-P3/8 15 | # 3-P4/16 16 | # 4-P5/32 17 | 18 | # YOLOv5 v6.0 backbone 19 | backbone: 20 | # [from, number, module, args] 21 | [[-1, 1, vovnet39a, [False]], # 4 22 | [-1, 1, SPPF, [1024, 5]], # 5 23 | ] 24 | 25 | # YOLOv5 v6.0 head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], # 6 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], # 7 29 | [[-1, 3], 1, Concat, [1]], # cat backbone P4 8 30 | [-1, 3, C3, [512, False]], # 9 31 | 32 | [-1, 1, Conv, [256, 1, 1]], # 10 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], # 11 34 | [[-1, 2], 1, Concat, [1]], # cat backbone P3 12 35 | [-1, 3, C3, [256, False]], # 13 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], # 14 38 | [[-1, 10], 1, Concat, [1]], # cat head P4 15 39 | [-1, 3, C3, [512, False]], # 16 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], # 17 42 | [[-1, 5], 1, Concat, [1]], # cat head P5 18 43 | [-1, 3, C3, [1024, False]], # 19 (P5/32-large) 44 | 45 | [[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] -------------------------------------------------------------------------------- /yolo-improve/yolov5-res2block.py: -------------------------------------------------------------------------------- 1 | class Bottle2neck(nn.Module): 2 | expansion = 1 3 | 4 | def __init__(self, inplanes, planes, shortcut, baseWidth=26, scale = 4): 5 | """ Constructor 6 | Args: 7 | inplanes: input channel dimensionality 8 | planes: output channel dimensionality 9 | baseWidth: basic width of conv3x3 10 | scale: number of scale. 11 | """ 12 | super(Bottle2neck, self).__init__() 13 | 14 | width = int(math.floor(planes * (baseWidth/64.0))) 15 | self.conv1 = Conv(inplanes, width*scale, k=1) 16 | 17 | if scale == 1: 18 | self.nums = 1 19 | else: 20 | self.nums = scale -1 21 | convs = [] 22 | for i in range(self.nums): 23 | convs.append(Conv(width, width, k=3)) 24 | self.convs = nn.ModuleList(convs) 25 | 26 | self.conv3 = Conv(width*scale, planes * self.expansion, k=1, act=False) 27 | 28 | self.silu = nn.SiLU(inplace=True) 29 | self.scale = scale 30 | self.width = width 31 | self.shortcut = shortcut 32 | 33 | def forward(self, x): 34 | print(1) 35 | if self.shortcut: 36 | residual = x 37 | out = self.conv1(x) 38 | spx = torch.split(out, self.width, 1) 39 | for i in range(self.nums): 40 | if i==0: 41 | sp = spx[i] 42 | else: 43 | sp = sp + spx[i] 44 | sp = self.convs[i](sp) 45 | if i==0: 46 | out = sp 47 | else: 48 | out = torch.cat((out, sp), 1) 49 | if self.scale != 1: 50 | out = torch.cat((out, spx[self.nums]),1) 51 | 52 | out = self.conv3(out) 53 | if self.shortcut: 54 | out += residual 55 | out = self.silu(out) 56 | return out 57 | 58 | class C3_Res2Block(C3): 59 | # CSP Bottleneck with 3 convolutions 60 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 61 | super().__init__(c1, c2, n, shortcut, g, e) 62 | c_ = int(c2 * e) # hidden channels 63 | self.m = nn.Sequential(*(Bottle2neck(c_, c_, shortcut) for _ in range(n))) -------------------------------------------------------------------------------- /yolo-improve/yolov7-DCNV3.py: -------------------------------------------------------------------------------- 1 | from models.ops_dcnv3.modules import DCNv3 2 | class DCNV3_YoLo(nn.Module): 3 | def __init__(self, inc, ouc, k=1, s=1, p=None, g=1, act=True): 4 | super().__init__() 5 | 6 | self.conv = Conv(inc, ouc, k=1) 7 | self.dcnv3 = DCNv3(ouc, kernel_size=k, stride=s, group=g) 8 | self.bn = nn.BatchNorm2d(ouc) 9 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 10 | 11 | def forward(self, x): 12 | x = self.conv(x) 13 | x = x.permute(0, 2, 3, 1) 14 | x = self.dcnv3(x) 15 | x = x.permute(0, 3, 1, 2) 16 | x = self.act(self.bn(x)) 17 | return x 18 | 19 | if isinstance(m, Detect): 20 | s = 256 # 2x min stride 21 | self.model.to(torch.device('cuda')) 22 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s).to(torch.device('cuda')))]).cpu() # forward 23 | self.model.cpu() 24 | check_anchor_order(m) 25 | m.anchors /= m.stride.view(-1, 1, 1) 26 | self.stride = m.stride 27 | self._initialize_biases() # only run once 28 | # print('Strides: %s' % m.stride.tolist()) 29 | if isinstance(m, IDetect): 30 | s = 256 # 2x min stride 31 | self.model.to(torch.device('cuda')) 32 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s).to(torch.device('cuda')))]).cpu() # forward 33 | self.model.cpu() 34 | check_anchor_order(m) 35 | m.anchors /= m.stride.view(-1, 1, 1) 36 | self.stride = m.stride 37 | self._initialize_biases() # only run once 38 | # print('Strides: %s' % m.stride.tolist()) 39 | if isinstance(m, IAuxDetect): 40 | s = 256 # 2x min stride 41 | self.model.to(torch.device('cuda')) 42 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s).to(torch.device('cuda')))[:4]]).cpu() # forward 43 | self.model.cpu() 44 | #print(m.stride) 45 | check_anchor_order(m) 46 | m.anchors /= m.stride.view(-1, 1, 1) 47 | self.stride = m.stride 48 | self._initialize_aux_biases() # only run once 49 | # print('Strides: %s' % m.stride.tolist()) -------------------------------------------------------------------------------- /yolo-improve/yolov7-EVC.py: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # yolov7-tiny backbone 13 | backbone: 14 | # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True 15 | [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 0-P1/2 16 | 17 | [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 1-P2/4 18 | 19 | [-1, 1, Yolov7_Tiny_E_ELAN, [64, 32, nn.LeakyReLU(0.1)]], # 2 20 | 21 | [-1, 1, MP, []], # 3-P3/8 22 | [-1, 1, Yolov7_Tiny_E_ELAN, [128, 64, nn.LeakyReLU(0.1)]], # 4 23 | 24 | [-1, 1, MP, []], # 5-P4/16 25 | [-1, 1, Yolov7_Tiny_E_ELAN, [256, 128, nn.LeakyReLU(0.1)]], # 6 26 | 27 | [-1, 1, MP, []], # 7-P5/32 28 | [-1, 1, Yolov7_Tiny_E_ELAN, [512, 256, nn.LeakyReLU(0.1)]], # 8 29 | ] 30 | 31 | # yolov7-tiny head 32 | head: 33 | [[-1, 1, Yolov7_Tiny_SPP, [256, nn.LeakyReLU(0.1)]], # 9-Yolov7-tiny-spp 34 | 35 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [6, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4 38 | [-1, 1, EVCBlock, []], 39 | [[-1, -3], 1, Concat, [1]], 40 | [-1, 1, Yolov7_Tiny_E_ELAN, [128, 64, nn.LeakyReLU(0.1)]], # 15 41 | 42 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [4, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3 45 | [[-1, -2], 1, Concat, [1]], 46 | [-1, 1, Yolov7_Tiny_E_ELAN, [64, 32, nn.LeakyReLU(0.1)]], # 20 47 | 48 | [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]], 49 | [[-1, 15], 1, Concat, [1]], 50 | [-1, 1, Yolov7_Tiny_E_ELAN, [128, 64, nn.LeakyReLU(0.1)]], # 23 51 | 52 | [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]], 53 | [[-1, 9], 1, Concat, [1]], 54 | 55 | [-1, 1, Yolov7_Tiny_E_ELAN, [256, 128, nn.LeakyReLU(0.1)]], # 26 56 | 57 | [20, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], # 27-P3 58 | [23, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]], # 28-P4 59 | [26, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]], # 29-P5 60 | 61 | [[27, 28, 29], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5) 62 | ] -------------------------------------------------------------------------------- /yolo-improve/yolov7-MPDiou.py: -------------------------------------------------------------------------------- 1 | def bbox_mpdiou(box1, box2, x1y1x2y2=True, mpdiou_hw=None, grid=None, eps=1e-7): 2 | # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 3 | box2 = box2.T 4 | box1[:2] += grid 5 | box2[:2] += grid 6 | 7 | # Get the coordinates of bounding boxes 8 | if x1y1x2y2: # x1, y1, x2, y2 = box1 9 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] 10 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] 11 | else: # transform from xywh to xyxy 12 | b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 13 | b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 14 | b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 15 | b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 16 | 17 | # Intersection area 18 | inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ 19 | (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) 20 | 21 | # Union Area 22 | w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps 23 | w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps 24 | union = w1 * h1 + w2 * h2 - inter + eps 25 | 26 | iou = inter / union 27 | d1 = (b2_x1 - b1_x1) ** 2 + (b2_y1 - b1_y1) ** 2 28 | d2 = (b2_x2 - b1_x2) ** 2 + (b2_y2 - b1_y2) ** 2 29 | return iou - d1 / mpdiou_hw - d2 / mpdiou_hw # MPDIoU 30 | 31 | # ComputeLoss 32 | iou = bbox_mpdiou(pbox.T, tbox[i], x1y1x2y2=False, mpdiou_hw=pi.size(2) ** 2 + pi.size(3) ** 2, grid=torch.stack([gj, gi])) # iou(prediction, target) 33 | 34 | # ComputeLossOTA 35 | iou = bbox_mpdiou(pbox.T, selected_tbox, x1y1x2y2=False, mpdiou_hw=pi.size(2) ** 2 + pi.size(3) ** 2, grid=torch.stack([gj, gi])) # iou(prediction, target) -------------------------------------------------------------------------------- /yolo-improve/yolov7-NWD.py: -------------------------------------------------------------------------------- 1 | def wasserstein_loss(pred, target, eps=1e-7, constant=12.8): 2 | r"""`Implementation of paper `Enhancing Geometric Factors into 3 | Model Learning and Inference for Object Detection and Instance 4 | Segmentation `_. 5 | Code is modified from https://github.com/Zzh-tju/CIoU. 6 | Args: 7 | pred (Tensor): Predicted bboxes of format (x_center, y_center, w, h), 8 | shape (n, 4). 9 | target (Tensor): Corresponding gt bboxes, shape (n, 4). 10 | eps (float): Eps to avoid log(0). 11 | Return: 12 | Tensor: Loss tensor. 13 | """ 14 | 15 | center1 = pred[:, :2] 16 | center2 = target[:, :2] 17 | 18 | whs = center1[:, :2] - center2[:, :2] 19 | 20 | center_distance = whs[:, 0] * whs[:, 0] + whs[:, 1] * whs[:, 1] + eps # 21 | 22 | w1 = pred[:, 2] + eps 23 | h1 = pred[:, 3] + eps 24 | w2 = target[:, 2] + eps 25 | h2 = target[:, 3] + eps 26 | 27 | wh_distance = ((w1 - w2) ** 2 + (h1 - h2) ** 2) / 4 28 | 29 | wasserstein_2 = center_distance + wh_distance 30 | return torch.exp(-torch.sqrt(wasserstein_2) / constant) 31 | 32 | nwd = wasserstein_loss(pbox, tbox[i]) 33 | iou_ratio = 0.5 34 | lbox += (1 - iou_ratio) * (1.0 - nwd).mean() + iou_ratio * (1.0 - iou).mean() # iou loss 35 | 36 | # Objectness 37 | iou = (iou.detach() * iou_ratio + nwd.detach() * (1 - iou_ratio)).clamp(0, 1).type(tobj.dtype) -------------------------------------------------------------------------------- /yolo-improve/yolov7-head/yolov7-tiny-P6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 3 8 | 9 | # yolov7-tiny backbone 10 | backbone: 11 | # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True 12 | [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 0-P1/2 13 | 14 | [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 1-P2/4 15 | 16 | [-1, 1, Yolov7_Tiny_E_ELAN, [64, 32, nn.LeakyReLU(0.1)]], # 2 17 | 18 | [-1, 1, MP, []], # 3-P3/8 19 | [-1, 1, Yolov7_Tiny_E_ELAN, [128, 64, nn.LeakyReLU(0.1)]], # 4 20 | 21 | [-1, 1, MP, []], # 5-P4/16 22 | [-1, 1, Yolov7_Tiny_E_ELAN, [256, 128, nn.LeakyReLU(0.1)]], # 6 23 | 24 | [-1, 1, MP, []], # 7-P5/32 25 | [-1, 1, Yolov7_Tiny_E_ELAN, [512, 256, nn.LeakyReLU(0.1)]], # 8 26 | ] 27 | 28 | # yolov7-tiny head 29 | head: 30 | [[-1, 1, Yolov7_Tiny_SPP, [256, nn.LeakyReLU(0.1)]], # 9-Yolov7-tiny-spp 31 | 32 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [6, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4 35 | [[-1, -2], 1, Concat, [1]], 36 | [-1, 1, Yolov7_Tiny_E_ELAN, [128, 64, nn.LeakyReLU(0.1)]], # 14 37 | 38 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [4, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3 41 | [[-1, -2], 1, Concat, [1]], 42 | [-1, 1, Yolov7_Tiny_E_ELAN, [64, 32, nn.LeakyReLU(0.1)]], # 19 43 | 44 | [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]], 45 | [[-1, 14], 1, Concat, [1]], 46 | [-1, 1, Yolov7_Tiny_E_ELAN, [128, 64, nn.LeakyReLU(0.1)]], # 22 47 | 48 | [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]], 49 | [[-1, 9], 1, Concat, [1]], 50 | [-1, 1, Yolov7_Tiny_E_ELAN, [256, 128, nn.LeakyReLU(0.1)]], # 25 51 | 52 | [19, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], # 26-P3 53 | [22, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]], # 27-P4 54 | [25, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]], # 28-P5 55 | 56 | [25, 1, MP, []], # 29-P6/64 57 | [-1, 1, Yolov7_Tiny_E_ELAN, [512, 256, nn.LeakyReLU(0.1)]], # 30 58 | 59 | [[26, 27, 28, 30], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] -------------------------------------------------------------------------------- /yolo-improve/yolov8-objectcount.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings('ignore') 3 | import cv2, os, shutil 4 | import numpy as np 5 | from ultralytics import YOLO 6 | 7 | def get_video_cfg(path): 8 | video = cv2.VideoCapture(path) 9 | size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))) 10 | fps = int(video.get(cv2.CAP_PROP_FPS)) 11 | return cv2.VideoWriter_fourcc(*'XVID'), size, fps 12 | 13 | def plot_and_counting(result): 14 | image_plot = result.plot() 15 | box_count = result.boxes.shape[0] 16 | cv2.putText(image_plot, f'Object Counts:{box_count}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 4) 17 | return image_plot 18 | 19 | if __name__ == '__main__': 20 | output_dir = 'result' 21 | if os.path.exists(output_dir): 22 | shutil.rmtree(output_dir) 23 | os.makedirs(output_dir, exist_ok=True) 24 | 25 | model = YOLO('yolov8n.pt') # select your model.pt path 26 | 27 | # ----------------------for images or images-folder---------------------- 28 | for result in model.predict(source='ultralytics/assets', 29 | stream=True, 30 | imgsz=640, 31 | save=False, 32 | # conf=0.2, 33 | ): 34 | image_plot = plot_and_counting(result) 35 | cv2.imwrite(f'{output_dir}/{os.path.basename(result.path)}', image_plot) 36 | 37 | # ----------------------for video-folder---------------------- 38 | # video_base_path = 'video' 39 | # for video_path in os.listdir(video_base_path): 40 | # fourcc, size, fps = get_video_cfg(f'{video_base_path}/{video_path}') 41 | # video_output = cv2.VideoWriter(f'{output_dir}/{video_path}', fourcc, fps, size) 42 | # for result in model.predict(source=f'{video_base_path}/{video_path}', 43 | # stream=True, 44 | # imgsz=640, 45 | # save=False, 46 | # # conf=0.2, 47 | # ): 48 | # image_plot = plot_and_counting(result) 49 | # video_output.write(image_plot) 50 | # video_output.release() -------------------------------------------------------------------------------- /yolo-improve/yolov8.py: -------------------------------------------------------------------------------- 1 | from ultralytics import YOLO 2 | 3 | # 安装命令 4 | # python setup.py develop 5 | 6 | # 数据集示例百度云链接 7 | # 链接:https://pan.baidu.com/s/19FM7XnKEFC83vpiRdtNA8A?pwd=n93i 8 | # 提取码:n93i 9 | 10 | if __name__ == '__main__': 11 | # 直接使用预训练模型创建模型. 12 | model = YOLO('yolov8n.pt') 13 | model.train(**{'cfg':'ultralytics/cfg/exp1.yaml', 'data':'dataset/data.yaml'}) 14 | 15 | # 使用yaml配置文件来创建模型,并导入预训练权重. 16 | model = YOLO('ultralytics/cfg/models/v8/yolov8.yaml') 17 | model.load('yolov8n.pt') 18 | model.train(**{'cfg':'ultralytics/cfg/exp1.yaml', 'data':'dataset/data.yaml'}) 19 | 20 | # 模型验证 21 | model = YOLO('runs/detect/yolov8n_exp/weights/best.pt') 22 | model.val(**{'data':'dataset/data.yaml'}) 23 | 24 | # 模型推理 25 | model = YOLO('runs/detect/yolov8n_exp/weights/best.pt') 26 | model.predict(source='dataset/images/test', **{'save':True}) -------------------------------------------------------------------------------- /yolo/data.yaml: -------------------------------------------------------------------------------- 1 | # dataset path 2 | train: ./dataset/images/train 3 | val: ./dataset/images/val 4 | test: ./dataset/images/test 5 | 6 | # number of classes 7 | nc: 8 | 9 | # class names 10 | names: [] -------------------------------------------------------------------------------- /yolo/dataset/VOCdevkit/Annotations/ReadMe.md: -------------------------------------------------------------------------------- 1 | # 存放VOC标注格式的文件夹 -------------------------------------------------------------------------------- /yolo/dataset/VOCdevkit/JPEGImages/ReadMe.md: -------------------------------------------------------------------------------- 1 | # 存放图像的文件夹 -------------------------------------------------------------------------------- /yolo/dataset/VOCdevkit/txt/ReadMe.md: -------------------------------------------------------------------------------- 1 | # 存放YOLO标注格式的文件夹 -------------------------------------------------------------------------------- /yolo/dataset/split_data.py: -------------------------------------------------------------------------------- 1 | import os, shutil, random 2 | random.seed(0) 3 | import numpy as np 4 | from sklearn.model_selection import train_test_split 5 | 6 | val_size = 0.1 7 | test_size = 0.2 8 | postfix = 'jpg' 9 | imgpath = 'VOCdevkit/JPEGImages' 10 | txtpath = 'VOCdevkit/txt' 11 | 12 | os.makedirs('images/train', exist_ok=True) 13 | os.makedirs('images/val', exist_ok=True) 14 | os.makedirs('images/test', exist_ok=True) 15 | os.makedirs('labels/train', exist_ok=True) 16 | os.makedirs('labels/val', exist_ok=True) 17 | os.makedirs('labels/test', exist_ok=True) 18 | 19 | listdir = np.array([i for i in os.listdir(txtpath) if 'txt' in i]) 20 | random.shuffle(listdir) 21 | train, val, test = listdir[:int(len(listdir) * (1 - val_size - test_size))], listdir[int(len(listdir) * (1 - val_size - test_size)):int(len(listdir) * (1 - test_size))], listdir[int(len(listdir) * (1 - test_size)):] 22 | print(f'train set size:{len(train)} val set size:{len(val)} test set size:{len(test)}') 23 | 24 | for i in train: 25 | shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'images/train/{}.{}'.format(i[:-4], postfix)) 26 | shutil.copy('{}/{}'.format(txtpath, i), 'labels/train/{}'.format(i)) 27 | 28 | for i in val: 29 | shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'images/val/{}.{}'.format(i[:-4], postfix)) 30 | shutil.copy('{}/{}'.format(txtpath, i), 'labels/val/{}'.format(i)) 31 | 32 | for i in test: 33 | shutil.copy('{}/{}.{}'.format(imgpath, i[:-4], postfix), 'images/test/{}.{}'.format(i[:-4], postfix)) 34 | shutil.copy('{}/{}'.format(txtpath, i), 'labels/test/{}'.format(i)) -------------------------------------------------------------------------------- /yolo/readme.md: -------------------------------------------------------------------------------- 1 | # YOLOV5,YOLOV7,YOLOV8的数据集处理文件 2 | 本目录下的脚本是针对与yolov5,v7,v8的数据集处理脚本,支持如下: 3 | 1. VOC标注格式转换为YOLO标注格式。 4 | 2. 对数据集进行划分训练集,验证集,测试集。 5 | 6 | # VOC标注格式数据集使用示例 7 | 1. 把图片存放在dataset\VOCdevkit\JPEGImages中,图片后缀需要一致,比如都是jpg或者png等等,不支持混合的图片后缀格式,比如一些是jpg,一些是png。 8 | 2. 把VOC标注格式的XML文件存放在dataset\VOCdevkit\Annotations中。 9 | 3. 运行xml2txt.py,在这个文件中其会把Annotations中的XML格式标注文件转换到txt中的yolo格式标注文件。其中xml2txt.py中的postfix参数是JPEGImages的图片后缀,修改成图片的后缀即可,默认为jpg。比如我的图片都是png后缀的,需要把postfix修改为png即可。其中运行这个文件的时候,输出信息会输出你的数据集的类别,你需要把类别列表复制到data.yaml中的names中,并且修改nc为你的类别数,也就是names中类别个数。 10 | 4. 运行split_data.py,这个文件是划分训练、验证、测试集。其中支持修改val_size**验证集比例**和test_size**测试集比例**,可以在split_data.py中找到对应的参数进行修改,然后postfix参数也是你的图片数据集后缀格式,默认为jpg,如果你的图片后缀不是jpg结尾的话,需要修改一下这个参数。 11 | 12 | # YOLO标注格式数据集使用示例 13 | 1. 把图片存放在dataset\VOCdevkit\JPEGImages中,图片后缀需要一致,比如都是jpg或者png等等,不支持混合的图片后缀格式,比如一些是jpg,一些是png。 14 | 2. 把YOLO标注格式的TXT文件存放在dataset\VOCdevkit\txt中。 15 | 3. 运行split_data.py,这个文件是划分训练、验证、测试集。其中支持修改val_size**验证集比例**和test_size**测试集比例**,可以在split_data.py中找到对应的参数进行修改,然后postfix参数也是你的图片数据集后缀格式,默认为jpg,如果你的图片后缀不是jpg结尾的话,需要修改一下这个参数。 16 | 4. 在data.yaml中的names设置你的类别,其为一个list,比如我的YOLO标注格式数据集中,0代表face,1代表body,那在data.yaml中就是names:['face', 'body'],然后nc:2,nc就是类别个数。 17 | --------------------------------------------------------------------------------