├── 特征融合 ├── 1 ├── (ICMR 2022)CMF_Block(多模态融合).py └── (TIP2024)CGA特征融合模块.py ├── 频域 ├── 1 └── (CVPR 2024)FRFN.py ├── 目标检测 ├── WCMFandFACMA.pdf ├── WCMF.py ├── FACMA.py └── GFM.py ├── assets ├── image-20240906165609184.png ├── image-20240906165641092.png ├── data-20241122T035935Z-001.zip ├── 18b0c599180d157e714daf7f21b1fdc.jpg └── aae37e8f13bc88eb6aca66535d49a7e.jpg ├── 特征维度转换.py ├── 缝合代码示例 ├── 维度转换.py ├── HWD小波下采样.py ├── LSK.py ├── 部分卷积.py ├── MobileViTv2Attention.py └── DilateFormer.py ├── (arxiv)Arelu.py ├── README.md ├── (ICCV 2021) RA.py ├── 采样 ├── (PR2023) 小波下采样.py └── EUCB.py ├── 注意力 ├── (ICML 2021)SimAM.py ├── (IEEE 2023)AGCA.py ├── (arxiv2023)ema.py ├── (WACV 2021)TripletAttention.py ├── (TPAMI 2021)OutlookAttention.py ├── (CVPR 2024)SHSA.py ├── (tmm2023)多尺度膨胀注意力机制.py └── (CVPR 2024)CAA.py ├── GhostModule.py ├── DFF2d.py ├── 卷积 ├── (CVPR 2023) 部分卷积.py ├── (ICCV 2023)大核选择模块LSK.py ├── (ICCV 2021)CTR-GC(图卷积).py └── (CVPR 2022)dgcnn.py ├── (ICCV2023)SAFM.py ├── (CVPR 2024)IDC.py ├── UCDC.py ├── (arXiv 2021) EA.py ├── GCTattention.py ├── scSE.py ├── (arXiv 2019) ECA.py ├── PGM.py ├── f_sampling.py ├── 3D ├── (CVPR 2024)IDC3d.py ├── (IEEE 2024)SFFusion3d特征融合.py └── (CVPR 2022)DFE.py ├── MDTA.py ├── (ACCV 2024) LIA.py ├── GAU.py ├── 1D模块 ├── (ICCV 2023)EAA.py └── (KDD 2020)CorNet(NLP).py ├── (arXiv 2021) AFT.py ├── (ECCV2024)SMFA.py ├── (TPAMI 2022) ViP.py ├── MLAttention.py ├── SWA.py ├── (arXiv 2021) MobileViTv2.py ├── FCA.py ├── DPTAM.py ├── (ICLR 2023)ContraNorm(对比归一化层).py ├── LGAG.py ├── 图像超分 ├── SGFN.py └── FMM.py ├── (arXiv 2021) S2Attention.py ├── cleegn.py ├── MCM.py ├── LAE.py ├── tfcm.py ├── (arXiv 2021) PSA.py ├── (ICPR 2021)CAN(人群计数,CV2维任务通用).py ├── SPConv.py ├── LPA.py ├── DA.py ├── FECAttention.py ├── ULSAM.py ├── 点云 └── Attention.py ├── MixStructure.py ├── CPAM.py ├── (arXiv 2020 ) SSAN.py ├── LMFLoss.py ├── (Elsevier 2024)CF_loss.py ├── (arXiv 2023) ScaledDotProductAttention.py ├── (CVPR 2019) DCNv2.py ├── FEM.py ├── BFAM.py ├── (ACM MM 2023)Deepfake(深度伪造检测).py ├── MHIASA.py ├── (CVPR2020)strip_pooling.py ├── CPCA2d.py ├── PCBAM.py └── 遥感 └── MSAA.py /特征融合/1: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /频域/1: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /目标检测/WCMFandFACMA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/目标检测/WCMFandFACMA.pdf -------------------------------------------------------------------------------- /assets/image-20240906165609184.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/image-20240906165609184.png -------------------------------------------------------------------------------- /assets/image-20240906165641092.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/image-20240906165641092.png -------------------------------------------------------------------------------- /assets/data-20241122T035935Z-001.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/data-20241122T035935Z-001.zip -------------------------------------------------------------------------------- /assets/18b0c599180d157e714daf7f21b1fdc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/18b0c599180d157e714daf7f21b1fdc.jpg -------------------------------------------------------------------------------- /assets/aae37e8f13bc88eb6aca66535d49a7e.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/aae37e8f13bc88eb6aca66535d49a7e.jpg -------------------------------------------------------------------------------- /特征维度转换.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from einops import rearrange 3 | 4 | 5 | def to_3d(x): 6 | return rearrange(x, 'b c h w -> b (h w) c') 7 | 8 | 9 | def to_4d(x, h, w): 10 | return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w) 11 | 12 | 13 | if __name__ == '__main__': 14 | input = torch.randn(3, 32, 64, 64) # 假设输入tensor B C H W 15 | 16 | output = to_3d(input) 17 | print(output.size()) #输出shape b n c 18 | 19 | output1 =to_4d(output, 64, 64) # 指定高宽 h*w =n 20 | print(output1.size()) 21 | -------------------------------------------------------------------------------- /缝合代码示例/维度转换.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from einops import rearrange 3 | 4 | 5 | def to_3d(x): 6 | return rearrange(x, 'b c h w -> b (h w) c') 7 | 8 | 9 | def to_4d(x, h, w): 10 | return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w) 11 | 12 | 13 | # x = x.permute(0, 2, 3, 1) # 【B, C, H, W】 -> 【B, H, W, C】 14 | # x= x.permute(0, 3, 1, 2) # 【B, H, W, C】 -> 【B, C, H, W】 15 | 16 | if __name__ == '__main__': 17 | input = torch.randn(3, 32, 64, 64) # 假设输入tensor B C H W 18 | 19 | output = to_3d(input) 20 | print(output.size()) #输出shape b n c 21 | 22 | output1 =to_4d(output, 64, 64) # 指定高宽 h*w =n 23 | print(output1.size()) -------------------------------------------------------------------------------- /(arxiv)Arelu.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | # github地址:https://github.com/densechen/AReLU/blob/master/activations/arelu.py 7 | # 论文:ARELU: ATTENTION-BASED RECTIFIED LINEAR UNIT 8 | class AReLU(nn.Module): 9 | def __init__(self, alpha=0.90, beta=2.0): 10 | super().__init__() 11 | self.alpha = nn.Parameter(torch.tensor([alpha])) 12 | self.beta = nn.Parameter(torch.tensor([beta])) 13 | 14 | def forward(self, input): 15 | alpha = torch.clamp(self.alpha, min=0.01, max=0.99) 16 | beta = 1 + torch.sigmoid(self.beta) 17 | 18 | return F.relu(input) * beta - F.relu(-input) * alpha 19 | -------------------------------------------------------------------------------- /缝合代码示例/HWD小波下采样.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from pytorch_wavelets import DWTForward 4 | 5 | 6 | class Down_wt(nn.Module): 7 | def __init__(self, in_ch, out_ch): 8 | super(Down_wt, self).__init__() 9 | self.wt = DWTForward(J=1, mode='zero', wave='haar') 10 | self.conv_bn_relu = nn.Sequential( 11 | nn.Conv2d(in_ch * 4, out_ch, kernel_size=1, stride=1), 12 | nn.BatchNorm2d(out_ch), 13 | nn.ReLU(inplace=True), 14 | ) 15 | 16 | def forward(self, x): 17 | yL, yH = self.wt(x) 18 | y_HL = yH[0][:, :, 0, ::] 19 | y_LH = yH[0][:, :, 1, ::] 20 | y_HH = yH[0][:, :, 2, ::] 21 | x = torch.cat([yL, y_HL, y_LH, y_HH], dim=1) 22 | x = self.conv_bn_relu(x) 23 | return x 24 | 25 | 26 | # 输入 B C H W, 输出 B C H W 27 | if __name__ == '__main__': 28 | block = Down_wt(64, 96) # 输入通道数,输出通道数 29 | input = torch.rand(3, 64, 64, 64) 30 | output = block(input) 31 | print(output.size()) 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 全网最全最新的即插即用模块:目前进度70% 2 | 包括卷积 注意力机制 下采样 特征融合模块等 3 | 持续更新~ 4 | 详细论文讲解关注公众号【ai缝合大王】和B站【ai缝合大王】 5 | 模块分享、缝合交流进q群: 6 | 994264161 7 | 更多细分方向群:① 目标检测 ② 图像分类 ③ 语义分割 ④ 人脸识别 ⑤ 三维重建 ⑥ 多模态融合 ⑦ 姿态估计 ⑧ 超分辨率⑨ 自动驾驶 ⑩ 图像生成 ⑪ 遥感影像 ⑫ 医学图像 ⑬ 底层视觉 ⑭ YOLO 系列 ⑮ Mamba 等新架构⑯ 视频处理 ⑰ 3D ⑱ 大模型 ⑲ 重识别(ReID)⑳ 图像去雨/去噪/去模糊 8 | 细分方向群为微信群,扫描二维码添加微信,扣1-20拉你进群。 9 | 10 | ![8fe957e64594b1526077b0f75c6f496](https://github.com/user-attachments/assets/392ad630-081a-454d-ad38-40d24c4a8990) 11 | 12 | 目前主要更新二维图像模块,所有二维图像都可以用,图像分类、分割、目标检测、超分辨率重建、图像去雾、暗光增强等所有图像任务都可以用 3d模块 和1d模块 后续会陆续更新。 13 | 14 | ![...](assets/18b0c599180d157e714daf7f21b1fdc.jpg) 15 | 16 | 17 | 18 | 这里我介绍一下,加入深度学习论文指南: 19 | 第一,如果你是新手小白,代码论文都看不懂,我们会给出最优的学习路线,让你少走弯路,节省很多时间。 20 | 21 | 第二,这里提供缝合模块的技巧,让你轻松使用github上的模块即插即用,插入自己的模型中涨点。 22 | 23 | 第三,如果你缝合了很多模块发现没有效果,不妨来这里看看,这里提供了高阶缝合技巧,结构缝合,串并联交互缝合,创新点缝合等,并且还有自制即插即用模块分享。 24 | 25 | 第四,如果缝合好模块之后,不知道怎么编故事,怎么写论文的思路,这里也会进行提供。 26 | 27 | 第五,每个成员都可以发布主题,我们也会针对主题进行提问,随着人数的增多,你想知道的任何这方面的问题几乎都有模板答案。 28 | 29 | 第六,这是一个长期项目,不是说就几个视频加pdf,是一年的时间内所有内容。 30 | 31 | 32 | -------------------------------------------------------------------------------- /(ICCV 2021) RA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | # 论文地址:https://arxiv.org/pdf/2108.02456 7 | # 论文:Residual Attention: A Simple but Effective Method for Multi-Label Recognition 8 | 9 | 10 | 11 | class ResidualAttention(nn.Module): 12 | 13 | def __init__(self, channel=512 , num_class=1000,la=0.2): 14 | super().__init__() 15 | self.la=la 16 | self.fc=nn.Conv2d(in_channels=channel,out_channels=num_class,kernel_size=1,stride=1,bias=False) 17 | 18 | def forward(self, x): 19 | b,c,h,w=x.shape 20 | y_raw=self.fc(x).flatten(2) #b,num_class,hxw 21 | y_avg=torch.mean(y_raw,dim=2) #b,num_class 22 | y_max=torch.max(y_raw,dim=2)[0] #b,num_class 23 | score=y_avg+self.la*y_max 24 | return score 25 | 26 | 27 | 28 | 29 | if __name__ == '__main__': 30 | input=torch.randn(50,512,7,7) 31 | resatt = ResidualAttention(channel=512,num_class=1000,la=0.2) 32 | output=resatt(input) 33 | print(output.shape) 34 | 35 | 36 | -------------------------------------------------------------------------------- /采样/(PR2023) 小波下采样.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from pytorch_wavelets import DWTForward 4 | # GitHub地址 :https://github.com/apple1986/HWD 5 | # 论文地址:https://www.sciencedirect.com/science/article/pii/S0031320323005174 6 | class Down_wt(nn.Module): 7 | def __init__(self, in_ch, out_ch): 8 | super(Down_wt, self).__init__() 9 | self.wt = DWTForward(J=1, mode='zero', wave='haar') 10 | self.conv_bn_relu = nn.Sequential( 11 | nn.Conv2d(in_ch * 4, out_ch, kernel_size=1, stride=1), 12 | nn.BatchNorm2d(out_ch), 13 | nn.ReLU(inplace=True), 14 | ) 15 | 16 | def forward(self, x): 17 | yL, yH = self.wt(x) 18 | y_HL = yH[0][:, :, 0, ::] 19 | y_LH = yH[0][:, :, 1, ::] 20 | y_HH = yH[0][:, :, 2, ::] 21 | x = torch.cat([yL, y_HL, y_LH, y_HH], dim=1) 22 | x = self.conv_bn_relu(x) 23 | return x 24 | 25 | 26 | if __name__ == '__main__': 27 | block = Down_wt(64, 64) # 输入通道数,输出通道数 28 | input = torch.rand(3, 64, 64, 64) # 输入B C H W 29 | output = block(input) 30 | print(output.size()) 31 | -------------------------------------------------------------------------------- /注意力/(ICML 2021)SimAM.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------- 2 | # Simam: A simple, parameter-free attention module for convolutional neural networks (ICML 2021) 3 | # Github:https://github.com/ZjjConan/SimAM 4 | # --------------------------------------- 5 | import torch 6 | import torch.nn as nn 7 | from thop import profile 8 | 9 | 10 | class Simam_module(torch.nn.Module): 11 | def __init__(self, e_lambda=1e-4): 12 | super(Simam_module, self).__init__() 13 | self.act = nn.Sigmoid() 14 | self.e_lambda = e_lambda 15 | 16 | def forward(self, x): 17 | b, c, h, w = x.size() 18 | n = w * h - 1 19 | x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2) 20 | y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5 21 | 22 | return x * self.act(y) 23 | 24 | 25 | # 无参注意力机制 输入 N C H W, 输出 N C H W 26 | if __name__ == '__main__': 27 | model = Simam_module().cuda() 28 | x = torch.randn(1, 3, 64, 64).cuda() 29 | y = model(x) 30 | print(y.size()) 31 | flops, params = profile(model, (x,)) 32 | print(flops / 1e9) 33 | print(params) 34 | -------------------------------------------------------------------------------- /缝合代码示例/LSK.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class LSKblock(nn.Module): 6 | def __init__(self, dim): 7 | super().__init__() 8 | self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) 9 | self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3) 10 | self.conv1 = nn.Conv2d(dim, dim // 2, 1) 11 | self.conv2 = nn.Conv2d(dim, dim // 2, 1) 12 | self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3) 13 | self.conv = nn.Conv2d(dim // 2, dim, 1) 14 | 15 | def forward(self, x): 16 | attn1 = self.conv0(x) 17 | attn2 = self.conv_spatial(attn1) 18 | 19 | attn1 = self.conv1(attn1) 20 | attn2 = self.conv2(attn2) 21 | 22 | attn = torch.cat([attn1, attn2], dim=1) 23 | avg_attn = torch.mean(attn, dim=1, keepdim=True) 24 | max_attn, _ = torch.max(attn, dim=1, keepdim=True) 25 | agg = torch.cat([avg_attn, max_attn], dim=1) 26 | sig = self.conv_squeeze(agg).sigmoid() 27 | attn = attn1 * sig[:, 0, :, :].unsqueeze(1) + attn2 * sig[:, 1, :, :].unsqueeze(1) 28 | attn = self.conv(attn) 29 | return x * attn 30 | 31 | 32 | # 输入 B C H W, 输出 B C H W 33 | if __name__ == '__main__': 34 | block = LSKblock(64) 35 | input = torch.rand(1, 64, 64, 64) 36 | output = block(input) 37 | print(input.size(), output.size()) 38 | -------------------------------------------------------------------------------- /缝合代码示例/部分卷积.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | 4 | 5 | class Partial_conv3(nn.Module): 6 | 7 | def __init__(self, dim, n_div, forward): 8 | super().__init__() 9 | self.dim_conv3 = dim // n_div 10 | self.dim_untouched = dim - self.dim_conv3 11 | self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False) 12 | 13 | if forward == 'slicing': 14 | self.forward = self.forward_slicing 15 | elif forward == 'split_cat': 16 | self.forward = self.forward_split_cat 17 | else: 18 | raise NotImplementedError 19 | 20 | def forward_slicing(self, x): 21 | # only for inference 22 | x = x.clone() # !!! Keep the original input intact for the residual connection later 23 | x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :]) 24 | 25 | return x 26 | 27 | def forward_split_cat(self, x): 28 | # for training/inference 29 | x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1) 30 | x1 = self.partial_conv3(x1) 31 | x = torch.cat((x1, x2), 1) 32 | 33 | return x 34 | 35 | # 输入 B C H W, 输出 B C H W 36 | if __name__ == '__main__': 37 | block = Partial_conv3(64, 2, 'split_cat') 38 | input = torch.rand(1, 64, 64, 64) 39 | output = block(input) 40 | print(input.size(), output.size()) 41 | -------------------------------------------------------------------------------- /GhostModule.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch 4 | #GhostNet: More Features from Cheap Operations 5 | 6 | class GhostModule(nn.Module): 7 | def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True): 8 | super(GhostModule, self).__init__() 9 | self.oup = oup 10 | init_channels = math.ceil(oup / ratio) 11 | new_channels = init_channels*(ratio-1) 12 | 13 | self.primary_conv = nn.Sequential( 14 | nn.Conv2d(inp, init_channels, kernel_size, 15 | stride, kernel_size//2, bias=False), 16 | nn.BatchNorm2d(init_channels), 17 | nn.ReLU(inplace=True) if relu else nn.Sequential(), 18 | ) 19 | 20 | self.cheap_operation = nn.Sequential( 21 | nn.Conv2d(init_channels, new_channels, dw_size, 1, 22 | dw_size//2, groups=init_channels, bias=False), 23 | nn.BatchNorm2d(new_channels), 24 | nn.ReLU(inplace=True) if relu else nn.Sequential(), 25 | ) 26 | 27 | def forward(self, x): 28 | x1 = self.primary_conv(x) 29 | x2 = self.cheap_operation(x1) 30 | out = torch.cat([x1, x2], dim=1) 31 | return out[:, :self.oup, :, :] 32 | 33 | 34 | if __name__ == "__main__": 35 | block = GhostModule(128, 256) 36 | 37 | input = torch.zeros((2, 128, 64, 64)) 38 | 39 | output = block(input) 40 | 41 | print(output.size()) -------------------------------------------------------------------------------- /DFF2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | #论文:D-Net: Dynamic Large Kernel with Dynamic Feature Fusion for Volumetric Medical Image Segmentation 4 | #论文地址:https://arxiv.org/abs/2403.10674 5 | 6 | class DFF(nn.Module): 7 | def __init__(self, dim): 8 | super().__init__() 9 | 10 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 11 | self.conv_atten = nn.Sequential( 12 | nn.Conv2d(dim * 2, dim * 2, kernel_size=1, bias=False), 13 | nn.Sigmoid() 14 | ) 15 | self.conv_redu = nn.Conv2d(dim * 2, dim, kernel_size=1, bias=False) 16 | 17 | self.conv1 = nn.Conv2d(dim, 1, kernel_size=1, stride=1, bias=True) 18 | self.conv2 = nn.Conv2d(dim, 1, kernel_size=1, stride=1, bias=True) 19 | self.nonlin = nn.Sigmoid() 20 | 21 | def forward(self, x, skip): 22 | output = torch.cat([x, skip], dim=1) 23 | 24 | att = self.conv_atten(self.avg_pool(output)) 25 | output = output * att 26 | output = self.conv_redu(output) 27 | 28 | att = self.conv1(x) + self.conv2(skip) 29 | att = self.nonlin(att) 30 | output = output * att 31 | return output 32 | 33 | if __name__ == '__main__': 34 | 35 | x = torch.randn(1, 48, 128, 128) 36 | skip = torch.randn(1, 48, 128, 128) 37 | 38 | block = DFF(48) 39 | 40 | output = block(x, skip) 41 | 42 | print("Input shape (x):", x.size()) 43 | print("Input shape (skip):", skip.size()) 44 | print("Output shape:", output.size()) 45 | -------------------------------------------------------------------------------- /卷积/(CVPR 2023) 部分卷积.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | #论文地址:https://arxiv.org/pdf/2303.03667 4 | #GitHub地址:https://github.com/JierunChen/FasterNet 5 | 6 | class Partial_conv3(nn.Module): 7 | 8 | def __init__(self, dim, n_div, forward): 9 | super().__init__() 10 | self.dim_conv3 = dim // n_div 11 | self.dim_untouched = dim - self.dim_conv3 12 | self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False) 13 | 14 | if forward == 'slicing': 15 | self.forward = self.forward_slicing 16 | elif forward == 'split_cat': 17 | self.forward = self.forward_split_cat 18 | else: 19 | raise NotImplementedError 20 | 21 | def forward_slicing(self, x): 22 | # only for inference 23 | x = x.clone() # !!! Keep the original input intact for the residual connection later 24 | x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :]) 25 | 26 | return x 27 | 28 | def forward_split_cat(self, x): 29 | # for training/inference 30 | x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1) 31 | x1 = self.partial_conv3(x1) 32 | x = torch.cat((x1, x2), 1) 33 | 34 | return x 35 | 36 | 37 | if __name__ == '__main__': 38 | block = Partial_conv3(64, 2, 'split_cat').cuda() 39 | input = torch.rand(3, 64, 64, 64).cuda() #输入shape b c h w 40 | output = block(input) 41 | print(input.size(), output.size()) 42 | -------------------------------------------------------------------------------- /(ICCV2023)SAFM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | #https://github.com/sunny2109/SAFMN 5 | #论文:https://arxiv.org/pdf/2302.13800 6 | class SAFM(nn.Module): 7 | def __init__(self, dim, n_levels=4): 8 | super().__init__() 9 | self.n_levels = n_levels 10 | chunk_dim = dim // n_levels 11 | 12 | # Spatial Weighting 13 | self.mfr = nn.ModuleList( 14 | [nn.Conv2d(chunk_dim, chunk_dim, 3, 1, 1, groups=chunk_dim) for i in range(self.n_levels)]) 15 | 16 | # # Feature Aggregation 17 | self.aggr = nn.Conv2d(dim, dim, 1, 1, 0) 18 | 19 | # Activation 20 | self.act = nn.GELU() 21 | 22 | def forward(self, x): 23 | h, w = x.size()[-2:] 24 | 25 | xc = x.chunk(self.n_levels, dim=1) 26 | out = [] 27 | for i in range(self.n_levels): 28 | if i > 0: 29 | p_size = (h // 2 ** i, w // 2 ** i) 30 | s = F.adaptive_max_pool2d(xc[i], p_size) 31 | s = self.mfr[i](s) 32 | s = F.interpolate(s, size=(h, w), mode='nearest') 33 | else: 34 | s = self.mfr[i](xc[i]) 35 | out.append(s) 36 | 37 | out = self.aggr(torch.cat(out, dim=1)) 38 | out = self.act(out) * x 39 | return out 40 | 41 | 42 | if __name__ == '__main__': 43 | input = torch.randn(3,36,64,64) #输入b c h w 44 | 45 | block = SAFM(dim=36) 46 | output =block(input) 47 | print(output.size()) 48 | -------------------------------------------------------------------------------- /缝合代码示例/MobileViTv2Attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import init 4 | 5 | 6 | class MobileViTv2Attention(nn.Module): 7 | ''' 8 | Scaled dot-product attention 9 | ''' 10 | 11 | def __init__(self, d_model): 12 | ''' 13 | :param d_model: Output dimensionality of the model 14 | :param d_k: Dimensionality of queries and keys 15 | :param d_v: Dimensionality of values 16 | :param h: Number of heads 17 | ''' 18 | super(MobileViTv2Attention, self).__init__() 19 | self.fc_i = nn.Linear(d_model, 1) 20 | self.fc_k = nn.Linear(d_model, d_model) 21 | self.fc_v = nn.Linear(d_model, d_model) 22 | self.fc_o = nn.Linear(d_model, d_model) 23 | 24 | self.d_model = d_model 25 | 26 | def forward(self, input): 27 | ''' 28 | Computes 29 | :param queries: Queries (b_s, nq, d_model) 30 | :return: 31 | ''' 32 | i = self.fc_i(input) # (bs,nq,1) 33 | weight_i = torch.softmax(i, dim=1) # bs,nq,1 34 | context_score = weight_i * self.fc_k(input) # bs,nq,d_model 35 | context_vector = torch.sum(context_score, dim=1, keepdim=True) # bs,1,d_model 36 | v = self.fc_v(input) * context_vector # bs,nq,d_model 37 | out = self.fc_o(v) # bs,nq,d_model 38 | 39 | return out 40 | 41 | 42 | # 输入 B N C 输出 B N C 43 | if __name__ == '__main__': 44 | block = MobileViTv2Attention(d_model=31) 45 | input = torch.rand(64, 61, 31) 46 | output = block(input) 47 | print(input.size(), output.size()) 48 | -------------------------------------------------------------------------------- /(CVPR 2024)IDC.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | # 论文地址:https://arxiv.org/pdf/2303.16900 4 | # 论文:InceptionNeXt: When Inception Meets ConvNeXt (CVPR 2024) 5 | # 全网最全100➕即插即用模块GitHub地址:https://github.com/ai-dawang/PlugNPlay-Modules 6 | class InceptionDWConv2d(nn.Module): 7 | """ Inception depthweise convolution 8 | """ 9 | 10 | def __init__(self, in_channels, square_kernel_size=3, band_kernel_size=11, branch_ratio=0.125): 11 | super().__init__() 12 | 13 | gc = int(in_channels * branch_ratio) # channel numbers of a convolution branch 14 | self.dwconv_hw = nn.Conv2d(gc, gc, square_kernel_size, padding=square_kernel_size // 2, groups=gc) 15 | self.dwconv_w = nn.Conv2d(gc, gc, kernel_size=(1, band_kernel_size), padding=(0, band_kernel_size // 2), 16 | groups=gc) 17 | self.dwconv_h = nn.Conv2d(gc, gc, kernel_size=(band_kernel_size, 1), padding=(band_kernel_size // 2, 0), 18 | groups=gc) 19 | self.split_indexes = (in_channels - 3 * gc, gc, gc, gc) 20 | 21 | def forward(self, x): 22 | x_id, x_hw, x_w, x_h = torch.split(x, self.split_indexes, dim=1) 23 | return torch.cat( 24 | (x_id, self.dwconv_hw(x_hw), self.dwconv_w(x_w), self.dwconv_h(x_h)), 25 | dim=1, 26 | ) 27 | 28 | 29 | if __name__ == '__main__': 30 | 31 | block = InceptionDWConv2d(64) #输入C 32 | input = torch.randn(1, 64, 224, 224) #输入 B C H W 33 | output = block(input) 34 | print(input.size()) 35 | print(output.size()) 36 | -------------------------------------------------------------------------------- /UCDC.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.utils.data 3 | import torch 4 | #论文:ABC: Attention with Bilinear Correlation for Infrared Small Target Detection ICME2023 5 | #论文地址:https://arxiv.org/pdf/2303.10321 6 | 7 | def conv_relu_bn(in_channel, out_channel, dirate): 8 | return nn.Sequential( 9 | nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=dirate, 10 | dilation=dirate), 11 | nn.BatchNorm2d(out_channel), 12 | nn.ReLU(inplace=True) 13 | ) 14 | 15 | 16 | #u-shaped convolution-dilated convolution (UCDC) 17 | class UCDC(nn.Module): 18 | """ 19 | Convolution Block 20 | """ 21 | 22 | def __init__(self, in_ch, out_ch): 23 | super(UCDC, self).__init__() 24 | self.conv1 = conv_relu_bn(in_ch, out_ch, 1) 25 | self.dconv1 = conv_relu_bn(out_ch, out_ch // 2, 2) 26 | self.dconv2 = conv_relu_bn(out_ch // 2, out_ch // 2, 4) 27 | self.dconv3 = conv_relu_bn(out_ch, out_ch, 2) 28 | self.conv2 = conv_relu_bn(out_ch * 2, out_ch, 1) 29 | 30 | def forward(self, x): 31 | x1 = self.conv1(x) 32 | dx1 = self.dconv1(x1) 33 | dx2 = self.dconv2(dx1) 34 | dx3 = self.dconv3(torch.cat((dx1, dx2), dim=1)) 35 | out = self.conv2(torch.cat((x1, dx3), dim=1)) 36 | return out 37 | 38 | 39 | if __name__ == '__main__': 40 | 41 | block = UCDC(64, 64) 42 | 43 | 44 | input = torch.randn(1, 64, 32, 32) 45 | 46 | print(input.size()) 47 | 48 | output = block(input) 49 | 50 | print(output.size()) 51 | -------------------------------------------------------------------------------- /(arXiv 2021) EA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | # 论文地址:https://arxiv.org/abs/2105.02358 7 | # 论文:Beyond Self-attention: External Attention using Two Linear Layers for Visual Tasks 8 | 9 | 10 | class ExternalAttention(nn.Module): 11 | 12 | def __init__(self, d_model,S=64): 13 | super().__init__() 14 | self.mk=nn.Linear(d_model,S,bias=False) 15 | self.mv=nn.Linear(S,d_model,bias=False) 16 | self.softmax=nn.Softmax(dim=1) 17 | self.init_weights() 18 | 19 | 20 | def init_weights(self): 21 | for m in self.modules(): 22 | if isinstance(m, nn.Conv2d): 23 | init.kaiming_normal_(m.weight, mode='fan_out') 24 | if m.bias is not None: 25 | init.constant_(m.bias, 0) 26 | elif isinstance(m, nn.BatchNorm2d): 27 | init.constant_(m.weight, 1) 28 | init.constant_(m.bias, 0) 29 | elif isinstance(m, nn.Linear): 30 | init.normal_(m.weight, std=0.001) 31 | if m.bias is not None: 32 | init.constant_(m.bias, 0) 33 | 34 | def forward(self, queries): 35 | attn=self.mk(queries) #bs,n,S 36 | attn=self.softmax(attn) #bs,n,S 37 | attn=attn/torch.sum(attn,dim=2,keepdim=True) #bs,n,S 38 | out=self.mv(attn) #bs,n,d_model 39 | 40 | return out 41 | 42 | 43 | if __name__ == '__main__': 44 | input=torch.randn(50,49,512) 45 | block = ExternalAttention(d_model=512,S=8) 46 | output=block(input) 47 | print(output.shape) 48 | 49 | -------------------------------------------------------------------------------- /GCTattention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | #论文:title:Gated Channel Transformation for Visual Recognition 4 | #论文地址:https://arxiv.org/abs/1909.11519 5 | 6 | # 定义 GCT 模块 7 | class GCT(nn.Module): 8 | def __init__(self, num_channels, epsilon=1e-5, mode='l2', after_relu=False): 9 | super(GCT, self).__init__() 10 | self.alpha = nn.Parameter(torch.ones(1, num_channels, 1, 1)) 11 | self.gamma = nn.Parameter(torch.zeros(1, num_channels, 1, 1)) 12 | self.beta = nn.Parameter(torch.zeros(1, num_channels, 1, 1)) 13 | self.epsilon = epsilon 14 | self.mode = mode 15 | self.after_relu = after_relu 16 | 17 | def forward(self, x): 18 | if self.mode == 'l2': 19 | embedding = (x.pow(2).sum((2, 3), keepdim=True) + self.epsilon).pow(0.5) * self.alpha 20 | norm = self.gamma / (embedding.pow(2).mean(dim=1, keepdim=True) + self.epsilon).pow(0.5) 21 | elif self.mode == 'l1': 22 | _x = torch.abs(x) if not self.after_relu else x 23 | embedding = _x.sum((2, 3), keepdim=True) * self.alpha 24 | norm = self.gamma / (torch.abs(embedding).mean(dim=1, keepdim=True) + self.epsilon) 25 | else: 26 | raise ValueError('Unknown mode: {}'.format(self.mode)) 27 | 28 | gate = 1. + torch.tanh(embedding * norm + self.beta) 29 | return x * gate 30 | 31 | 32 | if __name__ == '__main__': 33 | 34 | 35 | input = torch.randn(1, 16, 32, 32) 36 | 37 | print(input.size()) 38 | 39 | block = GCT(num_channels=16) 40 | 41 | output = block(input) 42 | 43 | print(output.size()) -------------------------------------------------------------------------------- /scSE.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | #Concurrent Spatial and Channel ‘Squeeze & Excitation’ in Fully Convolutional Networks 4 | 5 | 6 | class cSE(nn.Module): 7 | 8 | def __init__(self, channel, reduction=2): 9 | super().__init__() 10 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 11 | self.fc = nn.Sequential( 12 | nn.Conv2d(channel, channel // reduction, kernel_size=1, bias=False), 13 | nn.ReLU(inplace=True), 14 | nn.Conv2d(channel // reduction, channel, kernel_size=1, bias=False), 15 | nn.Sigmoid() 16 | ) 17 | 18 | def forward(self, x): 19 | y = self.avg_pool(x) 20 | y = self.fc(y) 21 | return x * y.expand_as(x) 22 | 23 | class sSE(nn.Module): 24 | def __init__(self, in_channel): 25 | super().__init__() 26 | self.Conv1x1 = nn.Conv2d(in_channel, 1, kernel_size=1, bias=False) 27 | self.norm = nn.Sigmoid() 28 | 29 | def forward(self, x): 30 | y = self.Conv1x1(x) 31 | y = self.norm(y) 32 | return x * y 33 | 34 | class scSE(nn.Module): 35 | def __init__(self, in_channel): 36 | super().__init__() 37 | self.cSE = cSE(in_channel) 38 | self.sSE = sSE(in_channel) 39 | 40 | def forward(self, U): 41 | U_sse = self.sSE(U) 42 | U_cse = self.cSE(U) 43 | return torch.max(U_cse, U_sse) # Taking the element-wise maximum 44 | 45 | 46 | if __name__ == '__main__': 47 | input = torch.randn(3, 32, 64, 64) #B C H W 48 | block = scSE(in_channel=32) 49 | output = block(input) 50 | print(output.size()) -------------------------------------------------------------------------------- /(arXiv 2019) ECA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | from collections import OrderedDict 6 | 7 | # 论文地址:https://arxiv.org/pdf/1910.03151 8 | # 论文:ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks 9 | 10 | 11 | 12 | 13 | class ECAAttention(nn.Module): 14 | 15 | def __init__(self, kernel_size=3): 16 | super().__init__() 17 | self.gap=nn.AdaptiveAvgPool2d(1) 18 | self.conv=nn.Conv1d(1,1,kernel_size=kernel_size,padding=(kernel_size-1)//2) 19 | self.sigmoid=nn.Sigmoid() 20 | 21 | def init_weights(self): 22 | for m in self.modules(): 23 | if isinstance(m, nn.Conv2d): 24 | init.kaiming_normal_(m.weight, mode='fan_out') 25 | if m.bias is not None: 26 | init.constant_(m.bias, 0) 27 | elif isinstance(m, nn.BatchNorm2d): 28 | init.constant_(m.weight, 1) 29 | init.constant_(m.bias, 0) 30 | elif isinstance(m, nn.Linear): 31 | init.normal_(m.weight, std=0.001) 32 | if m.bias is not None: 33 | init.constant_(m.bias, 0) 34 | 35 | def forward(self, x): 36 | y=self.gap(x) #bs,c,1,1 37 | y=y.squeeze(-1).permute(0,2,1) #bs,1,c 38 | y=self.conv(y) #bs,1,c 39 | y=self.sigmoid(y) #bs,1,c 40 | y=y.permute(0,2,1).unsqueeze(-1) #bs,c,1,1 41 | return x*y.expand_as(x) 42 | 43 | 44 | 45 | 46 | 47 | 48 | if __name__ == '__main__': 49 | input=torch.randn(50,512,7,7) 50 | block = ECAAttention(kernel_size=3) 51 | output=block(input) 52 | print(output.shape) 53 | 54 | -------------------------------------------------------------------------------- /PGM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class PromptGenBlock(nn.Module): 7 | def __init__(self, prompt_dim=128, prompt_len=5, prompt_size=96, lin_dim=192): 8 | super(PromptGenBlock, self).__init__() 9 | self.prompt_param = nn.Parameter(torch.rand(1, prompt_len, prompt_dim, prompt_size, prompt_size)) 10 | self.linear_layer = nn.Linear(lin_dim, prompt_len) 11 | self.conv3x3 = nn.Conv2d(prompt_dim, prompt_dim, kernel_size=3, stride=1, padding=1, bias=False) 12 | 13 | def forward(self, x): 14 | B, C, H, W = x.shape 15 | emb = x.mean(dim=(-2, -1)) 16 | prompt_weights = F.softmax(self.linear_layer(emb), dim=1) 17 | prompt = prompt_weights.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) * self.prompt_param.unsqueeze(0).repeat(B, 1, 18 | 1, 1, 19 | 1, 20 | 1).squeeze( 21 | 1) 22 | prompt = torch.sum(prompt, dim=1) 23 | prompt = F.interpolate(prompt, (H, W), mode="bilinear") 24 | prompt = self.conv3x3(prompt) 25 | 26 | return prompt 27 | if __name__ == '__main__': 28 | 29 | block = PromptGenBlock(prompt_dim=3, prompt_len=4, prompt_size=96, lin_dim=3)#修改这里来对齐 30 | input = torch.randn(4, 3, 64, 64) # B C H W 31 | output = block(input) 32 | print(input.size()) 33 | print(output.size()) 34 | -------------------------------------------------------------------------------- /卷积/(ICCV 2023)大核选择模块LSK.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | #Github地址:https://github.com/zcablii/Large-Selective-Kernel-Network 4 | #论文地址:https://openaccess.thecvf.com/content/ICCV2023/papers/Li_Large_Selective_Kernel_Network_for_Remote_Sensing_Object_Detection_ICCV_2023_paper.pdf 5 | 6 | class LSKblock(nn.Module): 7 | def __init__(self, dim): 8 | super().__init__() 9 | self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) 10 | self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, 11 | padding=9, groups=dim, dilation=3) 12 | self.conv1 = nn.Conv2d(dim, dim // 2, 1) 13 | self.conv2 = nn.Conv2d(dim, dim // 2, 1) 14 | self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3) 15 | self.conv = nn.Conv2d(dim // 2, dim, 1) 16 | 17 | def forward(self, x): 18 | attn1 = self.conv0(x) 19 | attn2 = self.conv_spatial(attn1) 20 | 21 | attn1 = self.conv1(attn1) 22 | attn2 = self.conv2(attn2) 23 | 24 | attn = torch.cat([attn1, attn2], dim=1) 25 | avg_attn = torch.mean(attn, dim=1, keepdim=True) 26 | max_attn, _ = torch.max(attn, dim=1, keepdim=True) 27 | agg = torch.cat([avg_attn, max_attn], dim=1) 28 | sig = self.conv_squeeze(agg).sigmoid() 29 | attn = attn1 * sig[:, 0, :, :].unsqueeze(1) + \ 30 | attn2 * sig[:, 1, :, :].unsqueeze(1) 31 | attn = self.conv(attn) 32 | return x * attn 33 | 34 | 35 | if __name__ == '__main__': 36 | block = LSKblock(64).cuda() 37 | input = torch.rand(3, 64, 32, 32).cuda() #输入B C H W 38 | output = block(input) 39 | print(input.size(), output.size()) 40 | -------------------------------------------------------------------------------- /f_sampling.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | #论文:Multi-Scale Temporal Frequency Convolutional Network With Axial Attention for Speech Enhancement (ICASSP 2022) 4 | #论文地址:https://ieeexplore.ieee.org/document/9746610 5 | 6 | class FD(nn.Module): 7 | def __init__(self, cin, cout, K=(7, 1), S=(4, 1), P=(2, 0)): 8 | super(FD, self).__init__() 9 | self.fd = nn.Sequential( 10 | nn.Conv2d(cin, cout, K, S, P, groups=2), 11 | nn.BatchNorm2d(cout), 12 | nn.PReLU(cout) 13 | ) 14 | 15 | def forward(self, x): 16 | return self.fd(x) 17 | 18 | 19 | class FU(nn.Module): 20 | def __init__(self, cin, cout, K=(7, 1), S=(4, 1), P=(2, 0), O=(1, 0)): 21 | super(FU, self).__init__() 22 | self.pconv1 = nn.Sequential( 23 | nn.Conv2d(cin*2, cin, (1, 1)), 24 | nn.BatchNorm2d(cin), 25 | nn.Tanh(), 26 | ) 27 | self.pconv2 = nn.Sequential( 28 | nn.Conv2d(cin, cout, (1, 1)), 29 | nn.BatchNorm2d(cout), 30 | nn.PReLU(cout), 31 | ) 32 | # 22/06/13 update, add groups = 2 33 | self.conv3 = nn.Sequential( 34 | nn.ConvTranspose2d(cout, cout, K, S, P, O, groups=2), 35 | nn.BatchNorm2d(cout), 36 | nn.PReLU(cout) 37 | ) 38 | 39 | def forward(self, fu, fd): 40 | """ 41 | fu, fd: B C F T 42 | """ 43 | outs = self.pconv1(th.cat([fu, fd], dim=1))*fd 44 | outs = self.pconv2(outs) 45 | outs = self.conv3(outs) 46 | return outs 47 | 48 | 49 | def test_fd(): 50 | net = FD(4, 8) 51 | inps = th.randn(3, 4, 256, 101) 52 | print(net(inps).shape) 53 | 54 | 55 | if __name__ == "__main__": 56 | test_fd() -------------------------------------------------------------------------------- /3D/(CVPR 2024)IDC3d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | # 论文地址:https://arxiv.org/pdf/2303.16900 4 | # 论文:InceptionNeXt: When Inception Meets ConvNeXt (CVPR 2024) 5 | # 全网最全100➕即插即用模块GitHub地址:https://github.com/ai-dawang/PlugNPlay-Modules 6 | class InceptionDWConv3d(nn.Module): 7 | """ Inception depthwise convolution for 3D data 8 | """ 9 | 10 | def __init__(self, in_channels, cube_kernel_size=3, band_kernel_size=11, branch_ratio=0.125): 11 | super().__init__() 12 | 13 | gc = int(in_channels * branch_ratio) # channel numbers of a convolution branch 14 | self.dwconv_hwd = nn.Conv3d(gc, gc, cube_kernel_size, padding=cube_kernel_size // 2, groups=gc) 15 | self.dwconv_wd = nn.Conv3d(gc, gc, kernel_size=(1, 1, band_kernel_size), padding=(0, 0, band_kernel_size // 2), 16 | groups=gc) 17 | self.dwconv_hd = nn.Conv3d(gc, gc, kernel_size=(1, band_kernel_size, 1), padding=(0, band_kernel_size // 2, 0), 18 | groups=gc) 19 | self.dwconv_hw = nn.Conv3d(gc, gc, kernel_size=(band_kernel_size, 1, 1), padding=(band_kernel_size // 2, 0, 0), 20 | groups=gc) 21 | self.split_indexes = (in_channels - 4 * gc, gc, gc, gc, gc) 22 | 23 | def forward(self, x): 24 | x_id, x_hwd, x_wd, x_hd, x_hw = torch.split(x, self.split_indexes, dim=1) 25 | return torch.cat( 26 | (x_id, self.dwconv_hwd(x_hwd), self.dwconv_wd(x_wd), self.dwconv_hd(x_hd), self.dwconv_hw(x_hw)), 27 | dim=1, 28 | ) 29 | 30 | 31 | if __name__ == '__main__': 32 | block = InceptionDWConv3d(64) # 输入 C 33 | input = torch.randn(1, 64, 16, 224, 224) # 输入B C D H W 34 | output = block(input) 35 | print(input.size()) 36 | print(output.size()) 37 | -------------------------------------------------------------------------------- /目标检测/WCMF.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | # 论文:FCMNet: Frequency-aware cross-modality attention networks for RGB-D salient object detection 4 | # 论文地址:https://www.sciencedirect.com/science/article/abs/pii/S0925231222003848 5 | class WCMF(nn.Module): 6 | def __init__(self,channel=256): 7 | super(WCMF, self).__init__() 8 | self.conv_r1 = nn.Sequential(nn.Conv2d(channel, channel, 1, 1, 0), nn.BatchNorm2d(channel), nn.ReLU()) 9 | self.conv_d1 = nn.Sequential(nn.Conv2d(channel, channel, 1, 1, 0), nn.BatchNorm2d(channel), nn.ReLU()) 10 | 11 | self.conv_c1 = nn.Sequential(nn.Conv2d(2*channel, channel, 3, 1, 1), nn.BatchNorm2d(channel), nn.ReLU()) 12 | self.conv_c2 = nn.Sequential(nn.Conv2d(channel, 2, 3, 1, 1), nn.BatchNorm2d(2), nn.ReLU()) 13 | self.avgpool = nn.AdaptiveAvgPool2d((1,1)) 14 | def fusion(self,f1,f2,f_vec): 15 | 16 | w1 = f_vec[:, 0, :, :].unsqueeze(1) 17 | w2 = f_vec[:, 1, :, :].unsqueeze(1) 18 | out1 = (w1 * f1) + (w2 * f2) 19 | out2 = (w1 * f1) * (w2 * f2) 20 | return out1 + out2 21 | def forward(self,rgb,depth): 22 | Fr = self.conv_r1(rgb) 23 | Fd = self.conv_d1(depth) 24 | f = torch.cat([Fr, Fd],dim=1) 25 | f = self.conv_c1(f) 26 | f = self.conv_c2(f) 27 | # f = self.avgpool(f) 28 | Fo = self.fusion(Fr, Fd, f) 29 | return Fo 30 | 31 | 32 | if __name__ == '__main__': 33 | 34 | block = WCMF(channel=256) 35 | 36 | # 创建RGB和深度输入的假设张量 37 | rgb_input = torch.randn(1, 256, 224, 224) 38 | depth_input = torch.randn(1, 256, 224, 224) 39 | 40 | # 通过WCMF模型 41 | output = block(rgb_input, depth_input) 42 | 43 | # 打印输入和输出的shape 44 | print(rgb_input.size()) 45 | print(depth_input.size()) 46 | print(output.size()) -------------------------------------------------------------------------------- /MDTA.py: -------------------------------------------------------------------------------- 1 | ## Multi-DConv Head Transposed Self-Attention (MDTA) 2 | import torch 3 | from einops import rearrange 4 | from torch import nn 5 | 6 | 7 | class Attention(nn.Module): 8 | def __init__(self, dim, num_heads = 4, bias = True): 9 | super(Attention, self).__init__() 10 | self.num_heads = num_heads 11 | self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1)) 12 | 13 | self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias) 14 | self.qkv_dwconv = nn.Conv2d(dim * 3, dim * 3, kernel_size=3, stride=1, padding=1, groups=dim * 3, bias=bias) 15 | self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias) 16 | 17 | def forward(self, x): 18 | b, c, h, w = x.shape 19 | 20 | 21 | qkv = self.qkv_dwconv(self.qkv(x)) 22 | q, k, v = qkv.chunk(3, dim=1) 23 | 24 | q = rearrange(q, 'b (head c) h w -> b head c (h w)', head=self.num_heads) 25 | k = rearrange(k, 'b (head c) h w -> b head c (h w)', head=self.num_heads) 26 | v = rearrange(v, 'b (head c) h w -> b head c (h w)', head=self.num_heads) 27 | 28 | q = torch.nn.functional.normalize(q, dim=-1) 29 | k = torch.nn.functional.normalize(k, dim=-1) 30 | 31 | # [B, head, C/head, HW] * [B, head, HW, C/head] * [head, 1, 1] ==> [B, head, C/head, C/head] 32 | attn = (q @ k.transpose(-2, -1)) * self.temperature 33 | attn = attn.softmax(dim=-1) 34 | 35 | # [B, head, C/head, C/head] * [B, head, C/head, HW] ==> [B, head, C/head, HW] 36 | out = (attn @ v) 37 | 38 | # [B, head, C/head, HW] ==> [B, head, C/head, H, W] 39 | out = rearrange(out, 'b head c (h w) -> b (head c) h w', head=self.num_heads, h=h, w=w) 40 | 41 | out = self.project_out(out) 42 | return out 43 | 44 | if __name__ == '__main__': 45 | block = Attention(64) 46 | input = torch.rand(3, 64, 128, 128) 47 | output = block(input) 48 | 49 | print(input.size()) 50 | print(output.size()) 51 | -------------------------------------------------------------------------------- /注意力/(IEEE 2023)AGCA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | # 论文:AGCA: An Adaptive Graph Channel Attention Module for Steel Surface Defect Detection 5 | # 论文地址:https://ieeexplore.ieee.org/document/10050536 6 | 7 | class AGCA(nn.Module): 8 | def __init__(self, in_channel, ratio): 9 | super(AGCA, self).__init__() 10 | hide_channel = in_channel // ratio 11 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 12 | self.conv1 = nn.Conv2d(in_channel, hide_channel, kernel_size=1, bias=False) 13 | self.softmax = nn.Softmax(2) 14 | # Choose to deploy A0 on GPU or CPU according to your needs 15 | self.A0 = torch.eye(hide_channel).to('cuda') 16 | # self.A0 = torch.eye(hide_channel) 17 | # A2 is initialized to 1e-6 18 | self.A2 = nn.Parameter(torch.FloatTensor(torch.zeros((hide_channel, hide_channel))), requires_grad=True) 19 | init.constant_(self.A2, 1e-6) 20 | self.conv2 = nn.Conv1d(1, 1, kernel_size=1, bias=False) 21 | self.conv3 = nn.Conv1d(1, 1, kernel_size=1, bias=False) 22 | self.relu = nn.ReLU(inplace=True) 23 | self.conv4 = nn.Conv2d(hide_channel, in_channel, kernel_size=1, bias=False) 24 | self.sigmoid = nn.Sigmoid() 25 | 26 | def forward(self, x): 27 | y = self.avg_pool(x) 28 | y = self.conv1(y) 29 | B, C, _, _ = y.size() 30 | y = y.flatten(2).transpose(1, 2) 31 | A1 = self.softmax(self.conv2(y)) 32 | A1 = A1.expand(B, C, C) 33 | A = (self.A0 * A1) + self.A2 34 | y = torch.matmul(y, A) 35 | y = self.relu(self.conv3(y)) 36 | y = y.transpose(1, 2).view(-1, C, 1, 1) 37 | y = self.sigmoid(self.conv4(y)) 38 | 39 | return x * y 40 | 41 | if __name__ == '__main__': 42 | block = AGCA(in_channel=64, ratio=4).to('cuda') 43 | input = torch.rand(1, 64, 32, 32).to('cuda') 44 | output = block(input) 45 | print(input.size()) 46 | print(output.size()) 47 | -------------------------------------------------------------------------------- /(ACCV 2024) LIA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | # 论文题目:PlainUSR: Chasing Faster ConvNet for Efficient Super-Resolution 6 | # 论文地址:https://openaccess.thecvf.com/content/ACCV2024/papers/Wang_PlainUSR_Chasing_Faster_ConvNet_for_Efficient_Super-Resolution_ACCV_2024_paper.pdf 7 | 8 | class SoftPooling2D(torch.nn.Module): 9 | def __init__(self,kernel_size,stride=None,padding=0): 10 | super(SoftPooling2D, self).__init__() 11 | self.avgpool = torch.nn.AvgPool2d(kernel_size,stride,padding, count_include_pad=False) 12 | def forward(self, x): 13 | x_exp = torch.exp(x) 14 | x_exp_pool = self.avgpool(x_exp) 15 | x = self.avgpool(x_exp*x) 16 | return x/x_exp_pool 17 | 18 | class LocalAttention(nn.Module): 19 | ''' attention based on local importance''' 20 | def __init__(self, channels, f=16): 21 | super().__init__() 22 | self.body = nn.Sequential( 23 | # sample importance 24 | nn.Conv2d(channels, f, 1), 25 | SoftPooling2D(7, stride=3), 26 | nn.Conv2d(f, f, kernel_size=3, stride=2, padding=1), 27 | nn.Conv2d(f, channels, 3, padding=1), 28 | # to heatmap 29 | nn.Sigmoid(), 30 | ) 31 | self.gate = nn.Sequential( 32 | nn.Sigmoid(), 33 | ) 34 | def forward(self, x): 35 | ''' forward ''' 36 | # interpolate the heat map 37 | g = self.gate(x[:,:1].clone()) 38 | w = F.interpolate(self.body(x), (x.size(2), x.size(3)), mode='bilinear', align_corners=False) 39 | 40 | return x * w * g #(w + g) #self.gate(x, w) 41 | 42 | if __name__ == '__main__': 43 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 44 | print(f"Using device: {device}") 45 | 46 | block = LocalAttention(channels=32).to(device) 47 | input = torch.rand(1, 32, 256, 256).to(device) 48 | 49 | output = block(input) 50 | print(input.shape) 51 | print(output.shape) 52 | -------------------------------------------------------------------------------- /3D/(IEEE 2024)SFFusion3d特征融合.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | # 论文:A Multilevel Multimodal Fusion Transformer for Remote Sensing Semantic Segmentation 5 | # 全网最全100➕即插即用模块GitHub地址:https://github.com/ai-dawang/PlugNPlay-Modules 6 | class SqueezeAndExcitation3D(nn.Module): 7 | def __init__(self, channel, reduction=16, activation=nn.ReLU(inplace=True)): 8 | super(SqueezeAndExcitation3D, self).__init__() 9 | self.fc = nn.Sequential( 10 | nn.Conv3d(channel, channel // reduction, kernel_size=1), 11 | activation, 12 | nn.Conv3d(channel // reduction, channel, kernel_size=1), 13 | nn.Sigmoid() 14 | ) 15 | 16 | def forward(self, x): 17 | weighting = F.adaptive_avg_pool3d(x, 1) 18 | weighting = self.fc(weighting) 19 | y = x * weighting 20 | return y 21 | 22 | class SqueezeAndExciteFusionAdd3D(nn.Module): 23 | def __init__(self, channels_in, activation=nn.ReLU(inplace=True)): 24 | super(SqueezeAndExciteFusionAdd3D, self).__init__() 25 | 26 | self.se_1 = SqueezeAndExcitation3D(channels_in, activation=activation) 27 | self.se_2 = SqueezeAndExcitation3D(channels_in, activation=activation) 28 | 29 | def forward(self, se1, se2): 30 | se1 = self.se_1(se1) 31 | se2 = self.se_2(se2) 32 | out = se1 + se2 33 | return out 34 | 35 | # 示例用法 36 | if __name__ == "__main__": 37 | # 假设的输入数据 38 | input_1 = torch.randn(32, 64, 16, 128, 128) # 输入 B C D H W 39 | input_2 = torch.randn(32, 64, 16, 128, 128) # 同上 40 | 41 | # 打印输入数据的形状 42 | print(input_1.size()) # 输出: (32, 64, 16, 128, 128) 43 | print(input_2.size()) # 输出: (32, 64, 16, 128, 128) 44 | 45 | # 创建SqueezeAndExciteFusionAdd3D模块的实例 46 | block = SqueezeAndExciteFusionAdd3D(channels_in=64) 47 | 48 | # 将输入通过SqueezeAndExciteFusionAdd3D模块获得输出 49 | output = block(input_1, input_2) 50 | 51 | # 打印输出数据的形状 52 | print(output.size()) # 输出应该和输入形状相同: (32, 64, 16, 128, 128) 53 | -------------------------------------------------------------------------------- /注意力/(arxiv2023)ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | #GitHub地址:https://github.com/YOLOonMe/EMA-attention-module 4 | #论文地址:https://arxiv.org/abs/2305.13563v2 5 | class EMA(nn.Module): 6 | def __init__(self, channels, factor=8): 7 | super(EMA, self).__init__() 8 | self.groups = factor 9 | assert channels // self.groups > 0 10 | self.softmax = nn.Softmax(-1) 11 | self.agp = nn.AdaptiveAvgPool2d((1, 1)) 12 | self.pool_h = nn.AdaptiveAvgPool2d((None, 1)) 13 | self.pool_w = nn.AdaptiveAvgPool2d((1, None)) 14 | self.gn = nn.GroupNorm(channels // self.groups, channels // self.groups) 15 | self.conv1x1 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=1, stride=1, padding=0) 16 | self.conv3x3 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=3, stride=1, padding=1) 17 | 18 | def forward(self, x): 19 | b, c, h, w = x.size() 20 | group_x = x.reshape(b * self.groups, -1, h, w) # b*g,c//g,h,w 21 | x_h = self.pool_h(group_x) 22 | x_w = self.pool_w(group_x).permute(0, 1, 3, 2) 23 | hw = self.conv1x1(torch.cat([x_h, x_w], dim=2)) 24 | x_h, x_w = torch.split(hw, [h, w], dim=2) 25 | x1 = self.gn(group_x * x_h.sigmoid() * x_w.permute(0, 1, 3, 2).sigmoid()) 26 | x2 = self.conv3x3(group_x) 27 | x11 = self.softmax(self.agp(x1).reshape(b * self.groups, -1, 1).permute(0, 2, 1)) 28 | x12 = x2.reshape(b * self.groups, c // self.groups, -1) # b*g, c//g, hw 29 | x21 = self.softmax(self.agp(x2).reshape(b * self.groups, -1, 1).permute(0, 2, 1)) 30 | x22 = x1.reshape(b * self.groups, c // self.groups, -1) # b*g, c//g, hw 31 | weights = (torch.matmul(x11, x12) + torch.matmul(x21, x22)).reshape(b * self.groups, 1, h, w) 32 | return (group_x * weights.sigmoid()).reshape(b, c, h, w) 33 | 34 | 35 | # 输入 B C H W, 输出 B C H W 36 | if __name__ == '__main__': 37 | block = EMA(64).cuda() 38 | input = torch.rand(1, 64, 64, 64).cuda() 39 | output = block(input) 40 | print(input.size(), output.size()) 41 | -------------------------------------------------------------------------------- /GAU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class TA(nn.Module): 5 | def __init__(self, T,ratio=2): 6 | 7 | super(TA, self).__init__() 8 | 9 | self.avg_pool = nn.AdaptiveAvgPool3d(1) 10 | self.max_pool = nn.AdaptiveMaxPool3d(1) 11 | self.sharedMLP = nn.Sequential( 12 | nn.Conv3d(T, T // ratio, 1, bias=False), 13 | nn.ReLU(), 14 | nn.Conv3d(T // ratio, T, 1,bias=False), 15 | ) 16 | self.sigmoid = nn.Sigmoid() 17 | 18 | def forward(self, x): 19 | avg = self.avg_pool(x) 20 | # B,T,C 21 | out1 = self.sharedMLP(avg) 22 | max = self.max_pool(x) 23 | # B,T,C 24 | out2 = self.sharedMLP(max) 25 | out = out1+out2 26 | 27 | return out 28 | 29 | # task classifictaion or generation 30 | class SCA(nn.Module): 31 | def __init__(self, in_planes, kerenel_size,ratio = 1): 32 | super(SCA, self).__init__() 33 | self.sharedMLP = nn.Sequential( 34 | nn.Conv2d(in_planes, in_planes // ratio, kerenel_size, padding='same', bias=False), 35 | nn.ReLU(), 36 | nn.Conv2d(in_planes // ratio, in_planes, kerenel_size, padding='same', bias=False),) 37 | def forward(self, x): 38 | b,t, c, h, w = x.shape 39 | x = x.flatten(0,1) 40 | x = self.sharedMLP(x) 41 | out = x.reshape(b,t, c, h, w) 42 | return out 43 | if __name__ == '__main__': 44 | 45 | block1 = TA(T=10) # 假设输入有10个时间步长 46 | print("TA模型结构:\n", block1) 47 | 48 | # 创建SCA模型 49 | block2 = SCA(in_planes=64, kerenel_size=3) # 假设输入通道数为64 50 | print("\nSCA模型结构:\n", block2) 51 | 52 | # 创建随机输入数据 53 | batch_size = 4 54 | time_steps = 10 55 | channels = 64 56 | height = 32 57 | width = 32 58 | input = torch.randn(batch_size, time_steps, channels, height, width) 59 | print("\n输入数据形状:", input.size()) 60 | 61 | # 测试TA模型 62 | output = block1(input) 63 | print("TA模型输出形状:", output.shape) 64 | 65 | # 测试SCA模型 66 | output2 = block2(input) 67 | print("SCA模型输出形状:", output2.shape) -------------------------------------------------------------------------------- /卷积/(ICCV 2021)CTR-GC(图卷积).py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | def conv_init(conv): 4 | if conv.weight is not None: 5 | nn.init.kaiming_normal_(conv.weight, mode='fan_out') 6 | if conv.bias is not None: 7 | nn.init.constant_(conv.bias, 0) 8 | # 论文:Channel-wise Topology Refinement Graph Convolution for Skeleton-Based Action Recognition 9 | # 论文地址:https://ieeexplore.ieee.org/document/9710007 10 | 11 | class CTRGC(nn.Module): 12 | def __init__(self, in_channels, out_channels, rel_reduction=8, mid_reduction=1): 13 | super(CTRGC, self).__init__() 14 | self.in_channels = in_channels 15 | self.out_channels = out_channels 16 | if in_channels == 3 or in_channels == 9: 17 | self.rel_channels = 8 18 | self.mid_channels = 16 19 | else: 20 | self.rel_channels = in_channels // rel_reduction 21 | self.mid_channels = in_channels // mid_reduction 22 | self.conv1 = nn.Conv2d(self.in_channels, self.rel_channels, kernel_size=1) 23 | self.conv2 = nn.Conv2d(self.in_channels, self.rel_channels, kernel_size=1) 24 | self.conv3 = nn.Conv2d(self.in_channels, self.out_channels, kernel_size=1) 25 | self.conv4 = nn.Conv2d(self.rel_channels, self.out_channels, kernel_size=1) 26 | self.tanh = nn.Tanh() 27 | for m in self.modules(): 28 | if isinstance(m, nn.Conv2d): 29 | conv_init(m) 30 | elif isinstance(m, nn.BatchNorm2d): 31 | bn_init(m, 1) 32 | 33 | def forward(self, x, A=None, alpha=1): 34 | x1, x2, x3 = self.conv1(x).mean(-2), self.conv2(x).mean(-2), self.conv3(x) 35 | x1 = self.tanh(x1.unsqueeze(-1) - x2.unsqueeze(-2)) 36 | x1 = self.conv4(x1) * alpha + (A.unsqueeze(0).unsqueeze(0) if A is not None else 0) # N,C,V,V 37 | x1 = torch.einsum('ncuv,nctv->nctu', x1, x3) 38 | return x1 39 | 40 | 41 | 42 | if __name__ == '__main__': 43 | block = CTRGC(in_channels=64, out_channels=64) 44 | input = torch.rand(32, 64, 9, 9) 45 | output = block(input) 46 | print(input.size()) 47 | print(output.size()) 48 | -------------------------------------------------------------------------------- /1D模块/(ICCV 2023)EAA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import einops 4 | 5 | # 论文:SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications[ICCV'23] 6 | # 论文:https://openaccess.thecvf.com/content/ICCV2023/papers/Shaker_SwiftFormer_Efficient_Additive_Attention_for_Transformer-based_Real-time_Mobile_Vision_Applications_ICCV_2023_paper.pdf 7 | 8 | class EfficientAdditiveAttention(nn.Module): 9 | 10 | 11 | def __init__(self, in_dims, token_dim, num_heads=1): 12 | super().__init__() 13 | 14 | self.to_query = nn.Linear(in_dims, token_dim * num_heads) 15 | self.to_key = nn.Linear(in_dims, token_dim * num_heads) 16 | 17 | self.w_g = nn.Parameter(torch.randn(token_dim * num_heads, 1)) 18 | self.scale_factor = token_dim ** -0.5 19 | self.Proj = nn.Linear(token_dim * num_heads, token_dim * num_heads) 20 | self.final = nn.Linear(token_dim * num_heads, token_dim) 21 | 22 | def forward(self, x): 23 | query = self.to_query(x) 24 | key = self.to_key(x) 25 | 26 | query = torch.nn.functional.normalize(query, dim=-1) #BxNxD 27 | key = torch.nn.functional.normalize(key, dim=-1) #BxNxD 28 | 29 | query_weight = query @ self.w_g # BxNx1 (BxNxD @ Dx1) 30 | A = query_weight * self.scale_factor # BxNx1 31 | 32 | A = torch.nn.functional.normalize(A, dim=1) # BxNx1 33 | 34 | G = torch.sum(A * query, dim=1) # BxD 35 | 36 | G = einops.repeat( 37 | G, "b d -> b repeat d", repeat=key.shape[1] 38 | ) # BxNxD 39 | 40 | out = self.Proj(G * key) + query #BxNxD 41 | 42 | out = self.final(out) # BxNxD 43 | 44 | return out 45 | 46 | 47 | if __name__ == '__main__': 48 | # 假设输入维度为512,token维度为512,头数为1 49 | attention_layer = EfficientAdditiveAttention(in_dims=512, token_dim=512) 50 | 51 | # 创建一个随机输入张量,形状为[B, N, D] 52 | B, N, D = 1, 10, 512 53 | x = torch.randn(B, N, D) 54 | 55 | # 通过注意力层传递输入 56 | output = attention_layer(x) 57 | 58 | # 打印输入和输出的形状 59 | print("输入形状:", x.shape) 60 | print("输出形状:", output.shape) -------------------------------------------------------------------------------- /1D模块/(KDD 2020)CorNet(NLP).py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | # 论文:Correlation Networks for Extreme Multi-label Text Classification 5 | 6 | 7 | ACT2FN = {'elu': F.elu, 'relu': F.relu, 'sigmoid': torch.sigmoid, 'tanh': torch.tanh} 8 | 9 | 10 | class CorNetBlock(nn.Module): 11 | def __init__(self, context_size, output_size, cornet_act='sigmoid', **kwargs): 12 | super(CorNetBlock, self).__init__() 13 | self.dstbn2cntxt = nn.Linear(output_size, context_size) 14 | self.cntxt2dstbn = nn.Linear(context_size, output_size) 15 | self.act_fn = ACT2FN[cornet_act] 16 | 17 | def forward(self, output_dstrbtn): 18 | identity_logits = output_dstrbtn 19 | output_dstrbtn = self.act_fn(output_dstrbtn) 20 | context_vector = self.dstbn2cntxt(output_dstrbtn) 21 | context_vector = F.elu(context_vector) 22 | output_dstrbtn = self.cntxt2dstbn(context_vector) 23 | output_dstrbtn = output_dstrbtn + identity_logits 24 | return output_dstrbtn 25 | 26 | 27 | class CorNet(nn.Module): 28 | def __init__(self, output_size, cornet_dim=100, n_cornet_blocks=2, **kwargs): 29 | super(CorNet, self).__init__() 30 | self.intlv_layers = nn.ModuleList( 31 | [CorNetBlock(cornet_dim, output_size, **kwargs) for _ in range(n_cornet_blocks)]) 32 | for layer in self.intlv_layers: 33 | nn.init.xavier_uniform_(layer.dstbn2cntxt.weight) 34 | nn.init.xavier_uniform_(layer.cntxt2dstbn.weight) 35 | 36 | def forward(self, logits): 37 | for layer in self.intlv_layers: 38 | logits = layer(logits) 39 | return logits 40 | 41 | 42 | if __name__ == '__main__': 43 | output_size = 10 44 | cornet_dim = 100 45 | n_cornet_blocks = 2 46 | cornet_act = 'relu' 47 | 48 | model = CorNet(output_size=output_size, cornet_dim=cornet_dim, n_cornet_blocks=n_cornet_blocks) 49 | 50 | input_tensor = torch.rand(4, output_size) 51 | 52 | output = model(input_tensor) 53 | 54 | # 打印输入和输出的尺寸 55 | print("Input size :", input_tensor.size()) 56 | print("Output size:", output.size()) 57 | -------------------------------------------------------------------------------- /(arXiv 2021) AFT.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | # 论文地址:https://arxiv.org/pdf/2105.14103v1 7 | # 论文:An Attention Free Transformer 8 | 9 | 10 | class AFT_FULL(nn.Module): 11 | 12 | def __init__(self, d_model,n=49,simple=False): 13 | 14 | super(AFT_FULL, self).__init__() 15 | self.fc_q = nn.Linear(d_model, d_model) 16 | self.fc_k = nn.Linear(d_model, d_model) 17 | self.fc_v = nn.Linear(d_model,d_model) 18 | if(simple): 19 | self.position_biases=torch.zeros((n,n)) 20 | else: 21 | self.position_biases=nn.Parameter(torch.ones((n,n))) 22 | self.d_model = d_model 23 | self.n=n 24 | self.sigmoid=nn.Sigmoid() 25 | 26 | self.init_weights() 27 | 28 | 29 | def init_weights(self): 30 | for m in self.modules(): 31 | if isinstance(m, nn.Conv2d): 32 | init.kaiming_normal_(m.weight, mode='fan_out') 33 | if m.bias is not None: 34 | init.constant_(m.bias, 0) 35 | elif isinstance(m, nn.BatchNorm2d): 36 | init.constant_(m.weight, 1) 37 | init.constant_(m.bias, 0) 38 | elif isinstance(m, nn.Linear): 39 | init.normal_(m.weight, std=0.001) 40 | if m.bias is not None: 41 | init.constant_(m.bias, 0) 42 | 43 | def forward(self, input): 44 | 45 | bs, n,dim = input.shape 46 | 47 | q = self.fc_q(input) #bs,n,dim 48 | k = self.fc_k(input).view(1,bs,n,dim) #1,bs,n,dim 49 | v = self.fc_v(input).view(1,bs,n,dim) #1,bs,n,dim 50 | 51 | numerator=torch.sum(torch.exp(k+self.position_biases.view(n,1,-1,1))*v,dim=2) #n,bs,dim 52 | denominator=torch.sum(torch.exp(k+self.position_biases.view(n,1,-1,1)),dim=2) #n,bs,dim 53 | 54 | out=(numerator/denominator) #n,bs,dim 55 | out=self.sigmoid(q)*(out.permute(1,0,2)) #bs,n,dim 56 | 57 | return out 58 | 59 | 60 | if __name__ == '__main__': 61 | input=torch.randn(50,49,512) 62 | block = AFT_FULL(d_model=512, n=49) 63 | output=block(input) 64 | print(output.shape) 65 | 66 | -------------------------------------------------------------------------------- /(ECCV2024)SMFA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | #GitHub地址: https://github.com/Zheng-MJ/SMFANet 5 | #论文地址:https://openaccess.thecvf.com/content/ICCV2023/papers/Sun_Spatially-Adaptive_Feature_Modulation_for_Efficient_Image_Super-Resolution_ICCV_2023_paper.pdf 6 | class DMlp(nn.Module): 7 | def __init__(self, dim, growth_rate=2.0): 8 | super().__init__() 9 | hidden_dim = int(dim * growth_rate) 10 | self.conv_0 = nn.Sequential( 11 | nn.Conv2d(dim, hidden_dim, 3, 1, 1, groups=dim), 12 | nn.Conv2d(hidden_dim, hidden_dim, 1, 1, 0) 13 | ) 14 | self.act = nn.GELU() 15 | self.conv_1 = nn.Conv2d(hidden_dim, dim, 1, 1, 0) 16 | 17 | def forward(self, x): 18 | x = self.conv_0(x) 19 | x = self.act(x) 20 | x = self.conv_1(x) 21 | return x 22 | 23 | 24 | class SMFA(nn.Module): 25 | def __init__(self, dim=36): 26 | super(SMFA, self).__init__() 27 | self.linear_0 = nn.Conv2d(dim, dim * 2, 1, 1, 0) 28 | self.linear_1 = nn.Conv2d(dim, dim, 1, 1, 0) 29 | self.linear_2 = nn.Conv2d(dim, dim, 1, 1, 0) 30 | 31 | self.lde = DMlp(dim, 2) 32 | 33 | self.dw_conv = nn.Conv2d(dim, dim, 3, 1, 1, groups=dim) 34 | 35 | self.gelu = nn.GELU() 36 | self.down_scale = 8 37 | 38 | self.alpha = nn.Parameter(torch.ones((1, dim, 1, 1))) 39 | self.belt = nn.Parameter(torch.zeros((1, dim, 1, 1))) 40 | 41 | def forward(self, f): 42 | _, _, h, w = f.shape 43 | y, x = self.linear_0(f).chunk(2, dim=1) 44 | x_s = self.dw_conv(F.adaptive_max_pool2d(x, (h // self.down_scale, w // self.down_scale))) 45 | x_v = torch.var(x, dim=(-2, -1), keepdim=True) 46 | x_l = x * F.interpolate(self.gelu(self.linear_1(x_s * self.alpha + x_v * self.belt)), size=(h, w), 47 | mode='nearest') 48 | y_d = self.lde(y) 49 | return self.linear_2(x_l + y_d) 50 | 51 | 52 | if __name__ == '__main__': 53 | input = torch.randn(3, 36, 64, 64) # 输入b c h w 54 | 55 | block = SMFA(dim=36) 56 | output = block(input) 57 | print(output.size()) 58 | -------------------------------------------------------------------------------- /(TPAMI 2022) ViP.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | # 论文地址:https://arxiv.org/pdf/2106.12368 5 | # 论文:Vision Permutator: A Permutable MLP-Like Architecture for Visual Recognition 6 | 7 | 8 | class MLP(nn.Module): 9 | def __init__(self,in_features,hidden_features,out_features,act_layer=nn.GELU,drop=0.1): 10 | super().__init__() 11 | self.fc1=nn.Linear(in_features,hidden_features) 12 | self.act=act_layer() 13 | self.fc2=nn.Linear(hidden_features,out_features) 14 | self.drop=nn.Dropout(drop) 15 | 16 | def forward(self, x) : 17 | return self.drop(self.fc2(self.drop(self.act(self.fc1(x))))) 18 | 19 | class WeightedPermuteMLP(nn.Module): 20 | def __init__(self,dim,seg_dim=8, qkv_bias=False, proj_drop=0.): 21 | super().__init__() 22 | self.seg_dim=seg_dim 23 | 24 | self.mlp_c=nn.Linear(dim,dim,bias=qkv_bias) 25 | self.mlp_h=nn.Linear(dim,dim,bias=qkv_bias) 26 | self.mlp_w=nn.Linear(dim,dim,bias=qkv_bias) 27 | 28 | self.reweighting=MLP(dim,dim//4,dim*3) 29 | 30 | self.proj=nn.Linear(dim,dim) 31 | self.proj_drop=nn.Dropout(proj_drop) 32 | 33 | def forward(self,x) : 34 | B,H,W,C=x.shape 35 | 36 | c_embed=self.mlp_c(x) 37 | 38 | S=C//self.seg_dim 39 | h_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,2,1,4).reshape(B,self.seg_dim,W,H*S) 40 | h_embed=self.mlp_h(h_embed).reshape(B,self.seg_dim,W,H,S).permute(0,3,2,1,4).reshape(B,H,W,C) 41 | 42 | w_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,1,2,4).reshape(B,self.seg_dim,H,W*S) 43 | w_embed=self.mlp_w(w_embed).reshape(B,self.seg_dim,H,W,S).permute(0,2,3,1,4).reshape(B,H,W,C) 44 | 45 | weight=(c_embed+h_embed+w_embed).permute(0,3,1,2).flatten(2).mean(2) 46 | weight=self.reweighting(weight).reshape(B,C,3).permute(2,0,1).softmax(0).unsqueeze(2).unsqueeze(2) 47 | 48 | x=c_embed*weight[0]+w_embed*weight[1]+h_embed*weight[2] 49 | 50 | x=self.proj_drop(self.proj(x)) 51 | 52 | return x 53 | 54 | 55 | 56 | if __name__ == '__main__': 57 | input=torch.randn(64,8,8,512) 58 | seg_dim=8 59 | block=WeightedPermuteMLP(512,seg_dim) 60 | out=block(input) 61 | print(out.shape) 62 | -------------------------------------------------------------------------------- /MLAttention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | # 论文:AttentionXML: Label Tree-based Attention-Aware Deep Model for High-Performance Extreme Multi-Label Text Classification 5 | # 论文地址:https://arxiv.org/pdf/1811.01727.pdf 6 | 7 | class MLAttention(nn.Module): 8 | def __init__(self, hidden_size): 9 | super(MLAttention, self).__init__() 10 | self.attention = nn.Linear(hidden_size, hidden_size, bias=False) 11 | nn.init.xavier_uniform_(self.attention.weight) 12 | 13 | def forward(self, inputs, masks): 14 | masks = torch.unsqueeze(masks, 2) # N, L, 1 15 | attention_scores = self.attention(inputs) # N, L, hidden_size 16 | attention = F.softmax(attention_scores, dim=1) # N, L, hidden_size 17 | attention_masked = attention * masks # apply the mask 18 | return attention_masked 19 | 20 | class FastMLAttention(nn.Module): 21 | def __init__(self, hidden_size): 22 | super(FastMLAttention, self).__init__() 23 | self.attention_dim = hidden_size # Make sure this is same as your inputs dimension 24 | self.attention = nn.Linear(self.attention_dim, self.attention_dim) 25 | nn.init.xavier_uniform_(self.attention.weight) 26 | 27 | def forward(self, inputs, masks, attn_weights: nn.Module): 28 | masks = masks.unsqueeze(2) # N, L, 1 29 | attention_scores = self.attention(inputs) # N, L, hidden_size 30 | attention = F.softmax(attention_scores, dim=1) # Softmax over L dimension 31 | attention = attention * masks # Apply mask 32 | attention_masked = attention_scores * attention # Apply attention 33 | return attention_masked 34 | 35 | if __name__ == '__main__': 36 | 37 | batch_size = 8 38 | seq_len = 10 39 | hidden_size = 8 40 | inputs = torch.randn(batch_size, seq_len, hidden_size) 41 | masks = torch.ones(batch_size, seq_len) 42 | 43 | ml_attention = MLAttention(hidden_size) 44 | outputs_ml = ml_attention(inputs, masks) 45 | print(outputs_ml.size()) 46 | 47 | fast_ml_attention = FastMLAttention(hidden_size) 48 | outputs_fastml = fast_ml_attention(inputs, masks, None) 49 | print(outputs_fastml.size()) 50 | -------------------------------------------------------------------------------- /SWA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | #论文:DAU-Net: Dual attention-aided U-Net for segmenting tumor in breast ultrasound images 5 | #论文地址:https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0303670 6 | class SWA(nn.Module): 7 | def __init__(self, in_channels, n_heads=8, window_size=7): 8 | super(SWA, self).__init__() 9 | self.in_channels = in_channels 10 | self.n_heads = n_heads 11 | self.window_size = window_size 12 | 13 | self.query_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1) 14 | self.key_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1) 15 | self.value_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1) 16 | self.gamma = nn.Parameter(torch.zeros(1)) 17 | self.softmax = nn.Softmax(dim=-1) 18 | 19 | def forward(self, x): 20 | batch_size, C, height, width = x.size() 21 | padded_x = F.pad(x, [self.window_size // 2, self.window_size // 2, self.window_size // 2, self.window_size // 2], mode='reflect') 22 | 23 | proj_query = self.query_conv(x).view(batch_size, self.n_heads, C // self.n_heads, height * width) 24 | proj_key = self.key_conv(padded_x).unfold(2, self.window_size, 1).unfold(3, self.window_size, 1) 25 | proj_key = proj_key.permute(0, 1, 4, 5, 2, 3).contiguous().view(batch_size, self.n_heads, C // self.n_heads, -1) 26 | proj_value = self.value_conv(padded_x).unfold(2, self.window_size, 1).unfold(3, self.window_size, 1) 27 | proj_value = proj_value.permute(0, 1, 4, 5, 2, 3).contiguous().view(batch_size, self.n_heads, C // self.n_heads, -1) 28 | 29 | energy = torch.matmul(proj_query.permute(0, 1, 3, 2), proj_key) 30 | attention = self.softmax(energy) 31 | 32 | out_window = torch.matmul(attention, proj_value.permute(0, 1, 3, 2)) 33 | out_window = out_window.permute(0, 1, 3, 2).contiguous().view(batch_size, C, height, width) 34 | 35 | out = self.gamma * out_window + x 36 | return out 37 | 38 | if __name__ == '__main__': 39 | 40 | input = torch.randn(1, 64, 32, 32) 41 | block = SWA(in_channels=64) 42 | print(input.size()) 43 | output = block(input) 44 | print(output.size()) 45 | -------------------------------------------------------------------------------- /(arXiv 2021) MobileViTv2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | # 论文地址:https://arxiv.org/pdf/2206.02680 7 | # 论文:Separable Self-attention for Mobile Vision Transformers 8 | 9 | 10 | class MobileViTv2Attention(nn.Module): 11 | ''' 12 | Scaled dot-product attention 13 | ''' 14 | 15 | def __init__(self, d_model): 16 | ''' 17 | :param d_model: Output dimensionality of the model 18 | :param d_k: Dimensionality of queries and keys 19 | :param d_v: Dimensionality of values 20 | :param h: Number of heads 21 | ''' 22 | super(MobileViTv2Attention, self).__init__() 23 | self.fc_i = nn.Linear(d_model,1) 24 | self.fc_k = nn.Linear(d_model, d_model) 25 | self.fc_v = nn.Linear(d_model, d_model) 26 | self.fc_o = nn.Linear(d_model, d_model) 27 | 28 | self.d_model = d_model 29 | self.init_weights() 30 | 31 | 32 | def init_weights(self): 33 | for m in self.modules(): 34 | if isinstance(m, nn.Conv2d): 35 | init.kaiming_normal_(m.weight, mode='fan_out') 36 | if m.bias is not None: 37 | init.constant_(m.bias, 0) 38 | elif isinstance(m, nn.BatchNorm2d): 39 | init.constant_(m.weight, 1) 40 | init.constant_(m.bias, 0) 41 | elif isinstance(m, nn.Linear): 42 | init.normal_(m.weight, std=0.001) 43 | if m.bias is not None: 44 | init.constant_(m.bias, 0) 45 | 46 | def forward(self, input): 47 | ''' 48 | Computes 49 | :param queries: Queries (b_s, nq, d_model) 50 | :return: 51 | ''' 52 | i = self.fc_i(input) #(bs,nq,1) 53 | weight_i = torch.softmax(i, dim=1) #bs,nq,1 54 | context_score = weight_i * self.fc_k(input) #bs,nq,d_model 55 | context_vector = torch.sum(context_score,dim=1,keepdim=True) #bs,1,d_model 56 | v = self.fc_v(input) * context_vector #bs,nq,d_model 57 | out = self.fc_o(v) #bs,nq,d_model 58 | 59 | return out 60 | 61 | 62 | if __name__ == '__main__': 63 | input=torch.randn(50,49,512) 64 | block = MobileViTv2Attention(d_model=512) 65 | output=block(input) 66 | print(output.size()) 67 | 68 | -------------------------------------------------------------------------------- /FCA.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch import nn 4 | 5 | #论文:Unsupervised Bidirectional Contrastive Reconstruction and Adaptive Fine-Grained Channel Attention Networks for image dehazing 6 | #论文地址:https://www.sciencedirect.com/science/article/abs/pii/S0893608024002387 7 | 8 | class Mix(nn.Module): 9 | def __init__(self, m=-0.80): 10 | super(Mix, self).__init__() 11 | w = torch.nn.Parameter(torch.FloatTensor([m]), requires_grad=True) 12 | w = torch.nn.Parameter(w, requires_grad=True) 13 | self.w = w 14 | self.mix_block = nn.Sigmoid() 15 | 16 | def forward(self, fea1, fea2): 17 | mix_factor = self.mix_block(self.w) 18 | out = fea1 * mix_factor.expand_as(fea1) + fea2 * (1 - mix_factor.expand_as(fea2)) 19 | return out 20 | 21 | #Adaptive Fine-Grained Channel Attention (FCA) 22 | class FCAttention(nn.Module): 23 | def __init__(self,channel,b=1, gamma=2): 24 | super(FCAttention, self).__init__() 25 | self.avg_pool = nn.AdaptiveAvgPool2d(1)#全局平均池化 26 | #一维卷积 27 | t = int(abs((math.log(channel, 2) + b) / gamma)) 28 | k = t if t % 2 else t + 1 29 | self.conv1 = nn.Conv1d(1, 1, kernel_size=k, padding=int(k / 2), bias=False) 30 | self.fc = nn.Conv2d(channel, channel, 1, padding=0, bias=True) 31 | self.sigmoid = nn.Sigmoid() 32 | self.mix = Mix() 33 | 34 | 35 | def forward(self, input): 36 | x = self.avg_pool(input) 37 | x1 = self.conv1(x.squeeze(-1).transpose(-1, -2)).transpose(-1, -2)#(1,64,1) 38 | x2 = self.fc(x).squeeze(-1).transpose(-1, -2)#(1,1,64) 39 | out1 = torch.sum(torch.matmul(x1,x2),dim=1).unsqueeze(-1).unsqueeze(-1)#(1,64,1,1) 40 | out1 = self.sigmoid(out1) 41 | out2 = torch.sum(torch.matmul(x2.transpose(-1, -2),x1.transpose(-1, -2)),dim=1).unsqueeze(-1).unsqueeze(-1) 42 | 43 | out2 = self.sigmoid(out2) 44 | out = self.mix(out1,out2) 45 | out = self.conv1(out.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 46 | out = self.sigmoid(out) 47 | 48 | return input*out 49 | 50 | if __name__ == '__main__': 51 | input = torch.rand(1,64,256,256) 52 | block = FCAttention(channel=64) 53 | output = block(input) 54 | print(output.size()) 55 | 56 | -------------------------------------------------------------------------------- /DPTAM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torchvision 5 | 6 | 7 | 8 | class DPTAM(nn.Module): 9 | def __init__(self, 10 | in_channels, 11 | n_segment, 12 | kernel_size=3, 13 | stride=1, 14 | padding=1): 15 | super(DPTAM, self).__init__() 16 | self.in_channels = in_channels 17 | self.n_segment = n_segment 18 | self.kernel_size = kernel_size 19 | self.stride = stride 20 | self.padding = padding 21 | print('DPTAM with kernel_size {}.'.format(kernel_size)) 22 | 23 | self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=3)#context Modeling 24 | self.softmax = nn.Softmax(dim=2) 25 | self.p1_conv1= nn.Conv1d(in_channels , in_channels, 1, bias=False) 26 | 27 | 28 | self.dptam = nn.Sequential( 29 | nn.Conv1d(in_channels, 30 | in_channels // 4, 31 | kernel_size, 32 | stride=1, 33 | padding=kernel_size // 2, 34 | bias=False), nn.BatchNorm1d(in_channels // 4), 35 | nn.ReLU(inplace=True), 36 | nn.Conv1d(in_channels // 4, in_channels, 1, bias=False), 37 | nn.Sigmoid()) 38 | 39 | def forward(self, x): 40 | nt, c, h, w = x.size() 41 | 42 | t = self.n_segment 43 | n_batch = nt // t 44 | new_x = x.view(n_batch, t, c, h, w).permute(0, 2, 1, 3,4).contiguous() 45 | out = F.adaptive_avg_pool2d(new_x.view(n_batch * c, t, h, w), (1, 1)) 46 | 47 | x_22=out.view(-1,c,t) 48 | x22_c_t = self.p1_conv1(x_22) 49 | x22 =x_22.mean(2,keepdim=True) 50 | x22 = self.p1_conv1(x22) 51 | x22 = x22_c_t * x22 52 | x22= x_22+x22 53 | 54 | local_activation = self.dptam(x22).view(n_batch, c, t, 1, 1) 55 | new_x = new_x * local_activation 56 | 57 | out = new_x.view(n_batch, c, t, h, w) #光local 58 | out = out.permute(0, 2, 1, 3, 4).contiguous().view(nt, c, h, w) 59 | 60 | return out 61 | 62 | 63 | 64 | if __name__ == '__main__': 65 | n_segment = 16 66 | 67 | block = DPTAM(in_channels=4, n_segment=n_segment) 68 | input = torch.rand(16, 4, 16, 16) 69 | output = block(input) 70 | print(input.size()) 71 | print(output.size()) 72 | 73 | -------------------------------------------------------------------------------- /(ICLR 2023)ContraNorm(对比归一化层).py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | # 论文:ContraNorm: A Contrastive Learning Perspective on Oversmoothing and Beyond 4 | # 论文地址:https://ar5iv.labs.arxiv.org/html/2303.06562 5 | 6 | class ContraNorm(nn.Module): 7 | def __init__(self, dim, scale=0.1, dual_norm=False, pre_norm=False, temp=1.0, learnable=False, positive=False, identity=False): 8 | super().__init__() 9 | if learnable and scale > 0: 10 | import math 11 | if positive: 12 | scale_init = math.log(scale) 13 | else: 14 | scale_init = scale 15 | self.scale_param = nn.Parameter(torch.empty(dim).fill_(scale_init)) 16 | self.dual_norm = dual_norm 17 | self.scale = scale 18 | self.pre_norm = pre_norm 19 | self.temp = temp 20 | self.learnable = learnable 21 | self.positive = positive 22 | self.identity = identity 23 | 24 | self.layernorm = nn.LayerNorm(dim, eps=1e-6) 25 | 26 | def forward(self, x): 27 | if self.scale > 0.0: 28 | xn = nn.functional.normalize(x, dim=2) 29 | if self.pre_norm: 30 | x = xn 31 | sim = torch.bmm(xn, xn.transpose(1,2)) / self.temp 32 | if self.dual_norm: 33 | sim = nn.functional.softmax(sim, dim=2) + nn.functional.softmax(sim, dim=1) 34 | else: 35 | sim = nn.functional.softmax(sim, dim=2) 36 | x_neg = torch.bmm(sim, x) 37 | if not self.learnable: 38 | if self.identity: 39 | x = (1+self.scale) * x - self.scale * x_neg 40 | else: 41 | x = x - self.scale * x_neg 42 | else: 43 | scale = torch.exp(self.scale_param) if self.positive else self.scale_param 44 | scale = scale.view(1, 1, -1) 45 | if self.identity: 46 | x = scale * x - scale * x_neg 47 | else: 48 | x = x - scale * x_neg 49 | x = self.layernorm(x) 50 | return x 51 | 52 | 53 | if __name__ == '__main__': 54 | block = ContraNorm(dim=128, scale=0.1, dual_norm=False, pre_norm=False, temp=1.0, learnable=False, positive=False, identity=False) 55 | input = torch.rand(32, 784, 128) 56 | output = block(input) 57 | print("Input size:", input.size()) 58 | print("Output size:", output.size()) 59 | -------------------------------------------------------------------------------- /LGAG.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | # 论文:EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation, CVPR2024 4 | # 论文地址:https://arxiv.org/pdf/2405.06880 5 | 6 | 7 | def act_layer(act, inplace=False, neg_slope=0.2, n_prelu=1): 8 | # activation layer 9 | act = act.lower() 10 | if act == 'relu': 11 | layer = nn.ReLU(inplace) 12 | elif act == 'relu6': 13 | layer = nn.ReLU6(inplace) 14 | elif act == 'leakyrelu': 15 | layer = nn.LeakyReLU(neg_slope, inplace) 16 | elif act == 'prelu': 17 | layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope) 18 | elif act == 'gelu': 19 | layer = nn.GELU() 20 | elif act == 'hswish': 21 | layer = nn.Hardswish(inplace) 22 | else: 23 | raise NotImplementedError('activation layer [%s] is not found' % act) 24 | return layer 25 | 26 | class LGAG(nn.Module): 27 | def __init__(self, F_g, F_l, F_int=16, kernel_size=3, groups=1, activation='relu'): 28 | super(LGAG, self).__init__() 29 | 30 | if kernel_size == 1: 31 | groups = 1 32 | self.W_g = nn.Sequential( 33 | nn.Conv2d(F_g, F_int, kernel_size=kernel_size, stride=1, padding=kernel_size // 2, groups=groups, 34 | bias=True), 35 | nn.BatchNorm2d(F_int) 36 | ) 37 | self.W_x = nn.Sequential( 38 | nn.Conv2d(F_l, F_int, kernel_size=kernel_size, stride=1, padding=kernel_size // 2, groups=groups, 39 | bias=True), 40 | nn.BatchNorm2d(F_int) 41 | ) 42 | self.psi = nn.Sequential( 43 | nn.Conv2d(F_int, 1, kernel_size=1, stride=1, padding=0, bias=True), 44 | nn.BatchNorm2d(1), 45 | nn.Sigmoid() 46 | ) 47 | self.activation = act_layer(activation, inplace=True) 48 | 49 | 50 | def forward(self, g, x): 51 | g1 = self.W_g(g) 52 | x1 = self.W_x(x) 53 | psi = self.activation(g1 + x1) 54 | psi = self.psi(psi) 55 | 56 | return x * psi 57 | 58 | 59 | if __name__ == '__main__': 60 | # 示例输入 61 | g = torch.randn(1, 32, 64, 64) 62 | x = torch.randn(1, 64, 64, 64) 63 | 64 | # 实例化LGAG 65 | lgag = LGAG(F_g=32, F_l=64) 66 | 67 | # 打印输入的shape 68 | print("输入 g 的 shape:", g.shape) 69 | print("输入 x 的 shape:", x.shape) 70 | 71 | # 前向传播并打印输出的shape 72 | output = lgag(g, x) 73 | print("输出的 shape:", output.shape) -------------------------------------------------------------------------------- /特征融合/(ICMR 2022)CMF_Block(多模态融合).py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.fft 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | # 论文:M2TR: Multi-modal Multi-scale Transformers for Deepfake Detection 8 | # 论文地址:https://arxiv.org/pdf/2104.09770 9 | class CMA_Block(nn.Module): 10 | def __init__(self, in_channel, hidden_channel, out_channel): 11 | super(CMA_Block, self).__init__() 12 | 13 | self.conv1 = nn.Conv2d( 14 | in_channel, hidden_channel, kernel_size=1, stride=1, padding=0 15 | ) 16 | self.conv2 = nn.Conv2d( 17 | in_channel, hidden_channel, kernel_size=1, stride=1, padding=0 18 | ) 19 | self.conv3 = nn.Conv2d( 20 | in_channel, hidden_channel, kernel_size=1, stride=1, padding=0 21 | ) 22 | 23 | self.scale = hidden_channel ** -0.5 24 | 25 | self.conv4 = nn.Sequential( 26 | nn.Conv2d( 27 | hidden_channel, out_channel, kernel_size=1, stride=1, padding=0 28 | ), 29 | nn.BatchNorm2d(out_channel), 30 | nn.LeakyReLU(0.2, inplace=True), 31 | ) 32 | 33 | def forward(self, rgb, freq): 34 | _, _, h, w = rgb.size() 35 | 36 | q = self.conv1(rgb) 37 | k = self.conv2(freq) 38 | v = self.conv3(freq) 39 | 40 | q = q.view(q.size(0), q.size(1), q.size(2) * q.size(3)).transpose( 41 | -2, -1 42 | ) 43 | k = k.view(k.size(0), k.size(1), k.size(2) * k.size(3)) 44 | 45 | attn = torch.matmul(q, k) * self.scale 46 | m = attn.softmax(dim=-1) 47 | 48 | v = v.view(v.size(0), v.size(1), v.size(2) * v.size(3)).transpose( 49 | -2, -1 50 | ) 51 | z = torch.matmul(m, v) 52 | z = z.view(z.size(0), h, w, -1) 53 | z = z.permute(0, 3, 1, 2).contiguous() 54 | 55 | output = rgb + self.conv4(z) 56 | 57 | return output 58 | 59 | 60 | if __name__ == '__main__': 61 | in_channel = 64 62 | hidden_channel = 32 63 | out_channel = 64 64 | h = 64 65 | w = 64 66 | 67 | block = CMA_Block(in_channel, hidden_channel, out_channel) 68 | 69 | rgb_input = torch.rand(1, in_channel, h, w) 70 | freq_input = torch.rand(1, in_channel, h, w) 71 | 72 | output = block(rgb_input, freq_input) 73 | 74 | print("RGB Input size:", rgb_input.size()) 75 | print("Freq Input size:", freq_input.size()) 76 | print("Output size:", output.size()) 77 | -------------------------------------------------------------------------------- /图像超分/SGFN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | # 论文地址:https://arxiv.org/pdf/2308.03364 4 | # 论文:Dual Aggregation Transformer for Image Super-Resolution, ICCV 2023 5 | class SpatialGate(nn.Module): 6 | 7 | def __init__(self, dim): 8 | super().__init__() 9 | self.norm = nn.LayerNorm(dim) 10 | self.conv = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, groups=dim) # DW Conv 11 | 12 | def forward(self, x, H, W): 13 | # Split 14 | x1, x2 = x.chunk(2, dim = -1) 15 | B, N, C = x.shape 16 | x2 = self.conv(self.norm(x2).transpose(1, 2).contiguous().view(B, C//2, H, W)).flatten(2).transpose(-1, -2).contiguous() 17 | 18 | return x1 * x2 19 | 20 | class SGFN(nn.Module): 21 | """ Spatial-Gate Feed-Forward Network. 22 | Args: 23 | in_features (int): Number of input channels. 24 | hidden_features (int | None): Number of hidden channels. Default: None 25 | out_features (int | None): Number of output channels. Default: None 26 | act_layer (nn.Module): Activation layer. Default: nn.GELU 27 | drop (float): Dropout rate. Default: 0.0 28 | """ 29 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): 30 | super().__init__() 31 | out_features = out_features or in_features 32 | hidden_features = hidden_features or in_features 33 | self.fc1 = nn.Linear(in_features, hidden_features) 34 | self.act = act_layer() 35 | self.sg = SpatialGate(hidden_features//2) 36 | self.fc2 = nn.Linear(hidden_features//2, out_features) 37 | self.drop = nn.Dropout(drop) 38 | 39 | def forward(self, x, H, W): 40 | """ 41 | Input: x: (B, H*W, C), H, W 42 | Output: x: (B, H*W, C) 43 | """ 44 | x = self.fc1(x) 45 | x = self.act(x) 46 | x = self.drop(x) 47 | 48 | x = self.sg(x, H, W) 49 | x = self.drop(x) 50 | 51 | x = self.fc2(x) 52 | x = self.drop(x) 53 | return x 54 | 55 | 56 | if __name__ == '__main__': 57 | # 定义输入参数 58 | batch_size = 1 59 | height = 32 # 假设图像高度为32 60 | width = 32 # 假设图像宽度为32 61 | channels = 64 # 输入通道数 62 | 63 | block = SGFN(in_features=channels) 64 | 65 | # 创建随机输入数据 (B, H*W, C) 66 | x = torch.randn(batch_size, height * width, channels) 67 | 68 | # 前向传播并打印输入输出的形状 69 | output = block(x, height, width) 70 | 71 | print(x.size()) 72 | print(output.size()) -------------------------------------------------------------------------------- /(arXiv 2021) S2Attention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | # 论文地址:https://arxiv.org/pdf/2108.01072 7 | # 论文:S2-MLPv2: Improved Spatial-Shift MLP Architecture for Vision 8 | 9 | 10 | def spatial_shift1(x): 11 | b,w,h,c = x.size() 12 | x[:,1:,:,:c//4] = x[:,:w-1,:,:c//4] 13 | x[:,:w-1,:,c//4:c//2] = x[:,1:,:,c//4:c//2] 14 | x[:,:,1:,c//2:c*3//4] = x[:,:,:h-1,c//2:c*3//4] 15 | x[:,:,:h-1,3*c//4:] = x[:,:,1:,3*c//4:] 16 | return x 17 | 18 | 19 | def spatial_shift2(x): 20 | b,w,h,c = x.size() 21 | x[:,:,1:,:c//4] = x[:,:,:h-1,:c//4] 22 | x[:,:,:h-1,c//4:c//2] = x[:,:,1:,c//4:c//2] 23 | x[:,1:,:,c//2:c*3//4] = x[:,:w-1,:,c//2:c*3//4] 24 | x[:,:w-1,:,3*c//4:] = x[:,1:,:,3*c//4:] 25 | return x 26 | 27 | 28 | class SplitAttention(nn.Module): 29 | def __init__(self,channel=512,k=3): 30 | super().__init__() 31 | self.channel=channel 32 | self.k=k 33 | self.mlp1=nn.Linear(channel,channel,bias=False) 34 | self.gelu=nn.GELU() 35 | self.mlp2=nn.Linear(channel,channel*k,bias=False) 36 | self.softmax=nn.Softmax(1) 37 | 38 | def forward(self,x_all): 39 | b,k,h,w,c=x_all.shape 40 | x_all=x_all.reshape(b,k,-1,c) #bs,k,n,c 41 | a=torch.sum(torch.sum(x_all,1),1) #bs,c 42 | hat_a=self.mlp2(self.gelu(self.mlp1(a))) #bs,kc 43 | hat_a=hat_a.reshape(b,self.k,c) #bs,k,c 44 | bar_a=self.softmax(hat_a) #bs,k,c 45 | attention=bar_a.unsqueeze(-2) # #bs,k,1,c 46 | out=attention*x_all # #bs,k,n,c 47 | out=torch.sum(out,1).reshape(b,h,w,c) 48 | return out 49 | 50 | 51 | class S2Attention(nn.Module): 52 | 53 | def __init__(self, channels=512 ): 54 | super().__init__() 55 | self.mlp1 = nn.Linear(channels,channels*3) 56 | self.mlp2 = nn.Linear(channels,channels) 57 | self.split_attention = SplitAttention() 58 | 59 | def forward(self, x): 60 | b,c,w,h = x.size() 61 | x=x.permute(0,2,3,1) 62 | x = self.mlp1(x) 63 | x1 = spatial_shift1(x[:,:,:,:c]) 64 | x2 = spatial_shift2(x[:,:,:,c:c*2]) 65 | x3 = x[:,:,:,c*2:] 66 | x_all=torch.stack([x1,x2,x3],1) 67 | a = self.split_attention(x_all) 68 | x = self.mlp2(a) 69 | x=x.permute(0,3,1,2) 70 | return x 71 | 72 | 73 | 74 | 75 | if __name__ == '__main__': 76 | input=torch.randn(50,512,7,7) 77 | block = S2Attention(channels=512) 78 | output=block(input) 79 | print(output.shape) 80 | 81 | -------------------------------------------------------------------------------- /cleegn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | #论文:CLEEGN: A Convolutional Neural Network for Plug-and-Play Automatic EEG Reconstruction 4 | #论文地址:https://arxiv.org/pdf/2210.05988v2.pdf 5 | 6 | class Permute2d(nn.Module): 7 | def __init__(self, shape): 8 | super(Permute2d, self).__init__() 9 | self.shape = shape 10 | 11 | def forward(self, x): 12 | return torch.permute(x, self.shape) 13 | 14 | 15 | class CLEEGN(nn.Module): 16 | def __init__(self, n_chan, fs, N_F=20, tem_kernelLen=0.1): 17 | super(CLEEGN, self).__init__() 18 | self.n_chan = n_chan 19 | self.N_F = N_F 20 | self.fs = fs 21 | self.conv1 = nn.Sequential( 22 | nn.Conv2d(1, n_chan, (n_chan, 1), padding="valid", bias=True), 23 | Permute2d((0, 2, 1, 3)), 24 | nn.BatchNorm2d(1, eps=1e-3, momentum=0.99) 25 | ) 26 | self.conv2 = nn.Sequential( 27 | nn.Conv2d(1, N_F, (1, int(fs * tem_kernelLen)), padding="same", bias=True), 28 | nn.BatchNorm2d(N_F, eps=1e-3, momentum=0.99) 29 | ) 30 | 31 | self.conv3 = nn.Sequential( 32 | nn.Conv2d(N_F, N_F, (1, int(fs * tem_kernelLen)), padding="same", bias=True), 33 | nn.BatchNorm2d(N_F, eps=1e-3, momentum=0.99) 34 | ) 35 | self.conv4 = nn.Sequential( 36 | nn.Conv2d(N_F, n_chan, (n_chan, 1), padding="same", bias=True), 37 | nn.BatchNorm2d(n_chan, eps=1e-3, momentum=0.99) 38 | ) 39 | self.conv5 = nn.Conv2d(n_chan, 1, (n_chan, 1), padding="same", bias=True) 40 | 41 | def forward(self, x): 42 | # encoder 43 | x = self.conv1(x) 44 | x = self.conv2(x) 45 | # decoder 46 | x = self.conv3(x) 47 | x = self.conv4(x) 48 | 49 | x = self.conv5(x) 50 | return x 51 | 52 | 53 | if __name__ == '__main__': 54 | 55 | # 定义输入张量的参数 56 | batch_size = 1 # 批次大小,表示处理一个样本 57 | n_channels = 56 # EEG信号的通道数 58 | sampling_rate = 128.0 # 信号采样频率,单位为Hz 59 | time_length = int(sampling_rate) # 时间长度(宽度),即一个时间序列周期内的数据点数 60 | 61 | # 初始化模型 62 | model = CLEEGN(n_chan=n_channels, fs=sampling_rate, N_F=20, tem_kernelLen=0.1) 63 | 64 | # 生成随机输入张量,模拟EEG数据 65 | input_tensor = torch.randn(batch_size, 1, n_channels, time_length) # (batch_size, channels, height, width) 66 | 67 | # 执行前向传播 68 | output = model(input_tensor) 69 | 70 | # 输出输入和输出张量的形状 71 | print(f'输入张量形状: {input_tensor.shape}') 72 | print(f'输出张量形状: {output.shape}') 73 | 74 | -------------------------------------------------------------------------------- /采样/EUCB.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | # 论文:EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation, CVPR2024 4 | # 论文地址:https://arxiv.org/pdf/2405.06880 5 | 6 | def channel_shuffle(x, groups): 7 | batchsize, num_channels, height, width = x.data.size() 8 | channels_per_group = num_channels // groups 9 | # reshape 10 | x = x.view(batchsize, groups, 11 | channels_per_group, height, width) 12 | x = torch.transpose(x, 1, 2).contiguous() 13 | # flatten 14 | x = x.view(batchsize, -1, height, width) 15 | return x 16 | 17 | def act_layer(act, inplace=False, neg_slope=0.2, n_prelu=1): 18 | # activation layer 19 | act = act.lower() 20 | if act == 'relu': 21 | layer = nn.ReLU(inplace) 22 | elif act == 'relu6': 23 | layer = nn.ReLU6(inplace) 24 | elif act == 'leakyrelu': 25 | layer = nn.LeakyReLU(neg_slope, inplace) 26 | elif act == 'prelu': 27 | layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope) 28 | elif act == 'gelu': 29 | layer = nn.GELU() 30 | elif act == 'hswish': 31 | layer = nn.Hardswish(inplace) 32 | else: 33 | raise NotImplementedError('activation layer [%s] is not found' % act) 34 | return layer 35 | 36 | # Efficient up-convolution block (EUCB) 37 | class EUCB(nn.Module): 38 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, activation='relu'): 39 | super(EUCB, self).__init__() 40 | 41 | self.in_channels = in_channels 42 | self.out_channels = out_channels 43 | self.up_dwc = nn.Sequential( 44 | nn.Upsample(scale_factor=2), 45 | nn.Conv2d(self.in_channels, self.in_channels, kernel_size=kernel_size, stride=stride, 46 | padding=kernel_size // 2, groups=self.in_channels, bias=False), 47 | nn.BatchNorm2d(self.in_channels), 48 | act_layer(activation, inplace=True) 49 | ) 50 | self.pwc = nn.Sequential( 51 | nn.Conv2d(self.in_channels, self.out_channels, kernel_size=1, stride=1, padding=0, bias=True) 52 | ) 53 | 54 | def forward(self, x): 55 | x = self.up_dwc(x) 56 | x = channel_shuffle(x, self.in_channels) 57 | x = self.pwc(x) 58 | return x 59 | 60 | 61 | if __name__ == '__main__': 62 | input = torch.randn(1, 32, 64, 64) #B C H W 63 | 64 | block = EUCB(in_channels=32, out_channels=64) 65 | 66 | print(input.size()) 67 | 68 | output = block(input) 69 | print(output.size()) -------------------------------------------------------------------------------- /MCM.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | # 论文:MAGNet: Multi-scale Awareness and Global fusion Network for RGB-D salient object detection | KBS 5 | # 论文地址:https://www.sciencedirect.com/science/article/abs/pii/S0950705124007603 6 | # github地址:https://github.com/mingyu6346/MAGNet 7 | 8 | TRAIN_SIZE = 384 9 | 10 | class MCM(nn.Module): 11 | def __init__(self, inc, outc): 12 | super().__init__() 13 | self.upsample2 = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True) 14 | self.rc = nn.Sequential( 15 | nn.Conv2d(in_channels=inc, out_channels=inc, kernel_size=3, padding=1, stride=1, groups=inc), 16 | nn.BatchNorm2d(inc), 17 | nn.GELU(), 18 | nn.Conv2d(in_channels=inc, out_channels=outc, kernel_size=1, stride=1), 19 | nn.BatchNorm2d(outc), 20 | nn.GELU() 21 | ) 22 | self.predtrans = nn.Sequential( 23 | nn.Conv2d(in_channels=outc, out_channels=outc, kernel_size=3, padding=1, groups=outc), 24 | nn.BatchNorm2d(outc), 25 | nn.GELU(), 26 | nn.Conv2d(in_channels=outc, out_channels=1, kernel_size=1) 27 | ) 28 | 29 | self.rc2 = nn.Sequential( 30 | nn.Conv2d(in_channels=outc * 2, out_channels=outc * 2, kernel_size=3, padding=1, groups=outc * 2), 31 | nn.BatchNorm2d(outc * 2), 32 | nn.GELU(), 33 | nn.Conv2d(in_channels=outc * 2, out_channels=outc, kernel_size=1, stride=1), 34 | nn.BatchNorm2d(outc), 35 | nn.GELU() 36 | ) 37 | 38 | def forward(self, x1, x2): 39 | x2_upsample = self.upsample2(x2) # 上采样 40 | x2_rc = self.rc(x2_upsample) # 减少通道数 41 | shortcut = x2_rc 42 | 43 | x_cat = torch.cat((x1, x2_rc), dim=1) # 拼接 44 | x_forward = self.rc2(x_cat) # 减少通道数2 45 | x_forward = x_forward + shortcut 46 | pred = F.interpolate(self.predtrans(x_forward), TRAIN_SIZE, mode="bilinear", align_corners=True) # 预测图 47 | 48 | return pred, x_forward 49 | 50 | 51 | if __name__ == '__main__': 52 | 53 | inc = 64 # 输入通道数 54 | outc = 32 # 输出通道数 55 | mcm = MCM(inc=inc, outc=outc) 56 | 57 | x1 = torch.randn(1, outc, 96, 96) # Batch size=1, Channels=outc, Height=96, Width=96 58 | x2 = torch.randn(1, inc, 48, 48) # Batch size=1, Channels=inc, Height=48, Width=48 59 | 60 | pred, x_forward = mcm(x1, x2) 61 | 62 | print(x1.size()) 63 | print(x2.size()) 64 | print(pred.size()) 65 | print(x_forward.size()) -------------------------------------------------------------------------------- /LAE.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from einops import rearrange 4 | # 论文地址:https://arxiv.org/pdf/2408.14087 5 | # 论文:LSM-YOLO: A Compact and Effective ROI Detector for Medical Detection 6 | 7 | 8 | def autopad(k, p=None, d=1): # kernel, padding, dilation 9 | """Pad to 'same' shape outputs.""" 10 | if d > 1: 11 | k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size 12 | if p is None: 13 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 14 | return p 15 | 16 | class Conv(nn.Module): 17 | """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation).""" 18 | default_act = nn.SiLU() # default activation 19 | 20 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): 21 | """Initialize Conv layer with given arguments including activation.""" 22 | super().__init__() 23 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) 24 | self.bn = nn.BatchNorm2d(c2) 25 | self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() 26 | 27 | def forward(self, x): 28 | """Apply convolution, batch normalization and activation to input tensor.""" 29 | return self.act(self.bn(self.conv(x))) 30 | 31 | def forward_fuse(self, x): 32 | """Perform transposed convolution of 2D data.""" 33 | return self.act(self.conv(x)) 34 | 35 | class LAE(nn.Module): 36 | # Light-weight Adaptive Extraction 37 | def __init__(self, ch, group=16) -> None: 38 | super().__init__() 39 | 40 | self.softmax = nn.Softmax(dim=-1) 41 | self.attention = nn.Sequential( 42 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 43 | Conv(ch, ch, k=1) 44 | ) 45 | 46 | self.ds_conv = Conv(ch, ch * 4, k=3, s=2, g=(ch // group)) 47 | 48 | def forward(self, x): 49 | # bs, ch, 2*h, 2*w => bs, ch, h, w, 4 50 | att = rearrange(self.attention(x), 'bs ch (s1 h) (s2 w) -> bs ch h w (s1 s2)', s1=2, s2=2) 51 | att = self.softmax(att) 52 | 53 | # bs, 4 * ch, h, w => bs, ch, h, w, 4 54 | x = rearrange(self.ds_conv(x), 'bs (s ch) h w -> bs ch h w s', s=4) 55 | x = torch.sum(x * att, dim=-1) 56 | return x 57 | 58 | 59 | if __name__ == '__main__': 60 | 61 | input = torch.randn(1, 16, 64, 64) # B C H W 62 | block = LAE(ch=16) 63 | output = block(input) 64 | print(input.size()) 65 | print(output.size()) -------------------------------------------------------------------------------- /tfcm.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | #论文:Multi-Scale Temporal Frequency Convolutional Network With Axial Attention for Speech Enhancement(ICASSP 2022) 4 | #论文地址:https://ieeexplore.ieee.org/document/9746610 5 | 6 | class TFCM_Block(nn.Module): 7 | def __init__(self, 8 | cin=24, 9 | K=(3, 3), 10 | dila=1, 11 | causal=True, 12 | ): 13 | super(TFCM_Block, self).__init__() 14 | self.pconv1 = nn.Sequential( 15 | nn.Conv2d(cin, cin, kernel_size=(1, 1)), 16 | nn.BatchNorm2d(cin), 17 | nn.PReLU(cin), 18 | ) 19 | dila_pad = dila * (K[1] - 1) 20 | if causal: 21 | self.dila_conv = nn.Sequential( 22 | nn.ConstantPad2d((dila_pad, 0, 1, 1), 0.0), 23 | nn.Conv2d(cin, cin, K, 1, dilation=(1, dila), groups=cin), 24 | nn.BatchNorm2d(cin), 25 | nn.PReLU(cin) 26 | ) 27 | else: 28 | # update 22/06/21, add groups for non-casual 29 | self.dila_conv = nn.Sequential( 30 | nn.ConstantPad2d((dila_pad//2, dila_pad//2, 1, 1), 0.0), 31 | nn.Conv2d(cin, cin, K, 1, dilation=(1, dila), groups=cin), 32 | nn.BatchNorm2d(cin), 33 | nn.PReLU(cin) 34 | ) 35 | self.pconv2 = nn.Conv2d(cin, cin, kernel_size=(1, 1)) 36 | self.causal = causal 37 | self.dila_pad = dila_pad 38 | 39 | def forward(self, inps): 40 | """ 41 | inp: B x C x F x T 42 | """ 43 | outs = self.pconv1(inps) 44 | outs = self.dila_conv(outs) 45 | outs = self.pconv2(outs) 46 | return outs + inps 47 | 48 | 49 | class TFCM(nn.Module): 50 | def __init__(self, 51 | cin=24, 52 | K=(3, 3), 53 | tfcm_layer=6, 54 | causal=True, 55 | ): 56 | super(TFCM, self).__init__() 57 | self.tfcm = nn.ModuleList() 58 | for idx in range(tfcm_layer): 59 | self.tfcm.append( 60 | TFCM_Block(cin, K, 2**idx, causal=causal) 61 | ) 62 | 63 | def forward(self, inp): 64 | out = inp 65 | for idx in range(len(self.tfcm)): 66 | out = self.tfcm[idx](out) 67 | return out 68 | 69 | 70 | 71 | 72 | 73 | if __name__ == "__main__": 74 | block = TFCM(24) 75 | input = th.randn(2, 24, 256, 101)# B C H W 76 | out = block(input) 77 | print(out.size()) -------------------------------------------------------------------------------- /(arXiv 2021) PSA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | # 论文地址:https://arxiv.org/pdf/2105.14447 7 | # 论文:EPSANet: An Efficient Pyramid Squeeze Attention Block on Convolutional Neural Network 8 | 9 | 10 | 11 | class PSA(nn.Module): 12 | 13 | def __init__(self, channel=512,reduction=4,S=4): 14 | super().__init__() 15 | self.S=S 16 | 17 | self.convs=[] 18 | for i in range(S): 19 | self.convs.append(nn.Conv2d(channel//S,channel//S,kernel_size=2*(i+1)+1,padding=i+1)) 20 | 21 | self.se_blocks=[] 22 | for i in range(S): 23 | self.se_blocks.append(nn.Sequential( 24 | nn.AdaptiveAvgPool2d(1), 25 | nn.Conv2d(channel//S, channel // (S*reduction),kernel_size=1, bias=False), 26 | nn.ReLU(inplace=True), 27 | nn.Conv2d(channel // (S*reduction), channel//S,kernel_size=1, bias=False), 28 | nn.Sigmoid() 29 | )) 30 | 31 | self.softmax=nn.Softmax(dim=1) 32 | 33 | 34 | def init_weights(self): 35 | for m in self.modules(): 36 | if isinstance(m, nn.Conv2d): 37 | init.kaiming_normal_(m.weight, mode='fan_out') 38 | if m.bias is not None: 39 | init.constant_(m.bias, 0) 40 | elif isinstance(m, nn.BatchNorm2d): 41 | init.constant_(m.weight, 1) 42 | init.constant_(m.bias, 0) 43 | elif isinstance(m, nn.Linear): 44 | init.normal_(m.weight, std=0.001) 45 | if m.bias is not None: 46 | init.constant_(m.bias, 0) 47 | 48 | def forward(self, x): 49 | b, c, h, w = x.size() 50 | 51 | #Step1:SPC module 52 | SPC_out=x.view(b,self.S,c//self.S,h,w) #bs,s,ci,h,w 53 | for idx,conv in enumerate(self.convs): 54 | SPC_out[:,idx,:,:,:]=conv(SPC_out[:,idx,:,:,:]) 55 | 56 | #Step2:SE weight 57 | se_out=[] 58 | for idx,se in enumerate(self.se_blocks): 59 | se_out.append(se(SPC_out[:,idx,:,:,:])) 60 | SE_out=torch.stack(se_out,dim=1) 61 | SE_out=SE_out.expand_as(SPC_out) 62 | 63 | #Step3:Softmax 64 | softmax_out=self.softmax(SE_out) 65 | 66 | #Step4:SPA 67 | PSA_out=SPC_out*softmax_out 68 | PSA_out=PSA_out.view(b,-1,h,w) 69 | 70 | return PSA_out 71 | 72 | 73 | if __name__ == '__main__': 74 | input=torch.randn(50,512,7,7) 75 | block = PSA(channel=512,reduction=8) 76 | output=block(input) 77 | a=output.view(-1).sum() 78 | a.backward() 79 | print(output.shape) 80 | 81 | -------------------------------------------------------------------------------- /(ICPR 2021)CAN(人群计数,CV2维任务通用).py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | import torch.nn as nn 4 | import torch 5 | from torch.nn import functional as F 6 | from torchvision import models 7 | 8 | # 论文:Encoder-Decoder Based Convolutional Neural Networks with Multi-Scale-Aware Modules for Crowd Counting 9 | # 论文地址:https://ieeexplore.ieee.org/document/9413286 10 | 11 | class ContextualModule(nn.Module): 12 | def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)): 13 | super(ContextualModule, self).__init__() 14 | self.scales = [] 15 | self.scales = nn.ModuleList([self._make_scale(features, size) for size in sizes]) 16 | self.bottleneck = nn.Conv2d(features * 2, out_features, kernel_size=1) 17 | self.relu = nn.ReLU() 18 | self.weight_net = nn.Conv2d(features, features, kernel_size=1) 19 | self._initialize_weights() 20 | 21 | def __make_weight(self, feature, scale_feature): 22 | weight_feature = feature - scale_feature 23 | return F.sigmoid(self.weight_net(weight_feature)) 24 | 25 | def _make_scale(self, features, size): 26 | prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) 27 | conv = nn.Conv2d(features, features, kernel_size=1, bias=False) 28 | return nn.Sequential(prior, conv) 29 | 30 | def forward(self, feats): 31 | h, w = feats.size(2), feats.size(3) 32 | multi_scales = [F.upsample(input=stage(feats), size=(h, w), mode='bilinear') for stage in self.scales] 33 | weights = [self.__make_weight(feats, scale_feature) for scale_feature in multi_scales] 34 | overall_features = [(multi_scales[0] * weights[0] + multi_scales[1] * weights[1] + multi_scales[2] * weights[ 35 | 2] + multi_scales[3] * weights[3]) / (weights[0] + weights[1] + weights[2] + weights[3])] + [feats] 36 | bottle = self.bottleneck(torch.cat(overall_features, 1)) 37 | return self.relu(bottle) 38 | 39 | def _initialize_weights(self): 40 | for m in self.modules(): 41 | if isinstance(m, nn.Conv2d): 42 | nn.init.normal_(m.weight, std=0.01) 43 | if m.bias is not None: 44 | nn.init.constant_(m.bias, 0) 45 | elif isinstance(m, nn.BatchNorm2d): 46 | nn.init.constant_(m.weight, 1) 47 | nn.init.constant_(m.bias, 0) 48 | 49 | 50 | 51 | 52 | if __name__ == '__main__': 53 | block = ContextualModule(features=64, out_features=64) 54 | input_tensor = torch.rand(1, 64, 128, 128) 55 | output = block(input_tensor) 56 | print("Input size:", input_tensor.size()) 57 | print("Output size:", output.size()) -------------------------------------------------------------------------------- /SPConv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | 6 | class SPConv_3x3(nn.Module): 7 | def __init__(self, inplanes, outplanes, stride=1, ratio=0.5, reduction=16): 8 | super(SPConv_3x3, self).__init__() 9 | self.inplanes_3x3 = int(inplanes*ratio) 10 | self.inplanes_1x1 = inplanes - self.inplanes_3x3 11 | self.outplanes_3x3 = int(outplanes*ratio) 12 | self.outplanes_1x1 = outplanes - self.outplanes_3x3 13 | self.outplanes = outplanes 14 | self.stride = stride 15 | 16 | self.gwc = nn.Conv2d(self.inplanes_3x3, self.outplanes, kernel_size=3, stride=self.stride, 17 | padding=1, groups=2, bias=False) 18 | self.pwc = nn.Conv2d(self.inplanes_3x3, self.outplanes, kernel_size=1, bias=False) 19 | 20 | self.conv1x1 = nn.Conv2d(self.inplanes_1x1, self.outplanes,kernel_size=1) 21 | self.avgpool_s2_1 = nn.AvgPool2d(kernel_size=2,stride=2) 22 | self.avgpool_s2_3 = nn.AvgPool2d(kernel_size=2, stride=2) 23 | self.avgpool_add_1 = nn.AdaptiveAvgPool2d(1) 24 | self.avgpool_add_3 = nn.AdaptiveAvgPool2d(1) 25 | self.bn1 = nn.BatchNorm2d(self.outplanes) 26 | self.bn2 = nn.BatchNorm2d(self.outplanes) 27 | self.ratio = ratio 28 | self.groups = int(1/self.ratio) 29 | def forward(self, x): 30 | b, c, _, _ = x.size() 31 | 32 | 33 | x_3x3 = x[:,:int(c*self.ratio),:,:] 34 | x_1x1 = x[:,int(c*self.ratio):,:,:] 35 | out_3x3_gwc = self.gwc(x_3x3) 36 | if self.stride ==2: 37 | x_3x3 = self.avgpool_s2_3(x_3x3) 38 | out_3x3_pwc = self.pwc(x_3x3) 39 | out_3x3 = out_3x3_gwc + out_3x3_pwc 40 | out_3x3 = self.bn1(out_3x3) 41 | out_3x3_ratio = self.avgpool_add_3(out_3x3).squeeze() 42 | 43 | # use avgpool first to reduce information lost 44 | if self.stride == 2: 45 | x_1x1 = self.avgpool_s2_1(x_1x1) 46 | 47 | out_1x1 = self.conv1x1(x_1x1) 48 | out_1x1 = self.bn2(out_1x1) 49 | out_1x1_ratio = self.avgpool_add_1(out_1x1).squeeze() 50 | 51 | out_31_ratio = torch.stack((out_3x3_ratio, out_1x1_ratio), 2) 52 | out_31_ratio = nn.Softmax(dim=2)(out_31_ratio) 53 | out = out_1x1 * (out_31_ratio[:,:,1].view(b, self.outplanes, 1, 1).expand_as(out_1x1))\ 54 | + out_3x3 * (out_31_ratio[:,:,0].view(b, self.outplanes, 1, 1).expand_as(out_3x3)) 55 | 56 | return out 57 | if __name__ == '__main__': 58 | 59 | 60 | input = torch.randn(3, 64, 64, 64) 61 | 62 | block = SPConv_3x3(64, 32) 63 | output = block(input) 64 | print(input.size()) 65 | print(output.size()) -------------------------------------------------------------------------------- /LPA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | #论文:SwinPA-Net: Swin Transformer-Based Multiscale Feature Pyramid Aggregation Network for Medical Image Segmentation 4 | #论文地址:https://ieeexplore.ieee.org/document/9895210 5 | 6 | class ChannelAttention(nn.Module): 7 | def __init__(self, in_planes): 8 | super(ChannelAttention, self).__init__() 9 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 10 | self.max_pool = nn.AdaptiveMaxPool2d(1) 11 | 12 | self.fc1 = nn.Conv2d(in_planes, in_planes // 8, 1, bias=False) 13 | self.relu1 = nn.ReLU() 14 | self.fc2 = nn.Conv2d(in_planes // 8, in_planes, 1, bias=False) 15 | 16 | self.sigmoid = nn.Sigmoid() 17 | 18 | def forward(self, x): 19 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 20 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 21 | out = avg_out + max_out 22 | return self.sigmoid(out) 23 | 24 | 25 | class SpatialAttention(nn.Module): 26 | def __init__(self, kernel_size=3): 27 | super(SpatialAttention, self).__init__() 28 | 29 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7' 30 | padding = 3 if kernel_size == 7 else 1 31 | 32 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 33 | self.sigmoid = nn.Sigmoid() 34 | 35 | def forward(self, x): 36 | avg_out = torch.mean(x, dim=1, keepdim=True) 37 | max_out, _ = torch.max(x, dim=1, keepdim=True) 38 | x = torch.cat([avg_out, max_out], dim=1) 39 | x = self.conv1(x) 40 | return self.sigmoid(x) 41 | 42 | 43 | class LPA(nn.Module): 44 | def __init__(self, in_channel): 45 | super(LPA, self).__init__() 46 | self.ca = ChannelAttention(in_channel) 47 | self.sa = SpatialAttention() 48 | 49 | def forward(self, x): 50 | x0, x1 = x.chunk(2, dim=2) 51 | x0 = x0.chunk(2, dim=3) 52 | x1 = x1.chunk(2, dim=3) 53 | x0 = [self.ca(x0[-2]) * x0[-2], self.ca(x0[-1]) * x0[-1]] 54 | x0 = [self.sa(x0[-2]) * x0[-2], self.sa(x0[-1]) * x0[-1]] 55 | 56 | x1 = [self.ca(x1[-2]) * x1[-2], self.ca(x1[-1]) * x1[-1]] 57 | x1 = [self.sa(x1[-2]) * x1[-2], self.sa(x1[-1]) * x1[-1]] 58 | 59 | x0 = torch.cat(x0, dim=3) 60 | x1 = torch.cat(x1, dim=3) 61 | x3 = torch.cat((x0, x1), dim=2) 62 | 63 | x4 = self.ca(x) * x 64 | x4 = self.sa(x4) * x4 65 | x = x3 + x4 66 | return x 67 | 68 | 69 | if __name__ == '__main__': 70 | 71 | input = torch.rand(1, 28, 64, 64) 72 | block = LPA(in_channel=28) 73 | output = block(input) 74 | 75 | print(input.size()) 76 | print(output.size()) -------------------------------------------------------------------------------- /DA.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | class DoubleAttentionLayer(nn.Module): 7 | """ 8 | Implementation of Double Attention Network. NIPS 2018 9 | """ 10 | 11 | def __init__(self, in_channels: int, c_m: int, c_n: int, reconstruct=False): 12 | """ 13 | Parameters 14 | ---------- 15 | in_channels 16 | c_m 17 | c_n 18 | reconstruct: `bool` whether to re-construct output to have shape (B, in_channels, L, R) 19 | """ 20 | super(DoubleAttentionLayer, self).__init__() 21 | self.c_m = c_m 22 | self.c_n = c_n 23 | self.in_channels = in_channels 24 | self.reconstruct = reconstruct 25 | self.convA = nn.Conv2d(in_channels, c_m, kernel_size=1) 26 | self.convB = nn.Conv2d(in_channels, c_n, kernel_size=1) 27 | self.convV = nn.Conv2d(in_channels, c_n, kernel_size=1) 28 | if self.reconstruct: 29 | self.conv_reconstruct = nn.Conv2d(c_m, in_channels, kernel_size=1) 30 | 31 | def forward(self, x: torch.Tensor): 32 | """ 33 | Parameters 34 | ---------- 35 | x: `torch.Tensor` of shape (B, C, H, W) 36 | Returns 37 | ------- 38 | """ 39 | batch_size, c, h, w = x.size() 40 | assert c == self.in_channels, 'input channel not equal!' 41 | A = self.convA(x) # (B, c_m, h, w) because kernel size is 1 42 | 43 | B = self.convB(x) # (B, c_n, h, w) 44 | V = self.convV(x) # (B, c_n, h, w) 45 | 46 | tmpA = A.view(batch_size, self.c_m, h * w) 47 | 48 | attention_maps = B.view(batch_size, self.c_n, h * w) 49 | attention_vectors = V.view(batch_size, self.c_n, h * w) 50 | 51 | # softmax on the last dimension to create attention maps 52 | attention_maps = F.softmax(attention_maps, dim=-1) # 对hxw维度进行softmax 53 | 54 | # step 1: feature gathering 55 | global_descriptors = torch.bmm( # attention map(V)和tmpA进行 56 | tmpA, attention_maps.permute(0, 2, 1)) # (B, c_m, c_n) 57 | 58 | # step 2: feature distribution 59 | # (B, c_n, h * w) attention on c_n dimension - channel wise 60 | attention_vectors = F.softmax(attention_vectors, dim=1) 61 | 62 | tmpZ = global_descriptors.matmul( 63 | attention_vectors) # B, self.c_m, h * w 64 | 65 | tmpZ = tmpZ.view(batch_size, self.c_m, h, w) 66 | if self.reconstruct: 67 | tmpZ = self.conv_reconstruct(tmpZ) 68 | return tmpZ 69 | 70 | 71 | if __name__ == "__main__": 72 | input = torch.zeros(3, 12, 8, 8) 73 | block = DoubleAttentionLayer(12, 24, 4) 74 | output=block(input) 75 | print(output.size()) -------------------------------------------------------------------------------- /频域/(CVPR 2024)FRFN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from einops import rearrange 4 | import math 5 | 6 | # 论文:Adapt or Perish: Adaptive Sparse Transformer with Attentive Feature Refinement for Image Restoration, CVPR 2024. 7 | # 论文地址:https://openaccess.thecvf.com/content/CVPR2024/papers/Zhou_Adapt_or_Perish_Adaptive_Sparse_Transformer_with_Attentive_Feature_Refinement_CVPR_2024_paper.pdf 8 | # 全网最全100➕即插即用模块GitHub地址:https://github.com/ai-dawang/PlugNPlay-Modules 9 | class FRFN(nn.Module): 10 | def __init__(self, dim=32, hidden_dim=128, act_layer=nn.GELU): 11 | super().__init__() 12 | self.linear1 = nn.Sequential(nn.Linear(dim, hidden_dim * 2), 13 | act_layer()) 14 | self.dwconv = nn.Sequential( 15 | nn.Conv2d(hidden_dim, hidden_dim, groups=hidden_dim, kernel_size=3, stride=1, padding=1), 16 | act_layer()) 17 | self.linear2 = nn.Sequential(nn.Linear(hidden_dim, dim)) 18 | self.dim = dim 19 | self.hidden_dim = hidden_dim 20 | 21 | self.dim_conv = self.dim // 4 22 | self.dim_untouched = self.dim - self.dim_conv 23 | self.partial_conv3 = nn.Conv2d(self.dim_conv, self.dim_conv, 3, 1, 1, bias=False) 24 | 25 | def forward(self, x): 26 | x_init = x 27 | # bs x hw x c 28 | bs, hw, c = x.size() 29 | hh = int(math.sqrt(hw)) 30 | 31 | # spatial restore 32 | x = rearrange(x, ' b (h w) (c) -> b c h w ', h=hh, w=hh) 33 | 34 | x1, x2, = torch.split(x, [self.dim_conv, self.dim_untouched], dim=1) 35 | x1 = self.partial_conv3(x1) 36 | x = torch.cat((x1, x2), 1) 37 | 38 | # flaten 39 | x = rearrange(x, ' b c h w -> b (h w) c', h=hh, w=hh) 40 | 41 | x = self.linear1(x) 42 | # gate mechanism 43 | x_1, x_2 = x.chunk(2, dim=-1) 44 | 45 | x_1 = rearrange(x_1, ' b (h w) (c) -> b c h w ', h=hh, w=hh) 46 | x_1 = self.dwconv(x_1) 47 | x_1 = rearrange(x_1, ' b c h w -> b (h w) c', h=hh, w=hh) 48 | x = x_1 * x_2 49 | 50 | x = self.linear2(x) 51 | 52 | return x + x_init 53 | 54 | 55 | if __name__ == '__main__': 56 | # Instantiate the FRFN class 57 | dim = 64 # Dimension of input features 58 | 59 | 60 | # Create an instance of the FRFN module 61 | frfn = FRFN(dim) 62 | 63 | # Generate a random input tensor 64 | B = 1 # Batch size 65 | H = 64 # Height of the feature map 66 | W = 64 # Width of the feature map 67 | C = dim # Number of channels 68 | 69 | input = torch.randn(B, H * W, C) 70 | 71 | # Forward pass 72 | output = frfn(input) 73 | 74 | # Print input and output shapes 75 | print(input.size()) 76 | print(output.size()) 77 | -------------------------------------------------------------------------------- /FECAttention.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import numpy as np 3 | import torch 4 | #论文:FECAM: Frequency Enhanced Channel Attention Mechanism for Time Series Forecasting 5 | #论文地址:https://arxiv.org/abs/2212.01209 6 | 7 | try: 8 | from torch import irfft 9 | from torch import rfft 10 | except ImportError: 11 | def rfft(x, d): 12 | t = torch.fft.fft(x, dim=(-d)) 13 | r = torch.stack((t.real, t.imag), -1) 14 | return r 15 | 16 | 17 | def irfft(x, d): 18 | t = torch.fft.ifft(torch.complex(x[:, :, 0], x[:, :, 1]), dim=(-d)) 19 | return t.real 20 | 21 | 22 | def dct(x, norm=None): 23 | """ 24 | Discrete Cosine Transform, Type II (a.k.a. the DCT) 25 | 26 | For the meaning of the parameter `norm`, see: 27 | https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html 28 | 29 | :param x: the input signal 30 | :param norm: the normalization, None or 'ortho' 31 | :return: the DCT-II of the signal over the last dimension 32 | """ 33 | x_shape = x.shape 34 | N = x_shape[-1] 35 | x = x.contiguous().view(-1, N) 36 | 37 | v = torch.cat([x[:, ::2], x[:, 1::2].flip([1])], dim=1) 38 | 39 | Vc = rfft(v, 1) 40 | 41 | k = - torch.arange(N, dtype=x.dtype, device=x.device)[None, :] * np.pi / (2 * N) 42 | W_r = torch.cos(k) 43 | W_i = torch.sin(k) 44 | 45 | V = Vc[:, :, 0] * W_r - Vc[:, :, 1] * W_i 46 | 47 | if norm == 'ortho': 48 | V[:, 0] /= np.sqrt(N) * 2 49 | V[:, 1:] /= np.sqrt(N / 2) * 2 50 | 51 | V = 2 * V.view(*x_shape) 52 | 53 | return V 54 | 55 | 56 | class dct_channel_block(nn.Module): 57 | def __init__(self, channel): 58 | super(dct_channel_block, self).__init__() 59 | self.fc = nn.Sequential( 60 | nn.Linear(channel, channel * 2, bias=False), 61 | nn.Dropout(p=0.1), 62 | nn.ReLU(inplace=True), 63 | nn.Linear(channel * 2, channel, bias=False), 64 | nn.Sigmoid() 65 | ) 66 | 67 | self.dct_norm = nn.LayerNorm([96], eps=1e-6) # for lstm on length-wise 68 | 69 | def forward(self, x): 70 | b, c, l = x.size() # (B,C,L) (32,96,512) 71 | list = [] 72 | for i in range(c): 73 | freq = dct(x[:, i, :]) 74 | list.append(freq) 75 | 76 | stack_dct = torch.stack(list, dim=1) 77 | 78 | lr_weight = self.dct_norm(stack_dct) 79 | lr_weight = self.fc(lr_weight) 80 | lr_weight = self.dct_norm(lr_weight) 81 | 82 | return x * lr_weight # result 83 | 84 | 85 | if __name__ == '__main__': 86 | input = torch.rand(8, 7, 96) 87 | block = dct_channel_block(96) 88 | result = block(input) 89 | print("input_tensor.shape:", input.shape) 90 | print("result.shape:", result.shape) -------------------------------------------------------------------------------- /ULSAM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | torch.set_default_tensor_type(torch.cuda.FloatTensor) 5 | #ULSAM: Ultra-Lightweight Subspace Attention Module for Compact Convolutional Neural Networks(WACV20) 6 | 7 | class SubSpace(nn.Module): 8 | 9 | 10 | def __init__(self, nin: int) -> None: 11 | super(SubSpace, self).__init__() 12 | self.conv_dws = nn.Conv2d( 13 | nin, nin, kernel_size=1, stride=1, padding=0, groups=nin 14 | ) 15 | self.bn_dws = nn.BatchNorm2d(nin, momentum=0.9) 16 | self.relu_dws = nn.ReLU(inplace=False) 17 | 18 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) 19 | 20 | self.conv_point = nn.Conv2d( 21 | nin, 1, kernel_size=1, stride=1, padding=0, groups=1 22 | ) 23 | self.bn_point = nn.BatchNorm2d(1, momentum=0.9) 24 | self.relu_point = nn.ReLU(inplace=False) 25 | 26 | self.softmax = nn.Softmax(dim=2) 27 | 28 | def forward(self, x: torch.Tensor) -> torch.Tensor: 29 | out = self.conv_dws(x) 30 | out = self.bn_dws(out) 31 | out = self.relu_dws(out) 32 | 33 | out = self.maxpool(out) 34 | 35 | out = self.conv_point(out) 36 | out = self.bn_point(out) 37 | out = self.relu_point(out) 38 | 39 | m, n, p, q = out.shape 40 | out = self.softmax(out.view(m, n, -1)) 41 | out = out.view(m, n, p, q) 42 | 43 | out = out.expand(x.shape[0], x.shape[1], x.shape[2], x.shape[3]) 44 | 45 | out = torch.mul(out, x) 46 | 47 | out = out + x 48 | 49 | return out 50 | 51 | 52 | class ULSAM(nn.Module): 53 | 54 | 55 | def __init__(self, nin: int, nout: int, h: int, w: int, num_splits: int) -> None: 56 | super(ULSAM, self).__init__() 57 | 58 | assert nin % num_splits == 0 59 | 60 | self.nin = nin 61 | self.nout = nout 62 | self.h = h 63 | self.w = w 64 | self.num_splits = num_splits 65 | 66 | self.subspaces = nn.ModuleList( 67 | [SubSpace(int(self.nin / self.num_splits)) for i in range(self.num_splits)] 68 | ) 69 | 70 | def forward(self, x: torch.Tensor) -> torch.Tensor: 71 | group_size = int(self.nin / self.num_splits) 72 | 73 | # split at batch dimension 74 | sub_feat = torch.chunk(x, self.num_splits, dim=1) 75 | 76 | out = [] 77 | for idx, l in enumerate(self.subspaces): 78 | out.append(self.subspaces[idx](sub_feat[idx])) 79 | 80 | out = torch.cat(out, dim=1) 81 | 82 | return out 83 | 84 | 85 | if __name__ == '__main__': 86 | 87 | input = torch.randn(1, 32, 112, 112) 88 | 89 | block = ULSAM(32, 64, 112, 112, 4) 90 | print(input.size()) 91 | output = block(input) 92 | 93 | print(output.size()) -------------------------------------------------------------------------------- /注意力/(WACV 2021)TripletAttention.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------- 2 | # 论文: Rotate to Attend: Convolutional Triplet Attention Module (WACV 2021) 3 | # Github地址: https://github.com/landskape-ai/triplet-attention 4 | # --------------------------------------- 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | class BasicConv(nn.Module): 11 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, 12 | bn=True, bias=False): 13 | super(BasicConv, self).__init__() 14 | self.out_channels = out_planes 15 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, 16 | dilation=dilation, groups=groups, bias=bias) 17 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None 18 | self.relu = nn.ReLU() if relu else None 19 | 20 | def forward(self, x): 21 | x = self.conv(x) 22 | if self.bn is not None: 23 | x = self.bn(x) 24 | if self.relu is not None: 25 | x = self.relu(x) 26 | return x 27 | 28 | 29 | class ZPool(nn.Module): 30 | def forward(self, x): 31 | return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1) 32 | 33 | 34 | class AttentionGate(nn.Module): 35 | def __init__(self): 36 | super(AttentionGate, self).__init__() 37 | kernel_size = 7 38 | self.compress = ZPool() 39 | self.conv = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size - 1) // 2, relu=False) 40 | 41 | def forward(self, x): 42 | x_compress = self.compress(x) 43 | x_out = self.conv(x_compress) 44 | scale = torch.sigmoid_(x_out) 45 | return x * scale 46 | 47 | 48 | class TripletAttention(nn.Module): 49 | def __init__(self, no_spatial=False): 50 | super(TripletAttention, self).__init__() 51 | self.cw = AttentionGate() 52 | self.hc = AttentionGate() 53 | self.no_spatial = no_spatial 54 | if not no_spatial: 55 | self.hw = AttentionGate() 56 | 57 | def forward(self, x): 58 | x_perm1 = x.permute(0, 2, 1, 3).contiguous() 59 | x_out1 = self.cw(x_perm1) 60 | x_out11 = x_out1.permute(0, 2, 1, 3).contiguous() 61 | x_perm2 = x.permute(0, 3, 2, 1).contiguous() 62 | x_out2 = self.hc(x_perm2) 63 | x_out21 = x_out2.permute(0, 3, 2, 1).contiguous() 64 | if not self.no_spatial: 65 | x_out = self.hw(x) 66 | x_out = 1 / 3 * (x_out + x_out11 + x_out21) 67 | else: 68 | x_out = 1 / 2 * (x_out11 + x_out21) 69 | return x_out 70 | 71 | 72 | # 输入 B C H W, 输出 B C H W 73 | if __name__ == '__main__': 74 | input = torch.randn(3, 32, 64, 64) 75 | triplet = TripletAttention() 76 | output = triplet(input) 77 | print(output.shape) 78 | -------------------------------------------------------------------------------- /注意力/(TPAMI 2021)OutlookAttention.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------- 2 | # 论文: VOLO: Vision Outlooker for Visual Recognition (TPAMI 2021) 3 | # Github地址: https://github.com/sail-sg/volo 4 | # --------------------------------------- 5 | import torch 6 | from torch import nn 7 | import math 8 | from torch.nn import functional as F 9 | 10 | 11 | class OutlookAttention(nn.Module): 12 | """ 13 | Implementation of outlook attention 14 | --dim: hidden dim 15 | --num_heads: number of heads 16 | --kernel_size: kernel size in each window for outlook attention 17 | return: token features after outlook attention 18 | """ 19 | 20 | def __init__(self, dim, num_heads, kernel_size=3, padding=1, stride=1, 21 | qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): 22 | super().__init__() 23 | head_dim = dim // num_heads 24 | self.num_heads = num_heads 25 | self.kernel_size = kernel_size 26 | self.padding = padding 27 | self.stride = stride 28 | self.scale = qk_scale or head_dim**-0.5 29 | 30 | self.v = nn.Linear(dim, dim, bias=qkv_bias) 31 | self.attn = nn.Linear(dim, kernel_size**4 * num_heads) 32 | 33 | self.attn_drop = nn.Dropout(attn_drop) 34 | self.proj = nn.Linear(dim, dim) 35 | self.proj_drop = nn.Dropout(proj_drop) 36 | 37 | self.unfold = nn.Unfold(kernel_size=kernel_size, padding=padding, stride=stride) 38 | self.pool = nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True) 39 | 40 | def forward(self, x): 41 | B, H, W, C = x.shape 42 | 43 | v = self.v(x).permute(0, 3, 1, 2) # B, C, H, W 44 | 45 | h, w = math.ceil(H / self.stride), math.ceil(W / self.stride) 46 | v = self.unfold(v).reshape(B, self.num_heads, C // self.num_heads, 47 | self.kernel_size * self.kernel_size, 48 | h * w).permute(0, 1, 4, 3, 2) # B,H,N,kxk,C/H 49 | 50 | attn = self.pool(x.permute(0, 3, 1, 2)).permute(0, 2, 3, 1) 51 | attn = self.attn(attn).reshape( 52 | B, h * w, self.num_heads, self.kernel_size * self.kernel_size, 53 | self.kernel_size * self.kernel_size).permute(0, 2, 1, 3, 4) # B,H,N,kxk,kxk 54 | attn = attn * self.scale 55 | attn = attn.softmax(dim=-1) 56 | attn = self.attn_drop(attn) 57 | 58 | x = (attn @ v).permute(0, 1, 4, 3, 2).reshape( 59 | B, C * self.kernel_size * self.kernel_size, h * w) 60 | x = F.fold(x, output_size=(H, W), kernel_size=self.kernel_size, 61 | padding=self.padding, stride=self.stride) 62 | 63 | x = self.proj(x.permute(0, 2, 3, 1)) 64 | x = self.proj_drop(x) 65 | 66 | return x 67 | 68 | 69 | # 输入 B, H, W, C, 输出 B, H, W, C 70 | if __name__ == '__main__': 71 | block = OutlookAttention(dim=32,num_heads=8).cuda() 72 | input = torch.rand(3, 64, 64, 32).cuda() 73 | output = block(input) 74 | print(input.size(), output.size()) 75 | -------------------------------------------------------------------------------- /点云/Attention.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | #论文:Point MixSwap: Attentional Point Cloud Mixing via Swapping Matched Structural Divisions 6 | #论文地址:https://vllab.cs.nycu.edu.tw/images/paper/eccv_umam22.pdf 7 | class Attention(nn.Module): 8 | def __init__(self, dim_Q, dim_K, dim_LIN, num_heads, n_pts=1024, ln=False): 9 | super(Attention, self).__init__() 10 | self.n_pts = n_pts 11 | self.dim_LIN = dim_LIN 12 | self.num_heads = num_heads 13 | self.fc_q = nn.Linear(dim_Q, dim_LIN) 14 | self.fc_k = nn.Linear(dim_K, dim_LIN) 15 | self.fc_v = nn.Linear(dim_K, dim_LIN) 16 | if ln: 17 | self.ln0 = nn.LayerNorm(dim_LIN) 18 | self.ln1 = nn.LayerNorm(dim_LIN) 19 | self.fc_o = nn.Linear(dim_LIN, dim_LIN) 20 | 21 | def forward(self, Q, K, return_attn=True): # Q = [BS, 1, emb_dim = dim_Q]; K = [BS, n_pts, emb_dim= dim_K] 22 | Q = self.fc_q(Q) # [BS=6, n_div=3, dim_V=1024] 23 | K, V = self.fc_k(K), self.fc_v(K) # K = [BS=6, n_pts=1024, emb_dim = dim_V = 1024]; V_dim= K_dim 24 | dim_split = self.dim_LIN // self.num_heads 25 | Q_ = torch.cat(Q.split(dim_split, 2), 0) #[BS*n_head=6*n_head,n_div,dim_split=1024/1=1024] --> every n_div here & below can be n_pts 26 | K_ = torch.cat(K.split(dim_split, 2), 0) #[BS*n_head=6*n_head,n_pts,dim_split=1024/1=1024] 27 | V_ = torch.cat(V.split(dim_split, 2), 0) #[BS*n_head=6*n_head,n_pts,dim_split=1024/1=1024] 28 | A = torch.softmax(Q_.bmm(K_.transpose(1,2)) / math.sqrt(self.dim_LIN), 2) #[BS*n_head=6*n_head,n_div,dim_split=1024/1=1024] 29 | temp = (Q_ + A.bmm(V_)).split(Q.size(0), 0) #tupple of n_head, @[BS=6,n_div=3,dim_split=1024] 30 | O = torch.cat(temp, 2) #[BS=6,n_div=3,dim_split*n_head=emb=1024] 31 | O = O if getattr(self, 'ln0', None) is None else self.ln0(O) 32 | O = O + F.relu(self.fc_o(O)) 33 | O = O if getattr(self, 'ln1', None) is None else self.ln1(O) 34 | if self.num_heads >= 2: 35 | A = A.split(Q.size(0),dim=0) #tupple of n_head, @[BS=6,n_div=3,dim_split=1024] 36 | A = torch.stack([tensor_ for tensor_ in A], dim=0) #[n_head,BS,n_div=3,emb=1024] 37 | A = torch.mean(A, dim=0) #[BS,n_div=3,emb=1024] 38 | if return_attn: 39 | if A.size(-1) == self.n_pts: 40 | A = A.permute(0, 2, 1) #[BS, n_pts, n_div] 41 | return O, A 42 | else: 43 | return O 44 | 45 | 46 | if __name__ == '__main__': 47 | # 定义注意力机制 48 | block = Attention(dim_Q=1024, dim_K=1024, dim_LIN=1024, num_heads=8, n_pts=1024, ln=True) 49 | 50 | # 创建模拟输入数据 51 | batch_size = 6 52 | Q = torch.randn(batch_size, 1, 1024) # Query 张量 53 | K = torch.randn(batch_size, 1024, 1024) # Key 张量 54 | 55 | # 执行前向传播 56 | output, attention_scores = block(Q, K, return_attn=True) 57 | 58 | print(output.size()) 59 | print(attention_scores.size()) 60 | -------------------------------------------------------------------------------- /特征融合/(TIP2024)CGA特征融合模块.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # 论文:DEA-Net: Single image dehazing based on detail enhanced convolution and content-guided attention 3 | # GitHub地址:https://github.com/cecret3350/DEA-Net/tree/main 4 | # -------------------------------------------------------- 5 | 6 | import torch 7 | from torch import nn 8 | from einops.layers.torch import Rearrange 9 | 10 | 11 | class SpatialAttention(nn.Module): 12 | def __init__(self): 13 | super(SpatialAttention, self).__init__() 14 | self.sa = nn.Conv2d(2, 1, 7, padding=3, padding_mode='reflect', bias=True) 15 | 16 | def forward(self, x): 17 | x_avg = torch.mean(x, dim=1, keepdim=True) 18 | x_max, _ = torch.max(x, dim=1, keepdim=True) 19 | x2 = torch.cat([x_avg, x_max], dim=1) 20 | sattn = self.sa(x2) 21 | return sattn 22 | 23 | 24 | class ChannelAttention(nn.Module): 25 | def __init__(self, dim, reduction=8): 26 | super(ChannelAttention, self).__init__() 27 | self.gap = nn.AdaptiveAvgPool2d(1) 28 | self.ca = nn.Sequential( 29 | nn.Conv2d(dim, dim // reduction, 1, padding=0, bias=True), 30 | nn.ReLU(inplace=True), 31 | nn.Conv2d(dim // reduction, dim, 1, padding=0, bias=True), 32 | ) 33 | 34 | def forward(self, x): 35 | x_gap = self.gap(x) 36 | cattn = self.ca(x_gap) 37 | return cattn 38 | 39 | 40 | class PixelAttention(nn.Module): 41 | def __init__(self, dim): 42 | super(PixelAttention, self).__init__() 43 | self.pa2 = nn.Conv2d(2 * dim, dim, 7, padding=3, padding_mode='reflect', groups=dim, bias=True) 44 | self.sigmoid = nn.Sigmoid() 45 | 46 | def forward(self, x, pattn1): 47 | B, C, H, W = x.shape 48 | x = x.unsqueeze(dim=2) # B, C, 1, H, W 49 | pattn1 = pattn1.unsqueeze(dim=2) # B, C, 1, H, W 50 | x2 = torch.cat([x, pattn1], dim=2) # B, C, 2, H, W 51 | x2 = Rearrange('b c t h w -> b (c t) h w')(x2) 52 | pattn2 = self.pa2(x2) 53 | pattn2 = self.sigmoid(pattn2) 54 | return pattn2 55 | 56 | 57 | class CGAFusion(nn.Module): 58 | def __init__(self, dim, reduction=8): 59 | super(CGAFusion, self).__init__() 60 | self.sa = SpatialAttention() 61 | self.ca = ChannelAttention(dim, reduction) 62 | self.pa = PixelAttention(dim) 63 | self.conv = nn.Conv2d(dim, dim, 1, bias=True) 64 | self.sigmoid = nn.Sigmoid() 65 | 66 | def forward(self, x, y): 67 | initial = x + y 68 | cattn = self.ca(initial) 69 | sattn = self.sa(initial) 70 | pattn1 = sattn + cattn 71 | pattn2 = self.sigmoid(self.pa(initial, pattn1)) 72 | result = initial + pattn2 * x + (1 - pattn2) * y 73 | result = self.conv(result) 74 | return result 75 | 76 | 77 | # 特征融合 78 | if __name__ == '__main__': 79 | block = CGAFusion(32) 80 | input1 = torch.rand(3, 32, 64, 64) # 输入 N C H W 81 | input2 = torch.rand(3, 32, 64, 64) 82 | output = block(input1, input2) 83 | print(output.size()) 84 | -------------------------------------------------------------------------------- /注意力/(CVPR 2024)SHSA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # 论文:SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design, CVPR 2024 4 | # 论文地址:https://arxiv.org/pdf/2401.16456 5 | # Github地址:https://github.com/ysj9909/SHViT 6 | # 全网最全100➕即插即用模块GitHub地址:https://github.com/ai-dawang/PlugNPlay-Modules 7 | class GroupNorm(torch.nn.GroupNorm): 8 | """ 9 | Group Normalization with 1 group. 10 | Input: tensor in shape [B, C, H, W] 11 | """ 12 | def __init__(self, num_channels, **kwargs): 13 | super().__init__(1, num_channels, **kwargs) 14 | 15 | 16 | class Conv2d_BN(torch.nn.Sequential): 17 | def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, 18 | groups=1, bn_weight_init=1): 19 | super().__init__() 20 | self.add_module('c', torch.nn.Conv2d( 21 | a, b, ks, stride, pad, dilation, groups, bias=False)) 22 | self.add_module('bn', torch.nn.BatchNorm2d(b)) 23 | torch.nn.init.constant_(self.bn.weight, bn_weight_init) 24 | torch.nn.init.constant_(self.bn.bias, 0) 25 | 26 | @torch.no_grad() 27 | def fuse(self): 28 | c, bn = self._modules.values() 29 | w = bn.weight / (bn.running_var + bn.eps)**0.5 30 | w = c.weight * w[:, None, None, None] 31 | b = bn.bias - bn.running_mean * bn.weight / \ 32 | (bn.running_var + bn.eps)**0.5 33 | m = torch.nn.Conv2d(w.size(1) * self.c.groups, w.size( 34 | 0), w.shape[2:], stride=self.c.stride, padding=self.c.padding, dilation=self.c.dilation, groups=self.c.groups, 35 | device=c.weight.device) 36 | m.weight.data.copy_(w) 37 | m.bias.data.copy_(b) 38 | return m 39 | 40 | class SHSA(torch.nn.Module): 41 | """Single-Head Self-Attention""" 42 | 43 | def __init__(self, dim, qk_dim=16, pdim=32): 44 | super().__init__() 45 | self.scale = qk_dim ** -0.5 46 | self.qk_dim = qk_dim 47 | self.dim = dim 48 | self.pdim = pdim 49 | 50 | self.pre_norm = GroupNorm(pdim) 51 | 52 | self.qkv = Conv2d_BN(pdim, qk_dim * 2 + pdim) 53 | self.proj = torch.nn.Sequential(torch.nn.ReLU(), Conv2d_BN( 54 | dim, dim, bn_weight_init=0)) 55 | 56 | def forward(self, x): 57 | B, C, H, W = x.shape 58 | x1, x2 = torch.split(x, [self.pdim, self.dim - self.pdim], dim=1) 59 | x1 = self.pre_norm(x1) 60 | qkv = self.qkv(x1) 61 | q, k, v = qkv.split([self.qk_dim, self.qk_dim, self.pdim], dim=1) 62 | q, k, v = q.flatten(2), k.flatten(2), v.flatten(2) 63 | 64 | attn = (q.transpose(-2, -1) @ k) * self.scale 65 | attn = attn.softmax(dim=-1) 66 | x1 = (v @ attn.transpose(-2, -1)).reshape(B, self.pdim, H, W) 67 | x = self.proj(torch.cat([x1, x2], dim=1)) 68 | 69 | return x 70 | 71 | 72 | if __name__ == '__main__': 73 | 74 | 75 | block = SHSA(64) #输入 C 76 | 77 | input = torch.randn(1, 64, 32, 32) # 输入 B C H W 78 | 79 | # Print input shape 80 | print(input.size()) 81 | 82 | # Forward pass through the SHSA module 83 | output = block(input) 84 | 85 | # Print output shape 86 | print(output.size()) 87 | -------------------------------------------------------------------------------- /MixStructure.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | #论文地址:https://arxiv.org/abs/2305.17654 4 | #论文:Mix Structure Block contains multi-scale parallel large convolution kernel module and enhanced parallel attention module 5 | 6 | class MixStructureBlock(nn.Module): 7 | def __init__(self, dim): 8 | super().__init__() 9 | 10 | self.norm1 = nn.BatchNorm2d(dim) 11 | self.norm2 = nn.BatchNorm2d(dim) 12 | 13 | self.conv1 = nn.Conv2d(dim, dim, kernel_size=1) 14 | self.conv2 = nn.Conv2d(dim, dim, kernel_size=5, padding=2, padding_mode='reflect') 15 | self.conv3_19 = nn.Conv2d(dim, dim, kernel_size=7, padding=9, groups=dim, dilation=3, padding_mode='reflect') 16 | self.conv3_13 = nn.Conv2d(dim, dim, kernel_size=5, padding=6, groups=dim, dilation=3, padding_mode='reflect') 17 | self.conv3_7 = nn.Conv2d(dim, dim, kernel_size=3, padding=3, groups=dim, dilation=3, padding_mode='reflect') 18 | 19 | # Simple Pixel Attention 20 | self.Wv = nn.Sequential( 21 | nn.Conv2d(dim, dim, 1), 22 | nn.Conv2d(dim, dim, kernel_size=3, padding=3 // 2, groups=dim, padding_mode='reflect') 23 | ) 24 | self.Wg = nn.Sequential( 25 | nn.AdaptiveAvgPool2d(1), 26 | nn.Conv2d(dim, dim, 1), 27 | nn.Sigmoid() 28 | ) 29 | 30 | # Channel Attention 31 | self.ca = nn.Sequential( 32 | nn.AdaptiveAvgPool2d(1), 33 | nn.Conv2d(dim, dim, 1, padding=0, bias=True), 34 | nn.GELU(), 35 | # nn.ReLU(True), 36 | nn.Conv2d(dim, dim, 1, padding=0, bias=True), 37 | nn.Sigmoid() 38 | ) 39 | 40 | # Pixel Attention 41 | self.pa = nn.Sequential( 42 | nn.Conv2d(dim, dim // 8, 1, padding=0, bias=True), 43 | nn.GELU(), 44 | # nn.ReLU(True), 45 | nn.Conv2d(dim // 8, 1, 1, padding=0, bias=True), 46 | nn.Sigmoid() 47 | ) 48 | 49 | self.mlp = nn.Sequential( 50 | nn.Conv2d(dim * 3, dim * 4, 1), 51 | nn.GELU(), 52 | # nn.ReLU(True), 53 | nn.Conv2d(dim * 4, dim, 1) 54 | ) 55 | self.mlp2 = nn.Sequential( 56 | nn.Conv2d(dim * 3, dim * 4, 1), 57 | nn.GELU(), 58 | # nn.ReLU(True), 59 | nn.Conv2d(dim * 4, dim, 1) 60 | ) 61 | 62 | def forward(self, x): 63 | identity = x 64 | x = self.norm1(x) 65 | x = self.conv1(x) 66 | x = self.conv2(x) 67 | x = torch.cat([self.conv3_19(x), self.conv3_13(x), self.conv3_7(x)], dim=1) 68 | x = self.mlp(x) 69 | x = identity + x 70 | 71 | identity = x 72 | x = self.norm2(x) 73 | x = torch.cat([self.Wv(x) * self.Wg(x), self.ca(x) * x, self.pa(x) * x], dim=1) 74 | x = self.mlp2(x) 75 | x = identity + x 76 | return x 77 | 78 | 79 | if __name__ == '__main__': 80 | 81 | 82 | block = MixStructureBlock(dim=64) 83 | 84 | 85 | input = torch.rand(1, 64, 128, 128) # B C H W 86 | 87 | 88 | output = block(input) 89 | 90 | print(input.size()) 91 | print(output.size()) 92 | -------------------------------------------------------------------------------- /CPAM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | #论文:ASF-YOLO: A Novel YOLO Model with Attentional Scale Sequence Fusion for Cell Instance Segmentation(IMAVIS) 5 | #论文地址:https://arxiv.org/abs/2312.06458 6 | 7 | class channel_att(nn.Module): 8 | def __init__(self, channel, b=1, gamma=2): 9 | super(channel_att, self).__init__() 10 | kernel_size = int(abs((math.log(channel, 2) + b) / gamma)) 11 | kernel_size = kernel_size if kernel_size % 2 else kernel_size + 1 12 | 13 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 14 | self.conv = nn.Conv1d(1, 1, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, bias=False) 15 | self.sigmoid = nn.Sigmoid() 16 | 17 | def forward(self, x): 18 | y = self.avg_pool(x) # 自适应平均池化 19 | y = y.squeeze(-1) 20 | y = y.transpose(-1, -2) 21 | y = self.conv(y).transpose(-1, -2).unsqueeze(-1) # 1D卷积 22 | y = self.sigmoid(y) # Sigmoid激活 23 | return x * y.expand_as(x) # 通道逐元素相乘 24 | 25 | 26 | class local_att(nn.Module): 27 | def __init__(self, channel, reduction=16): 28 | super(local_att, self).__init__() 29 | 30 | self.conv_1x1 = nn.Conv2d(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, 31 | bias=False) 32 | 33 | self.relu = nn.ReLU() 34 | self.bn = nn.BatchNorm2d(channel // reduction) 35 | 36 | self.F_h = nn.Conv2d(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, 37 | bias=False) 38 | self.F_w = nn.Conv2d(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, 39 | bias=False) 40 | 41 | self.sigmoid_h = nn.Sigmoid() 42 | self.sigmoid_w = nn.Sigmoid() 43 | 44 | def forward(self, x): 45 | _, _, h, w = x.size() 46 | 47 | x_h = torch.mean(x, dim=3, keepdim=True).permute(0, 1, 3, 2) 48 | x_w = torch.mean(x, dim=2, keepdim=True) 49 | 50 | x_cat_conv_relu = self.relu(self.bn(self.conv_1x1(torch.cat((x_h, x_w), 3)))) 51 | 52 | x_cat_conv_split_h, x_cat_conv_split_w = x_cat_conv_relu.split([h, w], 3) 53 | 54 | s_h = self.sigmoid_h(self.F_h(x_cat_conv_split_h.permute(0, 1, 3, 2))) 55 | s_w = self.sigmoid_w(self.F_w(x_cat_conv_split_w)) 56 | 57 | out = x * s_h.expand_as(x) * s_w.expand_as(x) 58 | return out 59 | 60 | #Channel and Position Attention Mechanism (CPAM) 61 | class CPAM(nn.Module): 62 | def __init__(self, ch): 63 | super().__init__() 64 | self.channel_att = channel_att(ch) 65 | self.local_att = local_att(ch) 66 | def forward(self, x): 67 | input1,input2 = x[0],x[1] 68 | input1 = self.channel_att(input1) 69 | x = input1 + input2 70 | x = self.local_att(x) 71 | return x 72 | 73 | 74 | if __name__ == '__main__': 75 | 76 | block = CPAM(128) 77 | 78 | input1 = torch.randn(1, 128, 32, 32) # B C H W 79 | input2 = torch.randn(1, 128, 32, 32) 80 | inputs = [input1, input2] 81 | output = block(inputs) 82 | 83 | 84 | print(input1.size()) 85 | print(input2.size()) 86 | print(output.size()) -------------------------------------------------------------------------------- /(arXiv 2020 ) SSAN.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | # 论文地址:https://arxiv.org/pdf/2005.10463 7 | # 论文:Simplified Self-Attention for Transformer-Based end-to-end Speech Recognition 8 | 9 | 10 | class SimplifiedScaledDotProductAttention(nn.Module): 11 | ''' 12 | Scaled dot-product attention 13 | ''' 14 | 15 | def __init__(self, d_model, h,dropout=.1): 16 | ''' 17 | :param d_model: Output dimensionality of the model 18 | :param d_k: Dimensionality of queries and keys 19 | :param d_v: Dimensionality of values 20 | :param h: Number of heads 21 | ''' 22 | super(SimplifiedScaledDotProductAttention, self).__init__() 23 | 24 | self.d_model = d_model 25 | self.d_k = d_model//h 26 | self.d_v = d_model//h 27 | self.h = h 28 | 29 | self.fc_o = nn.Linear(h * self.d_v, d_model) 30 | self.dropout=nn.Dropout(dropout) 31 | 32 | 33 | 34 | self.init_weights() 35 | 36 | 37 | def init_weights(self): 38 | for m in self.modules(): 39 | if isinstance(m, nn.Conv2d): 40 | init.kaiming_normal_(m.weight, mode='fan_out') 41 | if m.bias is not None: 42 | init.constant_(m.bias, 0) 43 | elif isinstance(m, nn.BatchNorm2d): 44 | init.constant_(m.weight, 1) 45 | init.constant_(m.bias, 0) 46 | elif isinstance(m, nn.Linear): 47 | init.normal_(m.weight, std=0.001) 48 | if m.bias is not None: 49 | init.constant_(m.bias, 0) 50 | 51 | def forward(self, queries, keys, values, attention_mask=None, attention_weights=None): 52 | ''' 53 | Computes 54 | :param queries: Queries (b_s, nq, d_model) 55 | :param keys: Keys (b_s, nk, d_model) 56 | :param values: Values (b_s, nk, d_model) 57 | :param attention_mask: Mask over attention values (b_s, h, nq, nk). True indicates masking. 58 | :param attention_weights: Multiplicative weights for attention values (b_s, h, nq, nk). 59 | :return: 60 | ''' 61 | b_s, nq = queries.shape[:2] 62 | nk = keys.shape[1] 63 | 64 | q = queries.view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3) # (b_s, h, nq, d_k) 65 | k = keys.view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1) # (b_s, h, d_k, nk) 66 | v = values.view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3) # (b_s, h, nk, d_v) 67 | 68 | att = torch.matmul(q, k) / np.sqrt(self.d_k) # (b_s, h, nq, nk) 69 | if attention_weights is not None: 70 | att = att * attention_weights 71 | if attention_mask is not None: 72 | att = att.masked_fill(attention_mask, -np.inf) 73 | att = torch.softmax(att, -1) 74 | att=self.dropout(att) 75 | 76 | out = torch.matmul(att, v).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v) # (b_s, nq, h*d_v) 77 | out = self.fc_o(out) # (b_s, nq, d_model) 78 | return out 79 | 80 | 81 | if __name__ == '__main__': 82 | input=torch.randn(50,49,512) 83 | block = SimplifiedScaledDotProductAttention(d_model=512, h=8) 84 | output=block(input,input,input) 85 | print(output.shape) 86 | 87 | -------------------------------------------------------------------------------- /3D/(CVPR 2022)DFE.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | # 论文:MonoDTR: Monocular 3D Object Detection with Depth-Aware Transformer 6 | # 论文地址:https://arxiv.org/pdf/2203.13310 7 | class DepthAwareFE(nn.Module): 8 | def __init__(self, output_channel_num): 9 | super(DepthAwareFE, self).__init__() 10 | self.output_channel_num = output_channel_num 11 | self.depth_output = nn.Sequential( 12 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), 13 | nn.Conv2d(self.output_channel_num, int(self.output_channel_num / 2), 3, padding=1), 14 | nn.BatchNorm2d(int(self.output_channel_num / 2)), 15 | nn.ReLU(), 16 | nn.Conv2d(int(self.output_channel_num / 2), 96, 1), 17 | ) 18 | self.depth_down = nn.Conv2d(96, 12, 3, stride=1, padding=1, groups=12) 19 | self.acf = dfe_module(256, 256) 20 | 21 | def forward(self, x): 22 | depth = self.depth_output(x) 23 | N, C, H, W = x.shape 24 | depth_guide = F.interpolate(depth, size=x.size()[2:], mode='bilinear', align_corners=False) 25 | depth_guide = self.depth_down(depth_guide) 26 | x = x + self.acf(x, depth_guide) 27 | 28 | return depth, depth_guide, x 29 | 30 | 31 | class dfe_module(nn.Module): 32 | 33 | def __init__(self, in_channels, out_channels): 34 | super(dfe_module, self).__init__() 35 | self.softmax = nn.Softmax(dim=-1) 36 | self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), 37 | nn.BatchNorm2d(out_channels), 38 | nn.ReLU(True), 39 | nn.Dropout2d(0.2, False)) 40 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1, padding=0) 41 | 42 | def forward(self, feat_ffm, coarse_x): 43 | N, D, H, W = coarse_x.size() 44 | 45 | # depth prototype 46 | feat_ffm = self.conv1(feat_ffm) 47 | _, C, _, _ = feat_ffm.size() 48 | 49 | proj_query = coarse_x.view(N, D, -1) 50 | proj_key = feat_ffm.view(N, C, -1).permute(0, 2, 1) 51 | energy = torch.bmm(proj_query, proj_key) 52 | energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy) - energy 53 | attention = self.softmax(energy_new) 54 | 55 | # depth enhancement 56 | attention = attention.permute(0, 2, 1) 57 | proj_value = coarse_x.view(N, D, -1) 58 | out = torch.bmm(attention, proj_value) 59 | out = out.view(N, C, H, W) 60 | out = self.conv2(out) 61 | 62 | return out 63 | 64 | if __name__ == '__main__': 65 | 66 | # 假定输入特征图的尺寸为 [N, C, H, W] = [1, 256, 64, 64] 67 | # 假定粗糙深度图的尺寸为 [N, D, H, W] = [1, 12, 64, 64] 68 | 69 | N, C, H, W = 1, 256, 64, 64 70 | D = 12 71 | 72 | # 初始化输入特征图和粗糙深度图 73 | feat_ffm = torch.rand(N, C, H, W) # 输入特征图 74 | coarse_x = torch.rand(N, D, H, W) # 粗糙深度图 75 | 76 | # 初始化dfe_module 77 | dfe = dfe_module(in_channels=C, out_channels=C) # 使用相同的通道数作为示例 78 | 79 | # 前向传播 80 | output = dfe(feat_ffm, coarse_x) 81 | 82 | # 打印输入和输出尺寸 83 | print("Input feat_ffm size:", feat_ffm.size()) 84 | print(" Output size:", output.size()) 85 | -------------------------------------------------------------------------------- /LMFLoss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | #论文:LMFLOSS: A HYBRID LOSS FOR IMBALANCED MEDICAL IMAGE CLASSIFICATION 6 | 7 | 8 | class FocalLoss(nn.Module): 9 | 10 | def __init__(self, alpha, gamma=2): 11 | super().__init__() 12 | self.alpha = alpha 13 | self.gamma = gamma 14 | 15 | def forward(self, output, target): 16 | num_classes = output.size(1) 17 | assert len(self.alpha) == num_classes, \ 18 | 'Length of weight tensor must match the number of classes' 19 | logp = F.cross_entropy(output, target, self.alpha) 20 | p = torch.exp(-logp) 21 | focal_loss = (1 - p) ** self.gamma * logp 22 | 23 | return torch.mean(focal_loss) 24 | 25 | 26 | class LDAMLoss(nn.Module): 27 | 28 | def __init__(self, cls_num_list, max_m=0.5, weight=None, s=30): 29 | """ 30 | max_m: The appropriate value for max_m depends on the specific dataset and the severity of the class imbalance. 31 | You can start with a small value and gradually increase it to observe the impact on the model's performance. 32 | If the model struggles with class separation or experiences underfitting, increasing max_m might help. However, 33 | be cautious not to set it too high, as it can cause overfitting or make the model too conservative. 34 | 35 | s: The choice of s depends on the desired scale of the logits and the specific requirements of your problem. 36 | It can be used to adjust the balance between the margin and the original logits. A larger s value amplifies 37 | the impact of the logits and can be useful when dealing with highly imbalanced datasets. 38 | You can experiment with different values of s to find the one that works best for your dataset and model. 39 | 40 | """ 41 | super(LDAMLoss, self).__init__() 42 | m_list = 1.0 / np.sqrt(np.sqrt(cls_num_list)) 43 | m_list = m_list * (max_m / np.max(m_list)) 44 | m_list = torch.cuda.FloatTensor(m_list) 45 | self.m_list = m_list 46 | assert s > 0 47 | self.s = s 48 | self.weight = weight 49 | 50 | def forward(self, x, target): 51 | index = torch.zeros_like(x, dtype=torch.uint8) 52 | index.scatter_(1, target.data.view(-1, 1), 1) 53 | 54 | index_float = index.type(torch.cuda.FloatTensor) 55 | batch_m = torch.matmul(self.m_list[None, :], index_float.transpose(0, 1)) 56 | batch_m = batch_m.view((-1, 1)) 57 | x_m = x - batch_m 58 | 59 | output = torch.where(index, x_m, x) 60 | return F.cross_entropy(self.s * output, target, weight=self.weight) 61 | 62 | 63 | class LMFLoss(nn.Module): 64 | def __init__(self, cls_num_list, weight, alpha=1, beta=1, gamma=2, max_m=0.5, s=30): 65 | super().__init__() 66 | self.focal_loss = FocalLoss(weight, gamma) 67 | self.ldam_loss = LDAMLoss(cls_num_list, max_m, weight, s) 68 | self.alpha = alpha 69 | self.beta = beta 70 | 71 | def forward(self, output, target): 72 | focal_loss_output = self.focal_loss(output, target) 73 | ldam_loss_output = self.ldam_loss(output, target) 74 | total_loss = self.alpha * focal_loss_output + self.beta * ldam_loss_output 75 | return total_loss -------------------------------------------------------------------------------- /(Elsevier 2024)CF_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | # 论文:CF-Loss: Clinically-relevant feature optimised loss function for retinal multi-class vessel segmentation and vascular feature measurement 5 | def encode_mask_3d(ground_truth, num_classes=4): 6 | batch_size, _, depth, height, width = ground_truth.size() 7 | one_hot = torch.zeros((batch_size, num_classes, depth, height, width), device=ground_truth.device) 8 | ground_truth = ground_truth.long() 9 | one_hot = one_hot.scatter_(1, ground_truth, 1) 10 | return one_hot 11 | 12 | class CF_Loss_3D(nn.Module): 13 | def __init__(self, img_depth, beta, alpha, gamma): 14 | super(CF_Loss_3D, self).__init__() 15 | self.beta = beta 16 | self.alpha = alpha 17 | self.gamma = gamma 18 | self.img_depth = img_depth 19 | self.CE = nn.CrossEntropyLoss() 20 | self.p = torch.tensor([img_depth], dtype=torch.float, device='cuda') 21 | self.n = torch.log(self.p) / torch.log(torch.tensor([2.0], device='cuda')) 22 | self.n = torch.floor(self.n) 23 | self.sizes = 2 ** torch.arange(self.n.item(), 1, -1, device='cuda').to(dtype=torch.int) 24 | 25 | def get_count_3d(self, sizes, p, masks_pred_softmax): 26 | counts = torch.zeros((masks_pred_softmax.shape[0], len(sizes), 2), device='cuda') 27 | index = 0 28 | 29 | for size in sizes: 30 | # 对3D数据使用3D池化 31 | stride = (1, size, size) # 保持深度方向不变 32 | pool = nn.AvgPool3d(kernel_size=(1, size, size), stride=stride) 33 | 34 | S = pool(masks_pred_softmax) 35 | S = S * ((S > 0) & (S < (size * size))) 36 | counts[..., index, 0] = (S[:, 0, ...] - S[:, 2, ...]).abs().sum() / (S[:, 2, ...] > 0).sum() 37 | counts[..., index, 1] = (S[:, 1, ...] - S[:, 3, ...]).abs().sum() / (S[:, 3, ...] > 0).sum() 38 | 39 | index += 1 40 | 41 | return counts 42 | 43 | def forward(self, prediction, ground_truth): 44 | # 假设ground_truth已经是适当格式 45 | ground_truth_encoded = encode_mask_3d(ground_truth) # 需要定义适用于3D数据的encode_mask_3d 46 | prediction_softmax = F.softmax(prediction, dim=1) 47 | 48 | loss_CE = self.CE(prediction_softmax, ground_truth.squeeze(1).long()) 49 | 50 | Loss_vd = (torch.abs(prediction_softmax[:, 1, ...].sum() - ground_truth_encoded[:, 1, ...].sum()) + torch.abs(prediction_softmax[:, 2, ...].sum() - ground_truth_encoded[:, 2, ...].sum())) / (prediction_softmax.shape[0] * prediction_softmax.shape[2] * prediction_softmax.shape[3] * prediction_softmax.shape[4]) 51 | 52 | prediction_softmax = prediction_softmax[:, 1:3, ...] 53 | ground_truth_encoded = ground_truth_encoded[:, 1:3, ...] 54 | combined = torch.cat((prediction_softmax, ground_truth_encoded), 1) 55 | counts = self.get_count_3d(self.sizes, self.p, combined) 56 | 57 | artery_ = torch.sqrt(torch.sum(self.sizes * ((counts[..., 0]) ** 2))) 58 | vein_ = torch.sqrt(torch.sum(self.sizes * ((counts[..., 1]) ** 2))) 59 | size_t = torch.sqrt(torch.sum(self.sizes ** 2)) 60 | loss_FD = (artery_ + vein_) / size_t / prediction_softmax.shape[0] 61 | 62 | loss_value = self.beta * loss_CE + self.alpha * loss_FD + self.gamma * Loss_vd 63 | 64 | return loss_value 65 | 66 | 67 | -------------------------------------------------------------------------------- /(arXiv 2023) ScaledDotProductAttention.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import init 5 | 6 | # 论文地址:https://arxiv.org/pdf/1706.03762 7 | # 论文:Attention Is All You Need 8 | 9 | 10 | class ScaledDotProductAttention(nn.Module): 11 | ''' 12 | Scaled dot-product attention 13 | ''' 14 | 15 | def __init__(self, d_model, d_k, d_v, h,dropout=.1): 16 | ''' 17 | :param d_model: Output dimensionality of the model 18 | :param d_k: Dimensionality of queries and keys 19 | :param d_v: Dimensionality of values 20 | :param h: Number of heads 21 | ''' 22 | super(ScaledDotProductAttention, self).__init__() 23 | self.fc_q = nn.Linear(d_model, h * d_k) 24 | self.fc_k = nn.Linear(d_model, h * d_k) 25 | self.fc_v = nn.Linear(d_model, h * d_v) 26 | self.fc_o = nn.Linear(h * d_v, d_model) 27 | self.dropout=nn.Dropout(dropout) 28 | 29 | self.d_model = d_model 30 | self.d_k = d_k 31 | self.d_v = d_v 32 | self.h = h 33 | 34 | self.init_weights() 35 | 36 | 37 | def init_weights(self): 38 | for m in self.modules(): 39 | if isinstance(m, nn.Conv2d): 40 | init.kaiming_normal_(m.weight, mode='fan_out') 41 | if m.bias is not None: 42 | init.constant_(m.bias, 0) 43 | elif isinstance(m, nn.BatchNorm2d): 44 | init.constant_(m.weight, 1) 45 | init.constant_(m.bias, 0) 46 | elif isinstance(m, nn.Linear): 47 | init.normal_(m.weight, std=0.001) 48 | if m.bias is not None: 49 | init.constant_(m.bias, 0) 50 | 51 | def forward(self, queries, keys, values, attention_mask=None, attention_weights=None): 52 | ''' 53 | Computes 54 | :param queries: Queries (b_s, nq, d_model) 55 | :param keys: Keys (b_s, nk, d_model) 56 | :param values: Values (b_s, nk, d_model) 57 | :param attention_mask: Mask over attention values (b_s, h, nq, nk). True indicates masking. 58 | :param attention_weights: Multiplicative weights for attention values (b_s, h, nq, nk). 59 | :return: 60 | ''' 61 | b_s, nq = queries.shape[:2] 62 | nk = keys.shape[1] 63 | 64 | q = self.fc_q(queries).view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3) # (b_s, h, nq, d_k) 65 | k = self.fc_k(keys).view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1) # (b_s, h, d_k, nk) 66 | v = self.fc_v(values).view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3) # (b_s, h, nk, d_v) 67 | 68 | att = torch.matmul(q, k) / np.sqrt(self.d_k) # (b_s, h, nq, nk) 69 | if attention_weights is not None: 70 | att = att * attention_weights 71 | if attention_mask is not None: 72 | att = att.masked_fill(attention_mask, -np.inf) 73 | att = torch.softmax(att, -1) 74 | att=self.dropout(att) 75 | 76 | out = torch.matmul(att, v).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v) # (b_s, nq, h*d_v) 77 | out = self.fc_o(out) # (b_s, nq, d_model) 78 | return out 79 | 80 | 81 | if __name__ == '__main__': 82 | input=torch.randn(50,49,512) 83 | sa = ScaledDotProductAttention(d_model=512, d_k=512, d_v=512, h=8) 84 | output=sa(input,input,input) 85 | print(output.shape) 86 | 87 | -------------------------------------------------------------------------------- /(CVPR 2019) DCNv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | from torchvision.ops import deform_conv2d 5 | 6 | # 论文地址:https://openaccess.thecvf.com/content_CVPR_2019/papers/Zhu_Deformable_ConvNets_V2_More_Deformable_Better_Results_CVPR_2019_paper.pdf 7 | # 论文:Deformable ConvNets v2: More Deformable, Better Results 8 | 9 | 10 | # 自动填充padding的函数 11 | def autopad(kernel_size, padding): 12 | # 默认返回的padding让卷积层输入输出大小相同(保持原大小) 13 | return padding if padding is not None else kernel_size // 2 14 | 15 | class DCNv2(nn.Module): 16 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, 17 | padding=1, groups=1, act=True, dilation=1, deformable_groups=1): 18 | super(DCNv2, self).__init__() 19 | 20 | self.in_channels = in_channels 21 | self.out_channels = out_channels 22 | self.kernel_size = (kernel_size, kernel_size) 23 | self.stride = (stride, stride) 24 | self.padding = (autopad(kernel_size, padding), autopad(kernel_size, padding)) 25 | self.dilation = (dilation, dilation) 26 | self.groups = groups 27 | self.deformable_groups = deformable_groups 28 | 29 | self.weight = nn.Parameter( 30 | torch.empty(out_channels, in_channels, *self.kernel_size) 31 | ) 32 | self.bias = nn.Parameter(torch.empty(out_channels)) 33 | 34 | out_channels_offset_mask = (self.deformable_groups * 3 * 35 | self.kernel_size[0] * self.kernel_size[1]) 36 | self.conv_offset_mask = nn.Conv2d( 37 | self.in_channels, 38 | out_channels_offset_mask, 39 | kernel_size=self.kernel_size, 40 | stride=self.stride, 41 | padding=self.padding, 42 | bias=True, 43 | ) 44 | self.bn = nn.BatchNorm2d(out_channels) 45 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 46 | self.reset_parameters() 47 | 48 | def forward(self, x): 49 | offset_mask = self.conv_offset_mask(x) 50 | o1, o2, mask = torch.chunk(offset_mask, 3, dim=1) 51 | offset = torch.cat((o1, o2), dim=1) 52 | mask = torch.sigmoid(mask) 53 | x = torch.ops.torchvision.deform_conv2d( 54 | x, 55 | self.weight, 56 | offset, 57 | mask, 58 | self.bias, 59 | self.stride[0], self.stride[1], 60 | self.padding[0], self.padding[1], 61 | self.dilation[0], self.dilation[1], 62 | self.groups, 63 | self.deformable_groups, 64 | True 65 | ) 66 | x = self.bn(x) 67 | x = self.act(x) 68 | return x 69 | 70 | def reset_parameters(self): 71 | n = self.in_channels 72 | for k in self.kernel_size: 73 | n *= k 74 | std = 1. / math.sqrt(n) 75 | self.weight.data.uniform_(-std, std) 76 | self.bias.data.zero_() 77 | self.conv_offset_mask.weight.data.zero_() 78 | self.conv_offset_mask.bias.data.zero_() 79 | 80 | 81 | def main(): 82 | input_tensor = torch.randn(4, 3, 64, 64) 83 | block = DCNv2(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1) 84 | output_tensor = block(input_tensor) 85 | print(output_tensor.size()) 86 | 87 | 88 | if __name__ == "__main__": 89 | main() -------------------------------------------------------------------------------- /FEM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | #论文:FFCA-YOLO for Small Object Detection in Remote Sensing Images[TGRS] 5 | #论文地址:https://ieeexplore.ieee.org/document/10423050 6 | 7 | class FEM(nn.Module): 8 | def __init__(self, in_planes, out_planes, stride=1, scale=0.1, map_reduce=8): 9 | super(FEM, self).__init__() 10 | self.scale = scale 11 | self.out_channels = out_planes 12 | inter_planes = in_planes // map_reduce 13 | self.branch0 = nn.Sequential( 14 | BasicConv(in_planes, 2 * inter_planes, kernel_size=1, stride=stride), 15 | BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=1, relu=False) 16 | ) 17 | self.branch1 = nn.Sequential( 18 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), 19 | BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=(1, 3), stride=stride, padding=(0, 1)), 20 | BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=(3, 1), stride=stride, padding=(1, 0)), 21 | BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=5, dilation=5, relu=False) 22 | ) 23 | self.branch2 = nn.Sequential( 24 | BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), 25 | BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=(3, 1), stride=stride, padding=(1, 0)), 26 | BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=(1, 3), stride=stride, padding=(0, 1)), 27 | BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=5, dilation=5, relu=False) 28 | ) 29 | 30 | self.ConvLinear = BasicConv(6 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False) 31 | self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False) 32 | self.relu = nn.ReLU(inplace=False) 33 | 34 | def forward(self, x): 35 | x0 = self.branch0(x) 36 | x1 = self.branch1(x) 37 | x2 = self.branch2(x) 38 | 39 | out = torch.cat((x0, x1, x2), 1) 40 | out = self.ConvLinear(out) 41 | short = self.shortcut(x) 42 | out = out * self.scale + short 43 | out = self.relu(out) 44 | 45 | return out 46 | 47 | class BasicConv(nn.Module): 48 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, 49 | bn=True, bias=False): 50 | super(BasicConv, self).__init__() 51 | self.out_channels = out_planes 52 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, 53 | dilation=dilation, groups=groups, bias=bias) 54 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None 55 | self.relu = nn.ReLU(inplace=True) if relu else None 56 | 57 | def forward(self, x): 58 | x = self.conv(x) 59 | if self.bn is not None: 60 | x = self.bn(x) 61 | if self.relu is not None: 62 | x = self.relu(x) 63 | return x 64 | 65 | 66 | if __name__ == '__main__': 67 | 68 | input = torch.randn(1, 64, 128, 128) 69 | block = FEM(in_planes=64, out_planes=64) 70 | print(input.size()) 71 | output = block(input) 72 | # 打印输出的形状 73 | print(output.size()) -------------------------------------------------------------------------------- /缝合代码示例/DilateFormer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class DilateAttention(nn.Module): 6 | "Implementation of Dilate-attention" 7 | 8 | def __init__(self, head_dim, qk_scale=None, attn_drop=0, kernel_size=3, dilation=1): 9 | super().__init__() 10 | self.head_dim = head_dim 11 | self.scale = qk_scale or head_dim ** -0.5 12 | self.kernel_size = kernel_size 13 | self.unfold = nn.Unfold(kernel_size, dilation, dilation * (kernel_size - 1) // 2, 1) 14 | self.attn_drop = nn.Dropout(attn_drop) 15 | 16 | def forward(self, q, k, v): 17 | # B, C//3, H, W 18 | B, d, H, W = q.shape 19 | q = q.reshape([B, d // self.head_dim, self.head_dim, 1, H * W]).permute(0, 1, 4, 3, 2) # B,h,N,1,d 20 | k = self.unfold(k).reshape( 21 | [B, d // self.head_dim, self.head_dim, self.kernel_size * self.kernel_size, H * W]).permute(0, 1, 4, 2, 22 | 3) # B,h,N,d,k*k 23 | attn = (q @ k) * self.scale # B,h,N,1,k*k 24 | attn = attn.softmax(dim=-1) 25 | attn = self.attn_drop(attn) 26 | v = self.unfold(v).reshape( 27 | [B, d // self.head_dim, self.head_dim, self.kernel_size * self.kernel_size, H * W]).permute(0, 1, 4, 3, 28 | 2) # B,h,N,k*k,d 29 | x = (attn @ v).transpose(1, 2).reshape(B, H, W, d) 30 | return x 31 | 32 | 33 | class MultiDilatelocalAttention(nn.Module): 34 | "Implementation of Dilate-attention" 35 | 36 | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, 37 | attn_drop=0., proj_drop=0., kernel_size=3, dilation=[2, 3]): 38 | super().__init__() 39 | self.dim = dim 40 | self.num_heads = num_heads 41 | head_dim = dim // num_heads 42 | self.dilation = dilation 43 | self.kernel_size = kernel_size 44 | self.scale = qk_scale or head_dim ** -0.5 45 | self.num_dilation = len(dilation) 46 | assert num_heads % self.num_dilation == 0, f"num_heads{num_heads} must be the times of num_dilation{self.num_dilation}!!" 47 | self.qkv = nn.Conv2d(dim, dim * 3, 1, bias=qkv_bias) 48 | self.dilate_attention = nn.ModuleList( 49 | [DilateAttention(head_dim, qk_scale, attn_drop, kernel_size, dilation[i]) 50 | for i in range(self.num_dilation)]) 51 | self.proj = nn.Linear(dim, dim) 52 | self.proj_drop = nn.Dropout(proj_drop) 53 | 54 | def forward(self, x): 55 | B, H, W, C = x.shape 56 | x = x.permute(0, 3, 1, 2) # B, C, H, W 57 | qkv = self.qkv(x).reshape(B, 3, self.num_dilation, C // self.num_dilation, H, W).permute(2, 1, 0, 3, 4, 5) 58 | # num_dilation,3,B,C//num_dilation,H,W 59 | x = x.reshape(B, self.num_dilation, C // self.num_dilation, H, W).permute(1, 0, 3, 4, 2) 60 | # num_dilation, B, H, W, C//num_dilation 61 | for i in range(self.num_dilation): 62 | x[i] = self.dilate_attention[i](qkv[i][0], qkv[i][1], qkv[i][2]) # B, H, W,C//num_dilation 63 | x = x.permute(1, 2, 3, 0, 4).reshape(B, H, W, C) 64 | x = self.proj(x) 65 | x = self.proj_drop(x) 66 | return x 67 | 68 | 69 | # 输入 B H W C, 输出 B H W C 70 | if __name__ == "__main__": 71 | x = torch.rand([3, 64, 64, 32]) 72 | m = MultiDilatelocalAttention(32) 73 | y = m(x) 74 | print(y.shape) 75 | -------------------------------------------------------------------------------- /BFAM.py: -------------------------------------------------------------------------------- 1 | #论文:B2CNet: A Progressive Change Boundary-to-Center Refinement Network for Multitemporal Remote Sensing Images Change Detection 2 | #论文地址:https://ieeexplore.ieee.org/document/10547405 3 | import torch 4 | import torch.nn as nn 5 | 6 | #Simam: A simple, parameter-free attention module for convolutional neural networks (ICML 2021) 7 | class simam_module(torch.nn.Module): 8 | def __init__(self, e_lambda=1e-4): 9 | super(simam_module, self).__init__() 10 | 11 | self.activaton = nn.Sigmoid() 12 | self.e_lambda = e_lambda 13 | 14 | def forward(self, x): 15 | b, c, h, w = x.size() 16 | 17 | n = w * h - 1 18 | 19 | x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2) 20 | y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5 21 | 22 | return x * self.activaton(y) 23 | 24 | #bitemporal feature aggregation module (BFAM) 25 | class BFAM(nn.Module): 26 | def __init__(self,inp,out): 27 | super(BFAM, self).__init__() 28 | 29 | self.pre_siam = simam_module() 30 | self.lat_siam = simam_module() 31 | 32 | 33 | out_1 = int(inp/2) 34 | 35 | self.conv_1 = nn.Conv2d(inp, out_1 , padding=1, kernel_size=3,groups=out_1, 36 | dilation=1) 37 | self.conv_2 = nn.Conv2d(inp, out_1, padding=2, kernel_size=3,groups=out_1, 38 | dilation=2) 39 | self.conv_3 = nn.Conv2d(inp, out_1, padding=3, kernel_size=3,groups=out_1, 40 | dilation=3) 41 | self.conv_4 = nn.Conv2d(inp, out_1, padding=4, kernel_size=3,groups=out_1, 42 | dilation=4) 43 | 44 | self.fuse = nn.Sequential( 45 | nn.Conv2d(out_1 * 4, out_1, kernel_size=1, padding=0), 46 | nn.BatchNorm2d(out_1), 47 | nn.ReLU(inplace=True) 48 | ) 49 | 50 | self.fuse_siam = simam_module() 51 | 52 | self.out = nn.Sequential( 53 | nn.Conv2d(out_1, out, kernel_size=3, padding=1), 54 | nn.BatchNorm2d(out), 55 | nn.ReLU(inplace=True) 56 | ) 57 | 58 | def forward(self,inp1,inp2,last_feature=None): 59 | x = torch.cat([inp1,inp2],dim=1) 60 | c1 = self.conv_1(x) 61 | c2 = self.conv_2(x) 62 | c3 = self.conv_3(x) 63 | c4 = self.conv_4(x) 64 | cat = torch.cat([c1,c2,c3,c4],dim=1) 65 | fuse = self.fuse(cat) 66 | inp1_siam = self.pre_siam(inp1) 67 | inp2_siam = self.lat_siam(inp2) 68 | 69 | 70 | inp1_mul = torch.mul(inp1_siam,fuse) 71 | inp2_mul = torch.mul(inp2_siam,fuse) 72 | fuse = self.fuse_siam(fuse) 73 | if last_feature is None: 74 | out = self.out(fuse + inp1 + inp2 + inp2_mul + inp1_mul) 75 | else: 76 | out = self.out(fuse+inp2_mul+inp1_mul+last_feature+inp1+inp2) 77 | out = self.fuse_siam(out) 78 | 79 | return out 80 | 81 | 82 | if __name__ == '__main__': 83 | 84 | block = BFAM(inp=128, out=256) 85 | 86 | inp1 = torch.rand(1, 128 // 2, 16, 16) # B C H W 87 | inp2 = torch.rand(1, 128 // 2, 16, 16)# B C H W 88 | last_feature = torch.rand(1, 128 // 2, 16, 16)# B C H W 89 | 90 | # 通过BFAM模块,这里没有提供last_feature的话,可以为None 91 | output = block(inp1, inp2, last_feature) 92 | # output = bfam(inp1, inp2) 93 | 94 | # 打印输入和输出的shape 95 | print(inp1.size()) 96 | print(inp2.size()) 97 | print(output.size()) -------------------------------------------------------------------------------- /(ACM MM 2023)Deepfake(深度伪造检测).py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import numpy as np 4 | import torch.nn.functional as F 5 | 6 | # 论文:Locate and Verify: A Two-Stream Network for Improved Deepfake Detection 7 | # 论文地址:https://arxiv.org/pdf/2309.11131 8 | 9 | class CMCE(nn.Module): # Contrastive Multimodal Contrastive Enhancement 增强模型对特征的关注度,提高模型的性能 10 | def __init__(self, in_channel=3): 11 | super(CMCE, self).__init__() 12 | self.relu = nn.ReLU() 13 | self.bn = nn.BatchNorm2d(in_channel) 14 | self.stage1 = nn.Sequential( 15 | nn.Conv2d(in_channel, in_channel, 3, 1, bias=False), 16 | nn.BatchNorm2d(in_channel), 17 | nn.ReLU() 18 | ) 19 | self.stage2 = nn.Sequential( 20 | nn.Conv2d(in_channel, in_channel, 3, 1, bias=False), 21 | nn.BatchNorm2d(in_channel), 22 | nn.ReLU() 23 | ) 24 | 25 | def forward(self, fa, fb): 26 | (b1, c1, h1, w1), (b2, c2, h2, w2) = fa.size(), fb.size() 27 | assert c1 == c2 28 | cos_sim = F.cosine_similarity(fa, fb, dim=1) 29 | cos_sim = cos_sim.unsqueeze(1) 30 | fa = fa + fb * cos_sim 31 | fb = fb + fa * cos_sim 32 | fa = self.relu(fa) 33 | fb = self.relu(fb) 34 | 35 | return fa, fb 36 | 37 | if __name__ == '__main__': 38 | block = CMCE() 39 | fa = torch.rand(16, 3, 32, 32) 40 | fb = torch.rand(16, 3, 32, 32) 41 | 42 | fa1, fb1 = block(fa, fb) 43 | print(fa.size()) 44 | print(fb.size()) 45 | print(fa1.size()) 46 | print(fb1.size()) 47 | 48 | 49 | class LFGA(nn.Module): # Local Feature Guidance Attention 旨在引导特征图的注意力以更好地聚焦在局部特征上 50 | def __init__(self, in_channel=3, out_channel=None, ratio=4): 51 | super(LFGA, self).__init__() 52 | self.chanel_in = in_channel 53 | 54 | if out_channel is None: 55 | out_channel = in_channel // ratio if in_channel // ratio > 0 else 1 56 | 57 | self.query_conv = nn.Conv2d( 58 | in_channels=in_channel, out_channels=out_channel, kernel_size=1) 59 | self.key_conv = nn.Conv2d( 60 | in_channels=in_channel, out_channels=out_channel, kernel_size=1) 61 | self.value_conv = nn.Conv2d( 62 | in_channels=in_channel, out_channels=in_channel, kernel_size=1) 63 | self.gamma = nn.Parameter(torch.zeros(1)) 64 | 65 | self.softmax = nn.Softmax(dim=-1) 66 | self.relu = nn.ReLU() 67 | self.bn = nn.BatchNorm2d(self.chanel_in) 68 | 69 | def forward(self, fa, fb): 70 | B, C, H, W = fa.size() 71 | proj_query = self.query_conv(fb).view( 72 | B, -1, H * W).permute(0, 2, 1) # B , HW, C 73 | proj_key = self.key_conv(fb).view( 74 | B, -1, H * W) # B X C x (*W*H) 75 | energy = torch.bmm(proj_query, proj_key) # B, HW, HW 76 | attention = self.softmax(energy) # BX (N) X (N) 77 | # attention = F.normalize(energy, dim=-1) 78 | 79 | proj_value = self.value_conv(fa).view( 80 | B, -1, H * W) # B , C , HW 81 | 82 | out = torch.bmm(proj_value, attention.permute(0, 2, 1)) 83 | out = out.view(B, C, H, W) 84 | 85 | out = self.gamma * out + fa 86 | 87 | return self.relu(out) 88 | 89 | 90 | if __name__ == '__main__': 91 | block = LFGA(in_channel=3, ratio=4) 92 | fa = torch.rand(16, 3, 32, 32) 93 | fb = torch.rand(16, 3, 32, 32) 94 | 95 | output = block(fa, fb) 96 | print(fa.size()) 97 | print(fb.size()) 98 | print(output.size()) 99 | -------------------------------------------------------------------------------- /目标检测/FACMA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import math 4 | 5 | # 论文:FCMNet: Frequency-aware cross-modality attention networks for RGB-D salient object detection 6 | # 论文地址:https://www.sciencedirect.com/science/article/abs/pii/S0925231222003848 7 | def get_1d_dct(i, freq, L): 8 | result = math.cos(math.pi * freq * (i+0.5)/L) / math.sqrt(L) 9 | if freq == 0: 10 | return result 11 | else: 12 | return result * math.sqrt(2) 13 | def get_dct_weights(width,height,channel,fidx_u,fidx_v): 14 | dct_weights = torch.zeros(1, channel, width, height) 15 | c_part = channel // len(fidx_u) 16 | for i, (u_x, v_y) in enumerate(zip(fidx_u, fidx_v)): 17 | for t_x in range(width): 18 | for t_y in range(height): 19 | dct_weights[:, i*c_part: (i+1)*c_part, t_x, t_y] = get_1d_dct(t_x, u_x, width) * get_1d_dct(t_y, v_y, height) 20 | return dct_weights 21 | class FCABlock(nn.Module): 22 | 23 | def __init__(self, channel,width,height,fidx_u, fidx_v, reduction=16): 24 | super(FCABlock, self).__init__() 25 | mid_channel = channel // reduction 26 | self.register_buffer('pre_computed_dct_weights', get_dct_weights(width,height,channel,fidx_u,fidx_v)) 27 | self.excitation = nn.Sequential( 28 | nn.Linear(channel, mid_channel, bias=False), 29 | nn.ReLU(inplace=True), 30 | nn.Linear(mid_channel, channel, bias=False), 31 | nn.Sigmoid() 32 | ) 33 | def forward(self, x): 34 | b, c, _, _ = x.size() 35 | y = torch.sum(x * self.pre_computed_dct_weights, dim=[2,3]) 36 | z = self.excitation(y).view(b, c, 1, 1) 37 | return x * z.expand_as(x) 38 | class SFCA(nn.Module): 39 | def __init__(self, in_channel,width,height,fidx_u,fidx_v): 40 | super(SFCA, self).__init__() 41 | 42 | fidx_u = [temp_u * (width // 8) for temp_u in fidx_u] 43 | fidx_v = [temp_v * (width // 8) for temp_v in fidx_v] 44 | self.FCA = FCABlock(in_channel, width, height, fidx_u, fidx_v) 45 | self.conv1 = nn.Conv2d(in_channel, 1, kernel_size=1, bias=False) 46 | self.norm = nn.Sigmoid() 47 | def forward(self, x): 48 | # FCA 49 | F_fca = self.FCA(x) 50 | #context attention 51 | con = self.conv1(x) # c,h,w -> 1,h,w 52 | con = self.norm(con) 53 | F_con = x * con 54 | return F_fca + F_con 55 | class FACMA(nn.Module): 56 | def __init__(self,in_channel,width,height,fidx_u,fidx_v): 57 | super(FACMA, self).__init__() 58 | self.sfca_depth = SFCA(in_channel, width, height, fidx_u, fidx_v) 59 | self.sfca_rgb = SFCA(in_channel, width, height, fidx_u, fidx_v) 60 | def forward(self, rgb, depth): 61 | out_d = self.sfca_depth(depth) 62 | out_d = rgb * out_d 63 | 64 | out_rgb = self.sfca_rgb(rgb) 65 | out_rgb = depth * out_rgb 66 | return out_rgb, out_d 67 | 68 | if __name__ == '__main__': 69 | 70 | # 定义输入参数 71 | in_channel = 64 72 | width = 224 73 | height = 224 74 | fidx_u = [0, 1] 75 | fidx_v = [0, 1] 76 | 77 | block = FACMA(in_channel, width, height, fidx_u, fidx_v) 78 | 79 | # 假设的RGB和深度输入 80 | rgb_input = torch.randn(1, in_channel, width, height) # Batch size为1 81 | depth_input = torch.randn(1, in_channel, width, height) # Batch size为1 82 | 83 | # 通过FACMA 84 | out_rgb, out_d = block(rgb_input, depth_input) 85 | 86 | # 打印输入输出形状 87 | print("RGB 输入形状:", rgb_input.shape) 88 | print("深度 输入形状:", depth_input.shape) 89 | print("RGB 输出形状:", out_rgb.shape) 90 | print("深度 输出形状:", out_d.shape) -------------------------------------------------------------------------------- /注意力/(tmm2023)多尺度膨胀注意力机制.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | # Github地址:https://github.com/JIAOJIAYUASD/dilateformer 6 | # 论文地址:https://arxiv.org/abs/2302.01791 7 | class DilateAttention(nn.Module): 8 | "Implementation of Dilate-attention" 9 | 10 | def __init__(self, head_dim, qk_scale=None, attn_drop=0, kernel_size=3, dilation=1): 11 | super().__init__() 12 | self.head_dim = head_dim 13 | self.scale = qk_scale or head_dim ** -0.5 14 | self.kernel_size = kernel_size 15 | self.unfold = nn.Unfold(kernel_size, dilation, dilation * (kernel_size - 1) // 2, 1) 16 | self.attn_drop = nn.Dropout(attn_drop) 17 | 18 | def forward(self, q, k, v): 19 | # B, C//3, H, W 20 | B, d, H, W = q.shape 21 | q = q.reshape([B, d // self.head_dim, self.head_dim, 1, H * W]).permute(0, 1, 4, 3, 2) # B,h,N,1,d 22 | k = self.unfold(k).reshape( 23 | [B, d // self.head_dim, self.head_dim, self.kernel_size * self.kernel_size, H * W]).permute(0, 1, 4, 2, 24 | 3) # B,h,N,d,k*k 25 | attn = (q @ k) * self.scale # B,h,N,1,k*k 26 | attn = attn.softmax(dim=-1) 27 | attn = self.attn_drop(attn) 28 | v = self.unfold(v).reshape( 29 | [B, d // self.head_dim, self.head_dim, self.kernel_size * self.kernel_size, H * W]).permute(0, 1, 4, 3, 30 | 2) # B,h,N,k*k,d 31 | x = (attn @ v).transpose(1, 2).reshape(B, H, W, d) 32 | return x 33 | 34 | 35 | class MultiDilatelocalAttention(nn.Module): 36 | "Implementation of Dilate-attention" 37 | 38 | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, 39 | attn_drop=0., proj_drop=0., kernel_size=3, dilation=[2, 3]): 40 | super().__init__() 41 | self.dim = dim 42 | self.num_heads = num_heads 43 | head_dim = dim // num_heads 44 | self.dilation = dilation 45 | self.kernel_size = kernel_size 46 | self.scale = qk_scale or head_dim ** -0.5 47 | self.num_dilation = len(dilation) 48 | assert num_heads % self.num_dilation == 0, f"num_heads{num_heads} must be the times of num_dilation{self.num_dilation}!!" 49 | self.qkv = nn.Conv2d(dim, dim * 3, 1, bias=qkv_bias) 50 | self.dilate_attention = nn.ModuleList( 51 | [DilateAttention(head_dim, qk_scale, attn_drop, kernel_size, dilation[i]) 52 | for i in range(self.num_dilation)]) 53 | self.proj = nn.Linear(dim, dim) 54 | self.proj_drop = nn.Dropout(proj_drop) 55 | 56 | def forward(self, x): 57 | B, H, W, C = x.shape 58 | x = x.permute(0, 3, 1, 2) # B, C, H, W 59 | qkv = self.qkv(x).reshape(B, 3, self.num_dilation, C // self.num_dilation, H, W).permute(2, 1, 0, 3, 4, 5) 60 | # num_dilation,3,B,C//num_dilation,H,W 61 | x = x.reshape(B, self.num_dilation, C // self.num_dilation, H, W).permute(1, 0, 3, 4, 2) 62 | # num_dilation, B, H, W, C//num_dilation 63 | for i in range(self.num_dilation): 64 | x[i] = self.dilate_attention[i](qkv[i][0], qkv[i][1], qkv[i][2]) # B, H, W,C//num_dilation 65 | x = x.permute(1, 2, 3, 0, 4).reshape(B, H, W, C) 66 | x = self.proj(x) 67 | x = self.proj_drop(x) 68 | return x 69 | 70 | 71 | if __name__ == "__main__": 72 | x = torch.rand([3, 64, 64, 64]).cuda() #输入B C H W 73 | m = MultiDilatelocalAttention(64).cuda() 74 | y = m(x) 75 | print(y.shape) 76 | -------------------------------------------------------------------------------- /MHIASA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import einops 4 | # https://ieeexplore.ieee.org/abstract/document/10632582/ 5 | # MHIAIFormer: Multi-Head Interacted and Adaptive Integrated Transformer with Spatial-Spectral Attention for Hyperspectral Image Classification, JSTARS2024 6 | # https://github.com/Delon1364/MHIAIFormer 7 | # Multi-Head Interacted Additive Self-Attention(MHIASA) 8 | 9 | # Efficient Head-Interacted Additive Attention: 10 | class EHIAAttention(nn.Module): 11 | def __init__(self, num_patches, dim, num_heads = 2): 12 | super(EHIAAttention, self).__init__() 13 | self.num_heads = num_heads 14 | self.in_dims = dim // num_heads 15 | 16 | # ==================添加两个linear 17 | self.fc_q = nn.Linear(dim, dim) 18 | self.fc_k = nn.Linear(dim, dim) 19 | 20 | # w_g ->: [N, 1] 21 | self.w_g = nn.Parameter(torch.randn(num_patches, 1)) 22 | self.scale_factor = num_patches ** -0.5 23 | self.Proj = nn.Linear(dim, dim) 24 | self.norm = nn.LayerNorm(dim) 25 | # ===================== 添加Avg分支 26 | self.d_avg = nn.AdaptiveAvgPool2d((None, 1)) 27 | self.fc = nn.Linear(self.in_dims, dim) 28 | self.gelu = nn.GELU() 29 | self.fc2 = nn.Linear(num_heads, num_heads) 30 | self.d_avg2 = nn.AdaptiveAvgPool2d((None, 1)) 31 | self.sigmoid = nn.Sigmoid() 32 | 33 | def forward(self, x): 34 | # x ->: [bs, num_patches, num_heads*in_dims] 35 | bs = x.shape[0] 36 | 37 | # ==================添加两个linear 38 | q = self.fc_q(x) 39 | x = self.fc_k(x) 40 | x_t = q.transpose(1, 2) 41 | 42 | # x_T ->: [bs, D, N] 43 | # x_t = x.transpose(1, 2) 44 | 45 | # query_weight ->: [bs, D, 1] ->: [bs, 1, D] 46 | query_weight = (x_t @ self.w_g).transpose(1, 2) 47 | 48 | A = query_weight * self.scale_factor 49 | A = A.softmax(dim=-1) 50 | 51 | # A * x_T ->: [bs, N, D] 52 | # G ->: [bs, D] 53 | G = torch.sum(A * x, dim=1) 54 | 55 | # ===================== 添加Avg分支 56 | d_avg = self.d_avg(x_t) # [bs, D, 1] 57 | d_avg = torch.squeeze(d_avg, 2) # [bs, D] 58 | d_avg = d_avg.reshape(bs, self.num_heads, self.in_dims) # [bs, h, d] 59 | d_avg = self.gelu(self.fc(d_avg)) # [bs, h, D] 60 | d_avg = d_avg.reshape(bs, -1, self.num_heads) # [bs, D, h] 61 | d_avg = self.fc2(d_avg) # [bs, D, h] 62 | d_avg = self.sigmoid(self.d_avg2(d_avg)) # [bs, D, 1] 63 | d_avg = torch.squeeze(d_avg, 2) # [bs, D] 64 | G = G * d_avg 65 | # ===================== 66 | 67 | # G ->: [bs, N, D] 68 | # key.shape[1] = N 69 | G = einops.repeat( 70 | G, "b d -> b repeat d", repeat=x.shape[1] 71 | ) 72 | 73 | # out :-> [bs, N, D] 74 | out = self.Proj(G * x) + self.norm(x) 75 | # out = self.Proj(out) 76 | 77 | return out 78 | 79 | 80 | if __name__ == '__main__': 81 | patch_size = 16 82 | num_patches = patch_size * patch_size 83 | dim = 128 # Typically dim is a multiple of num_heads 84 | 85 | # Instantiate the EHIAAttention 86 | model = EHIAAttention(num_patches=num_patches, dim=dim) 87 | 88 | # Create a random input tensor with shape (batch_size, num_patches, num_heads * in_dims) 89 | batch_size = 1 90 | input_tensor = torch.randn(batch_size, num_patches, dim) 91 | 92 | # Forward pass through the model 93 | output = model(input_tensor) 94 | 95 | # Print the shapes 96 | print("Input shape:", input_tensor.shape) 97 | print("Output shape:", output.shape) -------------------------------------------------------------------------------- /(CVPR2020)strip_pooling.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------- 2 | # 论文: Strip Pooling: Rethinking spatial pooling for scene parsing (CVPR2020) 3 | # Github地址: https://github.com/houqb/SPNet 4 | # --------------------------------------- 5 | 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class StripPooling(nn.Module): 12 | """ 13 | Reference: 14 | """ 15 | def __init__(self, in_channels, pool_size, norm_layer, up_kwargs): 16 | super(StripPooling, self).__init__() 17 | self.pool1 = nn.AdaptiveAvgPool2d(pool_size[0]) 18 | self.pool2 = nn.AdaptiveAvgPool2d(pool_size[1]) 19 | self.pool3 = nn.AdaptiveAvgPool2d((1, None)) 20 | self.pool4 = nn.AdaptiveAvgPool2d((None, 1)) 21 | 22 | inter_channels = int(in_channels/4) 23 | self.conv1_1 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 1, bias=False), 24 | norm_layer(inter_channels), 25 | nn.ReLU(True)) 26 | self.conv1_2 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 1, bias=False), 27 | norm_layer(inter_channels), 28 | nn.ReLU(True)) 29 | self.conv2_0 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), 30 | norm_layer(inter_channels)) 31 | self.conv2_1 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), 32 | norm_layer(inter_channels)) 33 | self.conv2_2 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), 34 | norm_layer(inter_channels)) 35 | self.conv2_3 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, (1, 3), 1, (0, 1), bias=False), 36 | norm_layer(inter_channels)) 37 | self.conv2_4 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, (3, 1), 1, (1, 0), bias=False), 38 | norm_layer(inter_channels)) 39 | self.conv2_5 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), 40 | norm_layer(inter_channels), 41 | nn.ReLU(True)) 42 | self.conv2_6 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), 43 | norm_layer(inter_channels), 44 | nn.ReLU(True)) 45 | self.conv3 = nn.Sequential(nn.Conv2d(inter_channels*2, in_channels, 1, bias=False), 46 | norm_layer(in_channels)) 47 | # bilinear interpolate options 48 | self._up_kwargs = up_kwargs 49 | 50 | def forward(self, x): 51 | _, _, h, w = x.size() 52 | x1 = self.conv1_1(x) 53 | x2 = self.conv1_2(x) 54 | x2_1 = self.conv2_0(x1) 55 | x2_2 = F.interpolate(self.conv2_1(self.pool1(x1)), (h, w), **self._up_kwargs) 56 | x2_3 = F.interpolate(self.conv2_2(self.pool2(x1)), (h, w), **self._up_kwargs) 57 | x2_4 = F.interpolate(self.conv2_3(self.pool3(x2)), (h, w), **self._up_kwargs) 58 | x2_5 = F.interpolate(self.conv2_4(self.pool4(x2)), (h, w), **self._up_kwargs) 59 | x1 = self.conv2_5(F.relu_(x2_1 + x2_2 + x2_3)) 60 | x2 = self.conv2_6(F.relu_(x2_5 + x2_4)) 61 | out = self.conv3(torch.cat([x1, x2], dim=1)) 62 | return F.relu_(x + out) 63 | 64 | 65 | # 输入 B C H W, 输出 B C H W 66 | if __name__ == '__main__': 67 | block = StripPooling(64, (20, 12), nn.BatchNorm2d, {'mode': 'bilinear', 'align_corners': True}) 68 | input = torch.rand(3, 64, 32, 32) 69 | output = block(input) 70 | print(input.size(), output.size()) 71 | -------------------------------------------------------------------------------- /CPCA2d.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch 3 | import torch.nn.functional 4 | import torch.nn.functional as F 5 | # 论文:Channel prior convolutional attention for medical image segmentation 6 | # 论文地址:https://arxiv.org/pdf/2306.05196 7 | 8 | 9 | class ChannelAttention(nn.Module): 10 | 11 | def __init__(self, input_channels, internal_neurons): 12 | super(ChannelAttention, self).__init__() 13 | self.fc1 = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True) 14 | self.fc2 = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True) 15 | self.input_channels = input_channels 16 | 17 | def forward(self, inputs): 18 | x1 = F.adaptive_avg_pool2d(inputs, output_size=(1, 1)) 19 | # print('x:', x.shape) 20 | x1 = self.fc1(x1) 21 | x1 = F.relu(x1, inplace=True) 22 | x1 = self.fc2(x1) 23 | x1 = torch.sigmoid(x1) 24 | x2 = F.adaptive_max_pool2d(inputs, output_size=(1, 1)) 25 | # print('x:', x.shape) 26 | x2 = self.fc1(x2) 27 | x2 = F.relu(x2, inplace=True) 28 | x2 = self.fc2(x2) 29 | x2 = torch.sigmoid(x2) 30 | x = x1 + x2 31 | x = x.view(-1, self.input_channels, 1, 1) 32 | return x 33 | 34 | class CPCABlock(nn.Module): 35 | 36 | def __init__(self, in_channels, out_channels, 37 | channelAttention_reduce=4): 38 | super().__init__() 39 | 40 | self.C = in_channels 41 | self.O = out_channels 42 | 43 | assert in_channels == out_channels 44 | self.ca = ChannelAttention(input_channels=in_channels, internal_neurons=in_channels // channelAttention_reduce) 45 | self.dconv5_5 = nn.Conv2d(in_channels, in_channels, kernel_size=5, padding=2, groups=in_channels) 46 | self.dconv1_7 = nn.Conv2d(in_channels, in_channels, kernel_size=(1, 7), padding=(0, 3), groups=in_channels) 47 | self.dconv7_1 = nn.Conv2d(in_channels, in_channels, kernel_size=(7, 1), padding=(3, 0), groups=in_channels) 48 | self.dconv1_11 = nn.Conv2d(in_channels, in_channels, kernel_size=(1, 11), padding=(0, 5), groups=in_channels) 49 | self.dconv11_1 = nn.Conv2d(in_channels, in_channels, kernel_size=(11, 1), padding=(5, 0), groups=in_channels) 50 | self.dconv1_21 = nn.Conv2d(in_channels, in_channels, kernel_size=(1, 21), padding=(0, 10), groups=in_channels) 51 | self.dconv21_1 = nn.Conv2d(in_channels, in_channels, kernel_size=(21, 1), padding=(10, 0), groups=in_channels) 52 | self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=(1, 1), padding=0) 53 | self.act = nn.GELU() 54 | 55 | def forward(self, inputs): 56 | # Global Perceptron 57 | inputs = self.conv(inputs) 58 | inputs = self.act(inputs) 59 | 60 | channel_att_vec = self.ca(inputs) 61 | inputs = channel_att_vec * inputs 62 | 63 | x_init = self.dconv5_5(inputs) 64 | x_1 = self.dconv1_7(x_init) 65 | x_1 = self.dconv7_1(x_1) 66 | x_2 = self.dconv1_11(x_init) 67 | x_2 = self.dconv11_1(x_2) 68 | x_3 = self.dconv1_21(x_init) 69 | x_3 = self.dconv21_1(x_3) 70 | x = x_1 + x_2 + x_3 + x_init 71 | spatial_att = self.conv(x) 72 | out = spatial_att * inputs 73 | out = self.conv(out) 74 | return out 75 | 76 | 77 | if __name__ == '__main__': 78 | 79 | input = torch.randn(4, 16, 64, 64) 80 | 81 | print(input.size()) 82 | 83 | block = CPCABlock(in_channels=16, out_channels=16, channelAttention_reduce=4) 84 | 85 | # 通过CPCABlock模块处理输入 86 | output = block(input) 87 | 88 | # 打印输出张量的形状 89 | print(output.size()) -------------------------------------------------------------------------------- /卷积/(CVPR 2022)dgcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def knn(x, k): 6 | inner = -2 * torch.matmul(x.transpose(2, 1).contiguous(), x) 7 | xx = torch.sum(x ** 2, dim=1, keepdim=True) 8 | pairwise_distance = -xx - inner - xx.transpose(2, 1).contiguous() 9 | 10 | idx = pairwise_distance.topk(k=k, dim=-1)[1] # (batch_size, num_points, k) 11 | return idx 12 | 13 | 14 | def get_graph_feature(x, k=20): 15 | # x = x.squeeze() 16 | idx = knn(x, k=k) # (batch_size, num_points, k) 17 | batch_size, num_points, _ = idx.size() 18 | 19 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 20 | 21 | idx_base = torch.arange(0, batch_size, device=device).view(-1, 1, 1) * num_points 22 | 23 | idx = idx + idx_base 24 | 25 | idx = idx.view(-1) 26 | 27 | _, num_dims, _ = x.size() 28 | 29 | # (batch_size, num_points, num_dims) -> (batch_size*num_points, num_dims) # batch_size * num_points * k + range(0, batch_size*num_points) 30 | x = x.transpose(2, 1).contiguous() 31 | 32 | feature = x.view(batch_size * num_points, -1)[idx, :] 33 | feature = feature.view(batch_size, num_points, k, num_dims) 34 | x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1) 35 | 36 | feature = torch.cat((feature, x), dim=3).permute(0, 3, 1, 2) 37 | 38 | return feature 39 | 40 | 41 | class DGCNN(torch.nn.Module): 42 | def __init__(self, emb_dims=1024, input_shape="bnc"): 43 | super(DGCNN, self).__init__() 44 | if input_shape not in ["bcn", "bnc"]: 45 | raise ValueError("Allowed shapes are 'bcn' (batch * channels * num_in_points), 'bnc' ") 46 | self.input_shape = input_shape 47 | self.emb_dims = emb_dims 48 | 49 | self.conv1 = torch.nn.Conv2d(6, 64, kernel_size=1, bias=False) 50 | self.conv2 = torch.nn.Conv2d(64, 64, kernel_size=1, bias=False) 51 | self.conv3 = torch.nn.Conv2d(64, 128, kernel_size=1, bias=False) 52 | self.conv4 = torch.nn.Conv2d(128, 256, kernel_size=1, bias=False) 53 | self.conv5 = torch.nn.Conv2d(512, emb_dims, kernel_size=1, bias=False) 54 | self.bn1 = torch.nn.BatchNorm2d(64) 55 | self.bn2 = torch.nn.BatchNorm2d(64) 56 | self.bn3 = torch.nn.BatchNorm2d(128) 57 | self.bn4 = torch.nn.BatchNorm2d(256) 58 | self.bn5 = torch.nn.BatchNorm2d(emb_dims) 59 | 60 | def forward(self, input_data): 61 | if self.input_shape == "bnc": 62 | input_data = input_data.permute(0, 2, 1) 63 | if input_data.shape[1] != 3: 64 | raise RuntimeError("shape of x must be of [Batch x 3 x NumInPoints]") 65 | 66 | batch_size, num_dims, num_points = input_data.size() 67 | output = get_graph_feature(input_data) 68 | 69 | output = F.relu(self.bn1(self.conv1(output))) 70 | output1 = output.max(dim=-1, keepdim=True)[0] 71 | 72 | output = F.relu(self.bn2(self.conv2(output))) 73 | output2 = output.max(dim=-1, keepdim=True)[0] 74 | 75 | output = F.relu(self.bn3(self.conv3(output))) 76 | output3 = output.max(dim=-1, keepdim=True)[0] 77 | 78 | output = F.relu(self.bn4(self.conv4(output))) 79 | output4 = output.max(dim=-1, keepdim=True)[0] 80 | 81 | output = torch.cat((output1, output2, output3, output4), dim=1) 82 | 83 | output = F.relu(self.bn5(self.conv5(output))).view(batch_size, -1, num_points) 84 | return output 85 | 86 | 87 | if __name__ == '__main__': 88 | # Test the code. 89 | x = torch.rand((10, 1024, 3)).cuda() 90 | 91 | dgcnn = DGCNN().cuda() 92 | y = dgcnn(x) 93 | print("\nInput Shape of DGCNN: ", x.shape, "\nOutput Shape of DGCNN: ", y.shape) 94 | -------------------------------------------------------------------------------- /PCBAM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | #论文:DAU-Net: Dual attention-aided U-Net for segmenting tumor in breast ultrasound images 4 | #论文:https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0303670 5 | class ChannelAttentionModule(nn.Module): 6 | def __init__(self, in_channels, ratio=8): 7 | super(ChannelAttentionModule, self).__init__() 8 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 9 | self.max_pool = nn.AdaptiveMaxPool2d(1) 10 | 11 | self.fc1 = nn.Conv2d(in_channels, in_channels // ratio, kernel_size=1, bias=False) 12 | self.relu1 = nn.ReLU() 13 | self.fc2 = nn.Conv2d(in_channels // ratio, in_channels, kernel_size=1, bias=False) 14 | 15 | def forward(self, x): 16 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 17 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 18 | out = avg_out + max_out 19 | return x * torch.sigmoid(out) 20 | 21 | 22 | class SpatialAttentionModule(nn.Module): 23 | def __init__(self): 24 | super(SpatialAttentionModule, self).__init__() 25 | self.conv1 = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False) 26 | 27 | def forward(self, x): 28 | avg_out = torch.mean(x, dim=1, keepdim=True) 29 | max_out, _ = torch.max(x, dim=1, keepdim=True) 30 | out = torch.cat([avg_out, max_out], dim=1) 31 | out = self.conv1(out) 32 | return x * torch.sigmoid(out) 33 | 34 | 35 | class CBAM(nn.Module): 36 | def __init__(self, in_channels, ratio=8): 37 | super(CBAM, self).__init__() 38 | self.channel_attention = ChannelAttentionModule(in_channels, ratio) 39 | self.spatial_attention = SpatialAttentionModule() 40 | 41 | def forward(self, x): 42 | x = self.channel_attention(x) 43 | x = self.spatial_attention(x) 44 | return x 45 | 46 | 47 | class PAM(nn.Module): 48 | def __init__(self, in_channels): 49 | super(PAM, self).__init__() 50 | self.query_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1) 51 | self.key_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1) 52 | self.value_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1) 53 | self.gamma = nn.Parameter(torch.zeros(1)) 54 | self.softmax = nn.Softmax(dim=-1) 55 | 56 | def forward(self, x): 57 | batch_size, C, height, width = x.size() 58 | proj_query = self.query_conv(x).view(batch_size, -1, height * width).permute(0, 2, 1) 59 | proj_key = self.key_conv(x).view(batch_size, -1, height * width) 60 | energy = torch.bmm(proj_query, proj_key) 61 | attention = self.softmax(energy) 62 | proj_value = self.value_conv(x).view(batch_size, -1, height * width) 63 | out = torch.bmm(proj_value, attention.permute(0, 2, 1)) 64 | out = out.view(batch_size, C, height, width) 65 | out = self.gamma * out + x 66 | return out 67 | 68 | 69 | class PCBAM(nn.Module): 70 | def __init__(self, in_channels, ratio=8): 71 | super(PCBAM, self).__init__() 72 | self.channel_attention = ChannelAttentionModule(in_channels, ratio) 73 | self.spatial_attention = SpatialAttentionModule() 74 | self.position_attention = PAM(in_channels) 75 | 76 | def forward(self, x): 77 | x_c = self.channel_attention(x) 78 | x_s = self.spatial_attention(x_c) 79 | x_p = self.position_attention(x) 80 | out = x_s + x_p 81 | return out 82 | 83 | 84 | if __name__ == '__main__': 85 | 86 | input = torch.randn(1, 64,32, 32) 87 | block = PCBAM(in_channels=64) 88 | print(input.size()) 89 | output = block(input) 90 | print(output.size()) 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /遥感/MSAA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | # 论文:CM-UNet: Hybrid CNN-Mamba UNet for Remote Sensing Image Semantic Segmentation 5 | # 论文地址:https://arxiv.org/pdf/2405.10530 6 | 7 | 8 | class ChannelAttentionModule(nn.Module): 9 | def __init__(self, in_channels, reduction=4): 10 | super(ChannelAttentionModule, self).__init__() 11 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 12 | self.max_pool = nn.AdaptiveMaxPool2d(1) 13 | self.fc = nn.Sequential( 14 | nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False), 15 | nn.ReLU(inplace=True), 16 | nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False) 17 | ) 18 | self.sigmoid = nn.Sigmoid() 19 | 20 | def forward(self, x): 21 | avg_out = self.fc(self.avg_pool(x)) 22 | max_out = self.fc(self.max_pool(x)) 23 | out = avg_out + max_out 24 | return self.sigmoid(out) 25 | 26 | class SpatialAttentionModule(nn.Module): 27 | def __init__(self, kernel_size=7): 28 | super(SpatialAttentionModule, self).__init__() 29 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False) 30 | self.sigmoid = nn.Sigmoid() 31 | 32 | def forward(self, x): 33 | avg_out = torch.mean(x, dim=1, keepdim=True) 34 | max_out, _ = torch.max(x, dim=1, keepdim=True) 35 | x = torch.cat([avg_out, max_out], dim=1) 36 | x = self.conv1(x) 37 | return self.sigmoid(x) 38 | 39 | class FusionConv(nn.Module): 40 | def __init__(self, in_channels, out_channels, factor=4.0): 41 | super(FusionConv, self).__init__() 42 | dim = int(out_channels // factor) 43 | self.down = nn.Conv2d(in_channels, dim, kernel_size=1, stride=1) 44 | self.conv_3x3 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1) 45 | self.conv_5x5 = nn.Conv2d(dim, dim, kernel_size=5, stride=1, padding=2) 46 | self.conv_7x7 = nn.Conv2d(dim, dim, kernel_size=7, stride=1, padding=3) 47 | self.spatial_attention = SpatialAttentionModule() 48 | self.channel_attention = ChannelAttentionModule(dim) 49 | self.up = nn.Conv2d(dim, out_channels, kernel_size=1, stride=1) 50 | self.down_2 = nn.Conv2d(in_channels, dim, kernel_size=1, stride=1) 51 | 52 | def forward(self, x1, x2, x4): 53 | x_fused = torch.cat([x1, x2, x4], dim=1) 54 | x_fused = self.down(x_fused) 55 | x_fused_c = x_fused * self.channel_attention(x_fused) 56 | x_3x3 = self.conv_3x3(x_fused) 57 | x_5x5 = self.conv_5x5(x_fused) 58 | x_7x7 = self.conv_7x7(x_fused) 59 | x_fused_s = x_3x3 + x_5x5 + x_7x7 60 | x_fused_s = x_fused_s * self.spatial_attention(x_fused_s) 61 | 62 | x_out = self.up(x_fused_s + x_fused_c) 63 | 64 | return x_out 65 | 66 | class MSAA(nn.Module): 67 | def __init__(self, in_channels, out_channels): 68 | super(MSAA, self).__init__() 69 | self.fusion_conv = FusionConv(in_channels * 3, out_channels) 70 | 71 | def forward(self, x1, x2, x4, last=False): 72 | # # x2 是从低到高,x4是从高到低的设计,x2传递语义信息,x4传递边缘问题特征补充 73 | # x_1_2_fusion = self.fusion_1x2(x1, x2) 74 | # x_1_4_fusion = self.fusion_1x4(x1, x4) 75 | # x_fused = x_1_2_fusion + x_1_4_fusion 76 | x_fused = self.fusion_conv(x1, x2, x4) 77 | return x_fused 78 | 79 | 80 | if __name__ == '__main__': 81 | 82 | block = MSAA(in_channels=64, out_channels=128) 83 | x1 = torch.randn(1, 64, 64, 64) 84 | x2 = torch.randn(1, 64, 64, 64) 85 | x4 = torch.randn(1, 64, 64, 64) 86 | 87 | output = block(x1, x2, x4) 88 | 89 | # Print the shapes of the inputs and the output 90 | print(x1.size()) 91 | print(x2.size()) 92 | print(x4.size()) 93 | print(output.size()) -------------------------------------------------------------------------------- /目标检测/GFM.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | # 论文:MAGNet: Multi-scale Awareness and Global fusion Network for RGB-D salient object detection | KBS 4 | # 论文地址:https://www.sciencedirect.com/science/article/abs/pii/S0950705124007603 5 | class DWPWConv(nn.Module): 6 | def __init__(self, inc, outc): 7 | super().__init__() 8 | self.conv = nn.Sequential( 9 | nn.Conv2d(in_channels=inc, out_channels=inc, kernel_size=3, padding=1, stride=1, groups=inc), 10 | nn.BatchNorm2d(inc), 11 | nn.GELU(), 12 | nn.Conv2d(in_channels=inc, out_channels=outc, kernel_size=1, stride=1), 13 | nn.BatchNorm2d(outc), 14 | nn.GELU() 15 | ) 16 | 17 | def forward(self, x): 18 | return self.conv(x) 19 | 20 | class SAttention(nn.Module): 21 | def __init__(self, dim, sa_num_heads=8, qkv_bias=True, qk_scale=None, 22 | attn_drop=0., proj_drop=0.): 23 | super().__init__() 24 | 25 | self.dim = dim 26 | self.sa_num_heads = sa_num_heads 27 | 28 | assert dim % sa_num_heads == 0, f"dim {dim} should be divided by num_heads {sa_num_heads}." 29 | 30 | self.act = nn.GELU() 31 | self.proj = nn.Linear(dim, dim) 32 | self.proj_drop = nn.Dropout(proj_drop) 33 | 34 | head_dim = dim // sa_num_heads 35 | self.scale = qk_scale or head_dim ** -0.5 36 | self.q = nn.Linear(dim, dim, bias=qkv_bias) 37 | self.attn_drop = nn.Dropout(attn_drop) 38 | self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias) 39 | self.local_conv = nn.Conv2d(dim, dim, kernel_size=3, padding=1, stride=1, groups=dim) 40 | 41 | def forward(self, x, H, W): 42 | B, N, C = x.shape 43 | 44 | q = self.q(x).reshape(B, N, self.sa_num_heads, C // self.sa_num_heads).permute(0, 2, 1, 3) 45 | kv = self.kv(x).reshape(B, -1, 2, self.sa_num_heads, C // self.sa_num_heads).permute(2, 0, 3, 1, 4) 46 | k, v = kv[0], kv[1] 47 | attn = (q @ k.transpose(-2, -1)) * self.scale 48 | attn = attn.softmax(dim=-1) 49 | attn = self.attn_drop(attn) 50 | x = (attn @ v).transpose(1, 2).reshape(B, N, C) + \ 51 | self.local_conv(v.transpose(1, 2).reshape(B, N, C).transpose(1, 2).view(B, C, H, W)).view(B, C, 52 | N).transpose(1, 2) 53 | 54 | x = self.proj(x) 55 | x = self.proj_drop(x) 56 | 57 | return x.permute(0, 2, 1).reshape(B, C, H, W) 58 | 59 | # Global Fusion Module 60 | class GFM(nn.Module): 61 | def __init__(self, inc, expend_ratio=2): 62 | super().__init__() 63 | self.expend_ratio = expend_ratio 64 | assert expend_ratio in [2, 3], f"expend_ratio {expend_ratio} mismatch" 65 | 66 | self.sa = SAttention(dim=inc) 67 | self.dw_pw = DWPWConv(inc * expend_ratio, inc) 68 | self.act = nn.GELU() 69 | 70 | def forward(self, x, d): 71 | B, C, H, W = x.shape 72 | if self.expend_ratio == 2: 73 | cat = torch.cat((x, d), dim=1) 74 | else: 75 | multi = x * d 76 | cat = torch.cat((x, d, multi), dim=1) 77 | x_rc = self.dw_pw(cat).flatten(2).permute(0, 2, 1) 78 | x_ = self.sa(x_rc, H, W) 79 | x_ = x_ + x 80 | return self.act(x_) 81 | 82 | 83 | if __name__ == '__main__': 84 | # 实例化 GFM 模块 85 | inc = 64 # 输入通道数 86 | block = GFM(inc=inc, expend_ratio=2) 87 | 88 | x = torch.randn(1, inc, 32, 32) # B C H W 89 | d = torch.randn(1, inc, 32, 32) # 与 x 相同形状的深度图 90 | 91 | # 前向传播,计算输出 92 | output = block(x, d) 93 | 94 | # 打印输入和输出的形状 95 | print(f"Input x shape: {x.size()}") 96 | print(f"Input d shape: {d.size()}") 97 | print(f"Output shape: {output.size()}") -------------------------------------------------------------------------------- /注意力/(CVPR 2024)CAA.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import torch.nn as nn 3 | import torch 4 | # 论文地址:https://arxiv.org/pdf/2403.06258 5 | # 论文:Poly Kernel Inception Network for Remote Sensing Detection(CVPR 2024) 6 | # Github地址:https://github.com/NUST-Machine-Intelligence-Laboratory/PKINet 7 | # 全网最全100➕即插即用模块GitHub地址:https://github.com/ai-dawang/PlugNPlay-Modules 8 | # Context Anchor Attention (CAA) module 9 | class ConvModule(nn.Module): 10 | def __init__( 11 | self, 12 | in_channels: int, 13 | out_channels: int, 14 | kernel_size: int, 15 | stride: int = 1, 16 | padding: int = 0, 17 | groups: int = 1, 18 | norm_cfg: Optional[dict] = None, 19 | act_cfg: Optional[dict] = None): 20 | super().__init__() 21 | layers = [] 22 | # Convolution Layer 23 | layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=(norm_cfg is None))) 24 | # Normalization Layer 25 | if norm_cfg: 26 | norm_layer = self._get_norm_layer(out_channels, norm_cfg) 27 | layers.append(norm_layer) 28 | # Activation Layer 29 | if act_cfg: 30 | act_layer = self._get_act_layer(act_cfg) 31 | layers.append(act_layer) 32 | # Combine all layers 33 | self.block = nn.Sequential(*layers) 34 | 35 | def forward(self, x): 36 | return self.block(x) 37 | 38 | def _get_norm_layer(self, num_features, norm_cfg): 39 | if norm_cfg['type'] == 'BN': 40 | return nn.BatchNorm2d(num_features, momentum=norm_cfg.get('momentum', 0.1), eps=norm_cfg.get('eps', 1e-5)) 41 | # Add more normalization types if needed 42 | raise NotImplementedError(f"Normalization layer '{norm_cfg['type']}' is not implemented.") 43 | 44 | def _get_act_layer(self, act_cfg): 45 | if act_cfg['type'] == 'ReLU': 46 | return nn.ReLU(inplace=True) 47 | if act_cfg['type'] == 'SiLU': 48 | return nn.SiLU(inplace=True) 49 | # Add more activation types if needed 50 | raise NotImplementedError(f"Activation layer '{act_cfg['type']}' is not implemented.") 51 | 52 | class CAA(nn.Module): 53 | """Context Anchor Attention""" 54 | def __init__( 55 | self, 56 | channels: int, 57 | h_kernel_size: int = 11, 58 | v_kernel_size: int = 11, 59 | norm_cfg: Optional[dict] = dict(type='BN', momentum=0.03, eps=0.001), 60 | act_cfg: Optional[dict] = dict(type='SiLU')): 61 | super().__init__() 62 | self.avg_pool = nn.AvgPool2d(7, 1, 3) 63 | self.conv1 = ConvModule(channels, channels, 1, 1, 0, 64 | norm_cfg=norm_cfg, act_cfg=act_cfg) 65 | self.h_conv = ConvModule(channels, channels, (1, h_kernel_size), 1, 66 | (0, h_kernel_size // 2), groups=channels, 67 | norm_cfg=None, act_cfg=None) 68 | self.v_conv = ConvModule(channels, channels, (v_kernel_size, 1), 1, 69 | (v_kernel_size // 2, 0), groups=channels, 70 | norm_cfg=None, act_cfg=None) 71 | self.conv2 = ConvModule(channels, channels, 1, 1, 0, 72 | norm_cfg=norm_cfg, act_cfg=act_cfg) 73 | self.act = nn.Sigmoid() 74 | 75 | def forward(self, x): 76 | attn_factor = self.act(self.conv2(self.v_conv(self.h_conv(self.conv1(self.avg_pool(x)))))) 77 | return attn_factor 78 | 79 | # Example usage to print input and output shapes 80 | if __name__ == "__main__": 81 | input = torch.randn(1, 64, 128, 128) #输入 B C H W 82 | block = CAA(64) 83 | output = block(input) 84 | print(input.size()) 85 | print(output.size()) 86 | -------------------------------------------------------------------------------- /图像超分/FMM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | #论文:Spatially-Adaptive Feature Modulation for Efficient Image Super-Resolution (ICCV 2023) 5 | #论文地址:https://openaccess.thecvf.com/content/ICCV2023/papers/Sun_Spatially-Adaptive_Feature_Modulation_for_Efficient_Image_Super-Resolution_ICCV_2023_paper.pdf 6 | 7 | # Layer Norm 8 | class LayerNorm(nn.Module): 9 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_first"): 10 | super().__init__() 11 | self.weight = nn.Parameter(torch.ones(normalized_shape)) 12 | self.bias = nn.Parameter(torch.zeros(normalized_shape)) 13 | self.eps = eps 14 | self.data_format = data_format 15 | if self.data_format not in ["channels_last", "channels_first"]: 16 | raise NotImplementedError 17 | self.normalized_shape = (normalized_shape, ) 18 | 19 | def forward(self, x): 20 | if self.data_format == "channels_last": 21 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) 22 | elif self.data_format == "channels_first": 23 | u = x.mean(1, keepdim=True) 24 | s = (x - u).pow(2).mean(1, keepdim=True) 25 | x = (x - u) / torch.sqrt(s + self.eps) 26 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 27 | return x 28 | 29 | # convolutional channel mixer (CCM) 30 | class CCM(nn.Module): 31 | def __init__(self, dim, growth_rate=2.0): 32 | super().__init__() 33 | hidden_dim = int(dim * growth_rate) 34 | 35 | self.ccm = nn.Sequential( 36 | nn.Conv2d(dim, hidden_dim, 3, 1, 1), 37 | nn.GELU(), 38 | nn.Conv2d(hidden_dim, dim, 1, 1, 0) 39 | ) 40 | 41 | def forward(self, x): 42 | return self.ccm(x) 43 | 44 | 45 | # spatially-adaptive feature modulation (SAFM) 46 | class SAFM(nn.Module): 47 | def __init__(self, dim, n_levels=4): 48 | super().__init__() 49 | self.n_levels = n_levels 50 | chunk_dim = dim // n_levels 51 | 52 | # Spatial Weighting 53 | self.mfr = nn.ModuleList( 54 | [nn.Conv2d(chunk_dim, chunk_dim, 3, 1, 1, groups=chunk_dim) for i in range(self.n_levels)]) 55 | 56 | # # Feature Aggregation 57 | self.aggr = nn.Conv2d(dim, dim, 1, 1, 0) 58 | 59 | # Activation 60 | self.act = nn.GELU() 61 | 62 | def forward(self, x): 63 | h, w = x.size()[-2:] 64 | 65 | xc = x.chunk(self.n_levels, dim=1) 66 | out = [] 67 | for i in range(self.n_levels): 68 | if i > 0: 69 | p_size = (h // 2 ** i, w // 2 ** i) 70 | s = F.adaptive_max_pool2d(xc[i], p_size) 71 | s = self.mfr[i](s) 72 | s = F.interpolate(s, size=(h, w), mode='nearest') 73 | else: 74 | s = self.mfr[i](xc[i]) 75 | out.append(s) 76 | 77 | out = self.aggr(torch.cat(out, dim=1)) 78 | out = self.act(out) * x 79 | return out 80 | 81 | # feature mixing module(FMM) 82 | class FMM(nn.Module): 83 | def __init__(self, dim, ffn_scale=2.0): 84 | super().__init__() 85 | 86 | self.norm1 = LayerNorm(dim) 87 | self.norm2 = LayerNorm(dim) 88 | 89 | # Multiscale Block 90 | self.safm = SAFM(dim) 91 | # Feedforward layer 92 | self.ccm = CCM(dim, ffn_scale) 93 | 94 | def forward(self, x): 95 | x = self.safm(self.norm1(x)) + x 96 | x = self.ccm(self.norm2(x)) + x 97 | return x 98 | 99 | if __name__ == '__main__': 100 | dim = 64 101 | block = FMM(dim) 102 | input = torch.randn(1, dim, 32, 32) 103 | output = block(input) 104 | 105 | # 打印输入和输出的形状 106 | print(input.size()) 107 | print(output.size()) --------------------------------------------------------------------------------