├── 特征融合
    ├── 1
    ├── (ICMR 2022)CMF_Block(多模态融合).py
    └── (TIP2024)CGA特征融合模块.py
├── 频域
    ├── 1
    └── (CVPR 2024)FRFN.py
├── 目标检测
    ├── WCMFandFACMA.pdf
    ├── WCMF.py
    ├── FACMA.py
    └── GFM.py
├── assets
    ├── image-20240906165609184.png
    ├── image-20240906165641092.png
    ├── data-20241122T035935Z-001.zip
    ├── 18b0c599180d157e714daf7f21b1fdc.jpg
    └── aae37e8f13bc88eb6aca66535d49a7e.jpg
├── 特征维度转换.py
├── 缝合代码示例
    ├── 维度转换.py
    ├── HWD小波下采样.py
    ├── LSK.py
    ├── 部分卷积.py
    ├── MobileViTv2Attention.py
    └── DilateFormer.py
├── (arxiv)Arelu.py
├── README.md
├── (ICCV 2021) RA.py
├── 采样
    ├── (PR2023) 小波下采样.py
    └── EUCB.py
├── 注意力
    ├── (ICML 2021)SimAM.py
    ├── (IEEE 2023)AGCA.py
    ├── (arxiv2023)ema.py
    ├── (WACV 2021)TripletAttention.py
    ├── (TPAMI 2021)OutlookAttention.py
    ├── (CVPR 2024)SHSA.py
    ├── (tmm2023)多尺度膨胀注意力机制.py
    └── (CVPR 2024)CAA.py
├── GhostModule.py
├── DFF2d.py
├── 卷积
    ├── (CVPR 2023) 部分卷积.py
    ├── (ICCV 2023)大核选择模块LSK.py
    ├── (ICCV 2021)CTR-GC（图卷积）.py
    └── (CVPR 2022)dgcnn.py
├── (ICCV2023)SAFM.py
├── (CVPR 2024)IDC.py
├── UCDC.py
├── (arXiv 2021) EA.py
├── GCTattention.py
├── scSE.py
├── (arXiv 2019) ECA.py
├── PGM.py
├── f_sampling.py
├── 3D
    ├── (CVPR 2024)IDC3d.py
    ├── (IEEE 2024)SFFusion3d特征融合.py
    └── (CVPR 2022)DFE.py
├── MDTA.py
├── (ACCV 2024) LIA.py
├── GAU.py
├── 1D模块
    ├── (ICCV 2023)EAA.py
    └── (KDD 2020)CorNet(NLP).py
├── (arXiv 2021) AFT.py
├── (ECCV2024)SMFA.py
├── (TPAMI 2022) ViP.py
├── MLAttention.py
├── SWA.py
├── (arXiv 2021) MobileViTv2.py
├── FCA.py
├── DPTAM.py
├── (ICLR 2023)ContraNorm(对比归一化层).py
├── LGAG.py
├── 图像超分
    ├── SGFN.py
    └── FMM.py
├── (arXiv 2021) S2Attention.py
├── cleegn.py
├── MCM.py
├── LAE.py
├── tfcm.py
├── (arXiv 2021) PSA.py
├── (ICPR 2021)CAN(人群计数,CV2维任务通用).py
├── SPConv.py
├── LPA.py
├── DA.py
├── FECAttention.py
├── ULSAM.py
├── 点云
    └── Attention.py
├── MixStructure.py
├── CPAM.py
├── (arXiv 2020 ) SSAN.py
├── LMFLoss.py
├── (Elsevier 2024)CF_loss.py
├── (arXiv 2023) ScaledDotProductAttention.py
├── (CVPR 2019) DCNv2.py
├── FEM.py
├── BFAM.py
├── (ACM MM 2023)Deepfake(深度伪造检测).py
├── MHIASA.py
├── (CVPR2020)strip_pooling.py
├── CPCA2d.py
├── PCBAM.py
└── 遥感
    └── MSAA.py


/特征融合/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/频域/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/目标检测/WCMFandFACMA.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/目标检测/WCMFandFACMA.pdf


--------------------------------------------------------------------------------
/assets/image-20240906165609184.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/image-20240906165609184.png


--------------------------------------------------------------------------------
/assets/image-20240906165641092.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/image-20240906165641092.png


--------------------------------------------------------------------------------
/assets/data-20241122T035935Z-001.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/data-20241122T035935Z-001.zip


--------------------------------------------------------------------------------
/assets/18b0c599180d157e714daf7f21b1fdc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/18b0c599180d157e714daf7f21b1fdc.jpg


--------------------------------------------------------------------------------
/assets/aae37e8f13bc88eb6aca66535d49a7e.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-dawang/PlugNPlay-Modules/HEAD/assets/aae37e8f13bc88eb6aca66535d49a7e.jpg


--------------------------------------------------------------------------------
/特征维度转换.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from einops import rearrange
 3 | 
 4 | 
 5 | def to_3d(x):
 6 |     return rearrange(x, 'b c h w -> b (h w) c')
 7 | 
 8 | 
 9 | def to_4d(x, h, w):
10 |     return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     input = torch.randn(3, 32, 64, 64)  # 假设输入tensor B C H W
15 | 
16 |     output = to_3d(input)
17 |     print(output.size())    #输出shape b n c
18 | 
19 |     output1 =to_4d(output, 64, 64)  # 指定高宽 h*w =n
20 |     print(output1.size())
21 | 


--------------------------------------------------------------------------------
/缝合代码示例/维度转换.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from einops import rearrange
 3 | 
 4 | 
 5 | def to_3d(x):
 6 |     return rearrange(x, 'b c h w -> b (h w) c')
 7 | 
 8 | 
 9 | def to_4d(x, h, w):
10 |     return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
11 | 
12 | 
13 | # x = x.permute(0, 2, 3, 1)  # 【B, C, H, W】 -> 【B, H, W, C】
14 | # x= x.permute(0, 3, 1, 2)  # 【B, H, W, C】 -> 【B, C, H, W】
15 | 
16 | if __name__ == '__main__':
17 |     input = torch.randn(3, 32, 64, 64)  # 假设输入tensor B C H W
18 | 
19 |     output = to_3d(input)
20 |     print(output.size())    #输出shape b n c
21 | 
22 |     output1 =to_4d(output, 64, 64)  # 指定高宽 h*w =n
23 |     print(output1.size())


--------------------------------------------------------------------------------
/(arxiv)Arelu.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | # github地址：https://github.com/densechen/AReLU/blob/master/activations/arelu.py
 7 | # 论文：ARELU: ATTENTION-BASED RECTIFIED LINEAR UNIT
 8 | class AReLU(nn.Module):
 9 |     def __init__(self, alpha=0.90, beta=2.0):
10 |         super().__init__()
11 |         self.alpha = nn.Parameter(torch.tensor([alpha]))
12 |         self.beta = nn.Parameter(torch.tensor([beta]))
13 | 
14 |     def forward(self, input):
15 |         alpha = torch.clamp(self.alpha, min=0.01, max=0.99)
16 |         beta = 1 + torch.sigmoid(self.beta)
17 | 
18 |         return F.relu(input) * beta - F.relu(-input) * alpha
19 | 


--------------------------------------------------------------------------------
/缝合代码示例/HWD小波下采样.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from pytorch_wavelets import DWTForward
 4 | 
 5 | 
 6 | class Down_wt(nn.Module):
 7 |     def __init__(self, in_ch, out_ch):
 8 |         super(Down_wt, self).__init__()
 9 |         self.wt = DWTForward(J=1, mode='zero', wave='haar')
10 |         self.conv_bn_relu = nn.Sequential(
11 |             nn.Conv2d(in_ch * 4, out_ch, kernel_size=1, stride=1),
12 |             nn.BatchNorm2d(out_ch),
13 |             nn.ReLU(inplace=True),
14 |         )
15 | 
16 |     def forward(self, x):
17 |         yL, yH = self.wt(x)
18 |         y_HL = yH[0][:, :, 0, ::]
19 |         y_LH = yH[0][:, :, 1, ::]
20 |         y_HH = yH[0][:, :, 2, ::]
21 |         x = torch.cat([yL, y_HL, y_LH, y_HH], dim=1)
22 |         x = self.conv_bn_relu(x)
23 |         return x
24 | 
25 | 
26 | # 输入 B C H W,  输出 B C H W
27 | if __name__ == '__main__':
28 |     block = Down_wt(64, 96)  # 输入通道数，输出通道数
29 |     input = torch.rand(3, 64, 64, 64)
30 |     output = block(input)
31 |     print(output.size())
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 全网最全最新的即插即用模块：目前进度70％
 2 | 包括卷积 注意力机制 下采样 特征融合模块等
 3 | 持续更新~
 4 | 详细论文讲解关注公众号【ai缝合大王】和B站【ai缝合大王】
 5 | 模块分享、缝合交流进q群：
 6 | 994264161
 7 | 更多细分方向群：① 目标检测 ② 图像分类 ③ 语义分割 ④ 人脸识别 ⑤ 三维重建 ⑥ 多模态融合 ⑦ 姿态估计 ⑧ 超分辨率⑨ 自动驾驶 ⑩ 图像生成 ⑪ 遥感影像 ⑫ 医学图像 ⑬ 底层视觉 ⑭ YOLO 系列 ⑮ Mamba 等新架构⑯ 视频处理 ⑰ 3D ⑱ 大模型 ⑲ 重识别（ReID）⑳ 图像去雨/去噪/去模糊
 8 | 细分方向群为微信群，扫描二维码添加微信，扣1-20拉你进群。
 9 | 
10 | ![8fe957e64594b1526077b0f75c6f496](https://github.com/user-attachments/assets/392ad630-081a-454d-ad38-40d24c4a8990)
11 | 
12 | 目前主要更新二维图像模块，所有二维图像都可以用，图像分类、分割、目标检测、超分辨率重建、图像去雾、暗光增强等所有图像任务都可以用   3d模块 和1d模块 后续会陆续更新。
13 | 
14 | ![...](assets/18b0c599180d157e714daf7f21b1fdc.jpg)
15 | 
16 | 
17 | 
18 | 这里我介绍一下，加入深度学习论文指南:
19 | 第一，如果你是新手小白，代码论文都看不懂，我们会给出最优的学习路线，让你少走弯路，节省很多时间。
20 | 
21 | 第二，这里提供缝合模块的技巧，让你轻松使用github上的模块即插即用，插入自己的模型中涨点。
22 | 
23 | 第三，如果你缝合了很多模块发现没有效果，不妨来这里看看，这里提供了高阶缝合技巧，结构缝合，串并联交互缝合，创新点缝合等，并且还有自制即插即用模块分享。
24 | 
25 | 第四，如果缝合好模块之后，不知道怎么编故事，怎么写论文的思路，这里也会进行提供。
26 | 
27 | 第五，每个成员都可以发布主题，我们也会针对主题进行提问，随着人数的增多，你想知道的任何这方面的问题几乎都有模板答案。
28 | 
29 | 第六，这是一个长期项目，不是说就几个视频加pdf，是一年的时间内所有内容。
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/(ICCV 2021) RA.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | 
 6 | # 论文地址：https://arxiv.org/pdf/2108.02456
 7 | # 论文：Residual Attention: A Simple but Effective Method for Multi-Label Recognition
 8 | 
 9 | 
10 | 
11 | class ResidualAttention(nn.Module):
12 | 
13 |     def __init__(self, channel=512 , num_class=1000,la=0.2):
14 |         super().__init__()
15 |         self.la=la
16 |         self.fc=nn.Conv2d(in_channels=channel,out_channels=num_class,kernel_size=1,stride=1,bias=False)
17 | 
18 |     def forward(self, x):
19 |         b,c,h,w=x.shape
20 |         y_raw=self.fc(x).flatten(2) #b,num_class,hxw
21 |         y_avg=torch.mean(y_raw,dim=2) #b,num_class
22 |         y_max=torch.max(y_raw,dim=2)[0] #b,num_class
23 |         score=y_avg+self.la*y_max
24 |         return score
25 | 
26 |         
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     input=torch.randn(50,512,7,7)
31 |     resatt = ResidualAttention(channel=512,num_class=1000,la=0.2)
32 |     output=resatt(input)
33 |     print(output.shape)
34 | 
35 |     
36 | 


--------------------------------------------------------------------------------
/采样/(PR2023) 小波下采样.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from pytorch_wavelets import DWTForward
 4 | # GitHub地址 ：https://github.com/apple1986/HWD
 5 | # 论文地址：https://www.sciencedirect.com/science/article/pii/S0031320323005174
 6 | class Down_wt(nn.Module):
 7 |     def __init__(self, in_ch, out_ch):
 8 |         super(Down_wt, self).__init__()
 9 |         self.wt = DWTForward(J=1, mode='zero', wave='haar')
10 |         self.conv_bn_relu = nn.Sequential(
11 |             nn.Conv2d(in_ch * 4, out_ch, kernel_size=1, stride=1),
12 |             nn.BatchNorm2d(out_ch),
13 |             nn.ReLU(inplace=True),
14 |         )
15 | 
16 |     def forward(self, x):
17 |         yL, yH = self.wt(x)
18 |         y_HL = yH[0][:, :, 0, ::]
19 |         y_LH = yH[0][:, :, 1, ::]
20 |         y_HH = yH[0][:, :, 2, ::]
21 |         x = torch.cat([yL, y_HL, y_LH, y_HH], dim=1)
22 |         x = self.conv_bn_relu(x)
23 |         return x
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     block = Down_wt(64, 64)  # 输入通道数，输出通道数
28 |     input = torch.rand(3, 64, 64, 64)  # 输入B C H W
29 |     output = block(input)
30 |     print(output.size())
31 | 


--------------------------------------------------------------------------------
/注意力/(ICML 2021)SimAM.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------
 2 | # Simam: A simple, parameter-free attention module for convolutional neural networks (ICML 2021)
 3 | # Github:https://github.com/ZjjConan/SimAM
 4 | # ---------------------------------------
 5 | import torch
 6 | import torch.nn as nn
 7 | from thop import profile
 8 | 
 9 | 
10 | class Simam_module(torch.nn.Module):
11 |     def __init__(self, e_lambda=1e-4):
12 |         super(Simam_module, self).__init__()
13 |         self.act = nn.Sigmoid()
14 |         self.e_lambda = e_lambda
15 | 
16 |     def forward(self, x):
17 |         b, c, h, w = x.size()
18 |         n = w * h - 1
19 |         x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2)
20 |         y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5
21 | 
22 |         return x * self.act(y)
23 | 
24 | 
25 | # 无参注意力机制    输入 N C H W,  输出 N C H W
26 | if __name__ == '__main__':
27 |     model = Simam_module().cuda()
28 |     x = torch.randn(1, 3, 64, 64).cuda()
29 |     y = model(x)
30 |     print(y.size())
31 |     flops, params = profile(model, (x,))
32 |     print(flops / 1e9)
33 |     print(params)
34 | 


--------------------------------------------------------------------------------
/缝合代码示例/LSK.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class LSKblock(nn.Module):
 6 |     def __init__(self, dim):
 7 |         super().__init__()
 8 |         self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
 9 |         self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3)
10 |         self.conv1 = nn.Conv2d(dim, dim // 2, 1)
11 |         self.conv2 = nn.Conv2d(dim, dim // 2, 1)
12 |         self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3)
13 |         self.conv = nn.Conv2d(dim // 2, dim, 1)
14 | 
15 |     def forward(self, x):
16 |         attn1 = self.conv0(x)
17 |         attn2 = self.conv_spatial(attn1)
18 | 
19 |         attn1 = self.conv1(attn1)
20 |         attn2 = self.conv2(attn2)
21 | 
22 |         attn = torch.cat([attn1, attn2], dim=1)
23 |         avg_attn = torch.mean(attn, dim=1, keepdim=True)
24 |         max_attn, _ = torch.max(attn, dim=1, keepdim=True)
25 |         agg = torch.cat([avg_attn, max_attn], dim=1)
26 |         sig = self.conv_squeeze(agg).sigmoid()
27 |         attn = attn1 * sig[:, 0, :, :].unsqueeze(1) + attn2 * sig[:, 1, :, :].unsqueeze(1)
28 |         attn = self.conv(attn)
29 |         return x * attn
30 | 
31 | 
32 | # 输入 B C H W,  输出 B C H W
33 | if __name__ == '__main__':
34 |     block = LSKblock(64)
35 |     input = torch.rand(1, 64, 64, 64)
36 |     output = block(input)
37 |     print(input.size(), output.size())
38 | 


--------------------------------------------------------------------------------
/缝合代码示例/部分卷积.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch
 3 | 
 4 | 
 5 | class Partial_conv3(nn.Module):
 6 | 
 7 |     def __init__(self, dim, n_div, forward):
 8 |         super().__init__()
 9 |         self.dim_conv3 = dim // n_div
10 |         self.dim_untouched = dim - self.dim_conv3
11 |         self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False)
12 | 
13 |         if forward == 'slicing':
14 |             self.forward = self.forward_slicing
15 |         elif forward == 'split_cat':
16 |             self.forward = self.forward_split_cat
17 |         else:
18 |             raise NotImplementedError
19 | 
20 |     def forward_slicing(self, x):
21 |         # only for inference
22 |         x = x.clone()  # !!! Keep the original input intact for the residual connection later
23 |         x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :])
24 | 
25 |         return x
26 | 
27 |     def forward_split_cat(self, x):
28 |         # for training/inference
29 |         x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1)
30 |         x1 = self.partial_conv3(x1)
31 |         x = torch.cat((x1, x2), 1)
32 | 
33 |         return x
34 | 
35 | # 输入 B C H W,  输出 B C H W
36 | if __name__ == '__main__':
37 |     block = Partial_conv3(64, 2, 'split_cat')
38 |     input = torch.rand(1, 64, 64, 64)
39 |     output = block(input)
40 |     print(input.size(), output.size())
41 | 


--------------------------------------------------------------------------------
/GhostModule.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import math
 3 | import torch
 4 | #GhostNet: More Features from Cheap Operations
 5 | 
 6 | class GhostModule(nn.Module):
 7 |     def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True):
 8 |         super(GhostModule, self).__init__()
 9 |         self.oup = oup
10 |         init_channels = math.ceil(oup / ratio)
11 |         new_channels = init_channels*(ratio-1)
12 | 
13 |         self.primary_conv = nn.Sequential(
14 |             nn.Conv2d(inp, init_channels, kernel_size,
15 |                       stride, kernel_size//2, bias=False),
16 |             nn.BatchNorm2d(init_channels),
17 |             nn.ReLU(inplace=True) if relu else nn.Sequential(),
18 |         )
19 | 
20 |         self.cheap_operation = nn.Sequential(
21 |             nn.Conv2d(init_channels, new_channels, dw_size, 1,
22 |                       dw_size//2, groups=init_channels, bias=False),
23 |             nn.BatchNorm2d(new_channels),
24 |             nn.ReLU(inplace=True) if relu else nn.Sequential(),
25 |         )
26 | 
27 |     def forward(self, x):
28 |         x1 = self.primary_conv(x)
29 |         x2 = self.cheap_operation(x1)
30 |         out = torch.cat([x1, x2], dim=1)
31 |         return out[:, :self.oup, :, :]
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     block = GhostModule(128, 256)
36 | 
37 |     input = torch.zeros((2, 128, 64, 64))
38 | 
39 |     output = block(input)
40 | 
41 |     print(output.size())


--------------------------------------------------------------------------------
/DFF2d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | #论文：D-Net: Dynamic Large Kernel with Dynamic Feature Fusion for Volumetric Medical Image Segmentation
 4 | #论文地址：https://arxiv.org/abs/2403.10674
 5 | 
 6 | class DFF(nn.Module):
 7 |     def __init__(self, dim):
 8 |         super().__init__()
 9 | 
10 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
11 |         self.conv_atten = nn.Sequential(
12 |             nn.Conv2d(dim * 2, dim * 2, kernel_size=1, bias=False),
13 |             nn.Sigmoid()
14 |         )
15 |         self.conv_redu = nn.Conv2d(dim * 2, dim, kernel_size=1, bias=False)
16 | 
17 |         self.conv1 = nn.Conv2d(dim, 1, kernel_size=1, stride=1, bias=True)
18 |         self.conv2 = nn.Conv2d(dim, 1, kernel_size=1, stride=1, bias=True)
19 |         self.nonlin = nn.Sigmoid()
20 | 
21 |     def forward(self, x, skip):
22 |         output = torch.cat([x, skip], dim=1)
23 | 
24 |         att = self.conv_atten(self.avg_pool(output))
25 |         output = output * att
26 |         output = self.conv_redu(output)
27 | 
28 |         att = self.conv1(x) + self.conv2(skip)
29 |         att = self.nonlin(att)
30 |         output = output * att
31 |         return output
32 | 
33 | if __name__ == '__main__':
34 | 
35 |     x = torch.randn(1, 48, 128, 128)
36 |     skip = torch.randn(1, 48, 128, 128)
37 | 
38 |     block = DFF(48)
39 | 
40 |     output = block(x, skip)
41 | 
42 |     print("Input shape (x):", x.size())
43 |     print("Input shape (skip):", skip.size())
44 |     print("Output shape:", output.size())
45 | 


--------------------------------------------------------------------------------
/卷积/(CVPR 2023) 部分卷积.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch
 3 | #论文地址：https://arxiv.org/pdf/2303.03667
 4 | #GitHub地址：https://github.com/JierunChen/FasterNet
 5 | 
 6 | class Partial_conv3(nn.Module):
 7 | 
 8 |     def __init__(self, dim, n_div, forward):
 9 |         super().__init__()
10 |         self.dim_conv3 = dim // n_div
11 |         self.dim_untouched = dim - self.dim_conv3
12 |         self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False)
13 | 
14 |         if forward == 'slicing':
15 |             self.forward = self.forward_slicing
16 |         elif forward == 'split_cat':
17 |             self.forward = self.forward_split_cat
18 |         else:
19 |             raise NotImplementedError
20 | 
21 |     def forward_slicing(self, x):
22 |         # only for inference
23 |         x = x.clone()  # !!! Keep the original input intact for the residual connection later
24 |         x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :])
25 | 
26 |         return x
27 | 
28 |     def forward_split_cat(self, x):
29 |         # for training/inference
30 |         x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1)
31 |         x1 = self.partial_conv3(x1)
32 |         x = torch.cat((x1, x2), 1)
33 | 
34 |         return x
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     block = Partial_conv3(64, 2, 'split_cat').cuda()
39 |     input = torch.rand(3, 64, 64, 64).cuda() #输入shape b c h w
40 |     output = block(input)
41 |     print(input.size(), output.size())
42 | 


--------------------------------------------------------------------------------
/(ICCV2023)SAFM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | #https://github.com/sunny2109/SAFMN
 5 | #论文：https://arxiv.org/pdf/2302.13800
 6 | class SAFM(nn.Module):
 7 |     def __init__(self, dim, n_levels=4):
 8 |         super().__init__()
 9 |         self.n_levels = n_levels
10 |         chunk_dim = dim // n_levels
11 | 
12 |         # Spatial Weighting
13 |         self.mfr = nn.ModuleList(
14 |             [nn.Conv2d(chunk_dim, chunk_dim, 3, 1, 1, groups=chunk_dim) for i in range(self.n_levels)])
15 | 
16 |         # # Feature Aggregation
17 |         self.aggr = nn.Conv2d(dim, dim, 1, 1, 0)
18 | 
19 |         # Activation
20 |         self.act = nn.GELU()
21 | 
22 |     def forward(self, x):
23 |         h, w = x.size()[-2:]
24 | 
25 |         xc = x.chunk(self.n_levels, dim=1)
26 |         out = []
27 |         for i in range(self.n_levels):
28 |             if i > 0:
29 |                 p_size = (h // 2 ** i, w // 2 ** i)
30 |                 s = F.adaptive_max_pool2d(xc[i], p_size)
31 |                 s = self.mfr[i](s)
32 |                 s = F.interpolate(s, size=(h, w), mode='nearest')
33 |             else:
34 |                 s = self.mfr[i](xc[i])
35 |             out.append(s)
36 | 
37 |         out = self.aggr(torch.cat(out, dim=1))
38 |         out = self.act(out) * x
39 |         return out
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     input = torch.randn(3,36,64,64) #输入b c h w
44 | 
45 |     block = SAFM(dim=36)
46 |     output =block(input)
47 |     print(output.size())
48 | 


--------------------------------------------------------------------------------
/缝合代码示例/MobileViTv2Attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import init
 4 | 
 5 | 
 6 | class MobileViTv2Attention(nn.Module):
 7 |     '''
 8 |     Scaled dot-product attention
 9 |     '''
10 | 
11 |     def __init__(self, d_model):
12 |         '''
13 |         :param d_model: Output dimensionality of the model
14 |         :param d_k: Dimensionality of queries and keys
15 |         :param d_v: Dimensionality of values
16 |         :param h: Number of heads
17 |         '''
18 |         super(MobileViTv2Attention, self).__init__()
19 |         self.fc_i = nn.Linear(d_model, 1)
20 |         self.fc_k = nn.Linear(d_model, d_model)
21 |         self.fc_v = nn.Linear(d_model, d_model)
22 |         self.fc_o = nn.Linear(d_model, d_model)
23 | 
24 |         self.d_model = d_model
25 | 
26 |     def forward(self, input):
27 |         '''
28 |         Computes
29 |         :param queries: Queries (b_s, nq, d_model)
30 |         :return:
31 |         '''
32 |         i = self.fc_i(input)  # (bs,nq,1)
33 |         weight_i = torch.softmax(i, dim=1)  # bs,nq,1
34 |         context_score = weight_i * self.fc_k(input)  # bs,nq,d_model
35 |         context_vector = torch.sum(context_score, dim=1, keepdim=True)  # bs,1,d_model
36 |         v = self.fc_v(input) * context_vector  # bs,nq,d_model
37 |         out = self.fc_o(v)  # bs,nq,d_model
38 | 
39 |         return out
40 | 
41 | 
42 | # 输入 B N C 输出 B N C
43 | if __name__ == '__main__':
44 |     block = MobileViTv2Attention(d_model=31)
45 |     input = torch.rand(64, 61, 31)
46 |     output = block(input)
47 |     print(input.size(), output.size())
48 | 


--------------------------------------------------------------------------------
/(CVPR 2024)IDC.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | # 论文地址：https://arxiv.org/pdf/2303.16900
 4 | # 论文：InceptionNeXt: When Inception Meets ConvNeXt (CVPR 2024)
 5 | # 全网最全100➕即插即用模块GitHub地址：https://github.com/ai-dawang/PlugNPlay-Modules
 6 | class InceptionDWConv2d(nn.Module):
 7 |     """ Inception depthweise convolution
 8 |     """
 9 | 
10 |     def __init__(self, in_channels, square_kernel_size=3, band_kernel_size=11, branch_ratio=0.125):
11 |         super().__init__()
12 | 
13 |         gc = int(in_channels * branch_ratio)  # channel numbers of a convolution branch
14 |         self.dwconv_hw = nn.Conv2d(gc, gc, square_kernel_size, padding=square_kernel_size // 2, groups=gc)
15 |         self.dwconv_w = nn.Conv2d(gc, gc, kernel_size=(1, band_kernel_size), padding=(0, band_kernel_size // 2),
16 |                                   groups=gc)
17 |         self.dwconv_h = nn.Conv2d(gc, gc, kernel_size=(band_kernel_size, 1), padding=(band_kernel_size // 2, 0),
18 |                                   groups=gc)
19 |         self.split_indexes = (in_channels - 3 * gc, gc, gc, gc)
20 | 
21 |     def forward(self, x):
22 |         x_id, x_hw, x_w, x_h = torch.split(x, self.split_indexes, dim=1)
23 |         return torch.cat(
24 |             (x_id, self.dwconv_hw(x_hw), self.dwconv_w(x_w), self.dwconv_h(x_h)),
25 |             dim=1,
26 |         )
27 | 
28 | 
29 | if __name__ == '__main__':
30 | 
31 |     block = InceptionDWConv2d(64) #输入C
32 |     input = torch.randn(1, 64, 224, 224) #输入 B C H W
33 |     output = block(input)
34 |     print(input.size())
35 |     print(output.size())
36 | 


--------------------------------------------------------------------------------
/UCDC.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.utils.data
 3 | import torch
 4 | #论文：ABC: Attention with Bilinear Correlation for Infrared Small Target Detection ICME2023
 5 | #论文地址：https://arxiv.org/pdf/2303.10321
 6 | 
 7 | def conv_relu_bn(in_channel, out_channel, dirate):
 8 |     return nn.Sequential(
 9 |         nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=dirate,
10 |                   dilation=dirate),
11 |         nn.BatchNorm2d(out_channel),
12 |         nn.ReLU(inplace=True)
13 |     )
14 | 
15 | 
16 | #u-shaped convolution-dilated convolution (UCDC)
17 | class UCDC(nn.Module):
18 |     """
19 |     Convolution Block
20 |     """
21 | 
22 |     def __init__(self, in_ch, out_ch):
23 |         super(UCDC, self).__init__()
24 |         self.conv1 = conv_relu_bn(in_ch, out_ch, 1)
25 |         self.dconv1 = conv_relu_bn(out_ch, out_ch // 2, 2)
26 |         self.dconv2 = conv_relu_bn(out_ch // 2, out_ch // 2, 4)
27 |         self.dconv3 = conv_relu_bn(out_ch, out_ch, 2)
28 |         self.conv2 = conv_relu_bn(out_ch * 2, out_ch, 1)
29 | 
30 |     def forward(self, x):
31 |         x1 = self.conv1(x)
32 |         dx1 = self.dconv1(x1)
33 |         dx2 = self.dconv2(dx1)
34 |         dx3 = self.dconv3(torch.cat((dx1, dx2), dim=1))
35 |         out = self.conv2(torch.cat((x1, dx3), dim=1))
36 |         return out
37 | 
38 | 
39 | if __name__ == '__main__':
40 | 
41 |     block = UCDC(64, 64)
42 | 
43 | 
44 |     input = torch.randn(1, 64, 32, 32)
45 | 
46 |     print(input.size())
47 | 
48 |     output = block(input)
49 | 
50 |     print(output.size())
51 | 


--------------------------------------------------------------------------------
/(arXiv 2021) EA.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | 
 6 | # 论文地址：https://arxiv.org/abs/2105.02358
 7 | # 论文：Beyond Self-attention: External Attention using Two Linear Layers for Visual Tasks
 8 | 
 9 | 
10 | class ExternalAttention(nn.Module):
11 | 
12 |     def __init__(self, d_model,S=64):
13 |         super().__init__()
14 |         self.mk=nn.Linear(d_model,S,bias=False)
15 |         self.mv=nn.Linear(S,d_model,bias=False)
16 |         self.softmax=nn.Softmax(dim=1)
17 |         self.init_weights()
18 | 
19 | 
20 |     def init_weights(self):
21 |         for m in self.modules():
22 |             if isinstance(m, nn.Conv2d):
23 |                 init.kaiming_normal_(m.weight, mode='fan_out')
24 |                 if m.bias is not None:
25 |                     init.constant_(m.bias, 0)
26 |             elif isinstance(m, nn.BatchNorm2d):
27 |                 init.constant_(m.weight, 1)
28 |                 init.constant_(m.bias, 0)
29 |             elif isinstance(m, nn.Linear):
30 |                 init.normal_(m.weight, std=0.001)
31 |                 if m.bias is not None:
32 |                     init.constant_(m.bias, 0)
33 | 
34 |     def forward(self, queries):
35 |         attn=self.mk(queries) #bs,n,S
36 |         attn=self.softmax(attn) #bs,n,S
37 |         attn=attn/torch.sum(attn,dim=2,keepdim=True) #bs,n,S
38 |         out=self.mv(attn) #bs,n,d_model
39 | 
40 |         return out
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     input=torch.randn(50,49,512)
45 |     block = ExternalAttention(d_model=512,S=8)
46 |     output=block(input)
47 |     print(output.shape)
48 | 
49 |     


--------------------------------------------------------------------------------
/GCTattention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | #论文：title：Gated Channel Transformation for Visual Recognition
 4 | #论文地址：https://arxiv.org/abs/1909.11519
 5 | 
 6 | # 定义 GCT 模块
 7 | class GCT(nn.Module):
 8 |     def __init__(self, num_channels, epsilon=1e-5, mode='l2', after_relu=False):
 9 |         super(GCT, self).__init__()
10 |         self.alpha = nn.Parameter(torch.ones(1, num_channels, 1, 1))
11 |         self.gamma = nn.Parameter(torch.zeros(1, num_channels, 1, 1))
12 |         self.beta = nn.Parameter(torch.zeros(1, num_channels, 1, 1))
13 |         self.epsilon = epsilon
14 |         self.mode = mode
15 |         self.after_relu = after_relu
16 | 
17 |     def forward(self, x):
18 |         if self.mode == 'l2':
19 |             embedding = (x.pow(2).sum((2, 3), keepdim=True) + self.epsilon).pow(0.5) * self.alpha
20 |             norm = self.gamma / (embedding.pow(2).mean(dim=1, keepdim=True) + self.epsilon).pow(0.5)
21 |         elif self.mode == 'l1':
22 |             _x = torch.abs(x) if not self.after_relu else x
23 |             embedding = _x.sum((2, 3), keepdim=True) * self.alpha
24 |             norm = self.gamma / (torch.abs(embedding).mean(dim=1, keepdim=True) + self.epsilon)
25 |         else:
26 |             raise ValueError('Unknown mode: {}'.format(self.mode))
27 | 
28 |         gate = 1. + torch.tanh(embedding * norm + self.beta)
29 |         return x * gate
30 | 
31 | 
32 | if __name__ == '__main__':
33 | 
34 | 
35 |     input = torch.randn(1, 16, 32, 32)
36 | 
37 |     print(input.size())
38 | 
39 |     block = GCT(num_channels=16)
40 | 
41 |     output = block(input)
42 | 
43 |     print(output.size())


--------------------------------------------------------------------------------
/scSE.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | #Concurrent Spatial and Channel ‘Squeeze & Excitation’ in Fully Convolutional Networks
 4 | 
 5 | 
 6 | class cSE(nn.Module):
 7 | 
 8 |     def __init__(self, channel, reduction=2):
 9 |         super().__init__()
10 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
11 |         self.fc = nn.Sequential(
12 |             nn.Conv2d(channel, channel // reduction, kernel_size=1, bias=False),
13 |             nn.ReLU(inplace=True),
14 |             nn.Conv2d(channel // reduction, channel, kernel_size=1, bias=False),
15 |             nn.Sigmoid()
16 |         )
17 | 
18 |     def forward(self, x):
19 |         y = self.avg_pool(x)
20 |         y = self.fc(y)
21 |         return x * y.expand_as(x)
22 | 
23 | class sSE(nn.Module):
24 |     def __init__(self, in_channel):
25 |         super().__init__()
26 |         self.Conv1x1 = nn.Conv2d(in_channel, 1, kernel_size=1, bias=False)
27 |         self.norm = nn.Sigmoid()
28 | 
29 |     def forward(self, x):
30 |         y = self.Conv1x1(x)
31 |         y = self.norm(y)
32 |         return x * y
33 | 
34 | class scSE(nn.Module):
35 |     def __init__(self, in_channel):
36 |         super().__init__()
37 |         self.cSE = cSE(in_channel)
38 |         self.sSE = sSE(in_channel)
39 | 
40 |     def forward(self, U):
41 |         U_sse = self.sSE(U)
42 |         U_cse = self.cSE(U)
43 |         return torch.max(U_cse, U_sse)  # Taking the element-wise maximum
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     input = torch.randn(3, 32, 64, 64) #B C H W
48 |     block = scSE(in_channel=32)
49 |     output = block(input)
50 |     print(output.size())


--------------------------------------------------------------------------------
/(arXiv 2019) ECA.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | from collections import OrderedDict
 6 | 
 7 | # 论文地址：https://arxiv.org/pdf/1910.03151
 8 | # 论文：ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks
 9 | 
10 | 
11 | 
12 | 
13 | class ECAAttention(nn.Module):
14 | 
15 |     def __init__(self, kernel_size=3):
16 |         super().__init__()
17 |         self.gap=nn.AdaptiveAvgPool2d(1)
18 |         self.conv=nn.Conv1d(1,1,kernel_size=kernel_size,padding=(kernel_size-1)//2)
19 |         self.sigmoid=nn.Sigmoid()
20 | 
21 |     def init_weights(self):
22 |         for m in self.modules():
23 |             if isinstance(m, nn.Conv2d):
24 |                 init.kaiming_normal_(m.weight, mode='fan_out')
25 |                 if m.bias is not None:
26 |                     init.constant_(m.bias, 0)
27 |             elif isinstance(m, nn.BatchNorm2d):
28 |                 init.constant_(m.weight, 1)
29 |                 init.constant_(m.bias, 0)
30 |             elif isinstance(m, nn.Linear):
31 |                 init.normal_(m.weight, std=0.001)
32 |                 if m.bias is not None:
33 |                     init.constant_(m.bias, 0)
34 | 
35 |     def forward(self, x):
36 |         y=self.gap(x) #bs,c,1,1
37 |         y=y.squeeze(-1).permute(0,2,1) #bs,1,c
38 |         y=self.conv(y) #bs,1,c
39 |         y=self.sigmoid(y) #bs,1,c
40 |         y=y.permute(0,2,1).unsqueeze(-1) #bs,c,1,1
41 |         return x*y.expand_as(x)
42 | 
43 |         
44 | 
45 | 
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     input=torch.randn(50,512,7,7)
50 |     block = ECAAttention(kernel_size=3)
51 |     output=block(input)
52 |     print(output.shape)
53 | 
54 |     


--------------------------------------------------------------------------------
/PGM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class PromptGenBlock(nn.Module):
 7 |     def __init__(self, prompt_dim=128, prompt_len=5, prompt_size=96, lin_dim=192):
 8 |         super(PromptGenBlock, self).__init__()
 9 |         self.prompt_param = nn.Parameter(torch.rand(1, prompt_len, prompt_dim, prompt_size, prompt_size))
10 |         self.linear_layer = nn.Linear(lin_dim, prompt_len)
11 |         self.conv3x3 = nn.Conv2d(prompt_dim, prompt_dim, kernel_size=3, stride=1, padding=1, bias=False)
12 | 
13 |     def forward(self, x):
14 |         B, C, H, W = x.shape
15 |         emb = x.mean(dim=(-2, -1))
16 |         prompt_weights = F.softmax(self.linear_layer(emb), dim=1)
17 |         prompt = prompt_weights.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) * self.prompt_param.unsqueeze(0).repeat(B, 1,
18 |                                                                                                                   1, 1,
19 |                                                                                                                   1,
20 |                                                                                                                   1).squeeze(
21 |             1)
22 |         prompt = torch.sum(prompt, dim=1)
23 |         prompt = F.interpolate(prompt, (H, W), mode="bilinear")
24 |         prompt = self.conv3x3(prompt)
25 | 
26 |         return prompt
27 | if __name__ == '__main__':
28 | 
29 |     block = PromptGenBlock(prompt_dim=3, prompt_len=4, prompt_size=96, lin_dim=3)#修改这里来对齐
30 |     input = torch.randn(4, 3, 64, 64)  # B C H W
31 |     output = block(input)
32 |     print(input.size())
33 |     print(output.size())
34 | 


--------------------------------------------------------------------------------
/卷积/(ICCV 2023)大核选择模块LSK.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | #Github地址：https://github.com/zcablii/Large-Selective-Kernel-Network
 4 | #论文地址：https://openaccess.thecvf.com/content/ICCV2023/papers/Li_Large_Selective_Kernel_Network_for_Remote_Sensing_Object_Detection_ICCV_2023_paper.pdf
 5 | 
 6 | class LSKblock(nn.Module):
 7 |     def __init__(self, dim):
 8 |         super().__init__()
 9 |         self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
10 |         self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1,
11 |                                       padding=9, groups=dim, dilation=3)
12 |         self.conv1 = nn.Conv2d(dim, dim // 2, 1)
13 |         self.conv2 = nn.Conv2d(dim, dim // 2, 1)
14 |         self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3)
15 |         self.conv = nn.Conv2d(dim // 2, dim, 1)
16 | 
17 |     def forward(self, x):
18 |         attn1 = self.conv0(x)
19 |         attn2 = self.conv_spatial(attn1)
20 | 
21 |         attn1 = self.conv1(attn1)
22 |         attn2 = self.conv2(attn2)
23 | 
24 |         attn = torch.cat([attn1, attn2], dim=1)
25 |         avg_attn = torch.mean(attn, dim=1, keepdim=True)
26 |         max_attn, _ = torch.max(attn, dim=1, keepdim=True)
27 |         agg = torch.cat([avg_attn, max_attn], dim=1)
28 |         sig = self.conv_squeeze(agg).sigmoid()
29 |         attn = attn1 * sig[:, 0, :, :].unsqueeze(1) + \
30 |                attn2 * sig[:, 1, :, :].unsqueeze(1)
31 |         attn = self.conv(attn)
32 |         return x * attn
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     block = LSKblock(64).cuda()
37 |     input = torch.rand(3, 64, 32, 32).cuda() #输入B C H W
38 |     output = block(input)
39 |     print(input.size(), output.size())
40 | 


--------------------------------------------------------------------------------
/f_sampling.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | #论文：Multi-Scale Temporal Frequency Convolutional Network With Axial Attention for Speech Enhancement (ICASSP 2022)
 4 | #论文地址：https://ieeexplore.ieee.org/document/9746610
 5 | 
 6 | class FD(nn.Module):
 7 |     def __init__(self, cin, cout, K=(7, 1), S=(4, 1), P=(2, 0)):
 8 |         super(FD, self).__init__()
 9 |         self.fd = nn.Sequential(
10 |             nn.Conv2d(cin, cout, K, S, P, groups=2),
11 |             nn.BatchNorm2d(cout),
12 |             nn.PReLU(cout)
13 |         )
14 | 
15 |     def forward(self, x):
16 |         return self.fd(x)
17 | 
18 | 
19 | class FU(nn.Module):
20 |     def __init__(self, cin, cout, K=(7, 1), S=(4, 1), P=(2, 0), O=(1, 0)):
21 |         super(FU, self).__init__()
22 |         self.pconv1 = nn.Sequential(
23 |             nn.Conv2d(cin*2, cin, (1, 1)),
24 |             nn.BatchNorm2d(cin),
25 |             nn.Tanh(),
26 |         )
27 |         self.pconv2 = nn.Sequential(
28 |             nn.Conv2d(cin, cout, (1, 1)),
29 |             nn.BatchNorm2d(cout),
30 |             nn.PReLU(cout),
31 |         )
32 |         #  22/06/13 update, add groups = 2
33 |         self.conv3 = nn.Sequential(
34 |             nn.ConvTranspose2d(cout, cout, K, S, P, O, groups=2),
35 |             nn.BatchNorm2d(cout),
36 |             nn.PReLU(cout)
37 |         )
38 | 
39 |     def forward(self, fu, fd):
40 |         """
41 |         fu, fd: B C F T
42 |         """
43 |         outs = self.pconv1(th.cat([fu, fd], dim=1))*fd
44 |         outs = self.pconv2(outs)
45 |         outs = self.conv3(outs)
46 |         return outs
47 | 
48 | 
49 | def test_fd():
50 |     net = FD(4, 8)
51 |     inps = th.randn(3, 4, 256, 101)
52 |     print(net(inps).shape)
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     test_fd()


--------------------------------------------------------------------------------
/3D/(CVPR 2024)IDC3d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | # 论文地址：https://arxiv.org/pdf/2303.16900
 4 | # 论文：InceptionNeXt: When Inception Meets ConvNeXt (CVPR 2024)
 5 | # 全网最全100➕即插即用模块GitHub地址：https://github.com/ai-dawang/PlugNPlay-Modules
 6 | class InceptionDWConv3d(nn.Module):
 7 |     """ Inception depthwise convolution for 3D data
 8 |     """
 9 | 
10 |     def __init__(self, in_channels, cube_kernel_size=3, band_kernel_size=11, branch_ratio=0.125):
11 |         super().__init__()
12 | 
13 |         gc = int(in_channels * branch_ratio)  # channel numbers of a convolution branch
14 |         self.dwconv_hwd = nn.Conv3d(gc, gc, cube_kernel_size, padding=cube_kernel_size // 2, groups=gc)
15 |         self.dwconv_wd = nn.Conv3d(gc, gc, kernel_size=(1, 1, band_kernel_size), padding=(0, 0, band_kernel_size // 2),
16 |                                    groups=gc)
17 |         self.dwconv_hd = nn.Conv3d(gc, gc, kernel_size=(1, band_kernel_size, 1), padding=(0, band_kernel_size // 2, 0),
18 |                                    groups=gc)
19 |         self.dwconv_hw = nn.Conv3d(gc, gc, kernel_size=(band_kernel_size, 1, 1), padding=(band_kernel_size // 2, 0, 0),
20 |                                    groups=gc)
21 |         self.split_indexes = (in_channels - 4 * gc, gc, gc, gc, gc)
22 | 
23 |     def forward(self, x):
24 |         x_id, x_hwd, x_wd, x_hd, x_hw = torch.split(x, self.split_indexes, dim=1)
25 |         return torch.cat(
26 |             (x_id, self.dwconv_hwd(x_hwd), self.dwconv_wd(x_wd), self.dwconv_hd(x_hd), self.dwconv_hw(x_hw)),
27 |             dim=1,
28 |         )
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     block = InceptionDWConv3d(64) # 输入 C
33 |     input = torch.randn(1, 64, 16, 224, 224) # 输入B C D H W
34 |     output = block(input)
35 |     print(input.size())
36 |     print(output.size())
37 | 


--------------------------------------------------------------------------------
/目标检测/WCMF.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | # 论文：FCMNet: Frequency-aware cross-modality attention networks for RGB-D salient object detection
 4 | # 论文地址：https://www.sciencedirect.com/science/article/abs/pii/S0925231222003848
 5 | class WCMF(nn.Module):
 6 |     def __init__(self,channel=256):
 7 |         super(WCMF, self).__init__()
 8 |         self.conv_r1 = nn.Sequential(nn.Conv2d(channel, channel, 1, 1, 0), nn.BatchNorm2d(channel), nn.ReLU())
 9 |         self.conv_d1 = nn.Sequential(nn.Conv2d(channel, channel, 1, 1, 0), nn.BatchNorm2d(channel), nn.ReLU())
10 | 
11 |         self.conv_c1 = nn.Sequential(nn.Conv2d(2*channel, channel, 3, 1, 1), nn.BatchNorm2d(channel), nn.ReLU())
12 |         self.conv_c2 = nn.Sequential(nn.Conv2d(channel, 2, 3, 1, 1), nn.BatchNorm2d(2), nn.ReLU())
13 |         self.avgpool = nn.AdaptiveAvgPool2d((1,1))
14 |     def fusion(self,f1,f2,f_vec):
15 | 
16 |         w1 = f_vec[:, 0, :, :].unsqueeze(1)
17 |         w2 = f_vec[:, 1, :, :].unsqueeze(1)
18 |         out1 = (w1 * f1) + (w2 * f2)
19 |         out2 = (w1 * f1) * (w2 * f2)
20 |         return out1 + out2
21 |     def forward(self,rgb,depth):
22 |         Fr = self.conv_r1(rgb)
23 |         Fd = self.conv_d1(depth)
24 |         f = torch.cat([Fr, Fd],dim=1)
25 |         f = self.conv_c1(f)
26 |         f = self.conv_c2(f)
27 |         # f = self.avgpool(f)
28 |         Fo = self.fusion(Fr, Fd, f)
29 |         return Fo
30 | 
31 | 
32 | if __name__ == '__main__':
33 | 
34 |     block = WCMF(channel=256)
35 | 
36 |     # 创建RGB和深度输入的假设张量
37 |     rgb_input = torch.randn(1, 256, 224, 224)
38 |     depth_input = torch.randn(1, 256, 224, 224)
39 | 
40 |     # 通过WCMF模型
41 |     output = block(rgb_input, depth_input)
42 | 
43 |     # 打印输入和输出的shape
44 |     print(rgb_input.size())
45 |     print(depth_input.size())
46 |     print(output.size())


--------------------------------------------------------------------------------
/MDTA.py:
--------------------------------------------------------------------------------
 1 | ## Multi-DConv Head Transposed Self-Attention (MDTA)
 2 | import torch
 3 | from einops import rearrange
 4 | from torch import nn
 5 | 
 6 | 
 7 | class Attention(nn.Module):
 8 |     def __init__(self, dim, num_heads = 4, bias = True):
 9 |         super(Attention, self).__init__()
10 |         self.num_heads = num_heads
11 |         self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1))
12 | 
13 |         self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias)
14 |         self.qkv_dwconv = nn.Conv2d(dim * 3, dim * 3, kernel_size=3, stride=1, padding=1, groups=dim * 3, bias=bias)
15 |         self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias)
16 | 
17 |     def forward(self, x):
18 |         b, c, h, w = x.shape
19 | 
20 | 
21 |         qkv = self.qkv_dwconv(self.qkv(x))
22 |         q, k, v = qkv.chunk(3, dim=1)
23 | 
24 |         q = rearrange(q, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
25 |         k = rearrange(k, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
26 |         v = rearrange(v, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
27 | 
28 |         q = torch.nn.functional.normalize(q, dim=-1)
29 |         k = torch.nn.functional.normalize(k, dim=-1)
30 | 
31 |         # [B, head, C/head, HW] * [B, head, HW, C/head] * [head, 1, 1] ==> [B, head, C/head, C/head]
32 |         attn = (q @ k.transpose(-2, -1)) * self.temperature
33 |         attn = attn.softmax(dim=-1)
34 | 
35 |         # [B, head, C/head, C/head] * [B, head, C/head, HW] ==> [B, head, C/head, HW]
36 |         out = (attn @ v)
37 | 
38 |         # [B, head, C/head, HW] ==> [B, head, C/head, H, W]
39 |         out = rearrange(out, 'b head c (h w) -> b (head c) h w', head=self.num_heads, h=h, w=w)
40 | 
41 |         out = self.project_out(out)
42 |         return out
43 | 
44 | if __name__ == '__main__':
45 |     block = Attention(64)
46 |     input = torch.rand(3, 64, 128, 128)
47 |     output = block(input)
48 | 
49 |     print(input.size())
50 |     print(output.size())
51 | 


--------------------------------------------------------------------------------
/注意力/(IEEE 2023)AGCA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn import init
 4 | # 论文：AGCA: An Adaptive Graph Channel Attention Module for Steel Surface Defect Detection
 5 | # 论文地址：https://ieeexplore.ieee.org/document/10050536
 6 | 
 7 | class AGCA(nn.Module):
 8 |     def __init__(self, in_channel, ratio):
 9 |         super(AGCA, self).__init__()
10 |         hide_channel = in_channel // ratio
11 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
12 |         self.conv1 = nn.Conv2d(in_channel, hide_channel, kernel_size=1, bias=False)
13 |         self.softmax = nn.Softmax(2)
14 |         # Choose to deploy A0 on GPU or CPU according to your needs
15 |         self.A0 = torch.eye(hide_channel).to('cuda')
16 |         # self.A0 = torch.eye(hide_channel)
17 |         # A2 is initialized to 1e-6
18 |         self.A2 = nn.Parameter(torch.FloatTensor(torch.zeros((hide_channel, hide_channel))), requires_grad=True)
19 |         init.constant_(self.A2, 1e-6)
20 |         self.conv2 = nn.Conv1d(1, 1, kernel_size=1, bias=False)
21 |         self.conv3 = nn.Conv1d(1, 1, kernel_size=1, bias=False)
22 |         self.relu = nn.ReLU(inplace=True)
23 |         self.conv4 = nn.Conv2d(hide_channel, in_channel, kernel_size=1, bias=False)
24 |         self.sigmoid = nn.Sigmoid()
25 | 
26 |     def forward(self, x):
27 |         y = self.avg_pool(x)
28 |         y = self.conv1(y)
29 |         B, C, _, _ = y.size()
30 |         y = y.flatten(2).transpose(1, 2)
31 |         A1 = self.softmax(self.conv2(y))
32 |         A1 = A1.expand(B, C, C)
33 |         A = (self.A0 * A1) + self.A2
34 |         y = torch.matmul(y, A)
35 |         y = self.relu(self.conv3(y))
36 |         y = y.transpose(1, 2).view(-1, C, 1, 1)
37 |         y = self.sigmoid(self.conv4(y))
38 | 
39 |         return x * y
40 | 
41 | if __name__ == '__main__':
42 |     block = AGCA(in_channel=64, ratio=4).to('cuda')
43 |     input = torch.rand(1, 64, 32, 32).to('cuda')
44 |     output = block(input)
45 |     print(input.size())
46 |     print(output.size())
47 | 


--------------------------------------------------------------------------------
/(ACCV 2024) LIA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | # 论文题目：PlainUSR: Chasing Faster ConvNet for Efficient Super-Resolution
 6 | # 论文地址：https://openaccess.thecvf.com/content/ACCV2024/papers/Wang_PlainUSR_Chasing_Faster_ConvNet_for_Efficient_Super-Resolution_ACCV_2024_paper.pdf
 7 | 
 8 | class SoftPooling2D(torch.nn.Module):
 9 |     def __init__(self,kernel_size,stride=None,padding=0):
10 |         super(SoftPooling2D, self).__init__()
11 |         self.avgpool = torch.nn.AvgPool2d(kernel_size,stride,padding, count_include_pad=False)
12 |     def forward(self, x):
13 |         x_exp = torch.exp(x)
14 |         x_exp_pool = self.avgpool(x_exp)
15 |         x = self.avgpool(x_exp*x)
16 |         return x/x_exp_pool 
17 |     
18 | class LocalAttention(nn.Module):
19 |     ''' attention based on local importance'''
20 |     def __init__(self, channels, f=16):
21 |         super().__init__()
22 |         self.body = nn.Sequential(
23 |             # sample importance
24 |             nn.Conv2d(channels, f, 1),
25 |             SoftPooling2D(7, stride=3),
26 |             nn.Conv2d(f, f, kernel_size=3, stride=2, padding=1),
27 |             nn.Conv2d(f, channels, 3, padding=1),
28 |             # to heatmap
29 |             nn.Sigmoid(),
30 |         )
31 |         self.gate = nn.Sequential(
32 |             nn.Sigmoid(),
33 |         )            
34 |     def forward(self, x):
35 |         ''' forward '''
36 |         # interpolate the heat map
37 |         g = self.gate(x[:,:1].clone())
38 |         w = F.interpolate(self.body(x), (x.size(2), x.size(3)), mode='bilinear', align_corners=False)
39 | 
40 |         return x * w * g #(w + g) #self.gate(x, w) 
41 | 
42 | if __name__ == '__main__':
43 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
44 |     print(f"Using device: {device}")
45 | 
46 |     block = LocalAttention(channels=32).to(device)
47 |     input = torch.rand(1, 32, 256, 256).to(device)
48 | 
49 |     output = block(input)
50 |     print(input.shape)
51 |     print(output.shape)
52 | 


--------------------------------------------------------------------------------
/3D/(IEEE 2024)SFFusion3d特征融合.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | # 论文：A Multilevel Multimodal Fusion Transformer for Remote Sensing Semantic Segmentation
 5 | # 全网最全100➕即插即用模块GitHub地址：https://github.com/ai-dawang/PlugNPlay-Modules
 6 | class SqueezeAndExcitation3D(nn.Module):
 7 |     def __init__(self, channel, reduction=16, activation=nn.ReLU(inplace=True)):
 8 |         super(SqueezeAndExcitation3D, self).__init__()
 9 |         self.fc = nn.Sequential(
10 |             nn.Conv3d(channel, channel // reduction, kernel_size=1),
11 |             activation,
12 |             nn.Conv3d(channel // reduction, channel, kernel_size=1),
13 |             nn.Sigmoid()
14 |         )
15 | 
16 |     def forward(self, x):
17 |         weighting = F.adaptive_avg_pool3d(x, 1)
18 |         weighting = self.fc(weighting)
19 |         y = x * weighting
20 |         return y
21 | 
22 | class SqueezeAndExciteFusionAdd3D(nn.Module):
23 |     def __init__(self, channels_in, activation=nn.ReLU(inplace=True)):
24 |         super(SqueezeAndExciteFusionAdd3D, self).__init__()
25 | 
26 |         self.se_1 = SqueezeAndExcitation3D(channels_in, activation=activation)
27 |         self.se_2 = SqueezeAndExcitation3D(channels_in, activation=activation)
28 | 
29 |     def forward(self, se1, se2):
30 |         se1 = self.se_1(se1)
31 |         se2 = self.se_2(se2)
32 |         out = se1 + se2
33 |         return out
34 | 
35 | # 示例用法
36 | if __name__ == "__main__":
37 |     # 假设的输入数据
38 |     input_1 = torch.randn(32, 64, 16, 128, 128)  # 输入 B C D H W
39 |     input_2 = torch.randn(32, 64, 16, 128, 128)  # 同上
40 | 
41 |     # 打印输入数据的形状
42 |     print(input_1.size())  # 输出: (32, 64, 16, 128, 128)
43 |     print(input_2.size())  # 输出: (32, 64, 16, 128, 128)
44 | 
45 |     # 创建SqueezeAndExciteFusionAdd3D模块的实例
46 |     block = SqueezeAndExciteFusionAdd3D(channels_in=64)
47 | 
48 |     # 将输入通过SqueezeAndExciteFusionAdd3D模块获得输出
49 |     output = block(input_1, input_2)
50 | 
51 |     # 打印输出数据的形状
52 |     print(output.size())  # 输出应该和输入形状相同: (32, 64, 16, 128, 128)
53 | 


--------------------------------------------------------------------------------
/注意力/(arxiv2023)ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | #GitHub地址：https://github.com/YOLOonMe/EMA-attention-module
 4 | #论文地址：https://arxiv.org/abs/2305.13563v2
 5 | class EMA(nn.Module):
 6 |     def __init__(self, channels, factor=8):
 7 |         super(EMA, self).__init__()
 8 |         self.groups = factor
 9 |         assert channels // self.groups > 0
10 |         self.softmax = nn.Softmax(-1)
11 |         self.agp = nn.AdaptiveAvgPool2d((1, 1))
12 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
13 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
14 |         self.gn = nn.GroupNorm(channels // self.groups, channels // self.groups)
15 |         self.conv1x1 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=1, stride=1, padding=0)
16 |         self.conv3x3 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=3, stride=1, padding=1)
17 | 
18 |     def forward(self, x):
19 |         b, c, h, w = x.size()
20 |         group_x = x.reshape(b * self.groups, -1, h, w)  # b*g,c//g,h,w
21 |         x_h = self.pool_h(group_x)
22 |         x_w = self.pool_w(group_x).permute(0, 1, 3, 2)
23 |         hw = self.conv1x1(torch.cat([x_h, x_w], dim=2))
24 |         x_h, x_w = torch.split(hw, [h, w], dim=2)
25 |         x1 = self.gn(group_x * x_h.sigmoid() * x_w.permute(0, 1, 3, 2).sigmoid())
26 |         x2 = self.conv3x3(group_x)
27 |         x11 = self.softmax(self.agp(x1).reshape(b * self.groups, -1, 1).permute(0, 2, 1))
28 |         x12 = x2.reshape(b * self.groups, c // self.groups, -1)  # b*g, c//g, hw
29 |         x21 = self.softmax(self.agp(x2).reshape(b * self.groups, -1, 1).permute(0, 2, 1))
30 |         x22 = x1.reshape(b * self.groups, c // self.groups, -1)  # b*g, c//g, hw
31 |         weights = (torch.matmul(x11, x12) + torch.matmul(x21, x22)).reshape(b * self.groups, 1, h, w)
32 |         return (group_x * weights.sigmoid()).reshape(b, c, h, w)
33 | 
34 | 
35 | # 输入 B C H W,  输出 B C H W
36 | if __name__ == '__main__':
37 |     block = EMA(64).cuda()
38 |     input = torch.rand(1, 64, 64, 64).cuda()
39 |     output = block(input)
40 |     print(input.size(), output.size())
41 | 


--------------------------------------------------------------------------------
/GAU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class TA(nn.Module):
 5 |     def __init__(self,  T,ratio=2):
 6 | 
 7 |         super(TA, self).__init__()
 8 | 
 9 |         self.avg_pool = nn.AdaptiveAvgPool3d(1)
10 |         self.max_pool = nn.AdaptiveMaxPool3d(1)
11 |         self.sharedMLP = nn.Sequential(
12 |             nn.Conv3d(T, T // ratio, 1, bias=False),
13 |             nn.ReLU(),
14 |             nn.Conv3d(T // ratio, T, 1,bias=False),
15 |         )
16 |         self.sigmoid = nn.Sigmoid()
17 | 
18 |     def forward(self, x):
19 |         avg = self.avg_pool(x)
20 |         # B,T,C
21 |         out1 = self.sharedMLP(avg)
22 |         max = self.max_pool(x)
23 |         # B,T,C
24 |         out2 = self.sharedMLP(max)
25 |         out = out1+out2
26 | 
27 |         return out
28 | 
29 | # task classifictaion or generation
30 | class SCA(nn.Module):
31 |     def __init__(self, in_planes, kerenel_size,ratio = 1):
32 |         super(SCA, self).__init__()
33 |         self.sharedMLP = nn.Sequential(
34 |                 nn.Conv2d(in_planes, in_planes // ratio, kerenel_size, padding='same', bias=False),
35 |                 nn.ReLU(),
36 |                 nn.Conv2d(in_planes // ratio, in_planes, kerenel_size, padding='same', bias=False),)
37 |     def forward(self, x):
38 |         b,t, c, h, w = x.shape
39 |         x = x.flatten(0,1)
40 |         x = self.sharedMLP(x)
41 |         out = x.reshape(b,t, c, h, w)
42 |         return out
43 | if __name__ == '__main__':
44 | 
45 |     block1 = TA(T=10)  # 假设输入有10个时间步长
46 |     print("TA模型结构：\n", block1)
47 | 
48 |     # 创建SCA模型
49 |     block2 = SCA(in_planes=64, kerenel_size=3)  # 假设输入通道数为64
50 |     print("\nSCA模型结构：\n", block2)
51 | 
52 |     # 创建随机输入数据
53 |     batch_size = 4
54 |     time_steps = 10
55 |     channels = 64
56 |     height = 32
57 |     width = 32
58 |     input = torch.randn(batch_size, time_steps, channels, height, width)
59 |     print("\n输入数据形状：", input.size())
60 | 
61 |     # 测试TA模型
62 |     output = block1(input)
63 |     print("TA模型输出形状：", output.shape)
64 | 
65 |     # 测试SCA模型
66 |     output2 = block2(input)
67 |     print("SCA模型输出形状：", output2.shape)


--------------------------------------------------------------------------------
/卷积/(ICCV 2021)CTR-GC（图卷积）.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | def conv_init(conv):
 4 |     if conv.weight is not None:
 5 |         nn.init.kaiming_normal_(conv.weight, mode='fan_out')
 6 |     if conv.bias is not None:
 7 |         nn.init.constant_(conv.bias, 0)
 8 | # 论文：Channel-wise Topology Refinement Graph Convolution for Skeleton-Based Action Recognition
 9 | # 论文地址：https://ieeexplore.ieee.org/document/9710007
10 | 
11 | class CTRGC(nn.Module):
12 |     def __init__(self, in_channels, out_channels, rel_reduction=8, mid_reduction=1):
13 |         super(CTRGC, self).__init__()
14 |         self.in_channels = in_channels
15 |         self.out_channels = out_channels
16 |         if in_channels == 3 or in_channels == 9:
17 |             self.rel_channels = 8
18 |             self.mid_channels = 16
19 |         else:
20 |             self.rel_channels = in_channels // rel_reduction
21 |             self.mid_channels = in_channels // mid_reduction
22 |         self.conv1 = nn.Conv2d(self.in_channels, self.rel_channels, kernel_size=1)
23 |         self.conv2 = nn.Conv2d(self.in_channels, self.rel_channels, kernel_size=1)
24 |         self.conv3 = nn.Conv2d(self.in_channels, self.out_channels, kernel_size=1)
25 |         self.conv4 = nn.Conv2d(self.rel_channels, self.out_channels, kernel_size=1)
26 |         self.tanh = nn.Tanh()
27 |         for m in self.modules():
28 |             if isinstance(m, nn.Conv2d):
29 |                 conv_init(m)
30 |             elif isinstance(m, nn.BatchNorm2d):
31 |                 bn_init(m, 1)
32 | 
33 |     def forward(self, x, A=None, alpha=1):
34 |         x1, x2, x3 = self.conv1(x).mean(-2), self.conv2(x).mean(-2), self.conv3(x)
35 |         x1 = self.tanh(x1.unsqueeze(-1) - x2.unsqueeze(-2))
36 |         x1 = self.conv4(x1) * alpha + (A.unsqueeze(0).unsqueeze(0) if A is not None else 0)  # N,C,V,V
37 |         x1 = torch.einsum('ncuv,nctv->nctu', x1, x3)
38 |         return x1
39 | 
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     block = CTRGC(in_channels=64, out_channels=64)
44 |     input = torch.rand(32, 64, 9, 9)
45 |     output = block(input)
46 |     print(input.size())
47 |     print(output.size())
48 | 


--------------------------------------------------------------------------------
/1D模块/(ICCV 2023)EAA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import einops
 4 | 
 5 | # 论文：SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications[ICCV'23]
 6 | # 论文：https://openaccess.thecvf.com/content/ICCV2023/papers/Shaker_SwiftFormer_Efficient_Additive_Attention_for_Transformer-based_Real-time_Mobile_Vision_Applications_ICCV_2023_paper.pdf
 7 | 
 8 | class EfficientAdditiveAttention(nn.Module):
 9 | 
10 | 
11 |     def __init__(self, in_dims, token_dim, num_heads=1):
12 |         super().__init__()
13 | 
14 |         self.to_query = nn.Linear(in_dims, token_dim * num_heads)
15 |         self.to_key = nn.Linear(in_dims, token_dim * num_heads)
16 | 
17 |         self.w_g = nn.Parameter(torch.randn(token_dim * num_heads, 1))
18 |         self.scale_factor = token_dim ** -0.5
19 |         self.Proj = nn.Linear(token_dim * num_heads, token_dim * num_heads)
20 |         self.final = nn.Linear(token_dim * num_heads, token_dim)
21 | 
22 |     def forward(self, x):
23 |         query = self.to_query(x)
24 |         key = self.to_key(x)
25 | 
26 |         query = torch.nn.functional.normalize(query, dim=-1) #BxNxD
27 |         key = torch.nn.functional.normalize(key, dim=-1) #BxNxD
28 | 
29 |         query_weight = query @ self.w_g # BxNx1 (BxNxD @ Dx1)
30 |         A = query_weight * self.scale_factor # BxNx1
31 | 
32 |         A = torch.nn.functional.normalize(A, dim=1) # BxNx1
33 | 
34 |         G = torch.sum(A * query, dim=1) # BxD
35 | 
36 |         G = einops.repeat(
37 |             G, "b d -> b repeat d", repeat=key.shape[1]
38 |         ) # BxNxD
39 | 
40 |         out = self.Proj(G * key) + query #BxNxD
41 | 
42 |         out = self.final(out) # BxNxD
43 | 
44 |         return out
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     # 假设输入维度为512，token维度为512，头数为1
49 |     attention_layer = EfficientAdditiveAttention(in_dims=512, token_dim=512)
50 | 
51 |     # 创建一个随机输入张量，形状为[B, N, D]
52 |     B, N, D = 1, 10, 512
53 |     x = torch.randn(B, N, D)
54 | 
55 |     # 通过注意力层传递输入
56 |     output = attention_layer(x)
57 | 
58 |     # 打印输入和输出的形状
59 |     print("输入形状:", x.shape)
60 |     print("输出形状:", output.shape)


--------------------------------------------------------------------------------
/1D模块/(KDD 2020)CorNet(NLP).py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | # 论文：Correlation Networks for Extreme Multi-label Text Classification
 5 | 
 6 | 
 7 | ACT2FN = {'elu': F.elu, 'relu': F.relu, 'sigmoid': torch.sigmoid, 'tanh': torch.tanh}
 8 | 
 9 | 
10 | class CorNetBlock(nn.Module):
11 |     def __init__(self, context_size, output_size, cornet_act='sigmoid', **kwargs):
12 |         super(CorNetBlock, self).__init__()
13 |         self.dstbn2cntxt = nn.Linear(output_size, context_size)
14 |         self.cntxt2dstbn = nn.Linear(context_size, output_size)
15 |         self.act_fn = ACT2FN[cornet_act]
16 | 
17 |     def forward(self, output_dstrbtn):
18 |         identity_logits = output_dstrbtn
19 |         output_dstrbtn = self.act_fn(output_dstrbtn)
20 |         context_vector = self.dstbn2cntxt(output_dstrbtn)
21 |         context_vector = F.elu(context_vector)
22 |         output_dstrbtn = self.cntxt2dstbn(context_vector)
23 |         output_dstrbtn = output_dstrbtn + identity_logits
24 |         return output_dstrbtn
25 | 
26 | 
27 | class CorNet(nn.Module):
28 |     def __init__(self, output_size, cornet_dim=100, n_cornet_blocks=2, **kwargs):
29 |         super(CorNet, self).__init__()
30 |         self.intlv_layers = nn.ModuleList(
31 |             [CorNetBlock(cornet_dim, output_size, **kwargs) for _ in range(n_cornet_blocks)])
32 |         for layer in self.intlv_layers:
33 |             nn.init.xavier_uniform_(layer.dstbn2cntxt.weight)
34 |             nn.init.xavier_uniform_(layer.cntxt2dstbn.weight)
35 | 
36 |     def forward(self, logits):
37 |         for layer in self.intlv_layers:
38 |             logits = layer(logits)
39 |         return logits
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     output_size = 10
44 |     cornet_dim = 100
45 |     n_cornet_blocks = 2
46 |     cornet_act = 'relu'
47 | 
48 |     model = CorNet(output_size=output_size, cornet_dim=cornet_dim, n_cornet_blocks=n_cornet_blocks)
49 | 
50 |     input_tensor = torch.rand(4, output_size)
51 | 
52 |     output = model(input_tensor)
53 | 
54 |     # 打印输入和输出的尺寸
55 |     print("Input size :", input_tensor.size())
56 |     print("Output size:", output.size())
57 | 


--------------------------------------------------------------------------------
/(arXiv 2021) AFT.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | 
 6 | # 论文地址：https://arxiv.org/pdf/2105.14103v1
 7 | # 论文：An Attention Free Transformer
 8 | 
 9 | 
10 | class AFT_FULL(nn.Module):
11 | 
12 |     def __init__(self, d_model,n=49,simple=False):
13 | 
14 |         super(AFT_FULL, self).__init__()
15 |         self.fc_q = nn.Linear(d_model, d_model)
16 |         self.fc_k = nn.Linear(d_model, d_model)
17 |         self.fc_v = nn.Linear(d_model,d_model)
18 |         if(simple):
19 |             self.position_biases=torch.zeros((n,n))
20 |         else:
21 |             self.position_biases=nn.Parameter(torch.ones((n,n)))
22 |         self.d_model = d_model
23 |         self.n=n
24 |         self.sigmoid=nn.Sigmoid()
25 | 
26 |         self.init_weights()
27 | 
28 | 
29 |     def init_weights(self):
30 |         for m in self.modules():
31 |             if isinstance(m, nn.Conv2d):
32 |                 init.kaiming_normal_(m.weight, mode='fan_out')
33 |                 if m.bias is not None:
34 |                     init.constant_(m.bias, 0)
35 |             elif isinstance(m, nn.BatchNorm2d):
36 |                 init.constant_(m.weight, 1)
37 |                 init.constant_(m.bias, 0)
38 |             elif isinstance(m, nn.Linear):
39 |                 init.normal_(m.weight, std=0.001)
40 |                 if m.bias is not None:
41 |                     init.constant_(m.bias, 0)
42 | 
43 |     def forward(self, input):
44 | 
45 |         bs, n,dim = input.shape
46 | 
47 |         q = self.fc_q(input) #bs,n,dim
48 |         k = self.fc_k(input).view(1,bs,n,dim) #1,bs,n,dim
49 |         v = self.fc_v(input).view(1,bs,n,dim) #1,bs,n,dim
50 |         
51 |         numerator=torch.sum(torch.exp(k+self.position_biases.view(n,1,-1,1))*v,dim=2) #n,bs,dim
52 |         denominator=torch.sum(torch.exp(k+self.position_biases.view(n,1,-1,1)),dim=2) #n,bs,dim
53 | 
54 |         out=(numerator/denominator) #n,bs,dim
55 |         out=self.sigmoid(q)*(out.permute(1,0,2)) #bs,n,dim
56 | 
57 |         return out
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     input=torch.randn(50,49,512)
62 |     block = AFT_FULL(d_model=512, n=49)
63 |     output=block(input)
64 |     print(output.shape)
65 | 
66 |     


--------------------------------------------------------------------------------
/(ECCV2024)SMFA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | #GitHub地址： https://github.com/Zheng-MJ/SMFANet
 5 | #论文地址：https://openaccess.thecvf.com/content/ICCV2023/papers/Sun_Spatially-Adaptive_Feature_Modulation_for_Efficient_Image_Super-Resolution_ICCV_2023_paper.pdf
 6 | class DMlp(nn.Module):
 7 |     def __init__(self, dim, growth_rate=2.0):
 8 |         super().__init__()
 9 |         hidden_dim = int(dim * growth_rate)
10 |         self.conv_0 = nn.Sequential(
11 |             nn.Conv2d(dim, hidden_dim, 3, 1, 1, groups=dim),
12 |             nn.Conv2d(hidden_dim, hidden_dim, 1, 1, 0)
13 |         )
14 |         self.act = nn.GELU()
15 |         self.conv_1 = nn.Conv2d(hidden_dim, dim, 1, 1, 0)
16 | 
17 |     def forward(self, x):
18 |         x = self.conv_0(x)
19 |         x = self.act(x)
20 |         x = self.conv_1(x)
21 |         return x
22 | 
23 | 
24 | class SMFA(nn.Module):
25 |     def __init__(self, dim=36):
26 |         super(SMFA, self).__init__()
27 |         self.linear_0 = nn.Conv2d(dim, dim * 2, 1, 1, 0)
28 |         self.linear_1 = nn.Conv2d(dim, dim, 1, 1, 0)
29 |         self.linear_2 = nn.Conv2d(dim, dim, 1, 1, 0)
30 | 
31 |         self.lde = DMlp(dim, 2)
32 | 
33 |         self.dw_conv = nn.Conv2d(dim, dim, 3, 1, 1, groups=dim)
34 | 
35 |         self.gelu = nn.GELU()
36 |         self.down_scale = 8
37 | 
38 |         self.alpha = nn.Parameter(torch.ones((1, dim, 1, 1)))
39 |         self.belt = nn.Parameter(torch.zeros((1, dim, 1, 1)))
40 | 
41 |     def forward(self, f):
42 |         _, _, h, w = f.shape
43 |         y, x = self.linear_0(f).chunk(2, dim=1)
44 |         x_s = self.dw_conv(F.adaptive_max_pool2d(x, (h // self.down_scale, w // self.down_scale)))
45 |         x_v = torch.var(x, dim=(-2, -1), keepdim=True)
46 |         x_l = x * F.interpolate(self.gelu(self.linear_1(x_s * self.alpha + x_v * self.belt)), size=(h, w),
47 |                                 mode='nearest')
48 |         y_d = self.lde(y)
49 |         return self.linear_2(x_l + y_d)
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     input = torch.randn(3, 36, 64, 64)  # 输入b c h w
54 | 
55 |     block = SMFA(dim=36)
56 |     output = block(input)
57 |     print(output.size())
58 | 


--------------------------------------------------------------------------------
/(TPAMI 2022) ViP.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | # 论文地址：https://arxiv.org/pdf/2106.12368
 5 | # 论文：Vision Permutator: A Permutable MLP-Like Architecture for Visual Recognition
 6 | 
 7 | 
 8 | class MLP(nn.Module):
 9 |     def __init__(self,in_features,hidden_features,out_features,act_layer=nn.GELU,drop=0.1):
10 |         super().__init__()
11 |         self.fc1=nn.Linear(in_features,hidden_features)
12 |         self.act=act_layer()
13 |         self.fc2=nn.Linear(hidden_features,out_features)
14 |         self.drop=nn.Dropout(drop)
15 | 
16 |     def forward(self, x) :
17 |         return self.drop(self.fc2(self.drop(self.act(self.fc1(x)))))
18 | 
19 | class WeightedPermuteMLP(nn.Module):
20 |     def __init__(self,dim,seg_dim=8, qkv_bias=False, proj_drop=0.):
21 |         super().__init__()
22 |         self.seg_dim=seg_dim
23 | 
24 |         self.mlp_c=nn.Linear(dim,dim,bias=qkv_bias)
25 |         self.mlp_h=nn.Linear(dim,dim,bias=qkv_bias)
26 |         self.mlp_w=nn.Linear(dim,dim,bias=qkv_bias)
27 | 
28 |         self.reweighting=MLP(dim,dim//4,dim*3)
29 | 
30 |         self.proj=nn.Linear(dim,dim)
31 |         self.proj_drop=nn.Dropout(proj_drop)
32 |     
33 |     def forward(self,x) :
34 |         B,H,W,C=x.shape
35 | 
36 |         c_embed=self.mlp_c(x)
37 | 
38 |         S=C//self.seg_dim
39 |         h_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,2,1,4).reshape(B,self.seg_dim,W,H*S)
40 |         h_embed=self.mlp_h(h_embed).reshape(B,self.seg_dim,W,H,S).permute(0,3,2,1,4).reshape(B,H,W,C)
41 | 
42 |         w_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,1,2,4).reshape(B,self.seg_dim,H,W*S)
43 |         w_embed=self.mlp_w(w_embed).reshape(B,self.seg_dim,H,W,S).permute(0,2,3,1,4).reshape(B,H,W,C)
44 | 
45 |         weight=(c_embed+h_embed+w_embed).permute(0,3,1,2).flatten(2).mean(2)
46 |         weight=self.reweighting(weight).reshape(B,C,3).permute(2,0,1).softmax(0).unsqueeze(2).unsqueeze(2)
47 | 
48 |         x=c_embed*weight[0]+w_embed*weight[1]+h_embed*weight[2]
49 | 
50 |         x=self.proj_drop(self.proj(x))
51 | 
52 |         return x
53 | 
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     input=torch.randn(64,8,8,512)
58 |     seg_dim=8
59 |     block=WeightedPermuteMLP(512,seg_dim)
60 |     out=block(input)
61 |     print(out.shape)
62 |     


--------------------------------------------------------------------------------
/MLAttention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | # 论文：AttentionXML: Label Tree-based Attention-Aware Deep Model for High-Performance Extreme Multi-Label Text Classification
 5 | # 论文地址：https://arxiv.org/pdf/1811.01727.pdf
 6 | 
 7 | class MLAttention(nn.Module):
 8 |     def __init__(self, hidden_size):
 9 |         super(MLAttention, self).__init__()
10 |         self.attention = nn.Linear(hidden_size, hidden_size, bias=False)
11 |         nn.init.xavier_uniform_(self.attention.weight)
12 | 
13 |     def forward(self, inputs, masks):
14 |         masks = torch.unsqueeze(masks, 2)  # N, L, 1
15 |         attention_scores = self.attention(inputs)  # N, L, hidden_size
16 |         attention = F.softmax(attention_scores, dim=1)  # N, L, hidden_size
17 |         attention_masked = attention * masks  # apply the mask
18 |         return attention_masked
19 | 
20 | class FastMLAttention(nn.Module):
21 |     def __init__(self, hidden_size):
22 |         super(FastMLAttention, self).__init__()
23 |         self.attention_dim = hidden_size  # Make sure this is same as your inputs dimension
24 |         self.attention = nn.Linear(self.attention_dim, self.attention_dim)
25 |         nn.init.xavier_uniform_(self.attention.weight)
26 | 
27 |     def forward(self, inputs, masks, attn_weights: nn.Module):
28 |         masks = masks.unsqueeze(2)   # N, L, 1
29 |         attention_scores = self.attention(inputs)  # N, L, hidden_size
30 |         attention = F.softmax(attention_scores, dim=1)  # Softmax over L dimension
31 |         attention = attention * masks  # Apply mask
32 |         attention_masked = attention_scores * attention  # Apply attention
33 |         return attention_masked
34 | 
35 | if __name__ == '__main__':
36 | 
37 |     batch_size = 8
38 |     seq_len = 10
39 |     hidden_size = 8
40 |     inputs = torch.randn(batch_size, seq_len, hidden_size)
41 |     masks = torch.ones(batch_size, seq_len)
42 | 
43 |     ml_attention = MLAttention(hidden_size)
44 |     outputs_ml = ml_attention(inputs, masks)
45 |     print(outputs_ml.size())
46 | 
47 |     fast_ml_attention = FastMLAttention(hidden_size)
48 |     outputs_fastml = fast_ml_attention(inputs, masks, None)
49 |     print(outputs_fastml.size())
50 | 


--------------------------------------------------------------------------------
/SWA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | #论文：DAU-Net: Dual attention-aided U-Net for segmenting tumor in breast ultrasound images
 5 | #论文地址：https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0303670
 6 | class SWA(nn.Module):
 7 |     def __init__(self, in_channels, n_heads=8, window_size=7):
 8 |         super(SWA, self).__init__()
 9 |         self.in_channels = in_channels
10 |         self.n_heads = n_heads
11 |         self.window_size = window_size
12 | 
13 |         self.query_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1)
14 |         self.key_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1)
15 |         self.value_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1)
16 |         self.gamma = nn.Parameter(torch.zeros(1))
17 |         self.softmax = nn.Softmax(dim=-1)
18 | 
19 |     def forward(self, x):
20 |         batch_size, C, height, width = x.size()
21 |         padded_x = F.pad(x, [self.window_size // 2, self.window_size // 2, self.window_size // 2, self.window_size // 2], mode='reflect')
22 | 
23 |         proj_query = self.query_conv(x).view(batch_size, self.n_heads, C // self.n_heads, height * width)
24 |         proj_key = self.key_conv(padded_x).unfold(2, self.window_size, 1).unfold(3, self.window_size, 1)
25 |         proj_key = proj_key.permute(0, 1, 4, 5, 2, 3).contiguous().view(batch_size, self.n_heads, C // self.n_heads, -1)
26 |         proj_value = self.value_conv(padded_x).unfold(2, self.window_size, 1).unfold(3, self.window_size, 1)
27 |         proj_value = proj_value.permute(0, 1, 4, 5, 2, 3).contiguous().view(batch_size, self.n_heads, C // self.n_heads, -1)
28 | 
29 |         energy = torch.matmul(proj_query.permute(0, 1, 3, 2), proj_key)
30 |         attention = self.softmax(energy)
31 | 
32 |         out_window = torch.matmul(attention, proj_value.permute(0, 1, 3, 2))
33 |         out_window = out_window.permute(0, 1, 3, 2).contiguous().view(batch_size, C, height, width)
34 | 
35 |         out = self.gamma * out_window + x
36 |         return out
37 | 
38 | if __name__ == '__main__':
39 | 
40 |     input = torch.randn(1, 64, 32, 32)
41 |     block = SWA(in_channels=64)
42 |     print(input.size())
43 |     output = block(input)
44 |     print(output.size())
45 | 


--------------------------------------------------------------------------------
/(arXiv 2021) MobileViTv2.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | 
 6 | # 论文地址：https://arxiv.org/pdf/2206.02680
 7 | # 论文：Separable Self-attention for Mobile Vision Transformers
 8 | 
 9 | 
10 | class MobileViTv2Attention(nn.Module):
11 |     '''
12 |     Scaled dot-product attention
13 |     '''
14 | 
15 |     def __init__(self, d_model):
16 |         '''
17 |         :param d_model: Output dimensionality of the model
18 |         :param d_k: Dimensionality of queries and keys
19 |         :param d_v: Dimensionality of values
20 |         :param h: Number of heads
21 |         '''
22 |         super(MobileViTv2Attention, self).__init__()
23 |         self.fc_i = nn.Linear(d_model,1)
24 |         self.fc_k = nn.Linear(d_model, d_model)
25 |         self.fc_v = nn.Linear(d_model, d_model)
26 |         self.fc_o = nn.Linear(d_model, d_model)
27 | 
28 |         self.d_model = d_model
29 |         self.init_weights()
30 | 
31 | 
32 |     def init_weights(self):
33 |         for m in self.modules():
34 |             if isinstance(m, nn.Conv2d):
35 |                 init.kaiming_normal_(m.weight, mode='fan_out')
36 |                 if m.bias is not None:
37 |                     init.constant_(m.bias, 0)
38 |             elif isinstance(m, nn.BatchNorm2d):
39 |                 init.constant_(m.weight, 1)
40 |                 init.constant_(m.bias, 0)
41 |             elif isinstance(m, nn.Linear):
42 |                 init.normal_(m.weight, std=0.001)
43 |                 if m.bias is not None:
44 |                     init.constant_(m.bias, 0)
45 | 
46 |     def forward(self, input):
47 |         '''
48 |         Computes
49 |         :param queries: Queries (b_s, nq, d_model)
50 |         :return:
51 |         '''
52 |         i = self.fc_i(input) #(bs,nq,1)
53 |         weight_i = torch.softmax(i, dim=1) #bs,nq,1
54 |         context_score = weight_i * self.fc_k(input) #bs,nq,d_model
55 |         context_vector = torch.sum(context_score,dim=1,keepdim=True) #bs,1,d_model
56 |         v = self.fc_v(input) * context_vector #bs,nq,d_model
57 |         out = self.fc_o(v) #bs,nq,d_model
58 | 
59 |         return out
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     input=torch.randn(50,49,512)
64 |     block = MobileViTv2Attention(d_model=512)
65 |     output=block(input)
66 |     print(output.size())
67 | 
68 |     


--------------------------------------------------------------------------------
/FCA.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | #论文：Unsupervised Bidirectional Contrastive Reconstruction and Adaptive Fine-Grained Channel Attention Networks for image dehazing
 6 | #论文地址：https://www.sciencedirect.com/science/article/abs/pii/S0893608024002387
 7 | 
 8 | class Mix(nn.Module):
 9 |     def __init__(self, m=-0.80):
10 |         super(Mix, self).__init__()
11 |         w = torch.nn.Parameter(torch.FloatTensor([m]), requires_grad=True)
12 |         w = torch.nn.Parameter(w, requires_grad=True)
13 |         self.w = w
14 |         self.mix_block = nn.Sigmoid()
15 | 
16 |     def forward(self, fea1, fea2):
17 |         mix_factor = self.mix_block(self.w)
18 |         out = fea1 * mix_factor.expand_as(fea1) + fea2 * (1 - mix_factor.expand_as(fea2))
19 |         return out
20 | 
21 | #Adaptive Fine-Grained Channel Attention (FCA)
22 | class FCAttention(nn.Module):
23 |     def __init__(self,channel,b=1, gamma=2):
24 |         super(FCAttention, self).__init__()
25 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)#全局平均池化
26 |         #一维卷积
27 |         t = int(abs((math.log(channel, 2) + b) / gamma))
28 |         k = t if t % 2 else t + 1
29 |         self.conv1 = nn.Conv1d(1, 1, kernel_size=k, padding=int(k / 2), bias=False)
30 |         self.fc = nn.Conv2d(channel, channel, 1, padding=0, bias=True)
31 |         self.sigmoid = nn.Sigmoid()
32 |         self.mix = Mix()
33 | 
34 | 
35 |     def forward(self, input):
36 |         x = self.avg_pool(input)
37 |         x1 = self.conv1(x.squeeze(-1).transpose(-1, -2)).transpose(-1, -2)#(1,64,1)
38 |         x2 = self.fc(x).squeeze(-1).transpose(-1, -2)#(1,1,64)
39 |         out1 = torch.sum(torch.matmul(x1,x2),dim=1).unsqueeze(-1).unsqueeze(-1)#(1,64,1,1)
40 |         out1 = self.sigmoid(out1)
41 |         out2 = torch.sum(torch.matmul(x2.transpose(-1, -2),x1.transpose(-1, -2)),dim=1).unsqueeze(-1).unsqueeze(-1)
42 | 
43 |         out2 = self.sigmoid(out2)
44 |         out = self.mix(out1,out2)
45 |         out = self.conv1(out.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
46 |         out = self.sigmoid(out)
47 | 
48 |         return input*out
49 | 
50 | if __name__ == '__main__':
51 |     input = torch.rand(1,64,256,256)
52 |     block = FCAttention(channel=64)
53 |     output = block(input)
54 |     print(output.size())
55 | 
56 | 


--------------------------------------------------------------------------------
/DPTAM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import torchvision
 5 | 
 6 | 
 7 | 
 8 | class DPTAM(nn.Module):
 9 |     def __init__(self,
10 |                  in_channels,
11 |                  n_segment,
12 |                  kernel_size=3,
13 |                  stride=1,
14 |                  padding=1):
15 |         super(DPTAM, self).__init__()
16 |         self.in_channels = in_channels
17 |         self.n_segment = n_segment
18 |         self.kernel_size = kernel_size
19 |         self.stride = stride
20 |         self.padding = padding
21 |         print('DPTAM with kernel_size {}.'.format(kernel_size))
22 | 
23 |         self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=3)#context Modeling
24 |         self.softmax = nn.Softmax(dim=2)
25 |         self.p1_conv1= nn.Conv1d(in_channels , in_channels, 1, bias=False)
26 | 
27 | 
28 |         self.dptam = nn.Sequential(
29 |             nn.Conv1d(in_channels,
30 |                       in_channels // 4,
31 |                       kernel_size,
32 |                       stride=1,
33 |                       padding=kernel_size // 2,
34 |                       bias=False), nn.BatchNorm1d(in_channels // 4),
35 |             nn.ReLU(inplace=True),
36 |             nn.Conv1d(in_channels // 4, in_channels, 1, bias=False),
37 |             nn.Sigmoid())
38 | 
39 |     def forward(self, x):
40 |         nt, c, h, w = x.size()
41 | 
42 |         t = self.n_segment
43 |         n_batch = nt // t
44 |         new_x = x.view(n_batch, t, c, h, w).permute(0, 2, 1, 3,4).contiguous()
45 |         out = F.adaptive_avg_pool2d(new_x.view(n_batch * c, t, h, w), (1, 1))
46 | 
47 |         x_22=out.view(-1,c,t)
48 |         x22_c_t = self.p1_conv1(x_22)
49 |         x22 =x_22.mean(2,keepdim=True)
50 |         x22 = self.p1_conv1(x22)
51 |         x22 = x22_c_t * x22
52 |         x22= x_22+x22
53 | 
54 |         local_activation = self.dptam(x22).view(n_batch, c, t, 1, 1)
55 |         new_x = new_x * local_activation
56 | 
57 |         out = new_x.view(n_batch, c, t, h, w) #光local
58 |         out = out.permute(0, 2, 1, 3, 4).contiguous().view(nt, c, h, w)
59 | 
60 |         return out
61 | 
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     n_segment = 16  
66 | 
67 |     block = DPTAM(in_channels=4, n_segment=n_segment)
68 |     input = torch.rand(16, 4, 16, 16)
69 |     output = block(input)
70 |     print(input.size())
71 |     print(output.size())
72 | 
73 | 


--------------------------------------------------------------------------------
/(ICLR 2023)ContraNorm(对比归一化层).py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | # 论文：ContraNorm: A Contrastive Learning Perspective on Oversmoothing and Beyond
 4 | # 论文地址：https://ar5iv.labs.arxiv.org/html/2303.06562
 5 | 
 6 | class ContraNorm(nn.Module):
 7 |     def __init__(self, dim, scale=0.1, dual_norm=False, pre_norm=False, temp=1.0, learnable=False, positive=False, identity=False):
 8 |         super().__init__()
 9 |         if learnable and scale > 0:
10 |             import math
11 |             if positive:
12 |                 scale_init = math.log(scale)
13 |             else:
14 |                 scale_init = scale
15 |             self.scale_param = nn.Parameter(torch.empty(dim).fill_(scale_init))
16 |         self.dual_norm = dual_norm
17 |         self.scale = scale
18 |         self.pre_norm = pre_norm
19 |         self.temp = temp
20 |         self.learnable = learnable
21 |         self.positive = positive
22 |         self.identity = identity
23 | 
24 |         self.layernorm = nn.LayerNorm(dim, eps=1e-6)
25 | 
26 |     def forward(self, x):
27 |         if self.scale > 0.0:
28 |             xn = nn.functional.normalize(x, dim=2)
29 |             if self.pre_norm:
30 |                 x = xn
31 |             sim = torch.bmm(xn, xn.transpose(1,2)) / self.temp
32 |             if self.dual_norm:
33 |                 sim = nn.functional.softmax(sim, dim=2) + nn.functional.softmax(sim, dim=1)
34 |             else:
35 |                 sim = nn.functional.softmax(sim, dim=2)
36 |             x_neg = torch.bmm(sim, x)
37 |             if not self.learnable:
38 |                 if self.identity:
39 |                     x = (1+self.scale) * x - self.scale * x_neg
40 |                 else:
41 |                     x = x - self.scale * x_neg
42 |             else:
43 |                 scale = torch.exp(self.scale_param) if self.positive else self.scale_param
44 |                 scale = scale.view(1, 1, -1)
45 |                 if self.identity:
46 |                     x = scale * x - scale * x_neg
47 |                 else:
48 |                     x = x - scale * x_neg
49 |         x = self.layernorm(x)
50 |         return x
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     block = ContraNorm(dim=128, scale=0.1, dual_norm=False, pre_norm=False, temp=1.0, learnable=False, positive=False, identity=False)
55 |     input = torch.rand(32, 784, 128)
56 |     output = block(input)
57 |     print("Input size:", input.size())
58 |     print("Output size:", output.size())
59 | 


--------------------------------------------------------------------------------
/LGAG.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | # 论文：EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation, CVPR2024
 4 | # 论文地址：https://arxiv.org/pdf/2405.06880
 5 | 
 6 | 
 7 | def act_layer(act, inplace=False, neg_slope=0.2, n_prelu=1):
 8 |     # activation layer
 9 |     act = act.lower()
10 |     if act == 'relu':
11 |         layer = nn.ReLU(inplace)
12 |     elif act == 'relu6':
13 |         layer = nn.ReLU6(inplace)
14 |     elif act == 'leakyrelu':
15 |         layer = nn.LeakyReLU(neg_slope, inplace)
16 |     elif act == 'prelu':
17 |         layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
18 |     elif act == 'gelu':
19 |         layer = nn.GELU()
20 |     elif act == 'hswish':
21 |         layer = nn.Hardswish(inplace)
22 |     else:
23 |         raise NotImplementedError('activation layer [%s] is not found' % act)
24 |     return layer
25 | 
26 | class LGAG(nn.Module):
27 |     def __init__(self, F_g, F_l, F_int=16, kernel_size=3, groups=1, activation='relu'):
28 |         super(LGAG, self).__init__()
29 | 
30 |         if kernel_size == 1:
31 |             groups = 1
32 |         self.W_g = nn.Sequential(
33 |             nn.Conv2d(F_g, F_int, kernel_size=kernel_size, stride=1, padding=kernel_size // 2, groups=groups,
34 |                       bias=True),
35 |             nn.BatchNorm2d(F_int)
36 |         )
37 |         self.W_x = nn.Sequential(
38 |             nn.Conv2d(F_l, F_int, kernel_size=kernel_size, stride=1, padding=kernel_size // 2, groups=groups,
39 |                       bias=True),
40 |             nn.BatchNorm2d(F_int)
41 |         )
42 |         self.psi = nn.Sequential(
43 |             nn.Conv2d(F_int, 1, kernel_size=1, stride=1, padding=0, bias=True),
44 |             nn.BatchNorm2d(1),
45 |             nn.Sigmoid()
46 |         )
47 |         self.activation = act_layer(activation, inplace=True)
48 | 
49 | 
50 |     def forward(self, g, x):
51 |         g1 = self.W_g(g)
52 |         x1 = self.W_x(x)
53 |         psi = self.activation(g1 + x1)
54 |         psi = self.psi(psi)
55 | 
56 |         return x * psi
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     # 示例输入
61 |     g = torch.randn(1, 32, 64, 64)
62 |     x = torch.randn(1, 64, 64, 64)
63 | 
64 |     # 实例化LGAG
65 |     lgag = LGAG(F_g=32, F_l=64)
66 | 
67 |     # 打印输入的shape
68 |     print("输入 g 的 shape:", g.shape)
69 |     print("输入 x 的 shape:", x.shape)
70 | 
71 |     # 前向传播并打印输出的shape
72 |     output = lgag(g, x)
73 |     print("输出的 shape:", output.shape)


--------------------------------------------------------------------------------
/特征融合/(ICMR 2022)CMF_Block(多模态融合).py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.fft
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | # 论文：M2TR: Multi-modal Multi-scale Transformers for Deepfake Detection
 8 | # 论文地址：https://arxiv.org/pdf/2104.09770
 9 | class CMA_Block(nn.Module):
10 |     def __init__(self, in_channel, hidden_channel, out_channel):
11 |         super(CMA_Block, self).__init__()
12 | 
13 |         self.conv1 = nn.Conv2d(
14 |             in_channel, hidden_channel, kernel_size=1, stride=1, padding=0
15 |         )
16 |         self.conv2 = nn.Conv2d(
17 |             in_channel, hidden_channel, kernel_size=1, stride=1, padding=0
18 |         )
19 |         self.conv3 = nn.Conv2d(
20 |             in_channel, hidden_channel, kernel_size=1, stride=1, padding=0
21 |         )
22 | 
23 |         self.scale = hidden_channel ** -0.5
24 | 
25 |         self.conv4 = nn.Sequential(
26 |             nn.Conv2d(
27 |                 hidden_channel, out_channel, kernel_size=1, stride=1, padding=0
28 |             ),
29 |             nn.BatchNorm2d(out_channel),
30 |             nn.LeakyReLU(0.2, inplace=True),
31 |         )
32 | 
33 |     def forward(self, rgb, freq):
34 |         _, _, h, w = rgb.size()
35 | 
36 |         q = self.conv1(rgb)
37 |         k = self.conv2(freq)
38 |         v = self.conv3(freq)
39 | 
40 |         q = q.view(q.size(0), q.size(1), q.size(2) * q.size(3)).transpose(
41 |             -2, -1
42 |         )
43 |         k = k.view(k.size(0), k.size(1), k.size(2) * k.size(3))
44 | 
45 |         attn = torch.matmul(q, k) * self.scale
46 |         m = attn.softmax(dim=-1)
47 | 
48 |         v = v.view(v.size(0), v.size(1), v.size(2) * v.size(3)).transpose(
49 |             -2, -1
50 |         )
51 |         z = torch.matmul(m, v)
52 |         z = z.view(z.size(0), h, w, -1)
53 |         z = z.permute(0, 3, 1, 2).contiguous()
54 | 
55 |         output = rgb + self.conv4(z)
56 | 
57 |         return output
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     in_channel = 64
62 |     hidden_channel = 32
63 |     out_channel = 64
64 |     h = 64
65 |     w = 64
66 | 
67 |     block = CMA_Block(in_channel, hidden_channel, out_channel)
68 | 
69 |     rgb_input = torch.rand(1, in_channel, h, w)
70 |     freq_input = torch.rand(1, in_channel, h, w)
71 | 
72 |     output = block(rgb_input, freq_input)
73 | 
74 |     print("RGB Input size:", rgb_input.size())
75 |     print("Freq Input size:", freq_input.size())
76 |     print("Output size:", output.size())
77 | 


--------------------------------------------------------------------------------
/图像超分/SGFN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | # 论文地址：https://arxiv.org/pdf/2308.03364
 4 | # 论文：Dual Aggregation Transformer for Image Super-Resolution, ICCV 2023
 5 | class SpatialGate(nn.Module):
 6 | 
 7 |     def __init__(self, dim):
 8 |         super().__init__()
 9 |         self.norm = nn.LayerNorm(dim)
10 |         self.conv = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, groups=dim) # DW Conv
11 | 
12 |     def forward(self, x, H, W):
13 |         # Split
14 |         x1, x2 = x.chunk(2, dim = -1)
15 |         B, N, C = x.shape
16 |         x2 = self.conv(self.norm(x2).transpose(1, 2).contiguous().view(B, C//2, H, W)).flatten(2).transpose(-1, -2).contiguous()
17 | 
18 |         return x1 * x2
19 | 
20 | class SGFN(nn.Module):
21 |     """ Spatial-Gate Feed-Forward Network.
22 |     Args:
23 |         in_features (int): Number of input channels.
24 |         hidden_features (int | None): Number of hidden channels. Default: None
25 |         out_features (int | None): Number of output channels. Default: None
26 |         act_layer (nn.Module): Activation layer. Default: nn.GELU
27 |         drop (float): Dropout rate. Default: 0.0
28 |     """
29 |     def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
30 |         super().__init__()
31 |         out_features = out_features or in_features
32 |         hidden_features = hidden_features or in_features
33 |         self.fc1 = nn.Linear(in_features, hidden_features)
34 |         self.act = act_layer()
35 |         self.sg = SpatialGate(hidden_features//2)
36 |         self.fc2 = nn.Linear(hidden_features//2, out_features)
37 |         self.drop = nn.Dropout(drop)
38 | 
39 |     def forward(self, x, H, W):
40 |         """
41 |         Input: x: (B, H*W, C), H, W
42 |         Output: x: (B, H*W, C)
43 |         """
44 |         x = self.fc1(x)
45 |         x = self.act(x)
46 |         x = self.drop(x)
47 | 
48 |         x = self.sg(x, H, W)
49 |         x = self.drop(x)
50 | 
51 |         x = self.fc2(x)
52 |         x = self.drop(x)
53 |         return x
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     # 定义输入参数
58 |     batch_size = 1
59 |     height = 32  # 假设图像高度为32
60 |     width = 32  # 假设图像宽度为32
61 |     channels = 64  # 输入通道数
62 | 
63 |     block = SGFN(in_features=channels)
64 | 
65 |     # 创建随机输入数据 (B, H*W, C)
66 |     x = torch.randn(batch_size, height * width, channels)
67 | 
68 |     # 前向传播并打印输入输出的形状
69 |     output = block(x, height, width)
70 | 
71 |     print(x.size())
72 |     print(output.size())


--------------------------------------------------------------------------------
/(arXiv 2021) S2Attention.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | 
 6 | # 论文地址：https://arxiv.org/pdf/2108.01072
 7 | # 论文：S2-MLPv2: Improved Spatial-Shift MLP Architecture for Vision
 8 | 
 9 | 
10 | def spatial_shift1(x):
11 |     b,w,h,c = x.size()
12 |     x[:,1:,:,:c//4] = x[:,:w-1,:,:c//4]
13 |     x[:,:w-1,:,c//4:c//2] = x[:,1:,:,c//4:c//2]
14 |     x[:,:,1:,c//2:c*3//4] = x[:,:,:h-1,c//2:c*3//4]
15 |     x[:,:,:h-1,3*c//4:] = x[:,:,1:,3*c//4:]
16 |     return x
17 | 
18 | 
19 | def spatial_shift2(x):
20 |     b,w,h,c = x.size()
21 |     x[:,:,1:,:c//4] = x[:,:,:h-1,:c//4]
22 |     x[:,:,:h-1,c//4:c//2] = x[:,:,1:,c//4:c//2]
23 |     x[:,1:,:,c//2:c*3//4] = x[:,:w-1,:,c//2:c*3//4]
24 |     x[:,:w-1,:,3*c//4:] = x[:,1:,:,3*c//4:]
25 |     return x
26 | 
27 | 
28 | class SplitAttention(nn.Module):
29 |     def __init__(self,channel=512,k=3):
30 |         super().__init__()
31 |         self.channel=channel
32 |         self.k=k
33 |         self.mlp1=nn.Linear(channel,channel,bias=False)
34 |         self.gelu=nn.GELU()
35 |         self.mlp2=nn.Linear(channel,channel*k,bias=False)
36 |         self.softmax=nn.Softmax(1)
37 |     
38 |     def forward(self,x_all):
39 |         b,k,h,w,c=x_all.shape
40 |         x_all=x_all.reshape(b,k,-1,c) #bs,k,n,c
41 |         a=torch.sum(torch.sum(x_all,1),1) #bs,c
42 |         hat_a=self.mlp2(self.gelu(self.mlp1(a))) #bs,kc
43 |         hat_a=hat_a.reshape(b,self.k,c) #bs,k,c
44 |         bar_a=self.softmax(hat_a) #bs,k,c
45 |         attention=bar_a.unsqueeze(-2) # #bs,k,1,c
46 |         out=attention*x_all # #bs,k,n,c
47 |         out=torch.sum(out,1).reshape(b,h,w,c)
48 |         return out
49 | 
50 | 
51 | class S2Attention(nn.Module):
52 | 
53 |     def __init__(self, channels=512 ):
54 |         super().__init__()
55 |         self.mlp1 = nn.Linear(channels,channels*3)
56 |         self.mlp2 = nn.Linear(channels,channels)
57 |         self.split_attention = SplitAttention()
58 | 
59 |     def forward(self, x):
60 |         b,c,w,h = x.size()
61 |         x=x.permute(0,2,3,1)
62 |         x = self.mlp1(x)
63 |         x1 = spatial_shift1(x[:,:,:,:c])
64 |         x2 = spatial_shift2(x[:,:,:,c:c*2])
65 |         x3 = x[:,:,:,c*2:]
66 |         x_all=torch.stack([x1,x2,x3],1)
67 |         a = self.split_attention(x_all)
68 |         x = self.mlp2(a)
69 |         x=x.permute(0,3,1,2)
70 |         return x
71 | 
72 |         
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     input=torch.randn(50,512,7,7)
77 |     block = S2Attention(channels=512)
78 |     output=block(input)
79 |     print(output.shape)
80 | 
81 |     


--------------------------------------------------------------------------------
/cleegn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | #论文：CLEEGN: A Convolutional Neural Network for Plug-and-Play Automatic EEG Reconstruction
 4 | #论文地址：https://arxiv.org/pdf/2210.05988v2.pdf
 5 | 
 6 | class Permute2d(nn.Module):
 7 |     def __init__(self, shape):
 8 |         super(Permute2d, self).__init__()
 9 |         self.shape = shape
10 | 
11 |     def forward(self, x):
12 |         return torch.permute(x, self.shape)
13 | 
14 | 
15 | class CLEEGN(nn.Module):
16 |     def __init__(self, n_chan, fs, N_F=20, tem_kernelLen=0.1):
17 |         super(CLEEGN, self).__init__()
18 |         self.n_chan = n_chan
19 |         self.N_F = N_F
20 |         self.fs = fs
21 |         self.conv1 = nn.Sequential(
22 |             nn.Conv2d(1, n_chan, (n_chan, 1), padding="valid", bias=True),
23 |             Permute2d((0, 2, 1, 3)),
24 |             nn.BatchNorm2d(1, eps=1e-3, momentum=0.99)
25 |         )
26 |         self.conv2 = nn.Sequential(
27 |             nn.Conv2d(1, N_F, (1, int(fs * tem_kernelLen)), padding="same", bias=True),
28 |             nn.BatchNorm2d(N_F, eps=1e-3, momentum=0.99)
29 |         )
30 | 
31 |         self.conv3 = nn.Sequential(
32 |             nn.Conv2d(N_F, N_F, (1, int(fs * tem_kernelLen)), padding="same", bias=True),
33 |             nn.BatchNorm2d(N_F, eps=1e-3, momentum=0.99)
34 |         )
35 |         self.conv4 = nn.Sequential(
36 |             nn.Conv2d(N_F, n_chan, (n_chan, 1), padding="same", bias=True),
37 |             nn.BatchNorm2d(n_chan, eps=1e-3, momentum=0.99)
38 |         )
39 |         self.conv5 = nn.Conv2d(n_chan, 1, (n_chan, 1), padding="same", bias=True)
40 | 
41 |     def forward(self, x):
42 |         # encoder
43 |         x = self.conv1(x)
44 |         x = self.conv2(x)
45 |         # decoder
46 |         x = self.conv3(x)
47 |         x = self.conv4(x)
48 | 
49 |         x = self.conv5(x)
50 |         return x
51 | 
52 | 
53 | if __name__ == '__main__':
54 | 
55 |     # 定义输入张量的参数
56 |     batch_size = 1  # 批次大小，表示处理一个样本
57 |     n_channels = 56  # EEG信号的通道数
58 |     sampling_rate = 128.0  # 信号采样频率，单位为Hz
59 |     time_length = int(sampling_rate)  # 时间长度（宽度），即一个时间序列周期内的数据点数
60 | 
61 |     # 初始化模型
62 |     model = CLEEGN(n_chan=n_channels, fs=sampling_rate, N_F=20, tem_kernelLen=0.1)
63 | 
64 |     # 生成随机输入张量，模拟EEG数据
65 |     input_tensor = torch.randn(batch_size, 1, n_channels, time_length)  # (batch_size, channels, height, width)
66 | 
67 |     # 执行前向传播
68 |     output = model(input_tensor)
69 | 
70 |     # 输出输入和输出张量的形状
71 |     print(f'输入张量形状: {input_tensor.shape}')
72 |     print(f'输出张量形状: {output.shape}')
73 | 
74 | 


--------------------------------------------------------------------------------
/采样/EUCB.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | # 论文：EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation, CVPR2024
 4 | # 论文地址：https://arxiv.org/pdf/2405.06880
 5 | 
 6 | def channel_shuffle(x, groups):
 7 |     batchsize, num_channels, height, width = x.data.size()
 8 |     channels_per_group = num_channels // groups
 9 |     # reshape
10 |     x = x.view(batchsize, groups,
11 |                channels_per_group, height, width)
12 |     x = torch.transpose(x, 1, 2).contiguous()
13 |     # flatten
14 |     x = x.view(batchsize, -1, height, width)
15 |     return x
16 | 
17 | def act_layer(act, inplace=False, neg_slope=0.2, n_prelu=1):
18 |     # activation layer
19 |     act = act.lower()
20 |     if act == 'relu':
21 |         layer = nn.ReLU(inplace)
22 |     elif act == 'relu6':
23 |         layer = nn.ReLU6(inplace)
24 |     elif act == 'leakyrelu':
25 |         layer = nn.LeakyReLU(neg_slope, inplace)
26 |     elif act == 'prelu':
27 |         layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
28 |     elif act == 'gelu':
29 |         layer = nn.GELU()
30 |     elif act == 'hswish':
31 |         layer = nn.Hardswish(inplace)
32 |     else:
33 |         raise NotImplementedError('activation layer [%s] is not found' % act)
34 |     return layer
35 | 
36 | #   Efficient up-convolution block (EUCB)
37 | class EUCB(nn.Module):
38 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, activation='relu'):
39 |         super(EUCB, self).__init__()
40 | 
41 |         self.in_channels = in_channels
42 |         self.out_channels = out_channels
43 |         self.up_dwc = nn.Sequential(
44 |             nn.Upsample(scale_factor=2),
45 |             nn.Conv2d(self.in_channels, self.in_channels, kernel_size=kernel_size, stride=stride,
46 |                       padding=kernel_size // 2, groups=self.in_channels, bias=False),
47 |             nn.BatchNorm2d(self.in_channels),
48 |             act_layer(activation, inplace=True)
49 |         )
50 |         self.pwc = nn.Sequential(
51 |             nn.Conv2d(self.in_channels, self.out_channels, kernel_size=1, stride=1, padding=0, bias=True)
52 |         )
53 | 
54 |     def forward(self, x):
55 |         x = self.up_dwc(x)
56 |         x = channel_shuffle(x, self.in_channels)
57 |         x = self.pwc(x)
58 |         return x
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     input = torch.randn(1, 32, 64, 64)  #B C H W
63 | 
64 |     block = EUCB(in_channels=32, out_channels=64)
65 | 
66 |     print(input.size())
67 | 
68 |     output = block(input)
69 |     print(output.size())


--------------------------------------------------------------------------------
/MCM.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.nn.functional as F
 4 | # 论文：MAGNet: Multi-scale Awareness and Global fusion Network for RGB-D salient object detection | KBS
 5 | # 论文地址：https://www.sciencedirect.com/science/article/abs/pii/S0950705124007603
 6 | # github地址：https://github.com/mingyu6346/MAGNet
 7 | 
 8 | TRAIN_SIZE = 384
 9 | 
10 | class MCM(nn.Module):
11 |     def __init__(self, inc, outc):
12 |         super().__init__()
13 |         self.upsample2 = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)
14 |         self.rc = nn.Sequential(
15 |             nn.Conv2d(in_channels=inc, out_channels=inc, kernel_size=3, padding=1, stride=1, groups=inc),
16 |             nn.BatchNorm2d(inc),
17 |             nn.GELU(),
18 |             nn.Conv2d(in_channels=inc, out_channels=outc, kernel_size=1, stride=1),
19 |             nn.BatchNorm2d(outc),
20 |             nn.GELU()
21 |         )
22 |         self.predtrans = nn.Sequential(
23 |             nn.Conv2d(in_channels=outc, out_channels=outc, kernel_size=3, padding=1, groups=outc),
24 |             nn.BatchNorm2d(outc),
25 |             nn.GELU(),
26 |             nn.Conv2d(in_channels=outc, out_channels=1, kernel_size=1)
27 |         )
28 | 
29 |         self.rc2 = nn.Sequential(
30 |             nn.Conv2d(in_channels=outc * 2, out_channels=outc * 2, kernel_size=3, padding=1, groups=outc * 2),
31 |             nn.BatchNorm2d(outc * 2),
32 |             nn.GELU(),
33 |             nn.Conv2d(in_channels=outc * 2, out_channels=outc, kernel_size=1, stride=1),
34 |             nn.BatchNorm2d(outc),
35 |             nn.GELU()
36 |         )
37 | 
38 |     def forward(self, x1, x2):
39 |         x2_upsample = self.upsample2(x2)  # 上采样
40 |         x2_rc = self.rc(x2_upsample)  # 减少通道数
41 |         shortcut = x2_rc
42 | 
43 |         x_cat = torch.cat((x1, x2_rc), dim=1)  # 拼接
44 |         x_forward = self.rc2(x_cat)  # 减少通道数2
45 |         x_forward = x_forward + shortcut
46 |         pred = F.interpolate(self.predtrans(x_forward), TRAIN_SIZE, mode="bilinear", align_corners=True)  # 预测图
47 | 
48 |         return pred, x_forward
49 | 
50 | 
51 | if __name__ == '__main__':
52 | 
53 |     inc = 64  # 输入通道数
54 |     outc = 32  # 输出通道数
55 |     mcm = MCM(inc=inc, outc=outc)
56 | 
57 |     x1 = torch.randn(1, outc, 96, 96)  # Batch size=1, Channels=outc, Height=96, Width=96
58 |     x2 = torch.randn(1, inc, 48, 48)  # Batch size=1, Channels=inc, Height=48, Width=48
59 | 
60 |     pred, x_forward = mcm(x1, x2)
61 | 
62 |     print(x1.size())
63 |     print(x2.size())
64 |     print(pred.size())
65 |     print(x_forward.size())


--------------------------------------------------------------------------------
/LAE.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from einops import rearrange
 4 | # 论文地址：https://arxiv.org/pdf/2408.14087
 5 | # 论文：LSM-YOLO: A Compact and Effective ROI Detector for Medical Detection
 6 | 
 7 | 
 8 | def autopad(k, p=None, d=1):  # kernel, padding, dilation
 9 |     """Pad to 'same' shape outputs."""
10 |     if d > 1:
11 |         k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
12 |     if p is None:
13 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
14 |     return p
15 | 
16 | class Conv(nn.Module):
17 |     """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
18 |     default_act = nn.SiLU()  # default activation
19 | 
20 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
21 |         """Initialize Conv layer with given arguments including activation."""
22 |         super().__init__()
23 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
24 |         self.bn = nn.BatchNorm2d(c2)
25 |         self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
26 | 
27 |     def forward(self, x):
28 |         """Apply convolution, batch normalization and activation to input tensor."""
29 |         return self.act(self.bn(self.conv(x)))
30 | 
31 |     def forward_fuse(self, x):
32 |         """Perform transposed convolution of 2D data."""
33 |         return self.act(self.conv(x))
34 | 
35 | class LAE(nn.Module):
36 |     # Light-weight Adaptive Extraction
37 |     def __init__(self, ch, group=16) -> None:
38 |         super().__init__()
39 | 
40 |         self.softmax = nn.Softmax(dim=-1)
41 |         self.attention = nn.Sequential(
42 |             nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
43 |             Conv(ch, ch, k=1)
44 |         )
45 | 
46 |         self.ds_conv = Conv(ch, ch * 4, k=3, s=2, g=(ch // group))
47 | 
48 |     def forward(self, x):
49 |         # bs, ch, 2*h, 2*w => bs, ch, h, w, 4
50 |         att = rearrange(self.attention(x), 'bs ch (s1 h) (s2 w) -> bs ch h w (s1 s2)', s1=2, s2=2)
51 |         att = self.softmax(att)
52 | 
53 |         # bs, 4 * ch, h, w => bs, ch, h, w, 4
54 |         x = rearrange(self.ds_conv(x), 'bs (s ch) h w -> bs ch h w s', s=4)
55 |         x = torch.sum(x * att, dim=-1)
56 |         return x
57 | 
58 | 
59 | if __name__ == '__main__':
60 | 
61 |     input = torch.randn(1, 16, 64, 64) # B C H W
62 |     block = LAE(ch=16)
63 |     output = block(input)
64 |     print(input.size())
65 |     print(output.size())


--------------------------------------------------------------------------------
/tfcm.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | #论文：Multi-Scale Temporal Frequency Convolutional Network With Axial Attention for Speech Enhancement(ICASSP 2022)
 4 | #论文地址：https://ieeexplore.ieee.org/document/9746610
 5 | 
 6 | class TFCM_Block(nn.Module):
 7 |     def __init__(self,
 8 |                  cin=24,
 9 |                  K=(3, 3),
10 |                  dila=1,
11 |                  causal=True,
12 |                  ):
13 |         super(TFCM_Block, self).__init__()
14 |         self.pconv1 = nn.Sequential(
15 |             nn.Conv2d(cin, cin, kernel_size=(1, 1)),
16 |             nn.BatchNorm2d(cin),
17 |             nn.PReLU(cin),
18 |         )
19 |         dila_pad = dila * (K[1] - 1)
20 |         if causal:
21 |             self.dila_conv = nn.Sequential(
22 |                 nn.ConstantPad2d((dila_pad, 0, 1, 1), 0.0),
23 |                 nn.Conv2d(cin, cin, K, 1, dilation=(1, dila), groups=cin),
24 |                 nn.BatchNorm2d(cin),
25 |                 nn.PReLU(cin)
26 |             )
27 |         else:
28 |             # update 22/06/21, add groups for non-casual
29 |             self.dila_conv = nn.Sequential(
30 |                 nn.ConstantPad2d((dila_pad//2, dila_pad//2, 1, 1), 0.0),
31 |                 nn.Conv2d(cin, cin, K, 1, dilation=(1, dila), groups=cin),
32 |                 nn.BatchNorm2d(cin),
33 |                 nn.PReLU(cin)
34 |             )
35 |         self.pconv2 = nn.Conv2d(cin, cin, kernel_size=(1, 1))
36 |         self.causal = causal
37 |         self.dila_pad = dila_pad
38 | 
39 |     def forward(self, inps):
40 |         """
41 |             inp: B x C x F x T
42 |         """
43 |         outs = self.pconv1(inps)
44 |         outs = self.dila_conv(outs)
45 |         outs = self.pconv2(outs)
46 |         return outs + inps
47 | 
48 | 
49 | class TFCM(nn.Module):
50 |     def __init__(self,
51 |                  cin=24,
52 |                  K=(3, 3),
53 |                  tfcm_layer=6,
54 |                  causal=True,
55 |                  ):
56 |         super(TFCM, self).__init__()
57 |         self.tfcm = nn.ModuleList()
58 |         for idx in range(tfcm_layer):
59 |             self.tfcm.append(
60 |                 TFCM_Block(cin, K, 2**idx, causal=causal)
61 |             )
62 | 
63 |     def forward(self, inp):
64 |         out = inp
65 |         for idx in range(len(self.tfcm)):
66 |             out = self.tfcm[idx](out)
67 |         return out
68 | 
69 | 
70 | 
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     block = TFCM(24)
75 |     input = th.randn(2, 24, 256, 101)# B C H W
76 |     out = block(input)
77 |     print(out.size())


--------------------------------------------------------------------------------
/(arXiv 2021) PSA.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | 
 6 | # 论文地址：https://arxiv.org/pdf/2105.14447
 7 | # 论文：EPSANet: An Efficient Pyramid Squeeze Attention Block on Convolutional Neural Network
 8 | 
 9 | 
10 | 
11 | class PSA(nn.Module):
12 | 
13 |     def __init__(self, channel=512,reduction=4,S=4):
14 |         super().__init__()
15 |         self.S=S
16 | 
17 |         self.convs=[]
18 |         for i in range(S):
19 |             self.convs.append(nn.Conv2d(channel//S,channel//S,kernel_size=2*(i+1)+1,padding=i+1))
20 | 
21 |         self.se_blocks=[]
22 |         for i in range(S):
23 |             self.se_blocks.append(nn.Sequential(
24 |                 nn.AdaptiveAvgPool2d(1),
25 |                 nn.Conv2d(channel//S, channel // (S*reduction),kernel_size=1, bias=False),
26 |                 nn.ReLU(inplace=True),
27 |                 nn.Conv2d(channel // (S*reduction), channel//S,kernel_size=1, bias=False),
28 |                 nn.Sigmoid()
29 |             ))
30 |         
31 |         self.softmax=nn.Softmax(dim=1)
32 | 
33 | 
34 |     def init_weights(self):
35 |         for m in self.modules():
36 |             if isinstance(m, nn.Conv2d):
37 |                 init.kaiming_normal_(m.weight, mode='fan_out')
38 |                 if m.bias is not None:
39 |                     init.constant_(m.bias, 0)
40 |             elif isinstance(m, nn.BatchNorm2d):
41 |                 init.constant_(m.weight, 1)
42 |                 init.constant_(m.bias, 0)
43 |             elif isinstance(m, nn.Linear):
44 |                 init.normal_(m.weight, std=0.001)
45 |                 if m.bias is not None:
46 |                     init.constant_(m.bias, 0)
47 | 
48 |     def forward(self, x):
49 |         b, c, h, w = x.size()
50 | 
51 |         #Step1:SPC module
52 |         SPC_out=x.view(b,self.S,c//self.S,h,w) #bs,s,ci,h,w
53 |         for idx,conv in enumerate(self.convs):
54 |             SPC_out[:,idx,:,:,:]=conv(SPC_out[:,idx,:,:,:])
55 | 
56 |         #Step2:SE weight
57 |         se_out=[]
58 |         for idx,se in enumerate(self.se_blocks):
59 |             se_out.append(se(SPC_out[:,idx,:,:,:]))
60 |         SE_out=torch.stack(se_out,dim=1)
61 |         SE_out=SE_out.expand_as(SPC_out)
62 | 
63 |         #Step3:Softmax
64 |         softmax_out=self.softmax(SE_out)
65 | 
66 |         #Step4:SPA
67 |         PSA_out=SPC_out*softmax_out
68 |         PSA_out=PSA_out.view(b,-1,h,w)
69 | 
70 |         return PSA_out
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     input=torch.randn(50,512,7,7)
75 |     block = PSA(channel=512,reduction=8)
76 |     output=block(input)
77 |     a=output.view(-1).sum()
78 |     a.backward()
79 |     print(output.shape)
80 | 
81 |     


--------------------------------------------------------------------------------
/(ICPR 2021)CAN(人群计数,CV2维任务通用).py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | warnings.filterwarnings("ignore")
 3 | import torch.nn as nn
 4 | import torch
 5 | from torch.nn import functional as F
 6 | from torchvision import models
 7 | 
 8 | # 论文：Encoder-Decoder Based Convolutional Neural Networks with Multi-Scale-Aware Modules for Crowd Counting
 9 | # 论文地址：https://ieeexplore.ieee.org/document/9413286
10 | 
11 | class ContextualModule(nn.Module):
12 |     def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
13 |         super(ContextualModule, self).__init__()
14 |         self.scales = []
15 |         self.scales = nn.ModuleList([self._make_scale(features, size) for size in sizes])
16 |         self.bottleneck = nn.Conv2d(features * 2, out_features, kernel_size=1)
17 |         self.relu = nn.ReLU()
18 |         self.weight_net = nn.Conv2d(features, features, kernel_size=1)
19 |         self._initialize_weights()
20 | 
21 |     def __make_weight(self, feature, scale_feature):
22 |         weight_feature = feature - scale_feature
23 |         return F.sigmoid(self.weight_net(weight_feature))
24 | 
25 |     def _make_scale(self, features, size):
26 |         prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
27 |         conv = nn.Conv2d(features, features, kernel_size=1, bias=False)
28 |         return nn.Sequential(prior, conv)
29 | 
30 |     def forward(self, feats):
31 |         h, w = feats.size(2), feats.size(3)
32 |         multi_scales = [F.upsample(input=stage(feats), size=(h, w), mode='bilinear') for stage in self.scales]
33 |         weights = [self.__make_weight(feats, scale_feature) for scale_feature in multi_scales]
34 |         overall_features = [(multi_scales[0] * weights[0] + multi_scales[1] * weights[1] + multi_scales[2] * weights[
35 |             2] + multi_scales[3] * weights[3]) / (weights[0] + weights[1] + weights[2] + weights[3])] + [feats]
36 |         bottle = self.bottleneck(torch.cat(overall_features, 1))
37 |         return self.relu(bottle)
38 | 
39 |     def _initialize_weights(self):
40 |         for m in self.modules():
41 |             if isinstance(m, nn.Conv2d):
42 |                 nn.init.normal_(m.weight, std=0.01)
43 |                 if m.bias is not None:
44 |                     nn.init.constant_(m.bias, 0)
45 |             elif isinstance(m, nn.BatchNorm2d):
46 |                 nn.init.constant_(m.weight, 1)
47 |                 nn.init.constant_(m.bias, 0)
48 | 
49 | 
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     block = ContextualModule(features=64, out_features=64)
54 |     input_tensor = torch.rand(1, 64, 128, 128)
55 |     output = block(input_tensor)
56 |     print("Input size:", input_tensor.size())
57 |     print("Output size:", output.size())


--------------------------------------------------------------------------------
/SPConv.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | 
 6 | class SPConv_3x3(nn.Module):
 7 |     def __init__(self, inplanes, outplanes, stride=1, ratio=0.5, reduction=16):
 8 |         super(SPConv_3x3, self).__init__()
 9 |         self.inplanes_3x3 = int(inplanes*ratio)
10 |         self.inplanes_1x1 = inplanes - self.inplanes_3x3
11 |         self.outplanes_3x3 = int(outplanes*ratio)
12 |         self.outplanes_1x1 = outplanes - self.outplanes_3x3
13 |         self.outplanes = outplanes
14 |         self.stride = stride
15 | 
16 |         self.gwc = nn.Conv2d(self.inplanes_3x3, self.outplanes, kernel_size=3, stride=self.stride,
17 |                              padding=1, groups=2, bias=False)
18 |         self.pwc = nn.Conv2d(self.inplanes_3x3, self.outplanes, kernel_size=1, bias=False)
19 | 
20 |         self.conv1x1 = nn.Conv2d(self.inplanes_1x1, self.outplanes,kernel_size=1)
21 |         self.avgpool_s2_1 = nn.AvgPool2d(kernel_size=2,stride=2)
22 |         self.avgpool_s2_3 = nn.AvgPool2d(kernel_size=2, stride=2)
23 |         self.avgpool_add_1 = nn.AdaptiveAvgPool2d(1)
24 |         self.avgpool_add_3 = nn.AdaptiveAvgPool2d(1)
25 |         self.bn1 = nn.BatchNorm2d(self.outplanes)
26 |         self.bn2 = nn.BatchNorm2d(self.outplanes)
27 |         self.ratio = ratio
28 |         self.groups = int(1/self.ratio)
29 |     def forward(self, x):
30 |         b, c, _, _ = x.size()
31 | 
32 | 
33 |         x_3x3 = x[:,:int(c*self.ratio),:,:]
34 |         x_1x1 = x[:,int(c*self.ratio):,:,:]
35 |         out_3x3_gwc = self.gwc(x_3x3)
36 |         if self.stride ==2:
37 |             x_3x3 = self.avgpool_s2_3(x_3x3)
38 |         out_3x3_pwc = self.pwc(x_3x3)
39 |         out_3x3 = out_3x3_gwc + out_3x3_pwc
40 |         out_3x3 = self.bn1(out_3x3)
41 |         out_3x3_ratio = self.avgpool_add_3(out_3x3).squeeze()
42 | 
43 |         # use avgpool first to reduce information lost
44 |         if self.stride == 2:
45 |             x_1x1 = self.avgpool_s2_1(x_1x1)
46 | 
47 |         out_1x1 = self.conv1x1(x_1x1)
48 |         out_1x1 = self.bn2(out_1x1)
49 |         out_1x1_ratio = self.avgpool_add_1(out_1x1).squeeze()
50 | 
51 |         out_31_ratio = torch.stack((out_3x3_ratio, out_1x1_ratio), 2)
52 |         out_31_ratio = nn.Softmax(dim=2)(out_31_ratio)
53 |         out = out_1x1 * (out_31_ratio[:,:,1].view(b, self.outplanes, 1, 1).expand_as(out_1x1))\
54 |               + out_3x3 * (out_31_ratio[:,:,0].view(b, self.outplanes, 1, 1).expand_as(out_3x3))
55 | 
56 |         return out
57 | if __name__ == '__main__':
58 | 
59 | 
60 |     input = torch.randn(3, 64, 64, 64)
61 | 
62 |     block = SPConv_3x3(64, 32)
63 |     output = block(input)
64 |     print(input.size())
65 |     print(output.size())


--------------------------------------------------------------------------------
/LPA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | #论文：SwinPA-Net: Swin Transformer-Based Multiscale Feature Pyramid Aggregation Network for Medical Image Segmentation
 4 | #论文地址：https://ieeexplore.ieee.org/document/9895210
 5 | 
 6 | class ChannelAttention(nn.Module):
 7 |     def __init__(self, in_planes):
 8 |         super(ChannelAttention, self).__init__()
 9 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
10 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
11 | 
12 |         self.fc1 = nn.Conv2d(in_planes, in_planes // 8, 1, bias=False)
13 |         self.relu1 = nn.ReLU()
14 |         self.fc2 = nn.Conv2d(in_planes // 8, in_planes, 1, bias=False)
15 | 
16 |         self.sigmoid = nn.Sigmoid()
17 | 
18 |     def forward(self, x):
19 |         avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
20 |         max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
21 |         out = avg_out + max_out
22 |         return self.sigmoid(out)
23 | 
24 | 
25 | class SpatialAttention(nn.Module):
26 |     def __init__(self, kernel_size=3):
27 |         super(SpatialAttention, self).__init__()
28 | 
29 |         assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
30 |         padding = 3 if kernel_size == 7 else 1
31 | 
32 |         self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
33 |         self.sigmoid = nn.Sigmoid()
34 | 
35 |     def forward(self, x):
36 |         avg_out = torch.mean(x, dim=1, keepdim=True)
37 |         max_out, _ = torch.max(x, dim=1, keepdim=True)
38 |         x = torch.cat([avg_out, max_out], dim=1)
39 |         x = self.conv1(x)
40 |         return self.sigmoid(x)
41 | 
42 | 
43 | class LPA(nn.Module):
44 |     def __init__(self, in_channel):
45 |         super(LPA, self).__init__()
46 |         self.ca = ChannelAttention(in_channel)
47 |         self.sa = SpatialAttention()
48 | 
49 |     def forward(self, x):
50 |         x0, x1 = x.chunk(2, dim=2)
51 |         x0 = x0.chunk(2, dim=3)
52 |         x1 = x1.chunk(2, dim=3)
53 |         x0 = [self.ca(x0[-2]) * x0[-2], self.ca(x0[-1]) * x0[-1]]
54 |         x0 = [self.sa(x0[-2]) * x0[-2], self.sa(x0[-1]) * x0[-1]]
55 | 
56 |         x1 = [self.ca(x1[-2]) * x1[-2], self.ca(x1[-1]) * x1[-1]]
57 |         x1 = [self.sa(x1[-2]) * x1[-2], self.sa(x1[-1]) * x1[-1]]
58 | 
59 |         x0 = torch.cat(x0, dim=3)
60 |         x1 = torch.cat(x1, dim=3)
61 |         x3 = torch.cat((x0, x1), dim=2)
62 | 
63 |         x4 = self.ca(x) * x
64 |         x4 = self.sa(x4) * x4
65 |         x = x3 + x4
66 |         return x
67 | 
68 | 
69 | if __name__ == '__main__':
70 | 
71 |     input = torch.rand(1, 28, 64, 64)
72 |     block = LPA(in_channel=28)
73 |     output = block(input)
74 | 
75 |     print(input.size())
76 |     print(output.size())


--------------------------------------------------------------------------------
/DA.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class DoubleAttentionLayer(nn.Module):
 7 |     """
 8 |     Implementation of Double Attention Network. NIPS 2018
 9 |     """
10 | 
11 |     def __init__(self, in_channels: int, c_m: int, c_n: int, reconstruct=False):
12 |         """
13 |         Parameters
14 |         ----------
15 |         in_channels
16 |         c_m
17 |         c_n
18 |         reconstruct: `bool` whether to re-construct output to have shape (B, in_channels, L, R)
19 |         """
20 |         super(DoubleAttentionLayer, self).__init__()
21 |         self.c_m = c_m
22 |         self.c_n = c_n
23 |         self.in_channels = in_channels
24 |         self.reconstruct = reconstruct
25 |         self.convA = nn.Conv2d(in_channels, c_m, kernel_size=1)
26 |         self.convB = nn.Conv2d(in_channels, c_n, kernel_size=1)
27 |         self.convV = nn.Conv2d(in_channels, c_n, kernel_size=1)
28 |         if self.reconstruct:
29 |             self.conv_reconstruct = nn.Conv2d(c_m, in_channels, kernel_size=1)
30 | 
31 |     def forward(self, x: torch.Tensor):
32 |         """
33 |         Parameters
34 |         ----------
35 |         x: `torch.Tensor` of shape (B, C, H, W)
36 |         Returns
37 |         -------
38 |         """
39 |         batch_size, c, h, w = x.size()
40 |         assert c == self.in_channels, 'input channel not equal!'
41 |         A = self.convA(x)  # (B, c_m, h, w) because kernel size is 1
42 | 
43 |         B = self.convB(x)  # (B, c_n, h, w)
44 |         V = self.convV(x)  # (B, c_n, h, w)
45 | 
46 |         tmpA = A.view(batch_size, self.c_m, h * w)
47 | 
48 |         attention_maps = B.view(batch_size, self.c_n, h * w)
49 |         attention_vectors = V.view(batch_size, self.c_n, h * w)
50 | 
51 |         # softmax on the last dimension to create attention maps
52 |         attention_maps = F.softmax(attention_maps, dim=-1)  # 对hxw维度进行softmax
53 | 
54 |         # step 1: feature gathering
55 |         global_descriptors = torch.bmm(  # attention map(V)和tmpA进行
56 |             tmpA, attention_maps.permute(0, 2, 1))  # (B, c_m, c_n)
57 | 
58 |         # step 2: feature distribution
59 |         # (B, c_n, h * w) attention on c_n dimension - channel wise
60 |         attention_vectors = F.softmax(attention_vectors, dim=1)
61 | 
62 |         tmpZ = global_descriptors.matmul(
63 |             attention_vectors)  # B, self.c_m, h * w
64 | 
65 |         tmpZ = tmpZ.view(batch_size, self.c_m, h, w)
66 |         if self.reconstruct:
67 |             tmpZ = self.conv_reconstruct(tmpZ)
68 |         return tmpZ
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     input = torch.zeros(3, 12, 8, 8)
73 |     block = DoubleAttentionLayer(12, 24, 4)
74 |     output=block(input)
75 |     print(output.size())


--------------------------------------------------------------------------------
/频域/(CVPR 2024)FRFN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from einops import rearrange
 4 | import math
 5 | 
 6 | # 论文：Adapt or Perish: Adaptive Sparse Transformer with Attentive Feature Refinement for Image Restoration, CVPR 2024.
 7 | # 论文地址：https://openaccess.thecvf.com/content/CVPR2024/papers/Zhou_Adapt_or_Perish_Adaptive_Sparse_Transformer_with_Attentive_Feature_Refinement_CVPR_2024_paper.pdf
 8 | # 全网最全100➕即插即用模块GitHub地址：https://github.com/ai-dawang/PlugNPlay-Modules
 9 | class FRFN(nn.Module):
10 |     def __init__(self, dim=32, hidden_dim=128, act_layer=nn.GELU):
11 |         super().__init__()
12 |         self.linear1 = nn.Sequential(nn.Linear(dim, hidden_dim * 2),
13 |                                      act_layer())
14 |         self.dwconv = nn.Sequential(
15 |             nn.Conv2d(hidden_dim, hidden_dim, groups=hidden_dim, kernel_size=3, stride=1, padding=1),
16 |             act_layer())
17 |         self.linear2 = nn.Sequential(nn.Linear(hidden_dim, dim))
18 |         self.dim = dim
19 |         self.hidden_dim = hidden_dim
20 | 
21 |         self.dim_conv = self.dim // 4
22 |         self.dim_untouched = self.dim - self.dim_conv
23 |         self.partial_conv3 = nn.Conv2d(self.dim_conv, self.dim_conv, 3, 1, 1, bias=False)
24 | 
25 |     def forward(self, x):
26 |         x_init = x
27 |         # bs x hw x c
28 |         bs, hw, c = x.size()
29 |         hh = int(math.sqrt(hw))
30 | 
31 |         # spatial restore
32 |         x = rearrange(x, ' b (h w) (c) -> b c h w ', h=hh, w=hh)
33 | 
34 |         x1, x2, = torch.split(x, [self.dim_conv, self.dim_untouched], dim=1)
35 |         x1 = self.partial_conv3(x1)
36 |         x = torch.cat((x1, x2), 1)
37 | 
38 |         # flaten
39 |         x = rearrange(x, ' b c h w -> b (h w) c', h=hh, w=hh)
40 | 
41 |         x = self.linear1(x)
42 |         # gate mechanism
43 |         x_1, x_2 = x.chunk(2, dim=-1)
44 | 
45 |         x_1 = rearrange(x_1, ' b (h w) (c) -> b c h w ', h=hh, w=hh)
46 |         x_1 = self.dwconv(x_1)
47 |         x_1 = rearrange(x_1, ' b c h w -> b (h w) c', h=hh, w=hh)
48 |         x = x_1 * x_2
49 | 
50 |         x = self.linear2(x)
51 | 
52 |         return x + x_init
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     # Instantiate the FRFN class
57 |     dim = 64  # Dimension of input features
58 | 
59 | 
60 |     # Create an instance of the FRFN module
61 |     frfn = FRFN(dim)
62 | 
63 |     # Generate a random input tensor
64 |     B = 1  # Batch size
65 |     H = 64  # Height of the feature map
66 |     W = 64  # Width of the feature map
67 |     C = dim  # Number of channels
68 | 
69 |     input = torch.randn(B, H * W, C)
70 | 
71 |     # Forward pass
72 |     output = frfn(input)
73 | 
74 |     # Print input and output shapes
75 |     print(input.size())
76 |     print(output.size())
77 | 


--------------------------------------------------------------------------------
/FECAttention.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import numpy as np
 3 | import torch
 4 | #论文：FECAM: Frequency Enhanced Channel Attention Mechanism for Time Series Forecasting
 5 | #论文地址：https://arxiv.org/abs/2212.01209
 6 | 
 7 | try:
 8 |     from torch import irfft
 9 |     from torch import rfft
10 | except ImportError:
11 |     def rfft(x, d):
12 |         t = torch.fft.fft(x, dim=(-d))
13 |         r = torch.stack((t.real, t.imag), -1)
14 |         return r
15 | 
16 | 
17 |     def irfft(x, d):
18 |         t = torch.fft.ifft(torch.complex(x[:, :, 0], x[:, :, 1]), dim=(-d))
19 |         return t.real
20 | 
21 | 
22 | def dct(x, norm=None):
23 |     """
24 |     Discrete Cosine Transform, Type II (a.k.a. the DCT)
25 | 
26 |     For the meaning of the parameter `norm`, see:
27 |     https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
28 | 
29 |     :param x: the input signal
30 |     :param norm: the normalization, None or 'ortho'
31 |     :return: the DCT-II of the signal over the last dimension
32 |     """
33 |     x_shape = x.shape
34 |     N = x_shape[-1]
35 |     x = x.contiguous().view(-1, N)
36 | 
37 |     v = torch.cat([x[:, ::2], x[:, 1::2].flip([1])], dim=1)
38 | 
39 |     Vc = rfft(v, 1)
40 | 
41 |     k = - torch.arange(N, dtype=x.dtype, device=x.device)[None, :] * np.pi / (2 * N)
42 |     W_r = torch.cos(k)
43 |     W_i = torch.sin(k)
44 | 
45 |     V = Vc[:, :, 0] * W_r - Vc[:, :, 1] * W_i
46 | 
47 |     if norm == 'ortho':
48 |         V[:, 0] /= np.sqrt(N) * 2
49 |         V[:, 1:] /= np.sqrt(N / 2) * 2
50 | 
51 |     V = 2 * V.view(*x_shape)
52 | 
53 |     return V
54 | 
55 | 
56 | class dct_channel_block(nn.Module):
57 |     def __init__(self, channel):
58 |         super(dct_channel_block, self).__init__()
59 |         self.fc = nn.Sequential(
60 |             nn.Linear(channel, channel * 2, bias=False),
61 |             nn.Dropout(p=0.1),
62 |             nn.ReLU(inplace=True),
63 |             nn.Linear(channel * 2, channel, bias=False),
64 |             nn.Sigmoid()
65 |         )
66 | 
67 |         self.dct_norm = nn.LayerNorm([96], eps=1e-6)  # for lstm on length-wise
68 | 
69 |     def forward(self, x):
70 |         b, c, l = x.size()  # (B,C,L) (32,96,512)
71 |         list = []
72 |         for i in range(c):
73 |             freq = dct(x[:, i, :])
74 |             list.append(freq)
75 | 
76 |         stack_dct = torch.stack(list, dim=1)
77 | 
78 |         lr_weight = self.dct_norm(stack_dct)
79 |         lr_weight = self.fc(lr_weight)
80 |         lr_weight = self.dct_norm(lr_weight)
81 | 
82 |         return x * lr_weight  # result
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     input = torch.rand(8, 7, 96)
87 |     block = dct_channel_block(96)
88 |     result = block(input)
89 |     print("input_tensor.shape:", input.shape)
90 |     print("result.shape:", result.shape)


--------------------------------------------------------------------------------
/ULSAM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | torch.set_default_tensor_type(torch.cuda.FloatTensor)
 5 | #ULSAM: Ultra-Lightweight Subspace Attention Module for Compact Convolutional Neural Networks(WACV20)
 6 | 
 7 | class SubSpace(nn.Module):
 8 | 
 9 | 
10 |     def __init__(self, nin: int) -> None:
11 |         super(SubSpace, self).__init__()
12 |         self.conv_dws = nn.Conv2d(
13 |             nin, nin, kernel_size=1, stride=1, padding=0, groups=nin
14 |         )
15 |         self.bn_dws = nn.BatchNorm2d(nin, momentum=0.9)
16 |         self.relu_dws = nn.ReLU(inplace=False)
17 | 
18 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
19 | 
20 |         self.conv_point = nn.Conv2d(
21 |             nin, 1, kernel_size=1, stride=1, padding=0, groups=1
22 |         )
23 |         self.bn_point = nn.BatchNorm2d(1, momentum=0.9)
24 |         self.relu_point = nn.ReLU(inplace=False)
25 | 
26 |         self.softmax = nn.Softmax(dim=2)
27 | 
28 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
29 |         out = self.conv_dws(x)
30 |         out = self.bn_dws(out)
31 |         out = self.relu_dws(out)
32 | 
33 |         out = self.maxpool(out)
34 | 
35 |         out = self.conv_point(out)
36 |         out = self.bn_point(out)
37 |         out = self.relu_point(out)
38 | 
39 |         m, n, p, q = out.shape
40 |         out = self.softmax(out.view(m, n, -1))
41 |         out = out.view(m, n, p, q)
42 | 
43 |         out = out.expand(x.shape[0], x.shape[1], x.shape[2], x.shape[3])
44 | 
45 |         out = torch.mul(out, x)
46 | 
47 |         out = out + x
48 | 
49 |         return out
50 | 
51 | 
52 | class ULSAM(nn.Module):
53 | 
54 | 
55 |     def __init__(self, nin: int, nout: int, h: int, w: int, num_splits: int) -> None:
56 |         super(ULSAM, self).__init__()
57 | 
58 |         assert nin % num_splits == 0
59 | 
60 |         self.nin = nin
61 |         self.nout = nout
62 |         self.h = h
63 |         self.w = w
64 |         self.num_splits = num_splits
65 | 
66 |         self.subspaces = nn.ModuleList(
67 |             [SubSpace(int(self.nin / self.num_splits)) for i in range(self.num_splits)]
68 |         )
69 | 
70 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
71 |         group_size = int(self.nin / self.num_splits)
72 | 
73 |         # split at batch dimension
74 |         sub_feat = torch.chunk(x, self.num_splits, dim=1)
75 | 
76 |         out = []
77 |         for idx, l in enumerate(self.subspaces):
78 |             out.append(self.subspaces[idx](sub_feat[idx]))
79 | 
80 |         out = torch.cat(out, dim=1)
81 | 
82 |         return out
83 | 
84 | 
85 | if __name__ == '__main__':
86 | 
87 |     input = torch.randn(1, 32, 112, 112)
88 | 
89 |     block = ULSAM(32, 64, 112, 112, 4)
90 |     print(input.size())
91 |     output = block(input)
92 | 
93 |     print(output.size())


--------------------------------------------------------------------------------
/注意力/(WACV 2021)TripletAttention.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------
 2 | # 论文: Rotate to Attend: Convolutional Triplet Attention Module (WACV 2021)
 3 | # Github地址: https://github.com/landskape-ai/triplet-attention
 4 | # ---------------------------------------
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | 
10 | class BasicConv(nn.Module):
11 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
12 |                  bn=True, bias=False):
13 |         super(BasicConv, self).__init__()
14 |         self.out_channels = out_planes
15 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
16 |                               dilation=dilation, groups=groups, bias=bias)
17 |         self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
18 |         self.relu = nn.ReLU() if relu else None
19 | 
20 |     def forward(self, x):
21 |         x = self.conv(x)
22 |         if self.bn is not None:
23 |             x = self.bn(x)
24 |         if self.relu is not None:
25 |             x = self.relu(x)
26 |         return x
27 | 
28 | 
29 | class ZPool(nn.Module):
30 |     def forward(self, x):
31 |         return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1)
32 | 
33 | 
34 | class AttentionGate(nn.Module):
35 |     def __init__(self):
36 |         super(AttentionGate, self).__init__()
37 |         kernel_size = 7
38 |         self.compress = ZPool()
39 |         self.conv = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size - 1) // 2, relu=False)
40 | 
41 |     def forward(self, x):
42 |         x_compress = self.compress(x)
43 |         x_out = self.conv(x_compress)
44 |         scale = torch.sigmoid_(x_out)
45 |         return x * scale
46 | 
47 | 
48 | class TripletAttention(nn.Module):
49 |     def __init__(self, no_spatial=False):
50 |         super(TripletAttention, self).__init__()
51 |         self.cw = AttentionGate()
52 |         self.hc = AttentionGate()
53 |         self.no_spatial = no_spatial
54 |         if not no_spatial:
55 |             self.hw = AttentionGate()
56 | 
57 |     def forward(self, x):
58 |         x_perm1 = x.permute(0, 2, 1, 3).contiguous()
59 |         x_out1 = self.cw(x_perm1)
60 |         x_out11 = x_out1.permute(0, 2, 1, 3).contiguous()
61 |         x_perm2 = x.permute(0, 3, 2, 1).contiguous()
62 |         x_out2 = self.hc(x_perm2)
63 |         x_out21 = x_out2.permute(0, 3, 2, 1).contiguous()
64 |         if not self.no_spatial:
65 |             x_out = self.hw(x)
66 |             x_out = 1 / 3 * (x_out + x_out11 + x_out21)
67 |         else:
68 |             x_out = 1 / 2 * (x_out11 + x_out21)
69 |         return x_out
70 | 
71 | 
72 | # 输入 B C H W,  输出 B C H W
73 | if __name__ == '__main__':
74 |     input = torch.randn(3, 32, 64, 64)
75 |     triplet = TripletAttention()
76 |     output = triplet(input)
77 |     print(output.shape)
78 | 


--------------------------------------------------------------------------------
/注意力/(TPAMI 2021)OutlookAttention.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------
 2 | # 论文: VOLO: Vision Outlooker for Visual Recognition (TPAMI 2021)
 3 | # Github地址: https://github.com/sail-sg/volo
 4 | # ---------------------------------------
 5 | import torch
 6 | from torch import nn
 7 | import math
 8 | from torch.nn import functional as F
 9 | 
10 | 
11 | class OutlookAttention(nn.Module):
12 |     """
13 |     Implementation of outlook attention
14 |     --dim: hidden dim
15 |     --num_heads: number of heads
16 |     --kernel_size: kernel size in each window for outlook attention
17 |     return: token features after outlook attention
18 |     """
19 | 
20 |     def __init__(self, dim, num_heads, kernel_size=3, padding=1, stride=1,
21 |                  qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
22 |         super().__init__()
23 |         head_dim = dim // num_heads
24 |         self.num_heads = num_heads
25 |         self.kernel_size = kernel_size
26 |         self.padding = padding
27 |         self.stride = stride
28 |         self.scale = qk_scale or head_dim**-0.5
29 | 
30 |         self.v = nn.Linear(dim, dim, bias=qkv_bias)
31 |         self.attn = nn.Linear(dim, kernel_size**4 * num_heads)
32 | 
33 |         self.attn_drop = nn.Dropout(attn_drop)
34 |         self.proj = nn.Linear(dim, dim)
35 |         self.proj_drop = nn.Dropout(proj_drop)
36 | 
37 |         self.unfold = nn.Unfold(kernel_size=kernel_size, padding=padding, stride=stride)
38 |         self.pool = nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True)
39 | 
40 |     def forward(self, x):
41 |         B, H, W, C = x.shape
42 | 
43 |         v = self.v(x).permute(0, 3, 1, 2)  # B, C, H, W
44 | 
45 |         h, w = math.ceil(H / self.stride), math.ceil(W / self.stride)
46 |         v = self.unfold(v).reshape(B, self.num_heads, C // self.num_heads,
47 |                                    self.kernel_size * self.kernel_size,
48 |                                    h * w).permute(0, 1, 4, 3, 2)  # B,H,N,kxk,C/H
49 | 
50 |         attn = self.pool(x.permute(0, 3, 1, 2)).permute(0, 2, 3, 1)
51 |         attn = self.attn(attn).reshape(
52 |             B, h * w, self.num_heads, self.kernel_size * self.kernel_size,
53 |             self.kernel_size * self.kernel_size).permute(0, 2, 1, 3, 4)  # B,H,N,kxk,kxk
54 |         attn = attn * self.scale
55 |         attn = attn.softmax(dim=-1)
56 |         attn = self.attn_drop(attn)
57 | 
58 |         x = (attn @ v).permute(0, 1, 4, 3, 2).reshape(
59 |             B, C * self.kernel_size * self.kernel_size, h * w)
60 |         x = F.fold(x, output_size=(H, W), kernel_size=self.kernel_size,
61 |                    padding=self.padding, stride=self.stride)
62 | 
63 |         x = self.proj(x.permute(0, 2, 3, 1))
64 |         x = self.proj_drop(x)
65 | 
66 |         return x
67 | 
68 | 
69 | # 输入 B, H, W, C,  输出 B, H, W, C
70 | if __name__ == '__main__':
71 |     block = OutlookAttention(dim=32,num_heads=8).cuda()
72 |     input = torch.rand(3, 64, 64, 32).cuda()
73 |     output = block(input)
74 |     print(input.size(), output.size())
75 | 


--------------------------------------------------------------------------------
/点云/Attention.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | #论文：Point MixSwap: Attentional Point Cloud Mixing via Swapping Matched Structural Divisions
 6 | #论文地址：https://vllab.cs.nycu.edu.tw/images/paper/eccv_umam22.pdf
 7 | class Attention(nn.Module):
 8 |     def __init__(self, dim_Q, dim_K, dim_LIN, num_heads, n_pts=1024, ln=False):
 9 |         super(Attention, self).__init__()
10 |         self.n_pts = n_pts
11 |         self.dim_LIN = dim_LIN
12 |         self.num_heads = num_heads
13 |         self.fc_q = nn.Linear(dim_Q, dim_LIN)
14 |         self.fc_k = nn.Linear(dim_K, dim_LIN)
15 |         self.fc_v = nn.Linear(dim_K, dim_LIN)
16 |         if ln:
17 |             self.ln0 = nn.LayerNorm(dim_LIN)
18 |             self.ln1 = nn.LayerNorm(dim_LIN)
19 |         self.fc_o = nn.Linear(dim_LIN, dim_LIN)
20 | 
21 |     def forward(self, Q, K, return_attn=True): # Q = [BS, 1, emb_dim = dim_Q]; K = [BS, n_pts, emb_dim= dim_K]
22 |         Q = self.fc_q(Q) # [BS=6, n_div=3, dim_V=1024]
23 |         K, V = self.fc_k(K), self.fc_v(K) # K = [BS=6, n_pts=1024, emb_dim = dim_V = 1024]; V_dim= K_dim
24 |         dim_split = self.dim_LIN // self.num_heads
25 |         Q_ = torch.cat(Q.split(dim_split, 2), 0) #[BS*n_head=6*n_head,n_div,dim_split=1024/1=1024] --> every n_div here & below can be n_pts
26 |         K_ = torch.cat(K.split(dim_split, 2), 0) #[BS*n_head=6*n_head,n_pts,dim_split=1024/1=1024]
27 |         V_ = torch.cat(V.split(dim_split, 2), 0) #[BS*n_head=6*n_head,n_pts,dim_split=1024/1=1024]
28 |         A = torch.softmax(Q_.bmm(K_.transpose(1,2)) / math.sqrt(self.dim_LIN), 2) #[BS*n_head=6*n_head,n_div,dim_split=1024/1=1024]
29 |         temp = (Q_ + A.bmm(V_)).split(Q.size(0), 0) #tupple of n_head, @[BS=6,n_div=3,dim_split=1024]
30 |         O = torch.cat(temp, 2) #[BS=6,n_div=3,dim_split*n_head=emb=1024]
31 |         O = O if getattr(self, 'ln0', None) is None else self.ln0(O)
32 |         O = O + F.relu(self.fc_o(O))
33 |         O = O if getattr(self, 'ln1', None) is None else self.ln1(O)
34 |         if self.num_heads >= 2:
35 |             A = A.split(Q.size(0),dim=0) #tupple of n_head, @[BS=6,n_div=3,dim_split=1024]
36 |             A = torch.stack([tensor_ for tensor_ in A], dim=0) #[n_head,BS,n_div=3,emb=1024]
37 |             A = torch.mean(A, dim=0) #[BS,n_div=3,emb=1024]
38 |         if return_attn:
39 |             if A.size(-1) == self.n_pts:
40 |                 A = A.permute(0, 2, 1) #[BS, n_pts, n_div]
41 |             return O, A
42 |         else:
43 |             return O
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     # 定义注意力机制
48 |     block = Attention(dim_Q=1024, dim_K=1024, dim_LIN=1024, num_heads=8, n_pts=1024, ln=True)
49 | 
50 |     # 创建模拟输入数据
51 |     batch_size = 6
52 |     Q = torch.randn(batch_size, 1, 1024)  # Query 张量
53 |     K = torch.randn(batch_size, 1024, 1024)  # Key 张量
54 | 
55 |     # 执行前向传播
56 |     output, attention_scores = block(Q, K, return_attn=True)
57 | 
58 |     print(output.size())
59 |     print(attention_scores.size())
60 | 


--------------------------------------------------------------------------------
/特征融合/(TIP2024)CGA特征融合模块.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # 论文：DEA-Net: Single image dehazing based on detail enhanced convolution and content-guided attention
 3 | # GitHub地址：https://github.com/cecret3350/DEA-Net/tree/main
 4 | # --------------------------------------------------------
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | from einops.layers.torch import Rearrange
 9 | 
10 | 
11 | class SpatialAttention(nn.Module):
12 |     def __init__(self):
13 |         super(SpatialAttention, self).__init__()
14 |         self.sa = nn.Conv2d(2, 1, 7, padding=3, padding_mode='reflect', bias=True)
15 | 
16 |     def forward(self, x):
17 |         x_avg = torch.mean(x, dim=1, keepdim=True)
18 |         x_max, _ = torch.max(x, dim=1, keepdim=True)
19 |         x2 = torch.cat([x_avg, x_max], dim=1)
20 |         sattn = self.sa(x2)
21 |         return sattn
22 | 
23 | 
24 | class ChannelAttention(nn.Module):
25 |     def __init__(self, dim, reduction=8):
26 |         super(ChannelAttention, self).__init__()
27 |         self.gap = nn.AdaptiveAvgPool2d(1)
28 |         self.ca = nn.Sequential(
29 |             nn.Conv2d(dim, dim // reduction, 1, padding=0, bias=True),
30 |             nn.ReLU(inplace=True),
31 |             nn.Conv2d(dim // reduction, dim, 1, padding=0, bias=True),
32 |         )
33 | 
34 |     def forward(self, x):
35 |         x_gap = self.gap(x)
36 |         cattn = self.ca(x_gap)
37 |         return cattn
38 | 
39 | 
40 | class PixelAttention(nn.Module):
41 |     def __init__(self, dim):
42 |         super(PixelAttention, self).__init__()
43 |         self.pa2 = nn.Conv2d(2 * dim, dim, 7, padding=3, padding_mode='reflect', groups=dim, bias=True)
44 |         self.sigmoid = nn.Sigmoid()
45 | 
46 |     def forward(self, x, pattn1):
47 |         B, C, H, W = x.shape
48 |         x = x.unsqueeze(dim=2)  # B, C, 1, H, W
49 |         pattn1 = pattn1.unsqueeze(dim=2)  # B, C, 1, H, W
50 |         x2 = torch.cat([x, pattn1], dim=2)  # B, C, 2, H, W
51 |         x2 = Rearrange('b c t h w -> b (c t) h w')(x2)
52 |         pattn2 = self.pa2(x2)
53 |         pattn2 = self.sigmoid(pattn2)
54 |         return pattn2
55 | 
56 | 
57 | class CGAFusion(nn.Module):
58 |     def __init__(self, dim, reduction=8):
59 |         super(CGAFusion, self).__init__()
60 |         self.sa = SpatialAttention()
61 |         self.ca = ChannelAttention(dim, reduction)
62 |         self.pa = PixelAttention(dim)
63 |         self.conv = nn.Conv2d(dim, dim, 1, bias=True)
64 |         self.sigmoid = nn.Sigmoid()
65 | 
66 |     def forward(self, x, y):
67 |         initial = x + y
68 |         cattn = self.ca(initial)
69 |         sattn = self.sa(initial)
70 |         pattn1 = sattn + cattn
71 |         pattn2 = self.sigmoid(self.pa(initial, pattn1))
72 |         result = initial + pattn2 * x + (1 - pattn2) * y
73 |         result = self.conv(result)
74 |         return result
75 | 
76 | 
77 | # 特征融合
78 | if __name__ == '__main__':
79 |     block = CGAFusion(32)
80 |     input1 = torch.rand(3, 32, 64, 64) # 输入 N C H W
81 |     input2 = torch.rand(3, 32, 64, 64)
82 |     output = block(input1, input2)
83 |     print(output.size())
84 | 


--------------------------------------------------------------------------------
/注意力/(CVPR 2024)SHSA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | # 论文：SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design, CVPR 2024
 4 | # 论文地址：https://arxiv.org/pdf/2401.16456
 5 | # Github地址：https://github.com/ysj9909/SHViT
 6 | # 全网最全100➕即插即用模块GitHub地址：https://github.com/ai-dawang/PlugNPlay-Modules
 7 | class GroupNorm(torch.nn.GroupNorm):
 8 |     """
 9 |     Group Normalization with 1 group.
10 |     Input: tensor in shape [B, C, H, W]
11 |     """
12 |     def __init__(self, num_channels, **kwargs):
13 |         super().__init__(1, num_channels, **kwargs)
14 | 
15 | 
16 | class Conv2d_BN(torch.nn.Sequential):
17 |     def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1,
18 |                  groups=1, bn_weight_init=1):
19 |         super().__init__()
20 |         self.add_module('c', torch.nn.Conv2d(
21 |             a, b, ks, stride, pad, dilation, groups, bias=False))
22 |         self.add_module('bn', torch.nn.BatchNorm2d(b))
23 |         torch.nn.init.constant_(self.bn.weight, bn_weight_init)
24 |         torch.nn.init.constant_(self.bn.bias, 0)
25 | 
26 |     @torch.no_grad()
27 |     def fuse(self):
28 |         c, bn = self._modules.values()
29 |         w = bn.weight / (bn.running_var + bn.eps)**0.5
30 |         w = c.weight * w[:, None, None, None]
31 |         b = bn.bias - bn.running_mean * bn.weight / \
32 |             (bn.running_var + bn.eps)**0.5
33 |         m = torch.nn.Conv2d(w.size(1) * self.c.groups, w.size(
34 |             0), w.shape[2:], stride=self.c.stride, padding=self.c.padding, dilation=self.c.dilation, groups=self.c.groups,
35 |             device=c.weight.device)
36 |         m.weight.data.copy_(w)
37 |         m.bias.data.copy_(b)
38 |         return m
39 | 
40 | class SHSA(torch.nn.Module):
41 |     """Single-Head Self-Attention"""
42 | 
43 |     def __init__(self, dim, qk_dim=16, pdim=32):
44 |         super().__init__()
45 |         self.scale = qk_dim ** -0.5
46 |         self.qk_dim = qk_dim
47 |         self.dim = dim
48 |         self.pdim = pdim
49 | 
50 |         self.pre_norm = GroupNorm(pdim)
51 | 
52 |         self.qkv = Conv2d_BN(pdim, qk_dim * 2 + pdim)
53 |         self.proj = torch.nn.Sequential(torch.nn.ReLU(), Conv2d_BN(
54 |             dim, dim, bn_weight_init=0))
55 | 
56 |     def forward(self, x):
57 |         B, C, H, W = x.shape
58 |         x1, x2 = torch.split(x, [self.pdim, self.dim - self.pdim], dim=1)
59 |         x1 = self.pre_norm(x1)
60 |         qkv = self.qkv(x1)
61 |         q, k, v = qkv.split([self.qk_dim, self.qk_dim, self.pdim], dim=1)
62 |         q, k, v = q.flatten(2), k.flatten(2), v.flatten(2)
63 | 
64 |         attn = (q.transpose(-2, -1) @ k) * self.scale
65 |         attn = attn.softmax(dim=-1)
66 |         x1 = (v @ attn.transpose(-2, -1)).reshape(B, self.pdim, H, W)
67 |         x = self.proj(torch.cat([x1, x2], dim=1))
68 | 
69 |         return x
70 | 
71 | 
72 | if __name__ == '__main__':
73 | 
74 | 
75 |     block = SHSA(64) #输入 C
76 | 
77 |     input = torch.randn(1, 64, 32, 32)  # 输入 B C H W
78 | 
79 |     # Print input shape
80 |     print(input.size())
81 | 
82 |     # Forward pass through the SHSA module
83 |     output = block(input)
84 | 
85 |     # Print output shape
86 |     print(output.size())
87 | 


--------------------------------------------------------------------------------
/MixStructure.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | #论文地址：https://arxiv.org/abs/2305.17654
 4 | #论文：Mix Structure Block contains multi-scale parallel large convolution kernel module and enhanced parallel attention module
 5 | 
 6 | class MixStructureBlock(nn.Module):
 7 |     def __init__(self, dim):
 8 |         super().__init__()
 9 | 
10 |         self.norm1 = nn.BatchNorm2d(dim)
11 |         self.norm2 = nn.BatchNorm2d(dim)
12 | 
13 |         self.conv1 = nn.Conv2d(dim, dim, kernel_size=1)
14 |         self.conv2 = nn.Conv2d(dim, dim, kernel_size=5, padding=2, padding_mode='reflect')
15 |         self.conv3_19 = nn.Conv2d(dim, dim, kernel_size=7, padding=9, groups=dim, dilation=3, padding_mode='reflect')
16 |         self.conv3_13 = nn.Conv2d(dim, dim, kernel_size=5, padding=6, groups=dim, dilation=3, padding_mode='reflect')
17 |         self.conv3_7 = nn.Conv2d(dim, dim, kernel_size=3, padding=3, groups=dim, dilation=3, padding_mode='reflect')
18 | 
19 |         # Simple Pixel Attention
20 |         self.Wv = nn.Sequential(
21 |             nn.Conv2d(dim, dim, 1),
22 |             nn.Conv2d(dim, dim, kernel_size=3, padding=3 // 2, groups=dim, padding_mode='reflect')
23 |         )
24 |         self.Wg = nn.Sequential(
25 |             nn.AdaptiveAvgPool2d(1),
26 |             nn.Conv2d(dim, dim, 1),
27 |             nn.Sigmoid()
28 |         )
29 | 
30 |         # Channel Attention
31 |         self.ca = nn.Sequential(
32 |             nn.AdaptiveAvgPool2d(1),
33 |             nn.Conv2d(dim, dim, 1, padding=0, bias=True),
34 |             nn.GELU(),
35 |             # nn.ReLU(True),
36 |             nn.Conv2d(dim, dim, 1, padding=0, bias=True),
37 |             nn.Sigmoid()
38 |         )
39 | 
40 |         # Pixel Attention
41 |         self.pa = nn.Sequential(
42 |             nn.Conv2d(dim, dim // 8, 1, padding=0, bias=True),
43 |             nn.GELU(),
44 |             # nn.ReLU(True),
45 |             nn.Conv2d(dim // 8, 1, 1, padding=0, bias=True),
46 |             nn.Sigmoid()
47 |         )
48 | 
49 |         self.mlp = nn.Sequential(
50 |             nn.Conv2d(dim * 3, dim * 4, 1),
51 |             nn.GELU(),
52 |             # nn.ReLU(True),
53 |             nn.Conv2d(dim * 4, dim, 1)
54 |         )
55 |         self.mlp2 = nn.Sequential(
56 |             nn.Conv2d(dim * 3, dim * 4, 1),
57 |             nn.GELU(),
58 |             # nn.ReLU(True),
59 |             nn.Conv2d(dim * 4, dim, 1)
60 |         )
61 | 
62 |     def forward(self, x):
63 |         identity = x
64 |         x = self.norm1(x)
65 |         x = self.conv1(x)
66 |         x = self.conv2(x)
67 |         x = torch.cat([self.conv3_19(x), self.conv3_13(x), self.conv3_7(x)], dim=1)
68 |         x = self.mlp(x)
69 |         x = identity + x
70 | 
71 |         identity = x
72 |         x = self.norm2(x)
73 |         x = torch.cat([self.Wv(x) * self.Wg(x), self.ca(x) * x, self.pa(x) * x], dim=1)
74 |         x = self.mlp2(x)
75 |         x = identity + x
76 |         return x
77 | 
78 | 
79 | if __name__ == '__main__':
80 | 
81 | 
82 |     block = MixStructureBlock(dim=64)
83 | 
84 | 
85 |     input = torch.rand(1, 64, 128, 128) # B C H W
86 | 
87 | 
88 |     output = block(input)
89 | 
90 |     print(input.size())
91 |     print(output.size())
92 | 


--------------------------------------------------------------------------------
/CPAM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import math
 4 | #论文：ASF-YOLO: A Novel YOLO Model with Attentional Scale Sequence Fusion for Cell Instance Segmentation(IMAVIS)
 5 | #论文地址：https://arxiv.org/abs/2312.06458
 6 | 
 7 | class channel_att(nn.Module):
 8 |     def __init__(self, channel, b=1, gamma=2):
 9 |         super(channel_att, self).__init__()
10 |         kernel_size = int(abs((math.log(channel, 2) + b) / gamma))
11 |         kernel_size = kernel_size if kernel_size % 2 else kernel_size + 1
12 | 
13 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
14 |         self.conv = nn.Conv1d(1, 1, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, bias=False)
15 |         self.sigmoid = nn.Sigmoid()
16 | 
17 |     def forward(self, x):
18 |         y = self.avg_pool(x)  # 自适应平均池化
19 |         y = y.squeeze(-1)
20 |         y = y.transpose(-1, -2)
21 |         y = self.conv(y).transpose(-1, -2).unsqueeze(-1)  # 1D卷积
22 |         y = self.sigmoid(y)  # Sigmoid激活
23 |         return x * y.expand_as(x)  # 通道逐元素相乘
24 | 
25 | 
26 | class local_att(nn.Module):
27 |     def __init__(self, channel, reduction=16):
28 |         super(local_att, self).__init__()
29 | 
30 |         self.conv_1x1 = nn.Conv2d(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1,
31 |                                   bias=False)
32 | 
33 |         self.relu = nn.ReLU()
34 |         self.bn = nn.BatchNorm2d(channel // reduction)
35 | 
36 |         self.F_h = nn.Conv2d(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1,
37 |                              bias=False)
38 |         self.F_w = nn.Conv2d(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1,
39 |                              bias=False)
40 | 
41 |         self.sigmoid_h = nn.Sigmoid()
42 |         self.sigmoid_w = nn.Sigmoid()
43 | 
44 |     def forward(self, x):
45 |         _, _, h, w = x.size()
46 | 
47 |         x_h = torch.mean(x, dim=3, keepdim=True).permute(0, 1, 3, 2)
48 |         x_w = torch.mean(x, dim=2, keepdim=True)
49 | 
50 |         x_cat_conv_relu = self.relu(self.bn(self.conv_1x1(torch.cat((x_h, x_w), 3))))
51 | 
52 |         x_cat_conv_split_h, x_cat_conv_split_w = x_cat_conv_relu.split([h, w], 3)
53 | 
54 |         s_h = self.sigmoid_h(self.F_h(x_cat_conv_split_h.permute(0, 1, 3, 2)))
55 |         s_w = self.sigmoid_w(self.F_w(x_cat_conv_split_w))
56 | 
57 |         out = x * s_h.expand_as(x) * s_w.expand_as(x)
58 |         return out
59 | 
60 | #Channel and Position Attention Mechanism (CPAM)
61 | class CPAM(nn.Module):
62 |     def __init__(self, ch):
63 |         super().__init__()
64 |         self.channel_att = channel_att(ch)
65 |         self.local_att = local_att(ch)
66 |     def forward(self, x):
67 |         input1,input2 = x[0],x[1]
68 |         input1 = self.channel_att(input1)
69 |         x = input1 + input2
70 |         x = self.local_att(x)
71 |         return x
72 | 
73 | 
74 | if __name__ == '__main__':
75 | 
76 |     block = CPAM(128)
77 | 
78 |     input1 = torch.randn(1, 128, 32, 32)  # B C H W
79 |     input2 = torch.randn(1, 128, 32, 32)
80 |     inputs = [input1, input2]
81 |     output = block(inputs)
82 | 
83 | 
84 |     print(input1.size())
85 |     print(input2.size())
86 |     print(output.size())


--------------------------------------------------------------------------------
/(arXiv 2020 ) SSAN.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | 
 6 | # 论文地址：https://arxiv.org/pdf/2005.10463
 7 | # 论文：Simplified Self-Attention for Transformer-Based end-to-end Speech Recognition
 8 | 
 9 | 
10 | class SimplifiedScaledDotProductAttention(nn.Module):
11 |     '''
12 |     Scaled dot-product attention
13 |     '''
14 | 
15 |     def __init__(self, d_model, h,dropout=.1):
16 |         '''
17 |         :param d_model: Output dimensionality of the model
18 |         :param d_k: Dimensionality of queries and keys
19 |         :param d_v: Dimensionality of values
20 |         :param h: Number of heads
21 |         '''
22 |         super(SimplifiedScaledDotProductAttention, self).__init__()
23 | 
24 |         self.d_model = d_model
25 |         self.d_k = d_model//h
26 |         self.d_v = d_model//h
27 |         self.h = h
28 | 
29 |         self.fc_o = nn.Linear(h * self.d_v, d_model)
30 |         self.dropout=nn.Dropout(dropout)
31 | 
32 | 
33 | 
34 |         self.init_weights()
35 | 
36 | 
37 |     def init_weights(self):
38 |         for m in self.modules():
39 |             if isinstance(m, nn.Conv2d):
40 |                 init.kaiming_normal_(m.weight, mode='fan_out')
41 |                 if m.bias is not None:
42 |                     init.constant_(m.bias, 0)
43 |             elif isinstance(m, nn.BatchNorm2d):
44 |                 init.constant_(m.weight, 1)
45 |                 init.constant_(m.bias, 0)
46 |             elif isinstance(m, nn.Linear):
47 |                 init.normal_(m.weight, std=0.001)
48 |                 if m.bias is not None:
49 |                     init.constant_(m.bias, 0)
50 | 
51 |     def forward(self, queries, keys, values, attention_mask=None, attention_weights=None):
52 |         '''
53 |         Computes
54 |         :param queries: Queries (b_s, nq, d_model)
55 |         :param keys: Keys (b_s, nk, d_model)
56 |         :param values: Values (b_s, nk, d_model)
57 |         :param attention_mask: Mask over attention values (b_s, h, nq, nk). True indicates masking.
58 |         :param attention_weights: Multiplicative weights for attention values (b_s, h, nq, nk).
59 |         :return:
60 |         '''
61 |         b_s, nq = queries.shape[:2]
62 |         nk = keys.shape[1]
63 | 
64 |         q = queries.view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3)  # (b_s, h, nq, d_k)
65 |         k = keys.view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1)  # (b_s, h, d_k, nk)
66 |         v = values.view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3)  # (b_s, h, nk, d_v)
67 | 
68 |         att = torch.matmul(q, k) / np.sqrt(self.d_k)  # (b_s, h, nq, nk)
69 |         if attention_weights is not None:
70 |             att = att * attention_weights
71 |         if attention_mask is not None:
72 |             att = att.masked_fill(attention_mask, -np.inf)
73 |         att = torch.softmax(att, -1)
74 |         att=self.dropout(att)
75 | 
76 |         out = torch.matmul(att, v).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v)  # (b_s, nq, h*d_v)
77 |         out = self.fc_o(out)  # (b_s, nq, d_model)
78 |         return out
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     input=torch.randn(50,49,512)
83 |     block = SimplifiedScaledDotProductAttention(d_model=512, h=8)
84 |     output=block(input,input,input)
85 |     print(output.shape)
86 | 
87 |     


--------------------------------------------------------------------------------
/3D/(CVPR 2022)DFE.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | # 论文：MonoDTR: Monocular 3D Object Detection with Depth-Aware Transformer
 6 | # 论文地址：https://arxiv.org/pdf/2203.13310
 7 | class DepthAwareFE(nn.Module):
 8 |     def __init__(self, output_channel_num):
 9 |         super(DepthAwareFE, self).__init__()
10 |         self.output_channel_num = output_channel_num
11 |         self.depth_output = nn.Sequential(
12 |             nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
13 |             nn.Conv2d(self.output_channel_num, int(self.output_channel_num / 2), 3, padding=1),
14 |             nn.BatchNorm2d(int(self.output_channel_num / 2)),
15 |             nn.ReLU(),
16 |             nn.Conv2d(int(self.output_channel_num / 2), 96, 1),
17 |         )
18 |         self.depth_down = nn.Conv2d(96, 12, 3, stride=1, padding=1, groups=12)
19 |         self.acf = dfe_module(256, 256)
20 | 
21 |     def forward(self, x):
22 |         depth = self.depth_output(x)
23 |         N, C, H, W = x.shape
24 |         depth_guide = F.interpolate(depth, size=x.size()[2:], mode='bilinear', align_corners=False)
25 |         depth_guide = self.depth_down(depth_guide)
26 |         x = x + self.acf(x, depth_guide)
27 | 
28 |         return depth, depth_guide, x
29 | 
30 | 
31 | class dfe_module(nn.Module):
32 | 
33 |     def __init__(self, in_channels, out_channels):
34 |         super(dfe_module, self).__init__()
35 |         self.softmax = nn.Softmax(dim=-1)
36 |         self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False),
37 |                                    nn.BatchNorm2d(out_channels),
38 |                                    nn.ReLU(True),
39 |                                    nn.Dropout2d(0.2, False))
40 |         self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1, padding=0)
41 | 
42 |     def forward(self, feat_ffm, coarse_x):
43 |         N, D, H, W = coarse_x.size()
44 | 
45 |         # depth prototype
46 |         feat_ffm = self.conv1(feat_ffm)
47 |         _, C, _, _ = feat_ffm.size()
48 | 
49 |         proj_query = coarse_x.view(N, D, -1)
50 |         proj_key = feat_ffm.view(N, C, -1).permute(0, 2, 1)
51 |         energy = torch.bmm(proj_query, proj_key)
52 |         energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy) - energy
53 |         attention = self.softmax(energy_new)
54 | 
55 |         # depth enhancement
56 |         attention = attention.permute(0, 2, 1)
57 |         proj_value = coarse_x.view(N, D, -1)
58 |         out = torch.bmm(attention, proj_value)
59 |         out = out.view(N, C, H, W)
60 |         out = self.conv2(out)
61 | 
62 |         return out
63 | 
64 | if __name__ == '__main__':
65 | 
66 |     # 假定输入特征图的尺寸为 [N, C, H, W] = [1, 256, 64, 64]
67 |     # 假定粗糙深度图的尺寸为 [N, D, H, W] = [1, 12, 64, 64]
68 | 
69 |     N, C, H, W = 1, 256, 64, 64
70 |     D = 12
71 | 
72 |     # 初始化输入特征图和粗糙深度图
73 |     feat_ffm = torch.rand(N, C, H, W)  # 输入特征图
74 |     coarse_x = torch.rand(N, D, H, W)  # 粗糙深度图
75 | 
76 |     # 初始化dfe_module
77 |     dfe = dfe_module(in_channels=C, out_channels=C)  # 使用相同的通道数作为示例
78 | 
79 |     # 前向传播
80 |     output = dfe(feat_ffm, coarse_x)
81 | 
82 |     # 打印输入和输出尺寸
83 |     print("Input feat_ffm size:", feat_ffm.size())
84 |     print("        Output size:", output.size())
85 | 


--------------------------------------------------------------------------------
/LMFLoss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | #论文：LMFLOSS: A HYBRID LOSS FOR IMBALANCED MEDICAL IMAGE CLASSIFICATION
 6 | 
 7 | 
 8 | class FocalLoss(nn.Module):
 9 | 
10 |     def __init__(self, alpha, gamma=2):
11 |         super().__init__()
12 |         self.alpha = alpha
13 |         self.gamma = gamma
14 | 
15 |     def forward(self, output, target):
16 |         num_classes = output.size(1)
17 |         assert len(self.alpha) == num_classes, \
18 |             'Length of weight tensor must match the number of classes'
19 |         logp = F.cross_entropy(output, target, self.alpha)
20 |         p = torch.exp(-logp)
21 |         focal_loss = (1 - p) ** self.gamma * logp
22 | 
23 |         return torch.mean(focal_loss)
24 | 
25 | 
26 | class LDAMLoss(nn.Module):
27 | 
28 |     def __init__(self, cls_num_list, max_m=0.5, weight=None, s=30):
29 |         """
30 |         max_m: The appropriate value for max_m depends on the specific dataset and the severity of the class imbalance.
31 |         You can start with a small value and gradually increase it to observe the impact on the model's performance.
32 |         If the model struggles with class separation or experiences underfitting, increasing max_m might help. However,
33 |         be cautious not to set it too high, as it can cause overfitting or make the model too conservative.
34 | 
35 |         s: The choice of s depends on the desired scale of the logits and the specific requirements of your problem.
36 |         It can be used to adjust the balance between the margin and the original logits. A larger s value amplifies
37 |         the impact of the logits and can be useful when dealing with highly imbalanced datasets.
38 |         You can experiment with different values of s to find the one that works best for your dataset and model.
39 | 
40 |         """
41 |         super(LDAMLoss, self).__init__()
42 |         m_list = 1.0 / np.sqrt(np.sqrt(cls_num_list))
43 |         m_list = m_list * (max_m / np.max(m_list))
44 |         m_list = torch.cuda.FloatTensor(m_list)
45 |         self.m_list = m_list
46 |         assert s > 0
47 |         self.s = s
48 |         self.weight = weight
49 | 
50 |     def forward(self, x, target):
51 |         index = torch.zeros_like(x, dtype=torch.uint8)
52 |         index.scatter_(1, target.data.view(-1, 1), 1)
53 | 
54 |         index_float = index.type(torch.cuda.FloatTensor)
55 |         batch_m = torch.matmul(self.m_list[None, :], index_float.transpose(0, 1))
56 |         batch_m = batch_m.view((-1, 1))
57 |         x_m = x - batch_m
58 | 
59 |         output = torch.where(index, x_m, x)
60 |         return F.cross_entropy(self.s * output, target, weight=self.weight)
61 | 
62 | 
63 | class LMFLoss(nn.Module):
64 |     def __init__(self, cls_num_list, weight, alpha=1, beta=1, gamma=2, max_m=0.5, s=30):
65 |         super().__init__()
66 |         self.focal_loss = FocalLoss(weight, gamma)
67 |         self.ldam_loss = LDAMLoss(cls_num_list, max_m, weight, s)
68 |         self.alpha = alpha
69 |         self.beta = beta
70 | 
71 |     def forward(self, output, target):
72 |         focal_loss_output = self.focal_loss(output, target)
73 |         ldam_loss_output = self.ldam_loss(output, target)
74 |         total_loss = self.alpha * focal_loss_output + self.beta * ldam_loss_output
75 |         return total_loss


--------------------------------------------------------------------------------
/(Elsevier 2024)CF_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.nn.functional as F
 4 | # 论文：CF-Loss: Clinically-relevant feature optimised loss function for retinal multi-class vessel segmentation and vascular feature measurement
 5 | def encode_mask_3d(ground_truth, num_classes=4):
 6 |     batch_size, _, depth, height, width = ground_truth.size()
 7 |     one_hot = torch.zeros((batch_size, num_classes, depth, height, width), device=ground_truth.device)
 8 |     ground_truth = ground_truth.long()
 9 |     one_hot = one_hot.scatter_(1, ground_truth, 1)
10 |     return one_hot
11 | 
12 | class CF_Loss_3D(nn.Module):
13 |     def __init__(self, img_depth, beta, alpha, gamma):
14 |         super(CF_Loss_3D, self).__init__()
15 |         self.beta = beta
16 |         self.alpha = alpha
17 |         self.gamma = gamma
18 |         self.img_depth = img_depth
19 |         self.CE = nn.CrossEntropyLoss()
20 |         self.p = torch.tensor([img_depth], dtype=torch.float, device='cuda')
21 |         self.n = torch.log(self.p) / torch.log(torch.tensor([2.0], device='cuda'))
22 |         self.n = torch.floor(self.n)
23 |         self.sizes = 2 ** torch.arange(self.n.item(), 1, -1, device='cuda').to(dtype=torch.int)
24 | 
25 |     def get_count_3d(self, sizes, p, masks_pred_softmax):
26 |         counts = torch.zeros((masks_pred_softmax.shape[0], len(sizes), 2), device='cuda')
27 |         index = 0
28 | 
29 |         for size in sizes:
30 |             # 对3D数据使用3D池化
31 |             stride = (1, size, size)  # 保持深度方向不变
32 |             pool = nn.AvgPool3d(kernel_size=(1, size, size), stride=stride)
33 | 
34 |             S = pool(masks_pred_softmax)
35 |             S = S * ((S > 0) & (S < (size * size)))
36 |             counts[..., index, 0] = (S[:, 0, ...] - S[:, 2, ...]).abs().sum() / (S[:, 2, ...] > 0).sum()
37 |             counts[..., index, 1] = (S[:, 1, ...] - S[:, 3, ...]).abs().sum() / (S[:, 3, ...] > 0).sum()
38 | 
39 |             index += 1
40 | 
41 |         return counts
42 | 
43 |     def forward(self, prediction, ground_truth):
44 |         # 假设ground_truth已经是适当格式
45 |         ground_truth_encoded = encode_mask_3d(ground_truth)  # 需要定义适用于3D数据的encode_mask_3d
46 |         prediction_softmax = F.softmax(prediction, dim=1)
47 | 
48 |         loss_CE = self.CE(prediction_softmax, ground_truth.squeeze(1).long())
49 | 
50 |         Loss_vd = (torch.abs(prediction_softmax[:, 1, ...].sum() - ground_truth_encoded[:, 1, ...].sum()) + torch.abs(prediction_softmax[:, 2, ...].sum() - ground_truth_encoded[:, 2, ...].sum())) / (prediction_softmax.shape[0] * prediction_softmax.shape[2] * prediction_softmax.shape[3] * prediction_softmax.shape[4])
51 | 
52 |         prediction_softmax = prediction_softmax[:, 1:3, ...]
53 |         ground_truth_encoded = ground_truth_encoded[:, 1:3, ...]
54 |         combined = torch.cat((prediction_softmax, ground_truth_encoded), 1)
55 |         counts = self.get_count_3d(self.sizes, self.p, combined)
56 | 
57 |         artery_ = torch.sqrt(torch.sum(self.sizes * ((counts[..., 0]) ** 2)))
58 |         vein_ = torch.sqrt(torch.sum(self.sizes * ((counts[..., 1]) ** 2)))
59 |         size_t = torch.sqrt(torch.sum(self.sizes ** 2))
60 |         loss_FD = (artery_ + vein_) / size_t / prediction_softmax.shape[0]
61 | 
62 |         loss_value = self.beta * loss_CE + self.alpha * loss_FD + self.gamma * Loss_vd
63 | 
64 |         return loss_value
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/(arXiv 2023) ScaledDotProductAttention.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import init
 5 | 
 6 | # 论文地址：https://arxiv.org/pdf/1706.03762
 7 | # 论文：Attention Is All You Need
 8 | 
 9 | 
10 | class ScaledDotProductAttention(nn.Module):
11 |     '''
12 |     Scaled dot-product attention
13 |     '''
14 | 
15 |     def __init__(self, d_model, d_k, d_v, h,dropout=.1):
16 |         '''
17 |         :param d_model: Output dimensionality of the model
18 |         :param d_k: Dimensionality of queries and keys
19 |         :param d_v: Dimensionality of values
20 |         :param h: Number of heads
21 |         '''
22 |         super(ScaledDotProductAttention, self).__init__()
23 |         self.fc_q = nn.Linear(d_model, h * d_k)
24 |         self.fc_k = nn.Linear(d_model, h * d_k)
25 |         self.fc_v = nn.Linear(d_model, h * d_v)
26 |         self.fc_o = nn.Linear(h * d_v, d_model)
27 |         self.dropout=nn.Dropout(dropout)
28 | 
29 |         self.d_model = d_model
30 |         self.d_k = d_k
31 |         self.d_v = d_v
32 |         self.h = h
33 | 
34 |         self.init_weights()
35 | 
36 | 
37 |     def init_weights(self):
38 |         for m in self.modules():
39 |             if isinstance(m, nn.Conv2d):
40 |                 init.kaiming_normal_(m.weight, mode='fan_out')
41 |                 if m.bias is not None:
42 |                     init.constant_(m.bias, 0)
43 |             elif isinstance(m, nn.BatchNorm2d):
44 |                 init.constant_(m.weight, 1)
45 |                 init.constant_(m.bias, 0)
46 |             elif isinstance(m, nn.Linear):
47 |                 init.normal_(m.weight, std=0.001)
48 |                 if m.bias is not None:
49 |                     init.constant_(m.bias, 0)
50 | 
51 |     def forward(self, queries, keys, values, attention_mask=None, attention_weights=None):
52 |         '''
53 |         Computes
54 |         :param queries: Queries (b_s, nq, d_model)
55 |         :param keys: Keys (b_s, nk, d_model)
56 |         :param values: Values (b_s, nk, d_model)
57 |         :param attention_mask: Mask over attention values (b_s, h, nq, nk). True indicates masking.
58 |         :param attention_weights: Multiplicative weights for attention values (b_s, h, nq, nk).
59 |         :return:
60 |         '''
61 |         b_s, nq = queries.shape[:2]
62 |         nk = keys.shape[1]
63 | 
64 |         q = self.fc_q(queries).view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3)  # (b_s, h, nq, d_k)
65 |         k = self.fc_k(keys).view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1)  # (b_s, h, d_k, nk)
66 |         v = self.fc_v(values).view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3)  # (b_s, h, nk, d_v)
67 | 
68 |         att = torch.matmul(q, k) / np.sqrt(self.d_k)  # (b_s, h, nq, nk)
69 |         if attention_weights is not None:
70 |             att = att * attention_weights
71 |         if attention_mask is not None:
72 |             att = att.masked_fill(attention_mask, -np.inf)
73 |         att = torch.softmax(att, -1)
74 |         att=self.dropout(att)
75 | 
76 |         out = torch.matmul(att, v).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v)  # (b_s, nq, h*d_v)
77 |         out = self.fc_o(out)  # (b_s, nq, d_model)
78 |         return out
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     input=torch.randn(50,49,512)
83 |     sa = ScaledDotProductAttention(d_model=512, d_k=512, d_v=512, h=8)
84 |     output=sa(input,input,input)
85 |     print(output.shape)
86 | 
87 |     


--------------------------------------------------------------------------------
/(CVPR 2019) DCNv2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import math
 4 | from torchvision.ops import deform_conv2d
 5 | 
 6 | # 论文地址：https://openaccess.thecvf.com/content_CVPR_2019/papers/Zhu_Deformable_ConvNets_V2_More_Deformable_Better_Results_CVPR_2019_paper.pdf
 7 | # 论文：Deformable ConvNets v2: More Deformable, Better Results
 8 | 
 9 | 
10 | # 自动填充padding的函数
11 | def autopad(kernel_size, padding):
12 |     # 默认返回的padding让卷积层输入输出大小相同（保持原大小）
13 |     return padding if padding is not None else kernel_size // 2
14 | 
15 | class DCNv2(nn.Module):
16 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1,
17 |                  padding=1, groups=1, act=True, dilation=1, deformable_groups=1):
18 |         super(DCNv2, self).__init__()
19 | 
20 |         self.in_channels = in_channels
21 |         self.out_channels = out_channels
22 |         self.kernel_size = (kernel_size, kernel_size)
23 |         self.stride = (stride, stride)
24 |         self.padding = (autopad(kernel_size, padding), autopad(kernel_size, padding))
25 |         self.dilation = (dilation, dilation)
26 |         self.groups = groups
27 |         self.deformable_groups = deformable_groups
28 | 
29 |         self.weight = nn.Parameter(
30 |             torch.empty(out_channels, in_channels, *self.kernel_size)
31 |         )
32 |         self.bias = nn.Parameter(torch.empty(out_channels))
33 | 
34 |         out_channels_offset_mask = (self.deformable_groups * 3 *
35 |                                     self.kernel_size[0] * self.kernel_size[1])
36 |         self.conv_offset_mask = nn.Conv2d(
37 |             self.in_channels,
38 |             out_channels_offset_mask,
39 |             kernel_size=self.kernel_size,
40 |             stride=self.stride,
41 |             padding=self.padding,
42 |             bias=True,
43 |         )
44 |         self.bn = nn.BatchNorm2d(out_channels)
45 |         self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
46 |         self.reset_parameters()
47 | 
48 |     def forward(self, x):
49 |         offset_mask = self.conv_offset_mask(x)
50 |         o1, o2, mask = torch.chunk(offset_mask, 3, dim=1)
51 |         offset = torch.cat((o1, o2), dim=1)
52 |         mask = torch.sigmoid(mask)
53 |         x = torch.ops.torchvision.deform_conv2d(
54 |             x,
55 |             self.weight,
56 |             offset,
57 |             mask,
58 |             self.bias,
59 |             self.stride[0], self.stride[1],
60 |             self.padding[0], self.padding[1],
61 |             self.dilation[0], self.dilation[1],
62 |             self.groups,
63 |             self.deformable_groups,
64 |             True
65 |         )
66 |         x = self.bn(x)
67 |         x = self.act(x)
68 |         return x
69 | 
70 |     def reset_parameters(self):
71 |         n = self.in_channels
72 |         for k in self.kernel_size:
73 |             n *= k
74 |         std = 1. / math.sqrt(n)
75 |         self.weight.data.uniform_(-std, std)
76 |         self.bias.data.zero_()
77 |         self.conv_offset_mask.weight.data.zero_()
78 |         self.conv_offset_mask.bias.data.zero_()
79 | 
80 | 
81 | def main():
82 |     input_tensor = torch.randn(4, 3, 64, 64)
83 |     block = DCNv2(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
84 |     output_tensor = block(input_tensor)
85 |     print(output_tensor.size())
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()


--------------------------------------------------------------------------------
/FEM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | #论文：FFCA-YOLO for Small Object Detection in Remote Sensing Images[TGRS]
 5 | #论文地址：https://ieeexplore.ieee.org/document/10423050
 6 | 
 7 | class FEM(nn.Module):
 8 |     def __init__(self, in_planes, out_planes, stride=1, scale=0.1, map_reduce=8):
 9 |         super(FEM, self).__init__()
10 |         self.scale = scale
11 |         self.out_channels = out_planes
12 |         inter_planes = in_planes // map_reduce
13 |         self.branch0 = nn.Sequential(
14 |             BasicConv(in_planes, 2 * inter_planes, kernel_size=1, stride=stride),
15 |             BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=1, relu=False)
16 |         )
17 |         self.branch1 = nn.Sequential(
18 |             BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
19 |             BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=(1, 3), stride=stride, padding=(0, 1)),
20 |             BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=(3, 1), stride=stride, padding=(1, 0)),
21 |             BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=5, dilation=5, relu=False)
22 |         )
23 |         self.branch2 = nn.Sequential(
24 |             BasicConv(in_planes, inter_planes, kernel_size=1, stride=1),
25 |             BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=(3, 1), stride=stride, padding=(1, 0)),
26 |             BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=(1, 3), stride=stride, padding=(0, 1)),
27 |             BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=5, dilation=5, relu=False)
28 |         )
29 | 
30 |         self.ConvLinear = BasicConv(6 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
31 |         self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
32 |         self.relu = nn.ReLU(inplace=False)
33 | 
34 |     def forward(self, x):
35 |         x0 = self.branch0(x)
36 |         x1 = self.branch1(x)
37 |         x2 = self.branch2(x)
38 | 
39 |         out = torch.cat((x0, x1, x2), 1)
40 |         out = self.ConvLinear(out)
41 |         short = self.shortcut(x)
42 |         out = out * self.scale + short
43 |         out = self.relu(out)
44 | 
45 |         return out
46 | 
47 | class BasicConv(nn.Module):
48 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
49 |                  bn=True, bias=False):
50 |         super(BasicConv, self).__init__()
51 |         self.out_channels = out_planes
52 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
53 |                               dilation=dilation, groups=groups, bias=bias)
54 |         self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
55 |         self.relu = nn.ReLU(inplace=True) if relu else None
56 | 
57 |     def forward(self, x):
58 |         x = self.conv(x)
59 |         if self.bn is not None:
60 |             x = self.bn(x)
61 |         if self.relu is not None:
62 |             x = self.relu(x)
63 |         return x
64 | 
65 | 
66 | if __name__ == '__main__':
67 | 
68 |     input = torch.randn(1, 64, 128, 128)
69 |     block = FEM(in_planes=64, out_planes=64)
70 |     print(input.size())
71 |     output = block(input)
72 |     # 打印输出的形状
73 |     print(output.size())


--------------------------------------------------------------------------------
/缝合代码示例/DilateFormer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class DilateAttention(nn.Module):
 6 |     "Implementation of Dilate-attention"
 7 | 
 8 |     def __init__(self, head_dim, qk_scale=None, attn_drop=0, kernel_size=3, dilation=1):
 9 |         super().__init__()
10 |         self.head_dim = head_dim
11 |         self.scale = qk_scale or head_dim ** -0.5
12 |         self.kernel_size = kernel_size
13 |         self.unfold = nn.Unfold(kernel_size, dilation, dilation * (kernel_size - 1) // 2, 1)
14 |         self.attn_drop = nn.Dropout(attn_drop)
15 | 
16 |     def forward(self, q, k, v):
17 |         # B, C//3, H, W
18 |         B, d, H, W = q.shape
19 |         q = q.reshape([B, d // self.head_dim, self.head_dim, 1, H * W]).permute(0, 1, 4, 3, 2)  # B,h,N,1,d
20 |         k = self.unfold(k).reshape(
21 |             [B, d // self.head_dim, self.head_dim, self.kernel_size * self.kernel_size, H * W]).permute(0, 1, 4, 2,
22 |                                                                                                         3)  # B,h,N,d,k*k
23 |         attn = (q @ k) * self.scale  # B,h,N,1,k*k
24 |         attn = attn.softmax(dim=-1)
25 |         attn = self.attn_drop(attn)
26 |         v = self.unfold(v).reshape(
27 |             [B, d // self.head_dim, self.head_dim, self.kernel_size * self.kernel_size, H * W]).permute(0, 1, 4, 3,
28 |                                                                                                         2)  # B,h,N,k*k,d
29 |         x = (attn @ v).transpose(1, 2).reshape(B, H, W, d)
30 |         return x
31 | 
32 | 
33 | class MultiDilatelocalAttention(nn.Module):
34 |     "Implementation of Dilate-attention"
35 | 
36 |     def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None,
37 |                  attn_drop=0., proj_drop=0., kernel_size=3, dilation=[2, 3]):
38 |         super().__init__()
39 |         self.dim = dim
40 |         self.num_heads = num_heads
41 |         head_dim = dim // num_heads
42 |         self.dilation = dilation
43 |         self.kernel_size = kernel_size
44 |         self.scale = qk_scale or head_dim ** -0.5
45 |         self.num_dilation = len(dilation)
46 |         assert num_heads % self.num_dilation == 0, f"num_heads{num_heads} must be the times of num_dilation{self.num_dilation}!!"
47 |         self.qkv = nn.Conv2d(dim, dim * 3, 1, bias=qkv_bias)
48 |         self.dilate_attention = nn.ModuleList(
49 |             [DilateAttention(head_dim, qk_scale, attn_drop, kernel_size, dilation[i])
50 |              for i in range(self.num_dilation)])
51 |         self.proj = nn.Linear(dim, dim)
52 |         self.proj_drop = nn.Dropout(proj_drop)
53 | 
54 |     def forward(self, x):
55 |         B, H, W, C = x.shape
56 |         x = x.permute(0, 3, 1, 2)  # B, C, H, W
57 |         qkv = self.qkv(x).reshape(B, 3, self.num_dilation, C // self.num_dilation, H, W).permute(2, 1, 0, 3, 4, 5)
58 |         # num_dilation,3,B,C//num_dilation,H,W
59 |         x = x.reshape(B, self.num_dilation, C // self.num_dilation, H, W).permute(1, 0, 3, 4, 2)
60 |         # num_dilation, B, H, W, C//num_dilation
61 |         for i in range(self.num_dilation):
62 |             x[i] = self.dilate_attention[i](qkv[i][0], qkv[i][1], qkv[i][2])  # B, H, W,C//num_dilation
63 |         x = x.permute(1, 2, 3, 0, 4).reshape(B, H, W, C)
64 |         x = self.proj(x)
65 |         x = self.proj_drop(x)
66 |         return x
67 | 
68 | 
69 | #  输入 B H W C,  输出 B H W C
70 | if __name__ == "__main__":
71 |     x = torch.rand([3, 64, 64, 32])
72 |     m = MultiDilatelocalAttention(32)
73 |     y = m(x)
74 |     print(y.shape)
75 | 


--------------------------------------------------------------------------------
/BFAM.py:
--------------------------------------------------------------------------------
 1 | #论文：B2CNet: A Progressive Change Boundary-to-Center Refinement Network for Multitemporal Remote Sensing Images Change Detection
 2 | #论文地址：https://ieeexplore.ieee.org/document/10547405
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | #Simam: A simple, parameter-free attention module for convolutional neural networks (ICML 2021)
 7 | class simam_module(torch.nn.Module):
 8 |     def __init__(self, e_lambda=1e-4):
 9 |         super(simam_module, self).__init__()
10 | 
11 |         self.activaton = nn.Sigmoid()
12 |         self.e_lambda = e_lambda
13 | 
14 |     def forward(self, x):
15 |         b, c, h, w = x.size()
16 | 
17 |         n = w * h - 1
18 | 
19 |         x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2)
20 |         y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5
21 | 
22 |         return x * self.activaton(y)
23 | 
24 | #bitemporal feature aggregation module (BFAM)
25 | class BFAM(nn.Module):
26 |     def __init__(self,inp,out):
27 |         super(BFAM, self).__init__()
28 | 
29 |         self.pre_siam = simam_module()
30 |         self.lat_siam = simam_module()
31 | 
32 | 
33 |         out_1 = int(inp/2)
34 | 
35 |         self.conv_1 = nn.Conv2d(inp, out_1 , padding=1, kernel_size=3,groups=out_1,
36 |                                    dilation=1)
37 |         self.conv_2 = nn.Conv2d(inp, out_1, padding=2, kernel_size=3,groups=out_1,
38 |                                    dilation=2)
39 |         self.conv_3 = nn.Conv2d(inp, out_1, padding=3, kernel_size=3,groups=out_1,
40 |                                    dilation=3)
41 |         self.conv_4 = nn.Conv2d(inp, out_1, padding=4, kernel_size=3,groups=out_1,
42 |                                    dilation=4)
43 | 
44 |         self.fuse = nn.Sequential(
45 |             nn.Conv2d(out_1 * 4, out_1, kernel_size=1, padding=0),
46 |             nn.BatchNorm2d(out_1),
47 |             nn.ReLU(inplace=True)
48 |         )
49 | 
50 |         self.fuse_siam = simam_module()
51 | 
52 |         self.out = nn.Sequential(
53 |             nn.Conv2d(out_1, out, kernel_size=3, padding=1),
54 |             nn.BatchNorm2d(out),
55 |             nn.ReLU(inplace=True)
56 |         )
57 | 
58 |     def forward(self,inp1,inp2,last_feature=None):
59 |         x = torch.cat([inp1,inp2],dim=1)
60 |         c1 = self.conv_1(x)
61 |         c2 = self.conv_2(x)
62 |         c3 = self.conv_3(x)
63 |         c4 = self.conv_4(x)
64 |         cat = torch.cat([c1,c2,c3,c4],dim=1)
65 |         fuse = self.fuse(cat)
66 |         inp1_siam = self.pre_siam(inp1)
67 |         inp2_siam = self.lat_siam(inp2)
68 | 
69 | 
70 |         inp1_mul = torch.mul(inp1_siam,fuse)
71 |         inp2_mul = torch.mul(inp2_siam,fuse)
72 |         fuse = self.fuse_siam(fuse)
73 |         if last_feature is None:
74 |             out = self.out(fuse + inp1 + inp2 + inp2_mul + inp1_mul)
75 |         else:
76 |             out = self.out(fuse+inp2_mul+inp1_mul+last_feature+inp1+inp2)
77 |         out = self.fuse_siam(out)
78 | 
79 |         return out
80 | 
81 | 
82 | if __name__ == '__main__':
83 | 
84 |     block = BFAM(inp=128, out=256)
85 | 
86 |     inp1 = torch.rand(1, 128 // 2, 16, 16)     # B C H W
87 |     inp2 = torch.rand(1, 128 // 2, 16, 16)# B C H W
88 |     last_feature = torch.rand(1, 128 // 2, 16, 16)# B C H W
89 | 
90 |     # 通过BFAM模块，这里没有提供last_feature的话，可以为None
91 |     output = block(inp1, inp2, last_feature)
92 |     # output = bfam(inp1, inp2)
93 | 
94 |     # 打印输入和输出的shape
95 |     print(inp1.size())
96 |     print(inp2.size())
97 |     print(output.size())


--------------------------------------------------------------------------------
/(ACM MM 2023)Deepfake(深度伪造检测).py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import numpy as np
 4 | import torch.nn.functional as F
 5 | 
 6 | # 论文：Locate and Verify: A Two-Stream Network for Improved Deepfake Detection
 7 | # 论文地址：https://arxiv.org/pdf/2309.11131
 8 | 
 9 | class CMCE(nn.Module):  # Contrastive Multimodal Contrastive Enhancement  增强模型对特征的关注度，提高模型的性能
10 |     def __init__(self, in_channel=3):
11 |         super(CMCE, self).__init__()
12 |         self.relu = nn.ReLU()
13 |         self.bn = nn.BatchNorm2d(in_channel)
14 |         self.stage1 = nn.Sequential(
15 |             nn.Conv2d(in_channel, in_channel, 3, 1, bias=False),
16 |             nn.BatchNorm2d(in_channel),
17 |             nn.ReLU()
18 |         )
19 |         self.stage2 = nn.Sequential(
20 |             nn.Conv2d(in_channel, in_channel, 3, 1, bias=False),
21 |             nn.BatchNorm2d(in_channel),
22 |             nn.ReLU()
23 |         )
24 | 
25 |     def forward(self, fa, fb):
26 |         (b1, c1, h1, w1), (b2, c2, h2, w2) = fa.size(), fb.size()
27 |         assert c1 == c2
28 |         cos_sim = F.cosine_similarity(fa, fb, dim=1)
29 |         cos_sim = cos_sim.unsqueeze(1)
30 |         fa = fa + fb * cos_sim
31 |         fb = fb + fa * cos_sim
32 |         fa = self.relu(fa)
33 |         fb = self.relu(fb)
34 | 
35 |         return fa, fb
36 | 
37 | if __name__ == '__main__':
38 |     block = CMCE()
39 |     fa = torch.rand(16, 3, 32, 32)
40 |     fb = torch.rand(16, 3, 32, 32)
41 | 
42 |     fa1, fb1 = block(fa, fb)
43 |     print(fa.size())
44 |     print(fb.size())
45 |     print(fa1.size())
46 |     print(fb1.size())
47 | 
48 | 
49 | class LFGA(nn.Module): # Local Feature Guidance Attention 旨在引导特征图的注意力以更好地聚焦在局部特征上
50 |     def __init__(self, in_channel=3, out_channel=None, ratio=4):
51 |         super(LFGA, self).__init__()
52 |         self.chanel_in = in_channel
53 | 
54 |         if out_channel is None:
55 |             out_channel = in_channel // ratio if in_channel // ratio > 0 else 1
56 | 
57 |         self.query_conv = nn.Conv2d(
58 |             in_channels=in_channel, out_channels=out_channel, kernel_size=1)
59 |         self.key_conv = nn.Conv2d(
60 |             in_channels=in_channel, out_channels=out_channel, kernel_size=1)
61 |         self.value_conv = nn.Conv2d(
62 |             in_channels=in_channel, out_channels=in_channel, kernel_size=1)
63 |         self.gamma = nn.Parameter(torch.zeros(1))
64 | 
65 |         self.softmax = nn.Softmax(dim=-1)
66 |         self.relu = nn.ReLU()
67 |         self.bn = nn.BatchNorm2d(self.chanel_in)
68 | 
69 |     def forward(self, fa, fb):
70 |         B, C, H, W = fa.size()
71 |         proj_query = self.query_conv(fb).view(
72 |             B, -1, H * W).permute(0, 2, 1)  # B , HW, C
73 |         proj_key = self.key_conv(fb).view(
74 |             B, -1, H * W)  # B X C x (*W*H)
75 |         energy = torch.bmm(proj_query, proj_key)  # B, HW, HW
76 |         attention = self.softmax(energy)  # BX (N) X (N)
77 |         # attention = F.normalize(energy, dim=-1)
78 | 
79 |         proj_value = self.value_conv(fa).view(
80 |             B, -1, H * W)  # B , C , HW
81 | 
82 |         out = torch.bmm(proj_value, attention.permute(0, 2, 1))
83 |         out = out.view(B, C, H, W)
84 | 
85 |         out = self.gamma * out + fa
86 | 
87 |         return self.relu(out)
88 | 
89 | 
90 | if __name__ == '__main__':
91 |     block = LFGA(in_channel=3, ratio=4)
92 |     fa = torch.rand(16, 3, 32, 32)
93 |     fb = torch.rand(16, 3, 32, 32)
94 | 
95 |     output = block(fa, fb)
96 |     print(fa.size())
97 |     print(fb.size())
98 |     print(output.size())
99 | 


--------------------------------------------------------------------------------
/目标检测/FACMA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import math
 4 | 
 5 | # 论文：FCMNet: Frequency-aware cross-modality attention networks for RGB-D salient object detection
 6 | # 论文地址：https://www.sciencedirect.com/science/article/abs/pii/S0925231222003848
 7 | def get_1d_dct(i, freq, L):
 8 |     result = math.cos(math.pi * freq * (i+0.5)/L) / math.sqrt(L)
 9 |     if freq == 0:
10 |         return result
11 |     else:
12 |         return result * math.sqrt(2)
13 | def get_dct_weights(width,height,channel,fidx_u,fidx_v):
14 |     dct_weights = torch.zeros(1, channel, width, height)
15 |     c_part = channel // len(fidx_u)
16 |     for i, (u_x, v_y) in enumerate(zip(fidx_u, fidx_v)):
17 |         for t_x in range(width):
18 |             for t_y in range(height):
19 |                 dct_weights[:, i*c_part: (i+1)*c_part, t_x, t_y] = get_1d_dct(t_x, u_x, width) * get_1d_dct(t_y, v_y, height)
20 |     return dct_weights
21 | class FCABlock(nn.Module):
22 | 
23 |     def __init__(self, channel,width,height,fidx_u, fidx_v, reduction=16):
24 |         super(FCABlock, self).__init__()
25 |         mid_channel = channel // reduction
26 |         self.register_buffer('pre_computed_dct_weights', get_dct_weights(width,height,channel,fidx_u,fidx_v))
27 |         self.excitation = nn.Sequential(
28 |             nn.Linear(channel, mid_channel, bias=False),
29 |             nn.ReLU(inplace=True),
30 |             nn.Linear(mid_channel, channel, bias=False),
31 |             nn.Sigmoid()
32 |         )
33 |     def forward(self, x):
34 |         b, c, _, _ = x.size()
35 |         y = torch.sum(x * self.pre_computed_dct_weights, dim=[2,3])
36 |         z = self.excitation(y).view(b, c, 1, 1)
37 |         return x * z.expand_as(x)
38 | class SFCA(nn.Module):
39 |     def __init__(self, in_channel,width,height,fidx_u,fidx_v):
40 |         super(SFCA, self).__init__()
41 | 
42 |         fidx_u = [temp_u * (width // 8) for temp_u in fidx_u]
43 |         fidx_v = [temp_v * (width // 8) for temp_v in fidx_v]
44 |         self.FCA = FCABlock(in_channel, width, height, fidx_u, fidx_v)
45 |         self.conv1 = nn.Conv2d(in_channel, 1, kernel_size=1, bias=False)
46 |         self.norm = nn.Sigmoid()
47 |     def forward(self, x):
48 |         # FCA
49 |         F_fca = self.FCA(x)
50 |         #context attention
51 |         con = self.conv1(x) # c,h,w -> 1,h,w
52 |         con = self.norm(con)
53 |         F_con = x * con
54 |         return F_fca + F_con
55 | class FACMA(nn.Module):
56 |     def __init__(self,in_channel,width,height,fidx_u,fidx_v):
57 |         super(FACMA, self).__init__()
58 |         self.sfca_depth = SFCA(in_channel, width, height, fidx_u, fidx_v)
59 |         self.sfca_rgb   = SFCA(in_channel, width, height, fidx_u, fidx_v)
60 |     def forward(self, rgb, depth):
61 |         out_d = self.sfca_depth(depth)
62 |         out_d = rgb * out_d
63 | 
64 |         out_rgb = self.sfca_rgb(rgb)
65 |         out_rgb = depth * out_rgb
66 |         return out_rgb, out_d
67 | 
68 | if __name__ == '__main__':
69 | 
70 |     # 定义输入参数
71 |     in_channel = 64
72 |     width = 224
73 |     height = 224
74 |     fidx_u = [0, 1]
75 |     fidx_v = [0, 1]
76 | 
77 |     block = FACMA(in_channel, width, height, fidx_u, fidx_v)
78 | 
79 |     # 假设的RGB和深度输入
80 |     rgb_input = torch.randn(1, in_channel, width, height)  # Batch size为1
81 |     depth_input = torch.randn(1, in_channel, width, height)  # Batch size为1
82 | 
83 |     # 通过FACMA
84 |     out_rgb, out_d = block(rgb_input, depth_input)
85 | 
86 |     # 打印输入输出形状
87 |     print("RGB 输入形状:", rgb_input.shape)
88 |     print("深度 输入形状:", depth_input.shape)
89 |     print("RGB 输出形状:", out_rgb.shape)
90 |     print("深度 输出形状:", out_d.shape)


--------------------------------------------------------------------------------
/注意力/(tmm2023)多尺度膨胀注意力机制.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | # Github地址：https://github.com/JIAOJIAYUASD/dilateformer
 6 | # 论文地址：https://arxiv.org/abs/2302.01791
 7 | class DilateAttention(nn.Module):
 8 |     "Implementation of Dilate-attention"
 9 | 
10 |     def __init__(self, head_dim, qk_scale=None, attn_drop=0, kernel_size=3, dilation=1):
11 |         super().__init__()
12 |         self.head_dim = head_dim
13 |         self.scale = qk_scale or head_dim ** -0.5
14 |         self.kernel_size = kernel_size
15 |         self.unfold = nn.Unfold(kernel_size, dilation, dilation * (kernel_size - 1) // 2, 1)
16 |         self.attn_drop = nn.Dropout(attn_drop)
17 | 
18 |     def forward(self, q, k, v):
19 |         # B, C//3, H, W
20 |         B, d, H, W = q.shape
21 |         q = q.reshape([B, d // self.head_dim, self.head_dim, 1, H * W]).permute(0, 1, 4, 3, 2)  # B,h,N,1,d
22 |         k = self.unfold(k).reshape(
23 |             [B, d // self.head_dim, self.head_dim, self.kernel_size * self.kernel_size, H * W]).permute(0, 1, 4, 2,
24 |                                                                                                         3)  # B,h,N,d,k*k
25 |         attn = (q @ k) * self.scale  # B,h,N,1,k*k
26 |         attn = attn.softmax(dim=-1)
27 |         attn = self.attn_drop(attn)
28 |         v = self.unfold(v).reshape(
29 |             [B, d // self.head_dim, self.head_dim, self.kernel_size * self.kernel_size, H * W]).permute(0, 1, 4, 3,
30 |                                                                                                         2)  # B,h,N,k*k,d
31 |         x = (attn @ v).transpose(1, 2).reshape(B, H, W, d)
32 |         return x
33 | 
34 | 
35 | class MultiDilatelocalAttention(nn.Module):
36 |     "Implementation of Dilate-attention"
37 | 
38 |     def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None,
39 |                  attn_drop=0., proj_drop=0., kernel_size=3, dilation=[2, 3]):
40 |         super().__init__()
41 |         self.dim = dim
42 |         self.num_heads = num_heads
43 |         head_dim = dim // num_heads
44 |         self.dilation = dilation
45 |         self.kernel_size = kernel_size
46 |         self.scale = qk_scale or head_dim ** -0.5
47 |         self.num_dilation = len(dilation)
48 |         assert num_heads % self.num_dilation == 0, f"num_heads{num_heads} must be the times of num_dilation{self.num_dilation}!!"
49 |         self.qkv = nn.Conv2d(dim, dim * 3, 1, bias=qkv_bias)
50 |         self.dilate_attention = nn.ModuleList(
51 |             [DilateAttention(head_dim, qk_scale, attn_drop, kernel_size, dilation[i])
52 |              for i in range(self.num_dilation)])
53 |         self.proj = nn.Linear(dim, dim)
54 |         self.proj_drop = nn.Dropout(proj_drop)
55 | 
56 |     def forward(self, x):
57 |         B, H, W, C = x.shape
58 |         x = x.permute(0, 3, 1, 2)  # B, C, H, W
59 |         qkv = self.qkv(x).reshape(B, 3, self.num_dilation, C // self.num_dilation, H, W).permute(2, 1, 0, 3, 4, 5)
60 |         # num_dilation,3,B,C//num_dilation,H,W
61 |         x = x.reshape(B, self.num_dilation, C // self.num_dilation, H, W).permute(1, 0, 3, 4, 2)
62 |         # num_dilation, B, H, W, C//num_dilation
63 |         for i in range(self.num_dilation):
64 |             x[i] = self.dilate_attention[i](qkv[i][0], qkv[i][1], qkv[i][2])  # B, H, W,C//num_dilation
65 |         x = x.permute(1, 2, 3, 0, 4).reshape(B, H, W, C)
66 |         x = self.proj(x)
67 |         x = self.proj_drop(x)
68 |         return x
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     x = torch.rand([3, 64, 64, 64]).cuda() #输入B C H W
73 |     m = MultiDilatelocalAttention(64).cuda()
74 |     y = m(x)
75 |     print(y.shape)
76 | 


--------------------------------------------------------------------------------
/MHIASA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import einops
 4 | # https://ieeexplore.ieee.org/abstract/document/10632582/
 5 | # MHIAIFormer: Multi-Head Interacted and Adaptive Integrated Transformer with Spatial-Spectral Attention for Hyperspectral Image Classification, JSTARS2024
 6 | # https://github.com/Delon1364/MHIAIFormer
 7 | # Multi-Head Interacted Additive Self-Attention(MHIASA)
 8 | 
 9 | # Efficient Head-Interacted Additive Attention:
10 | class EHIAAttention(nn.Module):
11 |     def __init__(self, num_patches, dim, num_heads = 2):
12 |         super(EHIAAttention, self).__init__()
13 |         self.num_heads = num_heads
14 |         self.in_dims = dim // num_heads
15 | 
16 |         # ==================添加两个linear
17 |         self.fc_q = nn.Linear(dim, dim)
18 |         self.fc_k = nn.Linear(dim, dim)
19 | 
20 |         # w_g ->: [N, 1]
21 |         self.w_g = nn.Parameter(torch.randn(num_patches, 1))
22 |         self.scale_factor = num_patches ** -0.5
23 |         self.Proj = nn.Linear(dim, dim)
24 |         self.norm = nn.LayerNorm(dim)
25 |         # ===================== 添加Avg分支
26 |         self.d_avg = nn.AdaptiveAvgPool2d((None, 1))
27 |         self.fc = nn.Linear(self.in_dims, dim)
28 |         self.gelu = nn.GELU()
29 |         self.fc2 = nn.Linear(num_heads, num_heads)
30 |         self.d_avg2 = nn.AdaptiveAvgPool2d((None, 1))
31 |         self.sigmoid = nn.Sigmoid()
32 | 
33 |     def forward(self, x):
34 |         # x ->: [bs, num_patches, num_heads*in_dims]
35 |         bs = x.shape[0]
36 | 
37 |         # ==================添加两个linear
38 |         q = self.fc_q(x)
39 |         x = self.fc_k(x)
40 |         x_t = q.transpose(1, 2)
41 | 
42 |         # x_T ->: [bs, D, N]
43 |         # x_t = x.transpose(1, 2)
44 | 
45 |         # query_weight ->: [bs, D, 1] ->: [bs, 1, D]
46 |         query_weight = (x_t @ self.w_g).transpose(1, 2)
47 | 
48 |         A = query_weight * self.scale_factor
49 |         A = A.softmax(dim=-1)
50 | 
51 |         # A * x_T ->: [bs, N, D]
52 |         # G ->: [bs, D]
53 |         G = torch.sum(A * x, dim=1)
54 | 
55 |         # ===================== 添加Avg分支
56 |         d_avg = self.d_avg(x_t)  # [bs, D, 1]
57 |         d_avg = torch.squeeze(d_avg, 2)  # [bs, D]
58 |         d_avg = d_avg.reshape(bs, self.num_heads, self.in_dims)  # [bs, h, d]
59 |         d_avg = self.gelu(self.fc(d_avg))  # [bs, h, D]
60 |         d_avg = d_avg.reshape(bs, -1, self.num_heads)  # [bs, D, h]
61 |         d_avg = self.fc2(d_avg)  # [bs, D, h]
62 |         d_avg = self.sigmoid(self.d_avg2(d_avg))  # [bs, D, 1]
63 |         d_avg = torch.squeeze(d_avg, 2)  # [bs, D]
64 |         G = G * d_avg
65 |         # =====================
66 | 
67 |         # G ->: [bs, N, D]
68 |         # key.shape[1] = N
69 |         G = einops.repeat(
70 |             G, "b d -> b repeat d", repeat=x.shape[1]
71 |         )
72 | 
73 |         # out :-> [bs, N, D]
74 |         out = self.Proj(G * x) + self.norm(x)
75 |         # out = self.Proj(out)
76 | 
77 |         return out
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     patch_size = 16
82 |     num_patches = patch_size * patch_size
83 |     dim = 128  # Typically dim is a multiple of num_heads
84 | 
85 |     # Instantiate the EHIAAttention
86 |     model = EHIAAttention(num_patches=num_patches, dim=dim)
87 | 
88 |     # Create a random input tensor with shape (batch_size, num_patches, num_heads * in_dims)
89 |     batch_size = 1
90 |     input_tensor = torch.randn(batch_size, num_patches, dim)
91 | 
92 |     # Forward pass through the model
93 |     output = model(input_tensor)
94 | 
95 |     # Print the shapes
96 |     print("Input shape:", input_tensor.shape)
97 |     print("Output shape:", output.shape)


--------------------------------------------------------------------------------
/(CVPR2020)strip_pooling.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------
 2 | # 论文: Strip Pooling: Rethinking spatial pooling for scene parsing  (CVPR2020)
 3 | # Github地址: https://github.com/houqb/SPNet
 4 | # ---------------------------------------
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | class StripPooling(nn.Module):
12 |     """
13 |     Reference:
14 |     """
15 |     def __init__(self, in_channels, pool_size, norm_layer, up_kwargs):
16 |         super(StripPooling, self).__init__()
17 |         self.pool1 = nn.AdaptiveAvgPool2d(pool_size[0])
18 |         self.pool2 = nn.AdaptiveAvgPool2d(pool_size[1])
19 |         self.pool3 = nn.AdaptiveAvgPool2d((1, None))
20 |         self.pool4 = nn.AdaptiveAvgPool2d((None, 1))
21 | 
22 |         inter_channels = int(in_channels/4)
23 |         self.conv1_1 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 1, bias=False),
24 |                                 norm_layer(inter_channels),
25 |                                 nn.ReLU(True))
26 |         self.conv1_2 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 1, bias=False),
27 |                                 norm_layer(inter_channels),
28 |                                 nn.ReLU(True))
29 |         self.conv2_0 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False),
30 |                                 norm_layer(inter_channels))
31 |         self.conv2_1 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False),
32 |                                 norm_layer(inter_channels))
33 |         self.conv2_2 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False),
34 |                                 norm_layer(inter_channels))
35 |         self.conv2_3 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, (1, 3), 1, (0, 1), bias=False),
36 |                                 norm_layer(inter_channels))
37 |         self.conv2_4 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, (3, 1), 1, (1, 0), bias=False),
38 |                                 norm_layer(inter_channels))
39 |         self.conv2_5 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False),
40 |                                 norm_layer(inter_channels),
41 |                                 nn.ReLU(True))
42 |         self.conv2_6 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False),
43 |                                 norm_layer(inter_channels),
44 |                                 nn.ReLU(True))
45 |         self.conv3 = nn.Sequential(nn.Conv2d(inter_channels*2, in_channels, 1, bias=False),
46 |                                 norm_layer(in_channels))
47 |         # bilinear interpolate options
48 |         self._up_kwargs = up_kwargs
49 | 
50 |     def forward(self, x):
51 |         _, _, h, w = x.size()
52 |         x1 = self.conv1_1(x)
53 |         x2 = self.conv1_2(x)
54 |         x2_1 = self.conv2_0(x1)
55 |         x2_2 = F.interpolate(self.conv2_1(self.pool1(x1)), (h, w), **self._up_kwargs)
56 |         x2_3 = F.interpolate(self.conv2_2(self.pool2(x1)), (h, w), **self._up_kwargs)
57 |         x2_4 = F.interpolate(self.conv2_3(self.pool3(x2)), (h, w), **self._up_kwargs)
58 |         x2_5 = F.interpolate(self.conv2_4(self.pool4(x2)), (h, w), **self._up_kwargs)
59 |         x1 = self.conv2_5(F.relu_(x2_1 + x2_2 + x2_3))
60 |         x2 = self.conv2_6(F.relu_(x2_5 + x2_4))
61 |         out = self.conv3(torch.cat([x1, x2], dim=1))
62 |         return F.relu_(x + out)
63 | 
64 | 
65 | # 输入 B C H W,  输出 B C H W
66 | if __name__ == '__main__':
67 |     block = StripPooling(64, (20, 12), nn.BatchNorm2d, {'mode': 'bilinear', 'align_corners': True})
68 |     input = torch.rand(3, 64, 32, 32)
69 |     output = block(input)
70 |     print(input.size(), output.size())
71 | 


--------------------------------------------------------------------------------
/CPCA2d.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch
 3 | import torch.nn.functional
 4 | import torch.nn.functional as F
 5 | # 论文：Channel prior convolutional attention for medical image segmentation
 6 | # 论文地址：https://arxiv.org/pdf/2306.05196
 7 | 
 8 | 
 9 | class ChannelAttention(nn.Module):
10 | 
11 |     def __init__(self, input_channels, internal_neurons):
12 |         super(ChannelAttention, self).__init__()
13 |         self.fc1 = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True)
14 |         self.fc2 = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True)
15 |         self.input_channels = input_channels
16 | 
17 |     def forward(self, inputs):
18 |         x1 = F.adaptive_avg_pool2d(inputs, output_size=(1, 1))
19 |         # print('x:', x.shape)
20 |         x1 = self.fc1(x1)
21 |         x1 = F.relu(x1, inplace=True)
22 |         x1 = self.fc2(x1)
23 |         x1 = torch.sigmoid(x1)
24 |         x2 = F.adaptive_max_pool2d(inputs, output_size=(1, 1))
25 |         # print('x:', x.shape)
26 |         x2 = self.fc1(x2)
27 |         x2 = F.relu(x2, inplace=True)
28 |         x2 = self.fc2(x2)
29 |         x2 = torch.sigmoid(x2)
30 |         x = x1 + x2
31 |         x = x.view(-1, self.input_channels, 1, 1)
32 |         return x
33 | 
34 | class CPCABlock(nn.Module):
35 | 
36 |     def __init__(self, in_channels, out_channels,
37 |                  channelAttention_reduce=4):
38 |         super().__init__()
39 | 
40 |         self.C = in_channels
41 |         self.O = out_channels
42 | 
43 |         assert in_channels == out_channels
44 |         self.ca = ChannelAttention(input_channels=in_channels, internal_neurons=in_channels // channelAttention_reduce)
45 |         self.dconv5_5 = nn.Conv2d(in_channels, in_channels, kernel_size=5, padding=2, groups=in_channels)
46 |         self.dconv1_7 = nn.Conv2d(in_channels, in_channels, kernel_size=(1, 7), padding=(0, 3), groups=in_channels)
47 |         self.dconv7_1 = nn.Conv2d(in_channels, in_channels, kernel_size=(7, 1), padding=(3, 0), groups=in_channels)
48 |         self.dconv1_11 = nn.Conv2d(in_channels, in_channels, kernel_size=(1, 11), padding=(0, 5), groups=in_channels)
49 |         self.dconv11_1 = nn.Conv2d(in_channels, in_channels, kernel_size=(11, 1), padding=(5, 0), groups=in_channels)
50 |         self.dconv1_21 = nn.Conv2d(in_channels, in_channels, kernel_size=(1, 21), padding=(0, 10), groups=in_channels)
51 |         self.dconv21_1 = nn.Conv2d(in_channels, in_channels, kernel_size=(21, 1), padding=(10, 0), groups=in_channels)
52 |         self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=(1, 1), padding=0)
53 |         self.act = nn.GELU()
54 | 
55 |     def forward(self, inputs):
56 |         #   Global Perceptron
57 |         inputs = self.conv(inputs)
58 |         inputs = self.act(inputs)
59 | 
60 |         channel_att_vec = self.ca(inputs)
61 |         inputs = channel_att_vec * inputs
62 | 
63 |         x_init = self.dconv5_5(inputs)
64 |         x_1 = self.dconv1_7(x_init)
65 |         x_1 = self.dconv7_1(x_1)
66 |         x_2 = self.dconv1_11(x_init)
67 |         x_2 = self.dconv11_1(x_2)
68 |         x_3 = self.dconv1_21(x_init)
69 |         x_3 = self.dconv21_1(x_3)
70 |         x = x_1 + x_2 + x_3 + x_init
71 |         spatial_att = self.conv(x)
72 |         out = spatial_att * inputs
73 |         out = self.conv(out)
74 |         return out
75 | 
76 | 
77 | if __name__ == '__main__':
78 | 
79 |     input = torch.randn(4, 16, 64, 64)
80 | 
81 |     print(input.size())
82 | 
83 |     block = CPCABlock(in_channels=16, out_channels=16, channelAttention_reduce=4)
84 | 
85 |     # 通过CPCABlock模块处理输入
86 |     output = block(input)
87 | 
88 |     # 打印输出张量的形状
89 |     print(output.size())


--------------------------------------------------------------------------------
/卷积/(CVPR 2022)dgcnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def knn(x, k):
 6 |     inner = -2 * torch.matmul(x.transpose(2, 1).contiguous(), x)
 7 |     xx = torch.sum(x ** 2, dim=1, keepdim=True)
 8 |     pairwise_distance = -xx - inner - xx.transpose(2, 1).contiguous()
 9 | 
10 |     idx = pairwise_distance.topk(k=k, dim=-1)[1]  # (batch_size, num_points, k)
11 |     return idx
12 | 
13 | 
14 | def get_graph_feature(x, k=20):
15 |     # x = x.squeeze()
16 |     idx = knn(x, k=k)  # (batch_size, num_points, k)
17 |     batch_size, num_points, _ = idx.size()
18 | 
19 |     device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
20 | 
21 |     idx_base = torch.arange(0, batch_size, device=device).view(-1, 1, 1) * num_points
22 | 
23 |     idx = idx + idx_base
24 | 
25 |     idx = idx.view(-1)
26 | 
27 |     _, num_dims, _ = x.size()
28 | 
29 |     # (batch_size, num_points, num_dims)  -> (batch_size*num_points, num_dims) #   batch_size * num_points * k + range(0, batch_size*num_points)
30 |     x = x.transpose(2, 1).contiguous()
31 | 
32 |     feature = x.view(batch_size * num_points, -1)[idx, :]
33 |     feature = feature.view(batch_size, num_points, k, num_dims)
34 |     x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1)
35 | 
36 |     feature = torch.cat((feature, x), dim=3).permute(0, 3, 1, 2)
37 | 
38 |     return feature
39 | 
40 | 
41 | class DGCNN(torch.nn.Module):
42 |     def __init__(self, emb_dims=1024, input_shape="bnc"):
43 |         super(DGCNN, self).__init__()
44 |         if input_shape not in ["bcn", "bnc"]:
45 |             raise ValueError("Allowed shapes are 'bcn' (batch * channels * num_in_points), 'bnc' ")
46 |         self.input_shape = input_shape
47 |         self.emb_dims = emb_dims
48 | 
49 |         self.conv1 = torch.nn.Conv2d(6, 64, kernel_size=1, bias=False)
50 |         self.conv2 = torch.nn.Conv2d(64, 64, kernel_size=1, bias=False)
51 |         self.conv3 = torch.nn.Conv2d(64, 128, kernel_size=1, bias=False)
52 |         self.conv4 = torch.nn.Conv2d(128, 256, kernel_size=1, bias=False)
53 |         self.conv5 = torch.nn.Conv2d(512, emb_dims, kernel_size=1, bias=False)
54 |         self.bn1 = torch.nn.BatchNorm2d(64)
55 |         self.bn2 = torch.nn.BatchNorm2d(64)
56 |         self.bn3 = torch.nn.BatchNorm2d(128)
57 |         self.bn4 = torch.nn.BatchNorm2d(256)
58 |         self.bn5 = torch.nn.BatchNorm2d(emb_dims)
59 | 
60 |     def forward(self, input_data):
61 |         if self.input_shape == "bnc":
62 |             input_data = input_data.permute(0, 2, 1)
63 |         if input_data.shape[1] != 3:
64 |             raise RuntimeError("shape of x must be of [Batch x 3 x NumInPoints]")
65 | 
66 |         batch_size, num_dims, num_points = input_data.size()
67 |         output = get_graph_feature(input_data)
68 | 
69 |         output = F.relu(self.bn1(self.conv1(output)))
70 |         output1 = output.max(dim=-1, keepdim=True)[0]
71 | 
72 |         output = F.relu(self.bn2(self.conv2(output)))
73 |         output2 = output.max(dim=-1, keepdim=True)[0]
74 | 
75 |         output = F.relu(self.bn3(self.conv3(output)))
76 |         output3 = output.max(dim=-1, keepdim=True)[0]
77 | 
78 |         output = F.relu(self.bn4(self.conv4(output)))
79 |         output4 = output.max(dim=-1, keepdim=True)[0]
80 | 
81 |         output = torch.cat((output1, output2, output3, output4), dim=1)
82 | 
83 |         output = F.relu(self.bn5(self.conv5(output))).view(batch_size, -1, num_points)
84 |         return output
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     # Test the code.
89 |     x = torch.rand((10, 1024, 3)).cuda()
90 | 
91 |     dgcnn = DGCNN().cuda()
92 |     y = dgcnn(x)
93 |     print("\nInput Shape of DGCNN: ", x.shape, "\nOutput Shape of DGCNN: ", y.shape)
94 | 


--------------------------------------------------------------------------------
/PCBAM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | #论文：DAU-Net: Dual attention-aided U-Net for segmenting tumor in breast ultrasound images
 4 | #论文：https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0303670
 5 | class ChannelAttentionModule(nn.Module):
 6 |     def __init__(self, in_channels, ratio=8):
 7 |         super(ChannelAttentionModule, self).__init__()
 8 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 9 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
10 | 
11 |         self.fc1 = nn.Conv2d(in_channels, in_channels // ratio, kernel_size=1, bias=False)
12 |         self.relu1 = nn.ReLU()
13 |         self.fc2 = nn.Conv2d(in_channels // ratio, in_channels, kernel_size=1, bias=False)
14 | 
15 |     def forward(self, x):
16 |         avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
17 |         max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
18 |         out = avg_out + max_out
19 |         return x * torch.sigmoid(out)
20 | 
21 | 
22 | class SpatialAttentionModule(nn.Module):
23 |     def __init__(self):
24 |         super(SpatialAttentionModule, self).__init__()
25 |         self.conv1 = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
26 | 
27 |     def forward(self, x):
28 |         avg_out = torch.mean(x, dim=1, keepdim=True)
29 |         max_out, _ = torch.max(x, dim=1, keepdim=True)
30 |         out = torch.cat([avg_out, max_out], dim=1)
31 |         out = self.conv1(out)
32 |         return x * torch.sigmoid(out)
33 | 
34 | 
35 | class CBAM(nn.Module):
36 |     def __init__(self, in_channels, ratio=8):
37 |         super(CBAM, self).__init__()
38 |         self.channel_attention = ChannelAttentionModule(in_channels, ratio)
39 |         self.spatial_attention = SpatialAttentionModule()
40 | 
41 |     def forward(self, x):
42 |         x = self.channel_attention(x)
43 |         x = self.spatial_attention(x)
44 |         return x
45 | 
46 | 
47 | class PAM(nn.Module):
48 |     def __init__(self, in_channels):
49 |         super(PAM, self).__init__()
50 |         self.query_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
51 |         self.key_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
52 |         self.value_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1)
53 |         self.gamma = nn.Parameter(torch.zeros(1))
54 |         self.softmax = nn.Softmax(dim=-1)
55 | 
56 |     def forward(self, x):
57 |         batch_size, C, height, width = x.size()
58 |         proj_query = self.query_conv(x).view(batch_size, -1, height * width).permute(0, 2, 1)
59 |         proj_key = self.key_conv(x).view(batch_size, -1, height * width)
60 |         energy = torch.bmm(proj_query, proj_key)
61 |         attention = self.softmax(energy)
62 |         proj_value = self.value_conv(x).view(batch_size, -1, height * width)
63 |         out = torch.bmm(proj_value, attention.permute(0, 2, 1))
64 |         out = out.view(batch_size, C, height, width)
65 |         out = self.gamma * out + x
66 |         return out
67 | 
68 | 
69 | class PCBAM(nn.Module):
70 |     def __init__(self, in_channels, ratio=8):
71 |         super(PCBAM, self).__init__()
72 |         self.channel_attention = ChannelAttentionModule(in_channels, ratio)
73 |         self.spatial_attention = SpatialAttentionModule()
74 |         self.position_attention = PAM(in_channels)
75 | 
76 |     def forward(self, x):
77 |         x_c = self.channel_attention(x)
78 |         x_s = self.spatial_attention(x_c)
79 |         x_p = self.position_attention(x)
80 |         out = x_s + x_p
81 |         return out
82 | 
83 | 
84 | if __name__ == '__main__':
85 | 
86 |     input = torch.randn(1, 64,32, 32)
87 |     block = PCBAM(in_channels=64)
88 |     print(input.size())
89 |     output = block(input)
90 |     print(output.size())
91 | 
92 | 
93 | 
94 | 
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/遥感/MSAA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | # 论文：CM-UNet: Hybrid CNN-Mamba UNet for Remote Sensing Image Semantic Segmentation
 5 | # 论文地址：https://arxiv.org/pdf/2405.10530
 6 | 
 7 | 
 8 | class ChannelAttentionModule(nn.Module):
 9 |     def __init__(self, in_channels, reduction=4):
10 |         super(ChannelAttentionModule, self).__init__()
11 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
12 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
13 |         self.fc = nn.Sequential(
14 |             nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False),
15 |             nn.ReLU(inplace=True),
16 |             nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)
17 |         )
18 |         self.sigmoid = nn.Sigmoid()
19 | 
20 |     def forward(self, x):
21 |         avg_out = self.fc(self.avg_pool(x))
22 |         max_out = self.fc(self.max_pool(x))
23 |         out = avg_out + max_out
24 |         return self.sigmoid(out)
25 | 
26 | class SpatialAttentionModule(nn.Module):
27 |     def __init__(self, kernel_size=7):
28 |         super(SpatialAttentionModule, self).__init__()
29 |         self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
30 |         self.sigmoid = nn.Sigmoid()
31 | 
32 |     def forward(self, x):
33 |         avg_out = torch.mean(x, dim=1, keepdim=True)
34 |         max_out, _ = torch.max(x, dim=1, keepdim=True)
35 |         x = torch.cat([avg_out, max_out], dim=1)
36 |         x = self.conv1(x)
37 |         return self.sigmoid(x)
38 | 
39 | class FusionConv(nn.Module):
40 |     def __init__(self, in_channels, out_channels, factor=4.0):
41 |         super(FusionConv, self).__init__()
42 |         dim = int(out_channels // factor)
43 |         self.down = nn.Conv2d(in_channels, dim, kernel_size=1, stride=1)
44 |         self.conv_3x3 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1)
45 |         self.conv_5x5 = nn.Conv2d(dim, dim, kernel_size=5, stride=1, padding=2)
46 |         self.conv_7x7 = nn.Conv2d(dim, dim, kernel_size=7, stride=1, padding=3)
47 |         self.spatial_attention = SpatialAttentionModule()
48 |         self.channel_attention = ChannelAttentionModule(dim)
49 |         self.up = nn.Conv2d(dim, out_channels, kernel_size=1, stride=1)
50 |         self.down_2 = nn.Conv2d(in_channels, dim, kernel_size=1, stride=1)
51 | 
52 |     def forward(self, x1, x2, x4):
53 |         x_fused = torch.cat([x1, x2, x4], dim=1)
54 |         x_fused = self.down(x_fused)
55 |         x_fused_c = x_fused * self.channel_attention(x_fused)
56 |         x_3x3 = self.conv_3x3(x_fused)
57 |         x_5x5 = self.conv_5x5(x_fused)
58 |         x_7x7 = self.conv_7x7(x_fused)
59 |         x_fused_s = x_3x3 + x_5x5 + x_7x7
60 |         x_fused_s = x_fused_s * self.spatial_attention(x_fused_s)
61 | 
62 |         x_out = self.up(x_fused_s + x_fused_c)
63 | 
64 |         return x_out
65 | 
66 | class MSAA(nn.Module):
67 |     def __init__(self, in_channels, out_channels):
68 |         super(MSAA, self).__init__()
69 |         self.fusion_conv = FusionConv(in_channels * 3, out_channels)
70 | 
71 |     def forward(self, x1, x2, x4, last=False):
72 |         # # x2 是从低到高，x4是从高到低的设计，x2传递语义信息，x4传递边缘问题特征补充
73 |         # x_1_2_fusion = self.fusion_1x2(x1, x2)
74 |         # x_1_4_fusion = self.fusion_1x4(x1, x4)
75 |         # x_fused = x_1_2_fusion + x_1_4_fusion
76 |         x_fused = self.fusion_conv(x1, x2, x4)
77 |         return x_fused
78 | 
79 | 
80 | if __name__ == '__main__':
81 | 
82 |     block = MSAA(in_channels=64, out_channels=128)
83 |     x1 = torch.randn(1, 64, 64, 64)
84 |     x2 = torch.randn(1, 64, 64, 64)
85 |     x4 = torch.randn(1, 64, 64, 64)
86 | 
87 |     output = block(x1, x2, x4)
88 | 
89 |     # Print the shapes of the inputs and the output
90 |     print(x1.size())
91 |     print(x2.size())
92 |     print(x4.size())
93 |     print(output.size())


--------------------------------------------------------------------------------
/目标检测/GFM.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | # 论文：MAGNet: Multi-scale Awareness and Global fusion Network for RGB-D salient object detection | KBS
 4 | # 论文地址：https://www.sciencedirect.com/science/article/abs/pii/S0950705124007603
 5 | class DWPWConv(nn.Module):
 6 |     def __init__(self, inc, outc):
 7 |         super().__init__()
 8 |         self.conv = nn.Sequential(
 9 |             nn.Conv2d(in_channels=inc, out_channels=inc, kernel_size=3, padding=1, stride=1, groups=inc),
10 |             nn.BatchNorm2d(inc),
11 |             nn.GELU(),
12 |             nn.Conv2d(in_channels=inc, out_channels=outc, kernel_size=1, stride=1),
13 |             nn.BatchNorm2d(outc),
14 |             nn.GELU()
15 |         )
16 | 
17 |     def forward(self, x):
18 |         return self.conv(x)
19 | 
20 | class SAttention(nn.Module):
21 |     def __init__(self, dim, sa_num_heads=8, qkv_bias=True, qk_scale=None,
22 |                  attn_drop=0., proj_drop=0.):
23 |         super().__init__()
24 | 
25 |         self.dim = dim
26 |         self.sa_num_heads = sa_num_heads
27 | 
28 |         assert dim % sa_num_heads == 0, f"dim {dim} should be divided by num_heads {sa_num_heads}."
29 | 
30 |         self.act = nn.GELU()
31 |         self.proj = nn.Linear(dim, dim)
32 |         self.proj_drop = nn.Dropout(proj_drop)
33 | 
34 |         head_dim = dim // sa_num_heads
35 |         self.scale = qk_scale or head_dim ** -0.5
36 |         self.q = nn.Linear(dim, dim, bias=qkv_bias)
37 |         self.attn_drop = nn.Dropout(attn_drop)
38 |         self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
39 |         self.local_conv = nn.Conv2d(dim, dim, kernel_size=3, padding=1, stride=1, groups=dim)
40 | 
41 |     def forward(self, x, H, W):
42 |         B, N, C = x.shape
43 | 
44 |         q = self.q(x).reshape(B, N, self.sa_num_heads, C // self.sa_num_heads).permute(0, 2, 1, 3)
45 |         kv = self.kv(x).reshape(B, -1, 2, self.sa_num_heads, C // self.sa_num_heads).permute(2, 0, 3, 1, 4)
46 |         k, v = kv[0], kv[1]
47 |         attn = (q @ k.transpose(-2, -1)) * self.scale
48 |         attn = attn.softmax(dim=-1)
49 |         attn = self.attn_drop(attn)
50 |         x = (attn @ v).transpose(1, 2).reshape(B, N, C) + \
51 |             self.local_conv(v.transpose(1, 2).reshape(B, N, C).transpose(1, 2).view(B, C, H, W)).view(B, C,
52 |                                                                                                       N).transpose(1, 2)
53 | 
54 |         x = self.proj(x)
55 |         x = self.proj_drop(x)
56 | 
57 |         return x.permute(0, 2, 1).reshape(B, C, H, W)
58 | 
59 | # Global Fusion Module
60 | class GFM(nn.Module):
61 |     def __init__(self, inc, expend_ratio=2):
62 |         super().__init__()
63 |         self.expend_ratio = expend_ratio
64 |         assert expend_ratio in [2, 3], f"expend_ratio {expend_ratio} mismatch"
65 | 
66 |         self.sa = SAttention(dim=inc)
67 |         self.dw_pw = DWPWConv(inc * expend_ratio, inc)
68 |         self.act = nn.GELU()
69 | 
70 |     def forward(self, x, d):
71 |         B, C, H, W = x.shape
72 |         if self.expend_ratio == 2:
73 |             cat = torch.cat((x, d), dim=1)
74 |         else:
75 |             multi = x * d
76 |             cat = torch.cat((x, d, multi), dim=1)
77 |         x_rc = self.dw_pw(cat).flatten(2).permute(0, 2, 1)
78 |         x_ = self.sa(x_rc, H, W)
79 |         x_ = x_ + x
80 |         return self.act(x_)
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     # 实例化 GFM 模块
85 |     inc = 64  # 输入通道数
86 |     block = GFM(inc=inc, expend_ratio=2)
87 | 
88 |     x = torch.randn(1, inc, 32, 32)  # B  C  H   W
89 |     d = torch.randn(1, inc, 32, 32)  # 与 x 相同形状的深度图
90 | 
91 |     # 前向传播，计算输出
92 |     output = block(x, d)
93 | 
94 |     # 打印输入和输出的形状
95 |     print(f"Input x shape: {x.size()}")
96 |     print(f"Input d shape: {d.size()}")
97 |     print(f"Output shape: {output.size()}")


--------------------------------------------------------------------------------
/注意力/(CVPR 2024)CAA.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import torch.nn as nn
 3 | import torch
 4 | # 论文地址：https://arxiv.org/pdf/2403.06258
 5 | # 论文：Poly Kernel Inception Network for Remote Sensing Detection(CVPR 2024)
 6 | # Github地址：https://github.com/NUST-Machine-Intelligence-Laboratory/PKINet
 7 | # 全网最全100➕即插即用模块GitHub地址：https://github.com/ai-dawang/PlugNPlay-Modules
 8 | # Context Anchor Attention (CAA) module
 9 | class ConvModule(nn.Module):
10 |     def __init__(
11 |             self,
12 |             in_channels: int,
13 |             out_channels: int,
14 |             kernel_size: int,
15 |             stride: int = 1,
16 |             padding: int = 0,
17 |             groups: int = 1,
18 |             norm_cfg: Optional[dict] = None,
19 |             act_cfg: Optional[dict] = None):
20 |         super().__init__()
21 |         layers = []
22 |         # Convolution Layer
23 |         layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=(norm_cfg is None)))
24 |         # Normalization Layer
25 |         if norm_cfg:
26 |             norm_layer = self._get_norm_layer(out_channels, norm_cfg)
27 |             layers.append(norm_layer)
28 |         # Activation Layer
29 |         if act_cfg:
30 |             act_layer = self._get_act_layer(act_cfg)
31 |             layers.append(act_layer)
32 |         # Combine all layers
33 |         self.block = nn.Sequential(*layers)
34 | 
35 |     def forward(self, x):
36 |         return self.block(x)
37 | 
38 |     def _get_norm_layer(self, num_features, norm_cfg):
39 |         if norm_cfg['type'] == 'BN':
40 |             return nn.BatchNorm2d(num_features, momentum=norm_cfg.get('momentum', 0.1), eps=norm_cfg.get('eps', 1e-5))
41 |         # Add more normalization types if needed
42 |         raise NotImplementedError(f"Normalization layer '{norm_cfg['type']}' is not implemented.")
43 | 
44 |     def _get_act_layer(self, act_cfg):
45 |         if act_cfg['type'] == 'ReLU':
46 |             return nn.ReLU(inplace=True)
47 |         if act_cfg['type'] == 'SiLU':
48 |             return nn.SiLU(inplace=True)
49 |         # Add more activation types if needed
50 |         raise NotImplementedError(f"Activation layer '{act_cfg['type']}' is not implemented.")
51 | 
52 | class CAA(nn.Module):
53 |     """Context Anchor Attention"""
54 |     def __init__(
55 |             self,
56 |             channels: int,
57 |             h_kernel_size: int = 11,
58 |             v_kernel_size: int = 11,
59 |             norm_cfg: Optional[dict] = dict(type='BN', momentum=0.03, eps=0.001),
60 |             act_cfg: Optional[dict] = dict(type='SiLU')):
61 |         super().__init__()
62 |         self.avg_pool = nn.AvgPool2d(7, 1, 3)
63 |         self.conv1 = ConvModule(channels, channels, 1, 1, 0,
64 |                                 norm_cfg=norm_cfg, act_cfg=act_cfg)
65 |         self.h_conv = ConvModule(channels, channels, (1, h_kernel_size), 1,
66 |                                  (0, h_kernel_size // 2), groups=channels,
67 |                                  norm_cfg=None, act_cfg=None)
68 |         self.v_conv = ConvModule(channels, channels, (v_kernel_size, 1), 1,
69 |                                  (v_kernel_size // 2, 0), groups=channels,
70 |                                  norm_cfg=None, act_cfg=None)
71 |         self.conv2 = ConvModule(channels, channels, 1, 1, 0,
72 |                                 norm_cfg=norm_cfg, act_cfg=act_cfg)
73 |         self.act = nn.Sigmoid()
74 | 
75 |     def forward(self, x):
76 |         attn_factor = self.act(self.conv2(self.v_conv(self.h_conv(self.conv1(self.avg_pool(x))))))
77 |         return attn_factor
78 | 
79 | # Example usage to print input and output shapes
80 | if __name__ == "__main__":
81 |     input = torch.randn(1, 64, 128, 128) #输入 B C H W
82 |     block = CAA(64)
83 |     output = block(input)
84 |     print(input.size())
85 |     print(output.size())
86 | 


--------------------------------------------------------------------------------
/图像超分/FMM.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | #论文：Spatially-Adaptive Feature Modulation for Efficient Image Super-Resolution (ICCV 2023)
  5 | #论文地址：https://openaccess.thecvf.com/content/ICCV2023/papers/Sun_Spatially-Adaptive_Feature_Modulation_for_Efficient_Image_Super-Resolution_ICCV_2023_paper.pdf
  6 | 
  7 | # Layer Norm
  8 | class LayerNorm(nn.Module):
  9 |     def __init__(self, normalized_shape, eps=1e-6, data_format="channels_first"):
 10 |         super().__init__()
 11 |         self.weight = nn.Parameter(torch.ones(normalized_shape))
 12 |         self.bias = nn.Parameter(torch.zeros(normalized_shape))
 13 |         self.eps = eps
 14 |         self.data_format = data_format
 15 |         if self.data_format not in ["channels_last", "channels_first"]:
 16 |             raise NotImplementedError
 17 |         self.normalized_shape = (normalized_shape, )
 18 | 
 19 |     def forward(self, x):
 20 |         if self.data_format == "channels_last":
 21 |             return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
 22 |         elif self.data_format == "channels_first":
 23 |             u = x.mean(1, keepdim=True)
 24 |             s = (x - u).pow(2).mean(1, keepdim=True)
 25 |             x = (x - u) / torch.sqrt(s + self.eps)
 26 |             x = self.weight[:, None, None] * x + self.bias[:, None, None]
 27 |             return x
 28 |         
 29 | # convolutional channel mixer (CCM)
 30 | class CCM(nn.Module):
 31 |     def __init__(self, dim, growth_rate=2.0):
 32 |         super().__init__()
 33 |         hidden_dim = int(dim * growth_rate)
 34 | 
 35 |         self.ccm = nn.Sequential(
 36 |             nn.Conv2d(dim, hidden_dim, 3, 1, 1),
 37 |             nn.GELU(),
 38 |             nn.Conv2d(hidden_dim, dim, 1, 1, 0)
 39 |         )
 40 | 
 41 |     def forward(self, x):
 42 |         return self.ccm(x)
 43 | 
 44 | 
 45 | # spatially-adaptive feature modulation (SAFM)
 46 | class SAFM(nn.Module):
 47 |     def __init__(self, dim, n_levels=4):
 48 |         super().__init__()
 49 |         self.n_levels = n_levels
 50 |         chunk_dim = dim // n_levels
 51 | 
 52 |         # Spatial Weighting
 53 |         self.mfr = nn.ModuleList(
 54 |             [nn.Conv2d(chunk_dim, chunk_dim, 3, 1, 1, groups=chunk_dim) for i in range(self.n_levels)])
 55 | 
 56 |         # # Feature Aggregation
 57 |         self.aggr = nn.Conv2d(dim, dim, 1, 1, 0)
 58 | 
 59 |         # Activation
 60 |         self.act = nn.GELU()
 61 | 
 62 |     def forward(self, x):
 63 |         h, w = x.size()[-2:]
 64 | 
 65 |         xc = x.chunk(self.n_levels, dim=1)
 66 |         out = []
 67 |         for i in range(self.n_levels):
 68 |             if i > 0:
 69 |                 p_size = (h // 2 ** i, w // 2 ** i)
 70 |                 s = F.adaptive_max_pool2d(xc[i], p_size)
 71 |                 s = self.mfr[i](s)
 72 |                 s = F.interpolate(s, size=(h, w), mode='nearest')
 73 |             else:
 74 |                 s = self.mfr[i](xc[i])
 75 |             out.append(s)
 76 | 
 77 |         out = self.aggr(torch.cat(out, dim=1))
 78 |         out = self.act(out) * x
 79 |         return out
 80 | 
 81 | # feature mixing module(FMM)
 82 | class FMM(nn.Module):
 83 |     def __init__(self, dim, ffn_scale=2.0):
 84 |         super().__init__()
 85 | 
 86 |         self.norm1 = LayerNorm(dim)
 87 |         self.norm2 = LayerNorm(dim)
 88 | 
 89 |         # Multiscale Block
 90 |         self.safm = SAFM(dim)
 91 |         # Feedforward layer
 92 |         self.ccm = CCM(dim, ffn_scale)
 93 | 
 94 |     def forward(self, x):
 95 |         x = self.safm(self.norm1(x)) + x
 96 |         x = self.ccm(self.norm2(x)) + x
 97 |         return x
 98 | 
 99 | if __name__ == '__main__':
100 |     dim = 64
101 |     block = FMM(dim)
102 |     input = torch.randn(1, dim, 32, 32)
103 |     output = block(input)
104 | 
105 |     # 打印输入和输出的形状
106 |     print(input.size())
107 |     print(output.size())


--------------------------------------------------------------------------------