├── src
    └── __init__.py
├── generation_idea_template
    ├── probiou
    │   ├── seed_ideas.json
    │   ├── prompt.json
    │   └── experiment.py
    ├── plane_voronoi
    │   ├── seed_ideas.json
    │   ├── code
    │   │   ├── img
    │   │   │   ├── voronoi.jpg
    │   │   │   └── voronoi_adaptive.jpg
    │   │   └── graph_based_voronoi.py
    │   └── prompt.json
    ├── coordattention
    │   ├── seed_ideas.json
    │   ├── prompt.json
    │   ├── experiment.py
    │   └── code
    │   │   ├── xxx-cross_dim_coordatt.py
    │   │   ├── xxx-hierarchical_coordatt.py
    │   │   ├── xxa-depthwise_coordatt.py
    │   │   ├── xaa-global_context_coordatt.py
    │   │   ├── xaa-temporal_coordatt.py
    │   │   ├── xaa-channel_mix_coordatt.py
    │   │   ├── xxx-geo_transform_coordatt.py
    │   │   ├── xxx-dynamic_complexity_coordatt.py
    │   │   ├── xxx-dual_domain_coordatt.py
    │   │   ├── xxx-se_coordatt.py
    │   │   ├── xxx-shared_params_coordatt.py
    │   │   ├── xxx-probabilistic_coordatt.py
    │   │   ├── xxa-edge_aware_coordatt.py
    │   │   ├── aaa-freq_domain_coordatt.py
    │   │   ├── xaa-adaptive_coordatt.py
    │   │   ├── xxx-content_adaptive_coordatt.py
    │   │   ├── xxx-multi_scale_coordatt.py
    │   │   ├── xxx-sparse_coordatt.py
    │   │   ├── xxa-nonlocal_coordatt.py
    │   │   └── xxa-deformable_pooling_coordatt.py
    ├── coordattention-gemini
    │   ├── seed_ideas.json
    │   ├── prompt.json
    │   ├── experiment.py
    │   └── code
    │   │   ├── pre_pool_depthwise_spatial.py
    │   │   ├── separate_learnable_fusion.py
    │   │   ├── modulated_post_sigmoid_attention.py
    │   │   ├── sigmoid_weighted_interaction_group_conv.py
    │   │   ├── pre_pool_spatial_adaptive_channel_attention.py
    │   │   ├── pre_pool_conv_spatial_attention.py
    │   │   ├── refined_modulated_cross_spatial_interaction.py
    │   │   ├── early_fusion_addition_projected.py
    │   │   ├── bottleneck_attention_modulation.py
    │   │   └── sigmoid_dynamic_weighted_fusion.py
    └── small_object_attention
    │   ├── seed_ideas.json
    │   ├── prompt.json
    │   ├── experiment.py
    │   └── code
    │       ├── scale_normalization_attention.py
    │       ├── hierarchical_attention_scaling.py
    │       ├── semantic_attention.py
    │       ├── meta_attention.py
    │       ├── spatial_channel_attention.py
    │       ├── simulated_temporal_attention.py
    │       ├── cross_channel_attention.py
    │       ├── dynamic_data_transformation.py
    │       ├── dual_attention_integration.py
    │       ├── multi_resolution_attention.py
    │       ├── contrastive_attention_enhancement.py
    │       ├── quality_adaptive_attention.py
    │       ├── internal_attention_bootstrapping.py
    │       ├── sparsity_attention.py
    │       ├── contextual_attention.py
    │       ├── task_adaptive_attention.py
    │       ├── pyramid_attention.py
    │       ├── adaptive_complexity_attention.py
    │       ├── adaptive_gating_attention.py
    │       ├── denoising_attention.py
    │       ├── temporal_attention_fusion.py
    │       ├── simplified_graph_attention.py
    │       ├── geometric_transformation_attention.py
    │       ├── dynamic_attention_selection.py
    │       ├── color_channel_fusion_attention.py
    │       ├── uncertainty_guided_attention.py
    │       └── contrastive_learning_integration.py
├── paper_review_example
    ├── attention.pdf
    ├── gan_diffusion.pdf
    └── layerwise_lr_grokking.pdf
├── .env.example
├── requirements.txt
├── paper_review_example.py
├── generation_idea_example.py
├── review.txt
├── README.md
└── .gitignore


/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/generation_idea_template/probiou/seed_ideas.json:
--------------------------------------------------------------------------------
1 | [
2 |   
3 | ]


--------------------------------------------------------------------------------
/generation_idea_template/plane_voronoi/seed_ideas.json:
--------------------------------------------------------------------------------
1 | [
2 |   
3 | ]


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/seed_ideas.json:
--------------------------------------------------------------------------------
1 | [
2 |   
3 | ]


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/seed_ideas.json:
--------------------------------------------------------------------------------
1 | [
2 |   
3 | ]


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/seed_ideas.json:
--------------------------------------------------------------------------------
1 | [
2 |   
3 | ]


--------------------------------------------------------------------------------
/paper_review_example/attention.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KMnO4-zx/paper-agent/HEAD/paper_review_example/attention.pdf


--------------------------------------------------------------------------------
/paper_review_example/gan_diffusion.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KMnO4-zx/paper-agent/HEAD/paper_review_example/gan_diffusion.pdf


--------------------------------------------------------------------------------
/paper_review_example/layerwise_lr_grokking.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KMnO4-zx/paper-agent/HEAD/paper_review_example/layerwise_lr_grokking.pdf


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=''
2 | # OPENAI_BASE_URL='' 
3 | 
4 | DEEPSEEK_API_KEY=''
5 | DEEPSEEK_BASE_URL='https://api.deepseek.com'
6 | 
7 | # 用于S2的API Key 
8 | S2_API_KEY=''


--------------------------------------------------------------------------------
/generation_idea_template/plane_voronoi/code/img/voronoi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KMnO4-zx/paper-agent/HEAD/generation_idea_template/plane_voronoi/code/img/voronoi.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # LLM APIs
 2 | backoff
 3 | openai
 4 | # Viz
 5 | pypdf
 6 | pymupdf4llm
 7 | # Common Requirements
 8 | numpy
 9 | tqdm
10 | # env
11 | python-dotenv
12 | 


--------------------------------------------------------------------------------
/generation_idea_template/plane_voronoi/code/img/voronoi_adaptive.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KMnO4-zx/paper-agent/HEAD/generation_idea_template/plane_voronoi/code/img/voronoi_adaptive.jpg


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/prompt.json:
--------------------------------------------------------------------------------
1 | {
2 |     "system": "You are an ambitious AI PhD student who is looking to publish a paper that will contribute significantly to the field.",
3 |     "task_description": "You are given the following file to work with, which can better detect small targets in detection target detection."
4 | }


--------------------------------------------------------------------------------
/generation_idea_template/plane_voronoi/prompt.json:
--------------------------------------------------------------------------------
1 | {
2 |     "system": "You are an ambitious AI PhD student who is looking to publish a paper that will contribute significantly to the field.",
3 |     "task_description": "You are given the following file to work with, which can improve the algorithm to make it more efficient to generate voronoi graphs."
4 | }


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/prompt.json:
--------------------------------------------------------------------------------
1 | {
2 |     "system": "You are an ambitious AI PhD student who is looking to publish a paper that will contribute significantly to the field.",
3 |     "task_description": "You are given the following file to work with, you can improve this attention module by adapting feature extraction and lightweight attention mechanisms."
4 | }


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/prompt.json:
--------------------------------------------------------------------------------
1 | {
2 |     "system": "You are an ambitious AI PhD student who is looking to publish a paper that will contribute significantly to the field.",
3 |     "task_description": "You are given the following file to work with, you can improve this attention module by adapting feature extraction and lightweight attention mechanisms."
4 | }


--------------------------------------------------------------------------------
/generation_idea_template/probiou/prompt.json:
--------------------------------------------------------------------------------
1 | {
2 |     "system": "You are an ambitious AI PhD student who is looking to publish a paper that will contribute significantly to the field.",
3 |     "task_description": "You are provided with the following file to work with. Without modifying the probiou theme algorithm, analyze the shortcomings of Ciou and propose a new optimized iou algorithm. You will work with the following file, conducting the analysis without modifying the probiou theme algorithm."
4 | }


--------------------------------------------------------------------------------
/paper_review_example.py:
--------------------------------------------------------------------------------
 1 | from src.perform_review import load_paper, perform_review
 2 | from src.prompt import reviewer_system_prompt_neg, reviewer_system_prompt_base, reviewer_system_prompt_pos
 3 | from openai import OpenAI
 4 | import json
 5 | import os
 6 | 
 7 | import pprint
 8 | 
 9 | # gpt-4o
10 | openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_BASE_URL"))
11 | 
12 | # deepseek-chat
13 | deepseek_clinet = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_BASE_URL'))
14 | 
15 | # load paper
16 | text = load_paper("./paper_review_example/attention.pdf")
17 | 
18 | """
19 | reviewer_system_prompt: 有三个等级
20 |     reviewer_system_prompt_neg  : 严格模式
21 |     reviewer_system_prompt_base : 中等模式
22 |     reviewer_system_prompt_pos  : 宽松模式
23 | """
24 | # 建议使用 gpt-4o review，使用 deepseek-chat 或许会找到一些意想不到的惊喜~
25 | review = perform_review(text, 'gpt-4o-2024-08-06', openai_client, num_reviews_ensemble=1, num_reflections=2, reviewer_system_prompt=reviewer_system_prompt_neg)
26 | 
27 | with open('review.txt', 'a') as f:
28 |     json.dump(review, f, indent=4)
29 |     f.write('\n\n\n')  # 添加换行符以便于区分多个 JSON 对象
30 | 
31 | pprint.pp(review)


--------------------------------------------------------------------------------
/generation_idea_example.py:
--------------------------------------------------------------------------------
 1 | from src.generate_idea import generate_ideas, check_idea_novelty, generation_idea_code
 2 | from openai import OpenAI
 3 | import json
 4 | import os
 5 | import pprint
 6 | 
 7 | from dotenv import load_dotenv, find_dotenv
 8 | _ = load_dotenv(find_dotenv())
 9 | 
10 | # gpt-4o
11 | openai_model = "gpt-4o-2024-08-06"
12 | openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_BASE_URL"))
13 | 
14 | # deepseek-chat
15 | deepseek_model = "deepseek-chat"
16 | deepseek_clinet = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_BASE_URL'))
17 | 
18 | base_dir = './generation_idea_template/probiou/'
19 | 
20 | # generate ideas
21 | # 会自动保存 ideas 的结果到文件中，下次运行时会直接从文件中加载
22 | # ideas = generate_ideas(
23 | #     base_dir=base_dir,
24 | #     client=openai_client,
25 | #     model=openai_model,
26 | #     skip_generation=False,
27 | #     max_num_generations=20,
28 | #     num_reflections=5,
29 | # )
30 | 
31 | with open(os.path.join(base_dir, 'ideas.json'), 'r') as f:
32 |     ideas = json.load(f)
33 | 
34 | # check novelty
35 | # 会自动更新 novelty 的结果到 ideas.json 文件中，下次运行时会直接从文件中加载
36 | # novelty_ideas = check_idea_novelty(
37 | #     ideas=ideas,
38 | #     base_dir=base_dir,
39 | #     client=openai_client,
40 | #     model=openai_model,
41 | # )
42 | 
43 | # generate ideas with code ，自动保存 code 到 base_dir/code 目录下
44 | generation_idea_code(base_dir=base_dir, client=openai_client, model=openai_model, num_reflections=5)
45 | 
46 | pprint.pp(ideas)
47 | 
48 | 


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/experiment.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import flatten, nn
 4 | from torch.nn import init
 5 | from torch.nn.modules.activation import ReLU
 6 | from torch.nn.modules.batchnorm import BatchNorm2d
 7 | from torch.nn import functional as F
 8 | 
 9 | 
10 | class SEAttention(nn.Module):
11 | 
12 |     def __init__(self, channel=512,reduction=16):
13 |         super().__init__()
14 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
15 |         self.fc = nn.Sequential(
16 |             nn.Linear(channel, channel // reduction, bias=False),
17 |             nn.ReLU(inplace=True),
18 |             nn.Linear(channel // reduction, channel, bias=False),
19 |             nn.Sigmoid()
20 |         )
21 | 
22 |     def init_weights(self):
23 |         for m in self.modules():
24 |             if isinstance(m, nn.Conv2d):
25 |                 init.kaiming_normal_(m.weight, mode='fan_out')
26 |                 if m.bias is not None:
27 |                     init.constant_(m.bias, 0)
28 |             elif isinstance(m, nn.BatchNorm2d):
29 |                 init.constant_(m.weight, 1)
30 |                 init.constant_(m.bias, 0)
31 |             elif isinstance(m, nn.Linear):
32 |                 init.normal_(m.weight, std=0.001)
33 |                 if m.bias is not None:
34 |                     init.constant_(m.bias, 0)
35 | 
36 |     def forward(self, x):
37 |         b, c, _, _ = x.size()
38 |         y = self.avg_pool(x).view(b, c)
39 |         y = self.fc(y).view(b, c, 1, 1)
40 |         return x * y.expand_as(x)
41 |     
42 | if __name__ == '__main__':
43 |     model = SEAttention()
44 |     model.init_weights()
45 |     input = torch.randn(1, 512, 7, 7)
46 |     output = model(input)
47 |     print(output.shape)


--------------------------------------------------------------------------------
/review.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Summary": "The paper presents the Transformer, a novel neural network architecture that utilizes attention mechanisms exclusively, eliminating the need for recurrence or convolution. It demonstrates superior performance on machine translation tasks, with enhanced parallelization and reduced training time compared to existing models.",
 3 |     "Strengths": [
 4 |         "Introduces a novel architecture based entirely on attention mechanisms.",
 5 |         "Achieves state-of-the-art results on machine translation tasks.",
 6 |         "Improves computational efficiency and parallelization.",
 7 |         "Comprehensive experimental evaluation and analysis.",
 8 |         "Clear and well-organized writing."
 9 |     ],
10 |     "Weaknesses": [
11 |         "Limited exploration of tasks beyond machine translation.",
12 |         "Potential limitations in handling very long sequences not extensively discussed.",
13 |         "Lacks discussion of potential societal impacts."
14 |     ],
15 |     "Originality": 4,
16 |     "Quality": 4,
17 |     "Clarity": 4,
18 |     "Significance": 4,
19 |     "Questions": [
20 |         "How does the model perform on tasks beyond machine translation?",
21 |         "Are there any specific challenges anticipated for applying the Transformer to very long sequences?",
22 |         "Could the authors elaborate on any potential ethical concerns or societal impacts?"
23 |     ],
24 |     "Limitations": [
25 |         "The model may have limitations with very long sequences due to fixed positional encodings.",
26 |         "Lack of exploration in diverse application domains could imply limited generalization evidence.",
27 |         "Does not address the potential negative societal impacts or ethical considerations of deploying such models."
28 |     ],
29 |     "Ethical Concerns": false,
30 |     "Soundness": 4,
31 |     "Presentation": 4,
32 |     "Contribution": 4,
33 |     "Overall": 9,
34 |     "Confidence": 5,
35 |     "Decision": "Accept"
36 | }
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/experiment.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class h_sigmoid(nn.Module):
 7 |     def __init__(self, inplace=True):
 8 |         super(h_sigmoid, self).__init__()
 9 |         self.relu = nn.ReLU6(inplace=inplace)
10 | 
11 |     def forward(self, x):
12 |         return self.relu(x + 3) / 6
13 | 
14 | 
15 | class h_swish(nn.Module):
16 |     def __init__(self, inplace=True):
17 |         super(h_swish, self).__init__()
18 |         self.sigmoid = h_sigmoid(inplace=inplace)
19 | 
20 |     def forward(self, x):
21 |         return x * self.sigmoid(x)
22 | 
23 | 
24 | class CoordAtt(nn.Module):
25 |     def __init__(self, inp, reduction=32):
26 |         super(CoordAtt, self).__init__()
27 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
28 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
29 | 
30 |         mip = max(8, inp // reduction)
31 | 
32 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
33 |         self.bn1 = nn.BatchNorm2d(mip)
34 |         self.act = h_swish()
35 | 
36 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
37 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
38 | 
39 |     def forward(self, x):
40 |         identity = x
41 | 
42 |         n, c, h, w = x.size()
43 |         x_h = self.pool_h(x)
44 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
45 | 
46 |         y = torch.cat([x_h, x_w], dim=2)
47 |         y = self.conv1(y)
48 |         y = self.bn1(y)
49 |         y = self.act(y)
50 | 
51 |         x_h, x_w = torch.split(y, [h, w], dim=2)
52 |         x_w = x_w.permute(0, 1, 3, 2)
53 | 
54 |         a_h = self.conv_h(x_h).sigmoid()
55 |         a_w = self.conv_w(x_w).sigmoid()
56 | 
57 |         out = identity * a_w * a_h
58 | 
59 |         return out
60 | 
61 | if __name__ == '__main__':
62 |     x = torch.randn(2, 64, 32, 32)
63 |     att = CoordAtt(inp=64, reduction=32)
64 |     out = att(x)
65 |     print("输入尺寸:", x.shape)
66 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/experiment.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class h_sigmoid(nn.Module):
 7 |     def __init__(self, inplace=True):
 8 |         super(h_sigmoid, self).__init__()
 9 |         self.relu = nn.ReLU6(inplace=inplace)
10 | 
11 |     def forward(self, x):
12 |         return self.relu(x + 3) / 6
13 | 
14 | 
15 | class h_swish(nn.Module):
16 |     def __init__(self, inplace=True):
17 |         super(h_swish, self).__init__()
18 |         self.sigmoid = h_sigmoid(inplace=inplace)
19 | 
20 |     def forward(self, x):
21 |         return x * self.sigmoid(x)
22 | 
23 | 
24 | class CoordAtt(nn.Module):
25 |     def __init__(self, inp, reduction=32):
26 |         super(CoordAtt, self).__init__()
27 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
28 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
29 | 
30 |         mip = max(8, inp // reduction)
31 | 
32 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
33 |         self.bn1 = nn.BatchNorm2d(mip)
34 |         self.act = h_swish()
35 | 
36 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
37 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
38 | 
39 |     def forward(self, x):  # x (2, 64, 32, 32)
40 |         identity = x  # 残差结构
41 | 
42 |         n, c, h, w = x.size() # 获得 h 和 w 形状
43 |         x_h = self.pool_h(x) # (2, 64, 32, 1)
44 |         x_w = self.pool_w(x).permute(0, 1, 3, 2) # (2, 64, 1, 32) --> (2, 64, 32, 1)
45 | 
46 |         y = torch.cat([x_h, x_w], dim=2) # (2, 64, 64, 1)
47 |         y = self.conv1(y) # (2, 8, 64, 1)
48 |         y = self.bn1(y) # (2, 8, 64, 1)
49 |         y = self.act(y) # (2, 8, 64, 1)
50 | 
51 |         x_h, x_w = torch.split(y, [h, w], dim=2) # (2, 8, 32, 1), (2, 8, 32, 1)
52 |         x_w = x_w.permute(0, 1, 3, 2) # (2, 8, 1, 32)
53 | 
54 |         a_h = self.conv_h(x_h).sigmoid() # (2, 64, 32, 1)
55 |         a_w = self.conv_w(x_w).sigmoid() # (2, 64, 1, 32)
56 |         out = identity * a_w * a_h # (2, 64, 32, 32)
57 |         return out
58 | 
59 | if __name__ == '__main__':
60 |     x = torch.randn(2, 64, 32, 32)
61 |     att = CoordAtt(inp=64, reduction=32)
62 |     out = att(x)
63 |     print("输入尺寸:", x.shape)
64 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/scale_normalization_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Enhance SEAttention by introducing a scale normalization layer that preprocesses feature maps to emphasize smaller targets
 3 | Implement this using a learned scaling factor that dynamically adjusts feature intensities based on size relevance before applying SEAttention
 4 | Modify the forward function to include this normalization step, ensuring minimal computational overhead
 5 | Evaluate the model's effectiveness by comparing precision and recall metrics, alongside visualizations of attention focus on small targets, using synthetic datasets
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | 
11 | import numpy as np
12 | import torch
13 | from torch import flatten, nn
14 | from torch.nn import init
15 | from torch.nn.modules.activation import ReLU
16 | from torch.nn.modules.batchnorm import BatchNorm2d
17 | from torch.nn import functional as F
18 | 
19 | 
20 | class SEAttention(nn.Module):
21 | 
22 |     def __init__(self, channel=512, reduction=16):
23 |         super().__init__()
24 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
25 |         self.fc = nn.Sequential(
26 |             nn.Linear(channel, channel // reduction, bias=False),
27 |             nn.ReLU(inplace=True),
28 |             nn.Linear(channel // reduction, channel, bias=False),
29 |             nn.Sigmoid()
30 |         )
31 |         self.scale_norm = nn.Parameter(torch.ones(1, channel, 1, 1))
32 | 
33 |     def init_weights(self):
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Conv2d):
36 |                 init.kaiming_normal_(m.weight, mode='fan_out')
37 |                 if m.bias is not None:
38 |                     init.constant_(m.bias, 0)
39 |             elif isinstance(m, nn.BatchNorm2d):
40 |                 init.constant_(m.weight, 1)
41 |                 init.constant_(m.bias, 0)
42 |             elif isinstance(m, nn.Linear):
43 |                 init.normal_(m.weight, std=0.001)
44 |                 if m.bias is not None:
45 |                     init.constant_(m.bias, 0)
46 | 
47 |     def forward(self, x):
48 |         # Scale normalization step
49 |         x = x * self.scale_norm
50 |         
51 |         # SEAttention mechanism
52 |         b, c, _, _ = x.size()
53 |         y = self.avg_pool(x).view(b, c)
54 |         y = self.fc(y).view(b, c, 1, 1)
55 |         return x * y.expand_as(x)
56 |     
57 | if __name__ == '__main__':
58 |     model = SEAttention()
59 |     model.init_weights()
60 |     input = torch.randn(1, 512, 7, 7)
61 |     output = model(input)
62 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/hierarchical_attention_scaling.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the SEAttention class to incorporate a dynamic scaling factor for attention weights
 3 | Implement a new function that computes scaling factors based on the spatial dimensions of feature maps
 4 | Integrate this function into the forward pass of SEAttention to adjust attention weights dynamically
 5 | Evaluate performance using precision, recall, and F1-score on small target detection tasks, comparing against the baseline SEAttention model and other enhanced models to demonstrate improvements in detecting small targets
 6 | 
 7 | """
 8 | 
 9 | import numpy as np
10 | import torch
11 | from torch import flatten, nn
12 | from torch.nn import init
13 | from torch.nn.modules.activation import ReLU
14 | from torch.nn.modules.batchnorm import BatchNorm2d
15 | from torch.nn import functional as F
16 | 
17 | class SEAttention(nn.Module):
18 | 
19 |     def __init__(self, channel=512, reduction=16):
20 |         super().__init__()
21 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
22 |         self.fc = nn.Sequential(
23 |             nn.Linear(channel, channel // reduction, bias=False),
24 |             nn.ReLU(inplace=True),
25 |             nn.Linear(channel // reduction, channel, bias=False),
26 |             nn.Sigmoid()
27 |         )
28 | 
29 |     def init_weights(self):
30 |         for m in self.modules():
31 |             if isinstance(m, nn.Conv2d):
32 |                 init.kaiming_normal_(m.weight, mode='fan_out')
33 |                 if m.bias is not None:
34 |                     init.constant_(m.bias, 0)
35 |             elif isinstance(m, nn.BatchNorm2d):
36 |                 init.constant_(m.weight, 1)
37 |                 init.constant_(m.bias, 0)
38 |             elif isinstance(m, nn.Linear):
39 |                 init.normal_(m.weight, std=0.001)
40 |                 if m.bias is not None:
41 |                     init.constant_(m.bias, 0)
42 | 
43 |     def compute_scaling_factor(self, x):
44 |         _, _, h, w = x.size()
45 |         # Example scaling factor: inverse of the sum of spatial dimensions
46 |         return 1.0 / (h + w)
47 | 
48 |     def forward(self, x):
49 |         b, c, _, _ = x.size()
50 |         scaling_factor = self.compute_scaling_factor(x)
51 |         y = self.avg_pool(x).view(b, c)
52 |         y = self.fc(y).view(b, c, 1, 1)
53 |         y = y * scaling_factor
54 |         return x * y.expand_as(x)
55 | 
56 | if __name__ == '__main__':
57 |     model = SEAttention()
58 |     model.init_weights()
59 |     input = torch.randn(1, 512, 7, 7)
60 |     output = model(input)
61 |     print(output.shape)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Paper-Agent
 2 | 
 3 | > *该项目来源于 [AI-Scientist](https://github.com/SakanaAI/AI-Scientist) 项目，Paper: [The AI Scientist: Towards Fully Automated Open-Ended Scientific Discovery](https://arxiv.org/abs/2408.06292)*
 4 | 
 5 | AI-Scientist 项目很棒，实验思路设计很精巧，很值得学习。但现有模型在写论文和修改代码做实验方面还有很多不足，所以在仔细研读了 AI-Scientist 的论文和项目代码之后，将 `generation ideas` 和 `paper revierw` 部分的代码拿来出来，做了一些简单的修改，并给代码添加了详细的中文注释，方便学习。
 6 | 
 7 | 每篇论文的诞生都经过了漫长的思考和实验，目前的LLM应该还无法胜任做实验这个任务，但它可以为我们提供一些更为大胆的 idea ，以及对论文的 review。review 可以找出论文中意想不到优缺点（优点：更好的讲故事；缺点：避重就轻，扬长避短，省略不要！哈哈）。LLM generation idea 和 paper review 希望可以把你变成 ***论文打印机！*** 
 8 | 
 9 | > *项目所有的 prompt 可以在 `src/prompt.py` 中找到。*
10 | 
11 | ## Usage
12 | 
13 | ### .env
14 | 
15 | 首先需要配置一些环境变量，可以参考 `.env.example` 文件。
16 | 
17 | ```
18 | OPENAI_API_KEY=''
19 | # OPENAI_BASE_URL='' 
20 | 
21 | DEEPSEEK_API_KEY=''
22 | DEEPSEEK_BASE_URL='https://api.deepseek.com'
23 | 
24 | # 用于S2的API Key 
25 | S2_API_KEY=''
26 | ```
27 | 
28 | > *S2 的 API Key 可以在 [Semantic Scholar](https://www.semanticscholar.org/) 上申请。*
29 | 
30 | ### Generation Ideas
31 | 
32 | 首先需要准备几个文件：`experiment.py`, `prompt.json` 和 `seed_ideas.json`。
33 | 
34 | - `experiment.py` 是你的实验/模块代码，必须有。
35 | - `prompt.json` 是对任务的描述，必须有。
36 | - `seed_ideas.json` 是你的实验代码的种子想法，可以没有（但文件要存在）。
37 | 
38 | > 可以参考 `generation_idea_template` 文件夹中的示例。
39 | 
40 | 运行代码可以参考 `generation_idea_example.py` 文件，也可以直接运行该文件。
41 | 
42 | ```bash
43 | python generation_idea_example.py
44 | ```
45 | 
46 | > note: 代码的运行结果还会保存在 `base_dir` 目录下的 `ideas.json` 文件中。
47 | 
48 | #### 代码详解
49 | 
50 | `generation_idea_example.py` 文件中大致代码如下：
51 | 
52 | - `generate_ideas`：用于生成 idea。
53 | - `check_idea_novelty`：用于检查 idea 的新颖性。但需要申请 emantic Scholar 的 API Key。
54 | - `generation_idea_code`：可以根据以上两个函数生成idea来生成代码。如果已经 `check novelty`，会直接生成 `novel=True` 的代码，如果没有 `check novelty`，会生成所有的代码。
55 | 
56 | ```python
57 | # gpt-4o
58 | # generate ideas
59 | # 会自动保存 ideas 的结果到文件中，下次运行时会直接从文件中加载
60 | ideas = generate_ideas(
61 |     base_dir=base_dir,
62 |     client=openai_client,
63 |     model=openai_model,
64 |     skip_generation=False,
65 |     max_num_generations=20,
66 |     num_reflections=5,
67 | )
68 | 
69 | # check novelty
70 | # 会自动更新 novelty 的结果到 ideas.json 文件中，下次运行时会直接从文件中加载
71 | novelty_ideas = check_idea_novelty(
72 |     ideas=ideas,
73 |     base_dir=base_dir,
74 |     client=openai_client,
75 |     model=openai_model,
76 | )
77 | 
78 | # generate ideas with code ，自动保存 code 到 base_dir/code 目录下
79 | generation_idea_code(base_dir=base_dir, client=openai_client, model=openai_model, num_reflections=5)
80 | 
81 | pprint.pp(novelty_ideas)
82 | ```
83 | 
84 | ### Paper Review
85 | 
86 | 运行代码可以参考 `paper_review_example.py` 文件，也可以直接运行该文件。
87 | 
88 | ```bash
89 | python paper_review_example.py
90 | ```
91 | 
92 | > note: 代码的运行结果还会保存在根目录下的 `reviews.txt` 文件中。
93 | 
94 | ## Blog
95 | 
96 | *后续有时间会有一个关于 AI-Scientist Blog （如果有时间的话）*
97 | 
98 | 


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/pre_pool_depthwise_spatial.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the `CoordAtt` module
 3 | Before pooling, apply a depthwise convolution to mix the spatial information
 4 | Then, perform the standard height and width pooling
 5 | Modify the `__init__` to include the depthwise convolution
 6 | Modify the `forward` to implement the depthwise convolution, and then the standard pooling
 7 | The rest of the forward pass remains unchanged
 8 | Compare the output with the baseline using the same test input and observe changes
 9 | This involves adding a depthwise conv, and modifying the forward pass
10 | 
11 | """
12 | 
13 | import torch
14 | import torch.nn as nn
15 | import torch.nn.functional as F
16 | 
17 | 
18 | class h_sigmoid(nn.Module):
19 |     def __init__(self, inplace=True):
20 |         super(h_sigmoid, self).__init__()
21 |         self.relu = nn.ReLU6(inplace=inplace)
22 | 
23 |     def forward(self, x):
24 |         return self.relu(x + 3) / 6
25 | 
26 | 
27 | class h_swish(nn.Module):
28 |     def __init__(self, inplace=True):
29 |         super(h_swish, self).__init__()
30 |         self.sigmoid = h_sigmoid(inplace=inplace)
31 | 
32 |     def forward(self, x):
33 |         return x * self.sigmoid(x)
34 | 
35 | 
36 | class CoordAtt(nn.Module):
37 |     def __init__(self, inp, reduction=32):
38 |         super(CoordAtt, self).__init__()
39 |         # Depthwise convolution before pooling
40 |         self.depthwise = nn.Conv2d(inp, inp, kernel_size=3, stride=1, padding=1, groups=inp)
41 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
42 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
43 | 
44 |         mip = max(8, inp // reduction)
45 | 
46 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
47 |         self.bn1 = nn.BatchNorm2d(mip)
48 |         self.act = h_swish()
49 | 
50 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
51 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
52 | 
53 |     def forward(self, x):
54 |         identity = x
55 |         
56 |         # Apply depthwise conv
57 |         x = self.depthwise(x)
58 | 
59 |         n, c, h, w = x.size()
60 |         x_h = self.pool_h(x)
61 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
62 | 
63 |         y = torch.cat([x_h, x_w], dim=2)
64 |         y = self.conv1(y)
65 |         y = self.bn1(y)
66 |         y = self.act(y)
67 | 
68 |         x_h, x_w = torch.split(y, [h, w], dim=2)
69 |         x_w = x_w.permute(0, 1, 3, 2)
70 | 
71 |         a_h = self.conv_h(x_h).sigmoid()
72 |         a_w = self.conv_w(x_w).sigmoid()
73 | 
74 |         out = identity * a_w * a_h
75 | 
76 |         return out
77 | 
78 | if __name__ == '__main__':
79 |     x = torch.randn(2, 64, 32, 32)
80 |     att = CoordAtt(inp=64, reduction=32)
81 |     out = att(x)
82 |     print("输入尺寸:", x.shape)
83 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/semantic_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by integrating a semantic attention module that utilizes a simple pooling strategy to identify salient semantic features
 3 | Implement this by adding a global max pooling layer to extract prominent features, followed by a learnable attention layer that assigns weights based on semantic relevance
 4 | Modify the forward function to incorporate this semantic attention after the channel attention
 5 | Evaluate the model's effectiveness by comparing detection performance on synthetic datasets, focusing on improvements in semantic understanding and detection accuracy
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | 
11 | import numpy as np
12 | import torch
13 | from torch import flatten, nn
14 | from torch.nn import init
15 | from torch.nn.modules.activation import ReLU
16 | from torch.nn.modules.batchnorm import BatchNorm2d
17 | from torch.nn import functional as F
18 | 
19 | class SEAttention(nn.Module):
20 | 
21 |     def __init__(self, channel=512, reduction=16):
22 |         super().__init__()
23 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
24 |         self.fc = nn.Sequential(
25 |             nn.Linear(channel, channel // reduction, bias=False),
26 |             nn.ReLU(inplace=True),
27 |             nn.Linear(channel // reduction, channel, bias=False),
28 |             nn.Sigmoid()
29 |         )
30 |         # Semantic attention components
31 |         self.global_max_pool = nn.AdaptiveMaxPool2d(1)
32 |         self.semantic_fc = nn.Sequential(
33 |             nn.Linear(channel, channel, bias=False),
34 |             nn.Sigmoid()
35 |         )
36 | 
37 |     def init_weights(self):
38 |         for m in self.modules():
39 |             if isinstance(m, nn.Conv2d):
40 |                 init.kaiming_normal_(m.weight, mode='fan_out')
41 |                 if m.bias is not None:
42 |                     init.constant_(m.bias, 0)
43 |             elif isinstance(m, nn.BatchNorm2d):
44 |                 init.constant_(m.weight, 1)
45 |                 init.constant_(m.bias, 0)
46 |             elif isinstance(m, nn.Linear):
47 |                 init.normal_(m.weight, std=0.001)
48 |                 if m.bias is not None:
49 |                     init.constant_(m.bias, 0)
50 | 
51 |     def forward(self, x):
52 |         b, c, _, _ = x.size()
53 |         
54 |         # Channel attention
55 |         y = self.avg_pool(x).view(b, c)
56 |         y = self.fc(y).view(b, c, 1, 1)
57 |         x = x * y.expand_as(x)
58 | 
59 |         # Semantic attention
60 |         z = self.global_max_pool(x).view(b, c)
61 |         z = self.semantic_fc(z).view(b, c, 1, 1)
62 |         x = x * z.expand_as(x)
63 | 
64 |         return x
65 | 
66 | if __name__ == '__main__':
67 |     model = SEAttention()
68 |     model.init_weights()
69 |     input = torch.randn(1, 512, 7, 7)
70 |     output = model(input)
71 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/meta_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by incorporating a meta-attention mechanism
 3 | Implement this by introducing a secondary attention module, such as an additional SE block or a simple linear transformation, to refine the channel weights produced by the original SEAttention
 4 | Modify the forward function to apply this meta-attention after the original attention recalibration while maintaining computational efficiency
 5 | Evaluate the model's performance by comparing feature maps and conducting quantitative assessments on synthetic datasets, focusing on improvements in attention focus and detection performance on small targets
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import numpy as np
11 | import torch
12 | from torch import flatten, nn
13 | from torch.nn import init
14 | from torch.nn.modules.activation import ReLU
15 | from torch.nn.modules.batchnorm import BatchNorm2d
16 | from torch.nn import functional as F
17 | 
18 | 
19 | class SEAttention(nn.Module):
20 | 
21 |     def __init__(self, channel=512, reduction=16):
22 |         super().__init__()
23 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
24 |         self.fc = nn.Sequential(
25 |             nn.Linear(channel, channel // reduction, bias=False),
26 |             nn.ReLU(inplace=True),
27 |             nn.Linear(channel // reduction, channel, bias=False),
28 |             nn.Sigmoid()
29 |         )
30 |         # Meta-attention module: an additional SE block to refine channel weights
31 |         self.meta_fc = nn.Sequential(
32 |             nn.Linear(channel, channel // reduction, bias=False),
33 |             nn.ReLU(inplace=True),
34 |             nn.Linear(channel // reduction, channel, bias=False),
35 |             nn.Sigmoid()
36 |         )
37 | 
38 |     def init_weights(self):
39 |         for m in self.modules():
40 |             if isinstance(m, nn.Conv2d):
41 |                 init.kaiming_normal_(m.weight, mode='fan_out')
42 |                 if m.bias is not None:
43 |                     init.constant_(m.bias, 0)
44 |             elif isinstance(m, nn.BatchNorm2d):
45 |                 init.constant_(m.weight, 1)
46 |                 init.constant_(m.bias, 0)
47 |             elif isinstance(m, nn.Linear):
48 |                 init.normal_(m.weight, std=0.001)
49 |                 if m.bias is not None:
50 |                     init.constant_(m.bias, 0)
51 | 
52 |     def forward(self, x):
53 |         b, c, _, _ = x.size()
54 |         y = self.avg_pool(x).view(b, c)
55 |         y = self.fc(y)  # Original SE block
56 |         # Apply meta-attention to refine channel weights
57 |         y_meta = self.meta_fc(y).view(b, c, 1, 1)
58 |         return x * y_meta.expand_as(x)
59 |     
60 | if __name__ == '__main__':
61 |     model = SEAttention()
62 |     model.init_weights()
63 |     input = torch.randn(1, 512, 7, 7)
64 |     output = model(input)
65 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/spatial_channel_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by adding a spatial attention layer
 3 | Implement this by introducing a convolutional layer that outputs a spatial attention map with the same height and width as the input feature map
 4 | In the forward function, apply spatial attention by element-wise multiplying the spatial attention map with the input feature map, followed by the existing channel attention
 5 | Evaluate the model's effectiveness by comparing the output feature maps against those from the original SEAttention, using input tensors of varying scales and complexities
 6 | Performance can be assessed by visual inspection of feature maps and quantitative analysis using synthetic datasets if available
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | import numpy as np
12 | import torch
13 | from torch import flatten, nn
14 | from torch.nn import init
15 | from torch.nn.modules.activation import ReLU
16 | from torch.nn.modules.batchnorm import BatchNorm2d
17 | from torch.nn import functional as F
18 | 
19 | class SEAttention(nn.Module):
20 | 
21 |     def __init__(self, channel=512, reduction=16):
22 |         super().__init__()
23 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
24 |         self.fc = nn.Sequential(
25 |             nn.Linear(channel, channel // reduction, bias=False),
26 |             nn.ReLU(inplace=True),
27 |             nn.Linear(channel // reduction, channel, bias=False),
28 |             nn.Sigmoid()
29 |         )
30 |         
31 |         # Spatial attention layer
32 |         self.spatial_conv = nn.Conv2d(channel, 1, kernel_size=7, padding=3, bias=False)
33 |         self.spatial_sigmoid = nn.Sigmoid()
34 | 
35 |     def init_weights(self):
36 |         for m in self.modules():
37 |             if isinstance(m, nn.Conv2d):
38 |                 init.kaiming_normal_(m.weight, mode='fan_out')
39 |                 if m.bias is not None:
40 |                     init.constant_(m.bias, 0)
41 |             elif isinstance(m, nn.BatchNorm2d):
42 |                 init.constant_(m.weight, 1)
43 |                 init.constant_(m.bias, 0)
44 |             elif isinstance(m, nn.Linear):
45 |                 init.normal_(m.weight, std=0.001)
46 |                 if m.bias is not None:
47 |                     init.constant_(m.bias, 0)
48 | 
49 |     def forward(self, x):
50 |         # Spatial attention
51 |         spatial_att = self.spatial_conv(x)
52 |         spatial_att = self.spatial_sigmoid(spatial_att)
53 |         x = x * spatial_att
54 | 
55 |         # Channel attention
56 |         b, c, _, _ = x.size()
57 |         y = self.avg_pool(x).view(b, c)
58 |         y = self.fc(y).view(b, c, 1, 1)
59 |         return x * y.expand_as(x)
60 |     
61 | if __name__ == '__main__':
62 |     model = SEAttention()
63 |     model.init_weights()
64 |     input = torch.randn(1, 512, 7, 7)
65 |     output = model(input)
66 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/simulated_temporal_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by simulating temporal attention using a sliding window approach on spatial feature maps
 3 | Implement this by adding a mechanism that divides feature maps into non-overlapping subregions, treating each as a pseudo-temporal step, and applies attention across these regions using a shared attention mechanism
 4 | This should be integrated into the forward function following the channel attention
 5 | Evaluate by testing the model on datasets where small targets are embedded in varying spatial contexts within a single image, with performance assessed through quantitative metrics and visualization of feature map focus areas to compare against the original model
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | 
11 | import numpy as np
12 | import torch
13 | from torch import flatten, nn
14 | from torch.nn import init
15 | from torch.nn.modules.activation import ReLU
16 | from torch.nn.modules.batchnorm import BatchNorm2d
17 | from torch.nn import functional as F
18 | 
19 | 
20 | class SEAttention(nn.Module):
21 | 
22 |     def __init__(self, channel=512, reduction=16, window_size=2):
23 |         super().__init__()
24 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
25 |         self.fc = nn.Sequential(
26 |             nn.Linear(channel, channel // reduction, bias=False),
27 |             nn.ReLU(inplace=True),
28 |             nn.Linear(channel // reduction, channel, bias=False),
29 |             nn.Sigmoid()
30 |         )
31 |         self.window_size = window_size
32 | 
33 |     def init_weights(self):
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Conv2d):
36 |                 init.kaiming_normal_(m.weight, mode='fan_out')
37 |                 if m.bias is not None:
38 |                     init.constant_(m.bias, 0)
39 |             elif isinstance(m, nn.BatchNorm2d):
40 |                 init.constant_(m.weight, 1)
41 |                 init.constant_(m.bias, 0)
42 |             elif isinstance(m, nn.Linear):
43 |                 init.normal_(m.weight, std=0.001)
44 |                 if m.bias is not None:
45 |                     init.constant_(m.bias, 0)
46 | 
47 |     def forward(self, x):
48 |         b, c, h, w = x.size()
49 |         y = self.avg_pool(x).view(b, c)
50 |         y = self.fc(y).view(b, c, 1, 1)
51 |         x = x * y.expand_as(x)
52 | 
53 |         # Simulating temporal attention via sliding window
54 |         sw = self.window_size
55 |         for i in range(0, h, sw):
56 |             for j in range(0, w, sw):
57 |                 subregion = x[:, :, i:i+sw, j:j+sw]
58 |                 pooled = subregion.mean(dim=(2, 3), keepdim=True)
59 |                 x[:, :, i:i+sw, j:j+sw] = subregion * pooled
60 | 
61 |         return x
62 | 
63 | if __name__ == '__main__':
64 |     model = SEAttention()
65 |     model.init_weights()
66 |     input = torch.randn(1, 512, 7, 7)
67 |     output = model(input)
68 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/separate_learnable_fusion.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the `CoordAtt` module
 3 | After pooling height and width features, apply a separate 1x1 convolution to each of them
 4 | Then, perform an element-wise addition of the transformed height and width features
 5 | Feed the result to the shared `conv1`
 6 | In the `__init__` function, add two 1x1 convolution layers, one for height and one for width
 7 | In the `forward` function, implement the separate convolutions and element-wise addition before the shared `conv1`
 8 | The rest of the forward pass remains the same
 9 | Compare output with the baseline using same test input, observe changes
10 | 
11 | """
12 | 
13 | import torch
14 | import torch.nn as nn
15 | import torch.nn.functional as F
16 | 
17 | 
18 | class h_sigmoid(nn.Module):
19 |     def __init__(self, inplace=True):
20 |         super(h_sigmoid, self).__init__()
21 |         self.relu = nn.ReLU6(inplace=inplace)
22 | 
23 |     def forward(self, x):
24 |         return self.relu(x + 3) / 6
25 | 
26 | 
27 | class h_swish(nn.Module):
28 |     def __init__(self, inplace=True):
29 |         super(h_swish, self).__init__()
30 |         self.sigmoid = h_sigmoid(inplace=inplace)
31 | 
32 |     def forward(self, x):
33 |         return x * self.sigmoid(x)
34 | 
35 | 
36 | class CoordAtt(nn.Module):
37 |     def __init__(self, inp, reduction=32):
38 |         super(CoordAtt, self).__init__()
39 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
40 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
41 | 
42 |         mip = max(8, inp // reduction)
43 | 
44 |         self.conv_h_proj = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
45 |         self.conv_w_proj = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
46 | 
47 | 
48 |         self.conv1 = nn.Conv2d(mip, mip, kernel_size=1, stride=1, padding=0)
49 |         self.bn1 = nn.BatchNorm2d(mip)
50 |         self.act = h_swish()
51 | 
52 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
53 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
54 | 
55 |     def forward(self, x):
56 |         identity = x
57 | 
58 |         n, c, h, w = x.size()
59 |         x_h = self.pool_h(x)
60 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
61 | 
62 |         x_h_proj = self.conv_h_proj(x_h)
63 |         x_w_proj = self.conv_w_proj(x_w)
64 | 
65 |         y = x_h_proj + x_w_proj.permute(0, 1, 3, 2)
66 | 
67 |         y = self.conv1(y)
68 |         y = self.bn1(y)
69 |         y = self.act(y)
70 | 
71 |         x_h, x_w = torch.split(y, [h, w], dim=2)
72 |         x_w = x_w.permute(0, 1, 3, 2)
73 | 
74 |         a_h = self.conv_h(x_h).sigmoid()
75 |         a_w = self.conv_w(x_w).sigmoid()
76 | 
77 |         out = identity * a_w * a_h
78 | 
79 |         return out
80 | 
81 | if __name__ == '__main__':
82 |     x = torch.randn(2, 64, 32, 32)
83 |     att = CoordAtt(inp=64, reduction=32)
84 |     out = att(x)
85 |     print("输入尺寸:", x.shape)
86 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-cross_dim_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the CoordAtt class to include a cross-dimensional attention mechanism
 3 | Implement an attention mechanism that captures interactions across both height and width dimensions simultaneously using a form of matrix multiplication
 4 | Adjust the forward method to compute these attention weights and integrate them with the existing coordinate attention features before applying them to the input
 5 | Evaluate the impact on feature representation by testing on a small benchmark dataset, comparing performance and computational efficiency against the original CoordAtt and other variants
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | 
15 | class h_sigmoid(nn.Module):
16 |     def __init__(self, inplace=True):
17 |         super(h_sigmoid, self).__init__()
18 |         self.relu = nn.ReLU6(inplace=inplace)
19 | 
20 |     def forward(self, x):
21 |         return self.relu(x + 3) / 6
22 | 
23 | class h_swish(nn.Module):
24 |     def __init__(self, inplace=True):
25 |         super(h_swish, self).__init__()
26 |         self.sigmoid = h_sigmoid(inplace=inplace)
27 | 
28 |     def forward(self, x):
29 |         return x * self.sigmoid(x)
30 | 
31 | class CoordAtt(nn.Module):
32 |     def __init__(self, inp, reduction=32):
33 |         super(CoordAtt, self).__init__()
34 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
35 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
36 | 
37 |         mip = max(8, inp // reduction)
38 | 
39 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
40 |         self.bn1 = nn.BatchNorm2d(mip)
41 |         self.act = h_swish()
42 | 
43 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
44 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
45 | 
46 |         # Improved Cross-Dimensional Attention
47 |         self.attention_conv = nn.Conv2d(inp, inp, kernel_size=1, stride=1, padding=0)
48 | 
49 |     def forward(self, x):
50 |         identity = x
51 | 
52 |         n, c, h, w = x.size()
53 |         x_h = self.pool_h(x)
54 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
55 | 
56 |         y = torch.cat([x_h, x_w], dim=2)
57 |         y = self.conv1(y)
58 |         y = self.bn1(y)
59 |         y = self.act(y)
60 | 
61 |         x_h, x_w = torch.split(y, [h, w], dim=2)
62 |         x_w = x_w.permute(0, 1, 3, 2)
63 | 
64 |         a_h = self.conv_h(x_h).sigmoid()
65 |         a_w = self.conv_w(x_w).sigmoid()
66 | 
67 |         # Compute cross-dimensional attention via convolution
68 |         attention_weights = self.attention_conv(identity).sigmoid()
69 | 
70 |         out = identity * a_w * a_h * attention_weights
71 | 
72 |         return out
73 | 
74 | if __name__ == '__main__':
75 |     x = torch.randn(2, 64, 32, 32)
76 |     att = CoordAtt(inp=64, reduction=32)
77 |     out = att(x)
78 |     print("输入尺寸:", x.shape)
79 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/cross_channel_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by integrating a cross-channel attention mechanism using a multi-head attention layer
 3 | This layer computes interactions between channels to create a comprehensive attention map that enhances feature recalibration
 4 | Modify the forward function to apply this cross-channel attention before the existing channel attention
 5 | Evaluate the model by comparing outputs with those from SEAttention and other modifications, using attention map visualizations and performance on synthetic datasets designed to test inter-channel dependencies
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import numpy as np
11 | import torch
12 | from torch import flatten, nn
13 | from torch.nn import init
14 | from torch.nn.modules.activation import ReLU
15 | from torch.nn.modules.batchnorm import BatchNorm2d
16 | from torch.nn import functional as F
17 | 
18 | class SEAttention(nn.Module):
19 |     def __init__(self, channel=512, reduction=16, num_heads=8):
20 |         super().__init__()
21 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
22 |         self.fc = nn.Sequential(
23 |             nn.Linear(channel, channel // reduction, bias=False),
24 |             nn.ReLU(inplace=True),
25 |             nn.Linear(channel // reduction, channel, bias=False),
26 |             nn.Sigmoid()
27 |         )
28 |         # Cross-channel multi-head attention
29 |         self.multihead_attn = nn.MultiheadAttention(embed_dim=channel, num_heads=num_heads, batch_first=True)
30 | 
31 |     def init_weights(self):
32 |         for m in self.modules():
33 |             if isinstance(m, nn.Conv2d):
34 |                 init.kaiming_normal_(m.weight, mode='fan_out')
35 |                 if m.bias is not None:
36 |                     init.constant_(m.bias, 0)
37 |             elif isinstance(m, nn.BatchNorm2d):
38 |                 init.constant_(m.weight, 1)
39 |                 init.constant_(m.bias, 0)
40 |             elif isinstance(m, nn.Linear):
41 |                 init.normal_(m.weight, std=0.001)
42 |                 if m.bias is not None:
43 |                     init.constant_(m.bias, 0)
44 | 
45 |     def forward(self, x):
46 |         b, c, h, w = x.size()
47 |         
48 |         # Reshape and transpose for multi-head attention
49 |         x_flat = x.view(b, c, h * w).transpose(1, 2)  # shape: (b, hw, c)
50 |         
51 |         # Apply multi-head attention
52 |         attn_output, _ = self.multihead_attn(x_flat, x_flat, x_flat)
53 |         attn_output = attn_output.transpose(1, 2).view(b, c, h, w)  # reshape back to original input shape
54 | 
55 |         # Existing SEAttention mechanism
56 |         y = self.avg_pool(attn_output).view(b, c)
57 |         y = self.fc(y).view(b, c, 1, 1)
58 |         
59 |         return attn_output * y.expand_as(attn_output)
60 | 
61 | if __name__ == '__main__':
62 |     model = SEAttention()
63 |     model.init_weights()
64 |     input = torch.randn(1, 512, 7, 7)
65 |     output = model(input)
66 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/dynamic_data_transformation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Develop a dynamic data transformation module that learns to apply optimal transformations to input data to improve small target visibility
 3 | Integrate this module into the SEAttention model by preprocessing input data before the attention mechanism
 4 | Evaluate the model's performance by comparing precision, recall, and F1-score on small target detection tasks with and without the transformation module
 5 | Analyze the effectiveness of different transformations in enhancing small target detection
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import numpy as np
11 | import torch
12 | from torch import flatten, nn
13 | from torch.nn import init
14 | from torch.nn.modules.activation import ReLU
15 | from torch.nn.modules.batchnorm import BatchNorm2d
16 | from torch.nn import functional as F
17 | 
18 | 
19 | class DynamicDataTransformation(nn.Module):
20 |     def __init__(self, channel):
21 |         super().__init__()
22 |         self.transform = nn.Sequential(
23 |             nn.Conv2d(channel, channel, kernel_size=3, stride=1, padding=1, bias=False),
24 |             nn.BatchNorm2d(channel),
25 |             nn.ReLU(inplace=True),
26 |             nn.Conv2d(channel, channel, kernel_size=3, stride=1, padding=1, bias=False),
27 |             nn.BatchNorm2d(channel),
28 |             nn.Sigmoid()
29 |         )
30 |     
31 |     def forward(self, x):
32 |         return x * self.transform(x)
33 |     
34 | 
35 | class SEAttention(nn.Module):
36 | 
37 |     def __init__(self, channel=512, reduction=16):
38 |         super().__init__()
39 |         self.data_transform = DynamicDataTransformation(channel)
40 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
41 |         self.fc = nn.Sequential(
42 |             nn.Linear(channel, channel // reduction, bias=False),
43 |             nn.ReLU(inplace=True),
44 |             nn.Linear(channel // reduction, channel, bias=False),
45 |             nn.Sigmoid()
46 |         )
47 | 
48 |     def init_weights(self):
49 |         for m in self.modules():
50 |             if isinstance(m, nn.Conv2d):
51 |                 init.kaiming_normal_(m.weight, mode='fan_out')
52 |                 if m.bias is not None:
53 |                     init.constant_(m.bias, 0)
54 |             elif isinstance(m, nn.BatchNorm2d):
55 |                 init.constant_(m.weight, 1)
56 |                 init.constant_(m.bias, 0)
57 |             elif isinstance(m, nn.Linear):
58 |                 init.normal_(m.weight, std=0.001)
59 |                 if m.bias is not None:
60 |                     init.constant_(m.bias, 0)
61 | 
62 |     def forward(self, x):
63 |         x = self.data_transform(x)
64 |         b, c, _, _ = x.size()
65 |         y = self.avg_pool(x).view(b, c)
66 |         y = self.fc(y).view(b, c, 1, 1)
67 |         return x * y.expand_as(x)
68 |     
69 | if __name__ == '__main__':
70 |     model = SEAttention()
71 |     model.init_weights()
72 |     input = torch.randn(1, 512, 7, 7)
73 |     output = model(input)
74 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/dual_attention_integration.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Expand SEAttention to include a spatial attention mechanism
 3 | Implement spatial attention by adding two convolutional layers, followed by a softmax activation to produce a spatial attention map
 4 | Combine the spatial attention map with the SE channel attention output through element-wise multiplication
 5 | Evaluate performance improvements using metrics such as precision, recall, and F1-score on small target detection tasks
 6 | Compare these results to the baseline SEAttention model's performance
 7 | 
 8 | """
 9 | 
10 | import numpy as np
11 | import torch
12 | from torch import flatten, nn
13 | from torch.nn import init
14 | from torch.nn.modules.activation import ReLU
15 | from torch.nn.modules.batchnorm import BatchNorm2d
16 | from torch.nn import functional as F
17 | 
18 | 
19 | class SEAttentionWithSpatial(nn.Module):
20 | 
21 |     def __init__(self, channel=512, reduction=16):
22 |         super().__init__()
23 |         # Channel Attention
24 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
25 |         self.fc = nn.Sequential(
26 |             nn.Linear(channel, channel // reduction, bias=False),
27 |             nn.ReLU(inplace=True),
28 |             nn.Linear(channel // reduction, channel, bias=False),
29 |             nn.Sigmoid()
30 |         )
31 |         # Spatial Attention
32 |         self.spatial_conv1 = nn.Conv2d(channel, channel // 8, kernel_size=7, padding=3, bias=False)
33 |         self.spatial_conv2 = nn.Conv2d(channel // 8, 1, kernel_size=7, padding=3, bias=False)
34 |         self.softmax = nn.Softmax(dim=2)
35 | 
36 |     def init_weights(self):
37 |         for m in self.modules():
38 |             if isinstance(m, nn.Conv2d):
39 |                 init.kaiming_normal_(m.weight, mode='fan_out')
40 |                 if m.bias is not None:
41 |                     init.constant_(m.bias, 0)
42 |             elif isinstance(m, nn.BatchNorm2d):
43 |                 init.constant_(m.weight, 1)
44 |                 init.constant_(m.bias, 0)
45 |             elif isinstance(m, nn.Linear):
46 |                 init.normal_(m.weight, std=0.001)
47 |                 if m.bias is not None:
48 |                     init.constant_(m.bias, 0)
49 | 
50 |     def forward(self, x):
51 |         b, c, h, w = x.size()
52 |         
53 |         # Channel Attention
54 |         y_c = self.avg_pool(x).view(b, c)
55 |         y_c = self.fc(y_c).view(b, c, 1, 1)
56 |         channel_attention = x * y_c.expand_as(x)
57 |         
58 |         # Spatial Attention
59 |         y_s = self.spatial_conv1(channel_attention)
60 |         y_s = F.relu(y_s)
61 |         y_s = self.spatial_conv2(y_s)
62 |         y_s = self.softmax(y_s.view(b, 1, h * w)).view(b, 1, h, w)
63 |         
64 |         # Combined Attention
65 |         combined_attention = channel_attention * y_s
66 |         
67 |         return combined_attention
68 |     
69 | if __name__ == '__main__':
70 |     model = SEAttentionWithSpatial()
71 |     model.init_weights()
72 |     input = torch.randn(1, 512, 7, 7)
73 |     output = model(input)
74 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-hierarchical_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implement a hierarchical structure within the CoordAtt module, using two sequential layers
 3 | The first layer applies channel-wise attention with 1x1 convolutions, focusing on channel dependencies
 4 | The second layer implements spatial attention with adaptive pooling and convolutions to capture spatial dependencies
 5 | Adjust the forward method to process these layers in sequence
 6 | Evaluate the enhanced attention mechanism by testing on a small benchmark dataset, comparing improvements in feature representation and performance to the original CoordAtt while monitoring computational efficiency
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | 
15 | class h_sigmoid(nn.Module):
16 |     def __init__(self, inplace=True):
17 |         super(h_sigmoid, self).__init__()
18 |         self.relu = nn.ReLU6(inplace=inplace)
19 | 
20 |     def forward(self, x):
21 |         return self.relu(x + 3) / 6
22 | 
23 | class h_swish(nn.Module):
24 |     def __init__(self, inplace=True):
25 |         super(h_swish, self).__init__()
26 |         self.sigmoid = h_sigmoid(inplace=inplace)
27 | 
28 |     def forward(self, x):
29 |         return x * self.sigmoid(x)
30 | 
31 | class CoordAtt(nn.Module):
32 |     def __init__(self, inp, reduction=32):
33 |         super(CoordAtt, self).__init__()
34 |         # First layer: channel-wise attention
35 |         mip = max(8, inp // reduction)
36 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
37 |         self.bn1 = nn.BatchNorm2d(mip)
38 |         self.act1 = h_swish()
39 |         
40 |         # Second layer: spatial attention
41 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
42 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
43 |         self.conv_h = nn.Conv2d(mip, mip, kernel_size=1, stride=1, padding=0)
44 |         self.conv_w = nn.Conv2d(mip, mip, kernel_size=1, stride=1, padding=0)
45 |         self.bn2 = nn.BatchNorm2d(mip)
46 |         self.act2 = h_swish()
47 | 
48 |         self.conv_out = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
49 |         
50 |     def forward(self, x):
51 |         identity = x
52 | 
53 |         # Channel-wise attention
54 |         y = self.conv1(x)
55 |         y = self.bn1(y)
56 |         y = self.act1(y)
57 | 
58 |         # Spatial attention
59 |         n, c, h, w = y.size()
60 |         x_h = self.pool_h(y)
61 |         x_w = self.pool_w(y).permute(0, 1, 3, 2)
62 |         y = torch.cat([x_h, x_w], dim=2)
63 |         y = self.bn2(y)
64 |         y = self.act2(y)
65 | 
66 |         x_h, x_w = torch.split(y, [h, w], dim=2)
67 |         x_w = x_w.permute(0, 1, 3, 2)
68 | 
69 |         a_h = self.conv_h(x_h).sigmoid()
70 |         a_w = self.conv_w(x_w).sigmoid()
71 | 
72 |         out = identity * a_w * a_h
73 |         out = self.conv_out(out)
74 |         
75 |         return out
76 | 
77 | if __name__ == '__main__':
78 |     x = torch.randn(2, 64, 32, 32)
79 |     att = CoordAtt(inp=64, reduction=32)
80 |     out = att(x)
81 |     print("输入尺寸:", x.shape)
82 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/multi_resolution_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by implementing a multi-resolution attention mechanism
 3 | Create two versions of the input feature map: the original and a single downsampled version
 4 | Apply the SEAttention block to each version, and then upsample the downsampled attention-weighted feature map back to the original resolution
 5 | Combine these maps to form a final attention map
 6 | Modify the forward function to include these steps while optimizing for computational efficiency
 7 | Evaluate the effectiveness by comparing detection performance on small targets using synthetic datasets, assessing both qualitative and quantitative improvements over the baseline SEAttention
 8 | 
 9 | """
10 | 
11 | # Modified code
12 | 
13 | import numpy as np
14 | import torch
15 | from torch import nn
16 | from torch.nn import init
17 | from torch.nn import functional as F
18 | 
19 | class SEAttention(nn.Module):
20 | 
21 |     def __init__(self, channel=512, reduction=16):
22 |         super().__init__()
23 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
24 |         self.fc = nn.Sequential(
25 |             nn.Linear(channel, channel // reduction, bias=False),
26 |             nn.ReLU(inplace=True),
27 |             nn.Linear(channel // reduction, channel, bias=False),
28 |             nn.Sigmoid()
29 |         )
30 | 
31 |     def init_weights(self):
32 |         for m in self.modules():
33 |             if isinstance(m, nn.Conv2d):
34 |                 init.kaiming_normal_(m.weight, mode='fan_out')
35 |                 if m.bias is not None:
36 |                     init.constant_(m.bias, 0)
37 |             elif isinstance(m, nn.BatchNorm2d):
38 |                 init.constant_(m.weight, 1)
39 |                 init.constant_(m.bias, 0)
40 |             elif isinstance(m, nn.Linear):
41 |                 init.normal_(m.weight, std=0.001)
42 |                 if m.bias is not None:
43 |                     init.constant_(m.bias, 0)
44 | 
45 |     def forward(self, x):
46 |         b, c, h, w = x.size()
47 |         
48 |         # Original SEAttention on the original feature map
49 |         y1 = self.avg_pool(x).view(b, c)
50 |         y1 = self.fc(y1).view(b, c, 1, 1)
51 |         out1 = x * y1.expand_as(x)
52 |         
53 |         # Downsample the feature map using interpolation
54 |         x_down = F.interpolate(x, scale_factor=0.5, mode='bilinear', align_corners=False)
55 |         
56 |         # SEAttention on the downsampled feature map
57 |         y2 = self.avg_pool(x_down).view(b, c)
58 |         y2 = self.fc(y2).view(b, c, 1, 1)
59 |         out2 = x_down * y2.expand_as(x_down)
60 |         
61 |         # Upsample back to the original resolution
62 |         out2_upsampled = F.interpolate(out2, size=(h, w), mode='bilinear', align_corners=False)
63 |         
64 |         # Combine attention maps
65 |         out_combined = out1 + out2_upsampled
66 |         
67 |         return out_combined
68 | 
69 | if __name__ == '__main__':
70 |     model = SEAttention()
71 |     model.init_weights()
72 |     input = torch.randn(1, 512, 7, 7)
73 |     output = model(input)
74 |     print(output.shape)
75 | 
76 | # I am done


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/contrastive_attention_enhancement.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Enhance SEAttention by integrating contrastive learning to improve spatial awareness
 3 | Implement this by creating pairs of feature maps: one with SEAttention applied and one without
 4 | Use a contrastive loss function to train the model to differentiate between these maps, emphasizing small target detection
 5 | Modify the forward function to support this training regime
 6 | Evaluate the model by comparing the contrastive loss and visualizing the attention focus on small targets, demonstrating improved spatial discrimination over the baseline model
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | import numpy as np
12 | import torch
13 | from torch import flatten, nn
14 | from torch.nn import init
15 | from torch.nn.modules.activation import ReLU
16 | from torch.nn.modules.batchnorm import BatchNorm2d
17 | from torch.nn import functional as F
18 | from torch.nn import CosineSimilarity
19 | 
20 | class SEAttention(nn.Module):
21 | 
22 |     def __init__(self, channel=512, reduction=16):
23 |         super().__init__()
24 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
25 |         self.fc = nn.Sequential(
26 |             nn.Linear(channel, channel // reduction, bias=False),
27 |             nn.ReLU(inplace=True),
28 |             nn.Linear(channel // reduction, channel, bias=False),
29 |             nn.Sigmoid()
30 |         )
31 |         # Initialize cosine similarity for contrastive learning
32 |         self.cos_sim = CosineSimilarity(dim=1)
33 | 
34 |     def init_weights(self):
35 |         for m in self.modules():
36 |             if isinstance(m, nn.Conv2d):
37 |                 init.kaiming_normal_(m.weight, mode='fan_out')
38 |                 if m.bias is not None:
39 |                     init.constant_(m.bias, 0)
40 |             elif isinstance(m, nn.BatchNorm2d):
41 |                 init.constant_(m.weight, 1)
42 |                 init.constant_(m.bias, 0)
43 |             elif isinstance(m, nn.Linear):
44 |                 init.normal_(m.weight, std=0.001)
45 |                 if m.bias is not None:
46 |                     init.constant_(m.bias, 0)
47 | 
48 |     def forward(self, x):
49 |         b, c, _, _ = x.size()
50 |         
51 |         # SEAttention applied map
52 |         y = self.avg_pool(x).view(b, c)
53 |         y = self.fc(y).view(b, c, 1, 1)
54 |         se_attention_map = x * y.expand_as(x)
55 |         
56 |         # Original map without SEAttention
57 |         original_map = x
58 |         
59 |         # Calculate cosine similarity between the two maps
60 |         similarity = self.cos_sim(se_attention_map, original_map)
61 |         
62 |         # Contrastive loss: encourage high similarity
63 |         contrastive_loss = 1 - similarity.mean() # Using 1 - cosine similarity as a simple contrastive loss
64 | 
65 |         return se_attention_map, contrastive_loss
66 |     
67 | if __name__ == '__main__':
68 |     model = SEAttention()
69 |     model.init_weights()
70 |     input = torch.randn(1, 512, 7, 7)
71 |     output, contrastive_loss = model(input)
72 |     print("Output shape:", output.shape)
73 |     print("Contrastive Loss:", contrastive_loss.item())
74 | 
75 | # I am done


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxa-depthwise_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the `CoordAtt` class to replace standard convolutions with depthwise separable convolutions
 3 | Implement a new class `DepthwiseSeparableConv` and use it to replace `conv1`, `conv_h`, and `conv_w`
 4 | Each depthwise separable convolution consists of a depthwise convolution followed by a pointwise convolution
 5 | Evaluate the efficiency improvements by measuring parameter count and computational time, and compare the accuracy on a small benchmark dataset against the original implementation
 6 | 
 7 | """
 8 | 
 9 | # 创新不足，但可以用来降低计算量
10 | 
11 | # Modified code
12 | import torch
13 | import torch.nn as nn
14 | import torch.nn.functional as F
15 | 
16 | 
17 | class h_sigmoid(nn.Module):
18 |     def __init__(self, inplace=True):
19 |         super(h_sigmoid, self).__init__()
20 |         self.relu = nn.ReLU6(inplace=inplace)
21 | 
22 |     def forward(self, x):
23 |         return self.relu(x + 3) / 6
24 | 
25 | 
26 | class h_swish(nn.Module):
27 |     def __init__(self, inplace=True):
28 |         super(h_swish, self).__init__()
29 |         self.sigmoid = h_sigmoid(inplace=inplace)
30 | 
31 |     def forward(self, x):
32 |         return x * self.sigmoid(x)
33 | 
34 | 
35 | class DepthwiseSeparableConv(nn.Module):
36 |     def __init__(self, inp, oup, kernel_size=1, stride=1, padding=0):
37 |         super(DepthwiseSeparableConv, self).__init__()
38 |         self.depthwise = nn.Conv2d(inp, inp, kernel_size=kernel_size, stride=stride, padding=padding, groups=inp)
39 |         self.pointwise = nn.Conv2d(inp, oup, kernel_size=1, stride=1, padding=0)
40 | 
41 |     def forward(self, x):
42 |         x = self.depthwise(x)
43 |         x = self.pointwise(x)
44 |         return x
45 | 
46 | 
47 | class CoordAtt(nn.Module):
48 |     def __init__(self, inp, reduction=32):
49 |         super(CoordAtt, self).__init__()
50 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
51 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
52 | 
53 |         mip = max(8, inp // reduction)
54 | 
55 |         self.conv1 = DepthwiseSeparableConv(inp, mip, kernel_size=1, stride=1, padding=0)
56 |         self.bn1 = nn.BatchNorm2d(mip)
57 |         self.act = h_swish()
58 | 
59 |         self.conv_h = DepthwiseSeparableConv(mip, inp, kernel_size=1, stride=1, padding=0)
60 |         self.conv_w = DepthwiseSeparableConv(mip, inp, kernel_size=1, stride=1, padding=0)
61 | 
62 |     def forward(self, x):
63 |         identity = x
64 | 
65 |         n, c, h, w = x.size()
66 |         x_h = self.pool_h(x)
67 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
68 | 
69 |         y = torch.cat([x_h, x_w], dim=2)
70 |         y = self.conv1(y)
71 |         y = self.bn1(y)
72 |         y = self.act(y)
73 | 
74 |         x_h, x_w = torch.split(y, [h, w], dim=2)
75 |         x_w = x_w.permute(0, 1, 3, 2)
76 | 
77 |         a_h = self.conv_h(x_h).sigmoid()
78 |         a_w = self.conv_w(x_w).sigmoid()
79 | 
80 |         out = identity * a_w * a_h
81 | 
82 |         return out
83 | 
84 | if __name__ == '__main__':
85 |     x = torch.randn(2, 64, 32, 32)
86 |     att = CoordAtt(inp=64, reduction=32)
87 |     out = att(x)
88 |     print("输入尺寸:", x.shape)
89 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/quality_adaptive_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Develop a quality assessment module that computes a quality score for each input feature map using metrics like noise level or sharpness
 3 | Integrate this module into the SEAttention class, modifying the attention weights based on quality scores
 4 | Implement this by adding a quality assessment function and updating the forward pass of SEAttention to apply adaptive modulation of attention weights
 5 | Evaluate performance improvements using precision, recall, and F1-score on small target detection tasks, and compare results with the baseline SEAttention model and other enhanced models
 6 | 
 7 | """
 8 | 
 9 | # Refined code
10 | import numpy as np
11 | import torch
12 | from torch import nn
13 | from torch.nn import init
14 | from torch.nn import functional as F
15 | 
16 | 
17 | class SEAttention(nn.Module):
18 | 
19 |     def __init__(self, channel=512, reduction=16):
20 |         super().__init__()
21 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
22 |         self.fc = nn.Sequential(
23 |             nn.Linear(channel, channel // reduction, bias=False),
24 |             nn.ReLU(inplace=True),
25 |             nn.Linear(channel // reduction, channel, bias=False),
26 |             nn.Sigmoid()
27 |         )
28 |         # Define a Laplacian kernel for sharpness calculation
29 |         self.laplacian_kernel = torch.tensor([[[[-1, -1, -1],
30 |                                                 [-1,  8, -1],
31 |                                                 [-1, -1, -1]]]], dtype=torch.float32)
32 | 
33 |     def compute_quality_score(self, x):
34 |         # Apply the Laplacian kernel to compute sharpness
35 |         laplacian = F.conv2d(x, self.laplacian_kernel, padding=1)
36 |         sharpness = laplacian.var(dim=[2, 3], keepdim=True)
37 |         quality_score = torch.sigmoid(sharpness)  # Normalize to [0, 1]
38 |         return quality_score
39 | 
40 |     def init_weights(self):
41 |         for m in self.modules():
42 |             if isinstance(m, nn.Conv2d):
43 |                 init.kaiming_normal_(m.weight, mode='fan_out')
44 |                 if m.bias is not None:
45 |                     init.constant_(m.bias, 0)
46 |             elif isinstance(m, nn.BatchNorm2d):
47 |                 init.constant_(m.weight, 1)
48 |                 init.constant_(m.bias, 0)
49 |             elif isinstance(m, nn.Linear):
50 |                 init.normal_(m.weight, std=0.001)
51 |                 if m.bias is not None:
52 |                     init.constant_(m.bias, 0)
53 | 
54 |     def forward(self, x):
55 |         b, c, _, _ = x.size()
56 |         y = self.avg_pool(x).view(b, c)
57 |         y = self.fc(y).view(b, c, 1, 1)
58 |         
59 |         # Compute quality score and adjust attention weights
60 |         quality_score = self.compute_quality_score(x)
61 |         adjusted_y = y * quality_score
62 |         
63 |         return x * adjusted_y.expand_as(x)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     # Initialize the model and weights
68 |     model = SEAttention()
69 |     model.init_weights()
70 | 
71 |     # Test the model with a random input
72 |     input = torch.randn(1, 512, 7, 7)
73 |     output = model(input)
74 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xaa-global_context_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Introduce a Global Context Block within the CoordAtt module
 3 | Implement global average pooling on the input feature map to extract global context vectors
 4 | Use these vectors to modulate the attention weights by integrating them with the existing coordinate attention features
 5 | Modify the forward method to include this global context before applying the spatial attention mechanism
 6 | Evaluate the impact on feature representation and performance using a small benchmark dataset, comparing the results with the original CoordAtt and other variants
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | 
12 | import torch
13 | import torch.nn as nn
14 | import torch.nn.functional as F
15 | 
16 | class h_sigmoid(nn.Module):
17 |     def __init__(self, inplace=True):
18 |         super(h_sigmoid, self).__init__()
19 |         self.relu = nn.ReLU6(inplace=inplace)
20 | 
21 |     def forward(self, x):
22 |         return self.relu(x + 3) / 6
23 | 
24 | class h_swish(nn.Module):
25 |     def __init__(self, inplace=True):
26 |         super(h_swish, self).__init__()
27 |         self.sigmoid = h_sigmoid(inplace=inplace)
28 | 
29 |     def forward(self, x):
30 |         return x * self.sigmoid(x)
31 | 
32 | class CoordAtt(nn.Module):
33 |     def __init__(self, inp, reduction=32):
34 |         super(CoordAtt, self).__init__()
35 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
36 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
37 | 
38 |         mip = max(8, inp // reduction)
39 | 
40 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
41 |         self.bn1 = nn.BatchNorm2d(mip)
42 |         self.act = h_swish()
43 | 
44 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
45 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
46 | 
47 |         # Global context block
48 |         self.global_pool = nn.AdaptiveAvgPool2d(1)
49 |         self.global_fc = nn.Sequential(
50 |             nn.Conv2d(inp, mip, kernel_size=1, padding=0),
51 |             nn.BatchNorm2d(mip),
52 |             nn.ReLU(inplace=True),
53 |             nn.Conv2d(mip, inp, kernel_size=1, padding=0),
54 |             nn.Sigmoid()
55 |         )
56 | 
57 |     def forward(self, x):
58 |         identity = x
59 | 
60 |         n, c, h, w = x.size()
61 |         x_h = self.pool_h(x)
62 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
63 | 
64 |         y = torch.cat([x_h, x_w], dim=2)
65 |         y = self.conv1(y)
66 |         y = self.bn1(y)
67 |         y = self.act(y)
68 | 
69 |         x_h, x_w = torch.split(y, [h, w], dim=2)
70 |         x_w = x_w.permute(0, 1, 3, 2)
71 | 
72 |         a_h = self.conv_h(x_h).sigmoid()
73 |         a_w = self.conv_w(x_w).sigmoid()
74 | 
75 |         # Apply global context
76 |         global_context = self.global_pool(identity)
77 |         global_context = self.global_fc(global_context)
78 |         
79 |         out = identity * a_w * a_h * global_context
80 | 
81 |         return out
82 | 
83 | if __name__ == '__main__':
84 |     x = torch.randn(2, 64, 32, 32)
85 |     att = CoordAtt(inp=64, reduction=32)
86 |     out = att(x)
87 |     print("输入尺寸:", x.shape)
88 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/internal_attention_bootstrapping.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implement an internal attention bootstrapping mechanism where SEAttention periodically saves and analyzes its attention distribution at various training stages
 3 | Modify the training routine to adjust current attention maps to better align with or improve upon these previously saved distributions, focusing on enhancing small target detection capabilities
 4 | Evaluate attention map alignment and detection performance improvements over baseline SEAttention using synthetic datasets
 5 | 
 6 | """
 7 | 
 8 | # Modified code
 9 | import numpy as np
10 | import torch
11 | from torch import flatten, nn
12 | from torch.nn import init
13 | from torch.nn.modules.activation import ReLU
14 | from torch.nn.modules.batchnorm import BatchNorm2d
15 | from torch.nn import functional as F
16 | 
17 | 
18 | class SEAttention(nn.Module):
19 | 
20 |     def __init__(self, channel=512, reduction=16, save_interval=10):
21 |         super().__init__()
22 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
23 |         self.fc = nn.Sequential(
24 |             nn.Linear(channel, channel // reduction, bias=False),
25 |             nn.ReLU(inplace=True),
26 |             nn.Linear(channel // reduction, channel, bias=False),
27 |             nn.Sigmoid()
28 |         )
29 |         self.attention_history = []
30 |         self.save_interval = save_interval
31 |         self.training_step = 0
32 | 
33 |     def init_weights(self):
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Conv2d):
36 |                 init.kaiming_normal_(m.weight, mode='fan_out')
37 |                 if m.bias is not None:
38 |                     init.constant_(m.bias, 0)
39 |             elif isinstance(m, nn.BatchNorm2d):
40 |                 init.constant_(m.weight, 1)
41 |                 init.constant_(m.bias, 0)
42 |             elif isinstance(m, nn.Linear):
43 |                 init.normal_(m.weight, std=0.001)
44 |                 if m.bias is not None:
45 |                     init.constant_(m.bias, 0)
46 | 
47 |     def forward(self, x):
48 |         b, c, _, _ = x.size()
49 |         y = self.avg_pool(x).view(b, c)
50 |         y = self.fc(y).view(b, c, 1, 1)
51 | 
52 |         # Save attention distribution periodically
53 |         if self.training and self.training_step % self.save_interval == 0:
54 |             self.attention_history.append(y.detach().clone())
55 |         
56 |         # Adjust attention maps based on saved distributions
57 |         if self.attention_history:
58 |             historical_attention = self.attention_history[-1]
59 |             y = self._adjust_attention(y, historical_attention)
60 |         
61 |         self.training_step += 1
62 |         return x * y.expand_as(x)
63 | 
64 |     def _adjust_attention(self, current_attention, historical_attention):
65 |         # Simple example of adjustment: interpolate between current and historical attention
66 |         adjusted_attention = (current_attention + historical_attention) / 2
67 |         return adjusted_attention
68 | 
69 | if __name__ == '__main__':
70 |     model = SEAttention()
71 |     model.init_weights()
72 |     input = torch.randn(1, 512, 7, 7)
73 |     output = model(input)
74 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/sparsity_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by incorporating a sparsity-promoting transformation within the attention mechanism
 3 | Implement a sparse encoding step using a learned thresholding layer, applied to the input feature maps before the existing attention recalibration
 4 | This thresholding layer will dynamically adjust based on the input characteristics to promote sparsity efficiently
 5 | Modify the forward function to include this sparsity transformation and evaluate its impact by comparing detection performance on synthetic datasets with baseline SEAttention
 6 | Use visualization of attention maps to assess enhanced focus on critical features and improved noise suppression
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | import numpy as np
12 | import torch
13 | from torch import flatten, nn
14 | from torch.nn import init
15 | from torch.nn.modules.activation import ReLU
16 | from torch.nn.modules.batchnorm import BatchNorm2d
17 | from torch.nn import functional as F
18 | 
19 | 
20 | class SEAttention(nn.Module):
21 | 
22 |     def __init__(self, channel=512, reduction=16):
23 |         super().__init__()
24 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
25 |         self.sparsity_layer = nn.Sequential(
26 |             nn.Linear(channel, channel // reduction, bias=False),
27 |             nn.ReLU(inplace=True),
28 |             nn.Linear(channel // reduction, channel, bias=False),
29 |             nn.Sigmoid()
30 |         )
31 |         self.attention_layer = nn.Sequential(
32 |             nn.Linear(channel, channel // reduction, bias=False),
33 |             nn.ReLU(inplace=True),
34 |             nn.Linear(channel // reduction, channel, bias=False),
35 |             nn.Sigmoid()
36 |         )
37 | 
38 |     def init_weights(self):
39 |         for m in self.modules():
40 |             if isinstance(m, nn.Conv2d):
41 |                 init.kaiming_normal_(m.weight, mode='fan_out')
42 |                 if m.bias is not None:
43 |                     init.constant_(m.bias, 0)
44 |             elif isinstance(m, nn.BatchNorm2d):
45 |                 init.constant_(m.weight, 1)
46 |                 init.constant_(m.bias, 0)
47 |             elif isinstance(m, nn.Linear):
48 |                 init.normal_(m.weight, std=0.001)
49 |                 if m.bias is not None:
50 |                     init.constant_(m.bias, 0)
51 | 
52 |     def apply_sparsity(self, x, threshold):
53 |         """Apply sparsity mask based on the dynamic threshold."""
54 |         return x * (x > threshold).float()
55 | 
56 |     def forward(self, x):
57 |         b, c, _, _ = x.size()
58 |         
59 |         # Sparsity-promoting transformation
60 |         sparsity_threshold = self.avg_pool(x).view(b, c)
61 |         sparsity_threshold = self.sparsity_layer(sparsity_threshold).view(b, c, 1, 1)
62 |         x = self.apply_sparsity(x, sparsity_threshold)
63 |         
64 |         # SEAttention mechanism
65 |         y = self.avg_pool(x).view(b, c)
66 |         y = self.attention_layer(y).view(b, c, 1, 1)
67 |         return x * y.expand_as(x)
68 |     
69 | if __name__ == '__main__':
70 |     model = SEAttention()
71 |     model.init_weights()
72 |     input = torch.randn(1, 512, 7, 7)
73 |     output = model(input)
74 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xaa-temporal_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Add a lightweight temporal attention mechanism to the CoordAtt module
 3 | Introduce 1D convolutions that operate on temporal sequences derived from input feature maps
 4 | Modify the forward method to compute temporal attention weights and integrate them with spatial attention
 5 | Evaluate the effectiveness on synthetic sequential data to assess improvements in temporal feature representations, while monitoring any additional computational cost incurred
 6 | 
 7 | """
 8 | 
 9 | # 可以一试
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | 
15 | class h_sigmoid(nn.Module):
16 |     def __init__(self, inplace=True):
17 |         super(h_sigmoid, self).__init__()
18 |         self.relu = nn.ReLU6(inplace=inplace)
19 | 
20 |     def forward(self, x):
21 |         return self.relu(x + 3) / 6
22 | 
23 | class h_swish(nn.Module):
24 |     def __init__(self, inplace=True):
25 |         super(h_swish, self).__init__()
26 |         self.sigmoid = h_sigmoid(inplace=inplace)
27 | 
28 |     def forward(self, x):
29 |         return x * self.sigmoid(x)
30 | 
31 | class CoordAtt(nn.Module):
32 |     def __init__(self, inp, reduction=32, temporal_reduction=4):
33 |         super(CoordAtt, self).__init__()
34 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
35 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
36 | 
37 |         mip = max(8, inp // reduction)
38 | 
39 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
40 |         self.bn1 = nn.BatchNorm2d(mip)
41 |         self.act = h_swish()
42 | 
43 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
44 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
45 | 
46 |         # Temporal Attention Module
47 |         self.temporal_conv1 = nn.Conv1d(inp, inp // temporal_reduction, kernel_size=3, padding=1)
48 |         self.temporal_bn1 = nn.BatchNorm1d(inp // temporal_reduction)
49 |         self.temporal_conv2 = nn.Conv1d(inp // temporal_reduction, inp, kernel_size=3, padding=1)
50 | 
51 |     def forward(self, x):
52 |         identity = x
53 | 
54 |         n, c, h, w = x.size()
55 |         x_h = self.pool_h(x)
56 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
57 | 
58 |         y = torch.cat([x_h, x_w], dim=2)
59 |         y = self.conv1(y)
60 |         y = self.bn1(y)
61 |         y = self.act(y)
62 | 
63 |         x_h, x_w = torch.split(y, [h, w], dim=2)
64 |         x_w = x_w.permute(0, 1, 3, 2)
65 | 
66 |         a_h = self.conv_h(x_h).sigmoid()
67 |         a_w = self.conv_w(x_w).sigmoid()
68 | 
69 |         # Temporal attention computation
70 |         temporal_x = x.view(n, c, -1)  # Reshape to (batch_size, channels, temporal_dim)
71 |         t = self.temporal_conv1(temporal_x)
72 |         t = self.temporal_bn1(t)
73 |         t = F.relu(t)
74 |         t = self.temporal_conv2(t).sigmoid()
75 |         t = t.view(n, c, h, w)  # Reshape back to original dimensions
76 | 
77 |         out = identity * a_w * a_h * t
78 | 
79 |         return out
80 | 
81 | if __name__ == '__main__':
82 |     x = torch.randn(2, 64, 32, 32)
83 |     att = CoordAtt(inp=64, reduction=32)
84 |     out = att(x)
85 |     print("输入尺寸:", x.shape)
86 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/contextual_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by adding a global context block that pools the entire feature map into a context vector
 3 | Use this vector to inform a spatial attention recalibration mechanism, which is applied after channel attention
 4 | Implement this by adding a global context pooling layer and a recalibration module in the forward function
 5 | Evaluate the model's effectiveness by comparing feature maps and performance metrics on synthetic datasets designed to mimic scenarios with small target detection challenges
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import numpy as np
11 | import torch
12 | from torch import flatten, nn
13 | from torch.nn import init
14 | from torch.nn.modules.activation import ReLU
15 | from torch.nn.modules.batchnorm import BatchNorm2d
16 | from torch.nn import functional as F
17 | 
18 | class GlobalContextBlock(nn.Module):
19 |     def __init__(self, channel):
20 |         super(GlobalContextBlock, self).__init__()
21 |         self.pool = nn.AdaptiveAvgPool2d(1)
22 |         self.fc = nn.Sequential(
23 |             nn.Linear(channel, channel // 16, bias=False),
24 |             nn.ReLU(inplace=True),
25 |             nn.Linear(channel // 16, channel, bias=False),
26 |             nn.Sigmoid()
27 |         )
28 | 
29 |     def forward(self, x):
30 |         b, c, _, _ = x.size()
31 |         y = self.pool(x).view(b, c)
32 |         y = self.fc(y).view(b, c, 1, 1)
33 |         return y.expand_as(x)
34 | 
35 | class SEAttention(nn.Module):
36 | 
37 |     def __init__(self, channel=512, reduction=16):
38 |         super().__init__()
39 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
40 |         self.channel_fc = nn.Sequential(
41 |             nn.Linear(channel, channel // reduction, bias=False),
42 |             nn.ReLU(inplace=True),
43 |             nn.Linear(channel // reduction, channel, bias=False),
44 |             nn.Sigmoid()
45 |         )
46 |         self.global_context = GlobalContextBlock(channel)
47 | 
48 |     def init_weights(self):
49 |         for m in self.modules():
50 |             if isinstance(m, nn.Conv2d):
51 |                 init.kaiming_normal_(m.weight, mode='fan_out')
52 |                 if m.bias is not None:
53 |                     init.constant_(m.bias, 0)
54 |             elif isinstance(m, nn.BatchNorm2d):
55 |                 init.constant_(m.weight, 1)
56 |                 init.constant_(m.bias, 0)
57 |             elif isinstance(m, nn.Linear):
58 |                 init.normal_(m.weight, std=0.001)
59 |                 if m.bias is not None:
60 |                     init.constant_(m.bias, 0)
61 | 
62 |     def forward(self, x):
63 |         b, c, _, _ = x.size()
64 |         # Channel attention
65 |         channel_attention = self.avg_pool(x).view(b, c)
66 |         channel_attention = self.channel_fc(channel_attention).view(b, c, 1, 1)
67 |         x = x * channel_attention.expand_as(x)
68 | 
69 |         # Spatial attention using global context
70 |         spatial_attention = self.global_context(x)
71 |         x = x * spatial_attention
72 | 
73 |         return x
74 |     
75 | if __name__ == '__main__':
76 |     model = SEAttention()
77 |     model.init_weights()
78 |     input = torch.randn(1, 512, 7, 7)
79 |     output = model(input)
80 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xaa-channel_mix_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Introduce a Channel Mixing Block (CMB) within the CoordAtt module
 3 | Implement grouped convolutions in the CMB to capture channel-wise dependencies efficiently
 4 | Modify the CoordAtt class to include a new CMB after the initial convolutional layers
 5 | Evaluate the impact of channel mixing on feature representation by testing on a small benchmark dataset, comparing the performance and computational efficiency against the original CoordAtt and other variants
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | class h_sigmoid(nn.Module):
15 |     def __init__(self, inplace=True):
16 |         super(h_sigmoid, self).__init__()
17 |         self.relu = nn.ReLU6(inplace=inplace)
18 | 
19 |     def forward(self, x):
20 |         return self.relu(x + 3) / 6
21 | 
22 | class h_swish(nn.Module):
23 |     def __init__(self, inplace=True):
24 |         super(h_swish, self).__init__()
25 |         self.sigmoid = h_sigmoid(inplace=inplace)
26 | 
27 |     def forward(self, x):
28 |         return x * self.sigmoid(x)
29 | 
30 | class ChannelMixingBlock(nn.Module):
31 |     def __init__(self, channels, groups=4):
32 |         super(ChannelMixingBlock, self).__init__()
33 |         self.groups = groups
34 |         self.grouped_conv = nn.Conv2d(channels, channels, kernel_size=1, groups=groups, bias=False)
35 |         self.bn = nn.BatchNorm2d(channels)
36 |         self.act = h_swish()
37 | 
38 |     def forward(self, x):
39 |         x = self.grouped_conv(x)
40 |         x = self.bn(x)
41 |         x = self.act(x)
42 |         return x
43 | 
44 | class CoordAtt(nn.Module):
45 |     def __init__(self, inp, reduction=32, groups=4):
46 |         super(CoordAtt, self).__init__()
47 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
48 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
49 | 
50 |         mip = max(8, inp // reduction)
51 | 
52 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
53 |         self.bn1 = nn.BatchNorm2d(mip)
54 |         self.act = h_swish()
55 | 
56 |         # Introduce Channel Mixing Block
57 |         self.cmb = ChannelMixingBlock(mip, groups=groups)
58 | 
59 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
60 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
61 | 
62 |     def forward(self, x):
63 |         identity = x
64 | 
65 |         n, c, h, w = x.size()
66 |         x_h = self.pool_h(x)
67 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
68 | 
69 |         y = torch.cat([x_h, x_w], dim=2)
70 |         y = self.conv1(y)
71 |         y = self.bn1(y)
72 |         y = self.act(y)
73 | 
74 |         # Pass through Channel Mixing Block
75 |         y = self.cmb(y)
76 | 
77 |         x_h, x_w = torch.split(y, [h, w], dim=2)
78 |         x_w = x_w.permute(0, 1, 3, 2)
79 | 
80 |         a_h = self.conv_h(x_h).sigmoid()
81 |         a_w = self.conv_w(x_w).sigmoid()
82 | 
83 |         out = identity * a_w * a_h
84 | 
85 |         return out
86 | 
87 | if __name__ == '__main__':
88 |     x = torch.randn(2, 64, 32, 32)
89 |     att = CoordAtt(inp=64, reduction=32)
90 |     out = att(x)
91 |     print("输入尺寸:", x.shape)
92 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/plane_voronoi/code/graph_based_voronoi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Use a priority queue to dynamically expand the influence of seed points across the grid, analogous to breadth-first search without explicitly constructing a graph
 3 | Modify the 'attribution' function to utilize this queue-based propagation, ensuring efficient handling of neighboring grid cells
 4 | Evaluate the execution time and scalability improvements compared to the baseline approach
 5 | 
 6 | """
 7 | 
 8 | # Modified code
 9 | #!/usr/bin/env python
10 | # -*- coding: utf-8 -*-
11 | '''
12 | @File    :   pane_voronoi.py
13 | @Time    :   2023/09/21 17:03:58
14 | @Author  :   不要葱姜蒜
15 | @Version :   1.0
16 | @Desc    :   None
17 | '''
18 | 
19 | import pprint
20 | import copy
21 | import random
22 | from PIL import Image
23 | from tqdm import tqdm  # 进度条
24 | from collections import defaultdict, deque
25 | 
26 | class PaneVoronoi:
27 |     def __init__(self, seed, seed_list, n):
28 |         self.n = n  # 边长 默认都是正方形
29 |         self.seed = seed  # 种子点
30 |         self.hash_map = [i * i for i in range(self.n)]
31 |         self.seed_list = seed_list  # 生成种子点
32 |         self.table = [[0] * self.n for _ in range(self.n)]
33 |         self.visited = [[False] * self.n for _ in range(self.n)]
34 |         self.colors = self.colors()  # 随机化颜色，并且种子点设置为黑色
35 |         self.count = n * 4 - 4
36 | 
37 |     def creat_seed(self):
38 |         res = []
39 |         for _ in range(self.seed):
40 |             res.append([random.randrange(self.n), random.randrange(self.n)])
41 |         return res
42 | 
43 |     def colors(self):
44 |         res = [[0, 0, 0]]
45 |         for _ in range(self.n):
46 |             res.append([random.randrange(99, 206) for _ in range(3)])
47 |         return res
48 | 
49 |     def deal(self):
50 |         # Initialize the queue with seed points
51 |         queue = deque()
52 |         for idx, (x, y) in enumerate(self.seed_list):
53 |             queue.append((x, y, idx + 1))  # (x, y, seed_index)
54 |             self.table[x][y] = idx + 1
55 |             self.visited[x][y] = True
56 | 
57 |         directions = [(-1, 0), (1, 0), (0, -1), (0, 1)]
58 |         while queue:
59 |             x, y, seed_index = queue.popleft()
60 |             for dx, dy in directions:
61 |                 nx, ny = x + dx, y + dy
62 |                 if 0 <= nx < self.n and 0 <= ny < self.n and not self.visited[nx][ny]:
63 |                     self.table[nx][ny] = seed_index
64 |                     self.visited[nx][ny] = True
65 |                     queue.append((nx, ny, seed_index))
66 | 
67 |     def positive_reverse(self):
68 |         return self.table
69 | 
70 |     @classmethod
71 |     def paint(cls, data, name, colors):
72 |         image = Image.new('RGB', (len(data), len(data[0])))
73 |         put_pixel = image.putpixel
74 |         for i in tqdm(range(len(data))):
75 |             for j in range(len(data[0])):
76 |                 color = colors[data[i][j]]
77 |                 put_pixel((i, j), (color[0], color[1], color[2]))
78 |         image.save(f'img/{name}.jpg')
79 | 
80 | if __name__ == '__main__':
81 |     seed_list = [[random.randrange(32), random.randrange(32)] for _ in range(500)]
82 |     v = PaneVoronoi(500, seed_list, 32)
83 |     v.deal()
84 |     da = v.positive_reverse()
85 |     v.paint(da, 'voronoi', v.colors)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-geo_transform_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Introduce a lightweight transformation network within the CoordAtt module, using a small convolutional layer followed by a fully connected layer to predict affine transformation parameters
 3 | Use these parameters to warp the input feature maps via spatial transformations before applying coordinate attention
 4 | Modify the forward method accordingly, and evaluate effectiveness on datasets with geometric variations, comparing feature representation, performance, and computational efficiency against the original CoordAtt and other variants
 5 | Ensure the transformation network is efficient to maintain minimal overhead
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | class h_sigmoid(nn.Module):
15 |     def __init__(self, inplace=True):
16 |         super(h_sigmoid, self).__init__()
17 |         self.relu = nn.ReLU6(inplace=inplace)
18 | 
19 |     def forward(self, x):
20 |         return self.relu(x + 3) / 6
21 | 
22 | class h_swish(nn.Module):
23 |     def __init__(self, inplace=True):
24 |         super(h_swish, self).__init__()
25 |         self.sigmoid = h_sigmoid(inplace=inplace)
26 | 
27 |     def forward(self, x):
28 |         return x * self.sigmoid(x)
29 | 
30 | class CoordAtt(nn.Module):
31 |     def __init__(self, inp, reduction=32):
32 |         super(CoordAtt, self).__init__()
33 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
34 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
35 | 
36 |         mip = max(8, inp // reduction)
37 | 
38 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
39 |         self.bn1 = nn.BatchNorm2d(mip)
40 |         self.act = h_swish()
41 | 
42 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
43 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
44 | 
45 |         # Transformation network
46 |         self.trans_conv = nn.Conv2d(inp, 6, kernel_size=1, stride=1, padding=0)
47 |         self.trans_fc = nn.Linear(6 * 1 * 1, 6)  # Predict affine parameters
48 | 
49 |     def forward(self, x):
50 |         identity = x
51 | 
52 |         # Predict affine parameters
53 |         n, c, h, w = x.size()
54 |         theta = self.trans_conv(x)
55 |         theta = theta.view(n, -1)
56 |         theta = self.trans_fc(theta)
57 |         theta = theta.view(-1, 2, 3)
58 | 
59 |         # Create affine grid and apply transformation
60 |         grid = F.affine_grid(theta, x.size(), align_corners=False)
61 |         x = F.grid_sample(x, grid, align_corners=False)
62 | 
63 |         x_h = self.pool_h(x)
64 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
65 | 
66 |         y = torch.cat([x_h, x_w], dim=2)
67 |         y = self.conv1(y)
68 |         y = self.bn1(y)
69 |         y = self.act(y)
70 | 
71 |         x_h, x_w = torch.split(y, [h, w], dim=2)
72 |         x_w = x_w.permute(0, 1, 3, 2)
73 | 
74 |         a_h = self.conv_h(x_h).sigmoid()
75 |         a_w = self.conv_w(x_w).sigmoid()
76 | 
77 |         out = identity * a_w * a_h
78 | 
79 |         return out
80 | 
81 | if __name__ == '__main__':
82 |     x = torch.randn(2, 64, 32, 32)
83 |     att = CoordAtt(inp=64, reduction=32)
84 |     out = att(x)
85 |     print("输入尺寸:", x.shape)
86 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/task_adaptive_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Develop a meta-attention module that infers a task descriptor from input data characteristics
 3 | Integrate this module within the SEAttention framework to modulate its parameters dynamically
 4 | Implement functions to extract task descriptors and modify SEAttention weights based on these descriptors
 5 | Evaluate the model's adaptability and performance across diverse small target detection tasks using precision, recall, and F1-score, comparing its performance against the baseline SEAttention model and other enhanced models
 6 | 
 7 | """
 8 | 
 9 | # Improved Code
10 | import numpy as np
11 | import torch
12 | from torch import flatten, nn
13 | from torch.nn import init
14 | from torch.nn.modules.activation import ReLU
15 | from torch.nn.modules.batchnorm import BatchNorm2d
16 | from torch.nn import functional as F
17 | 
18 | class TaskAdaptiveAttention(nn.Module):
19 |     """A module to infer task descriptors from input data characteristics."""
20 |     
21 |     def __init__(self, channel=512):
22 |         super(TaskAdaptiveAttention, self).__init__()
23 |         self.task_descriptor = nn.Sequential(
24 |             nn.AdaptiveAvgPool2d(1),
25 |             nn.Conv2d(channel, channel // 4, kernel_size=1),
26 |             nn.ReLU(inplace=True),
27 |             nn.Conv2d(channel // 4, channel, kernel_size=1),
28 |             nn.Sigmoid()
29 |         )
30 | 
31 |     def forward(self, x):
32 |         """Extract task descriptor from input data."""
33 |         return self.task_descriptor(x)
34 |         
35 | 
36 | class SEAttention(nn.Module):
37 | 
38 |     def __init__(self, channel=512, reduction=16):
39 |         super(SEAttention, self).__init__()
40 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
41 |         self.fc = nn.Sequential(
42 |             nn.Linear(channel, channel // reduction, bias=False),
43 |             nn.ReLU(inplace=True),
44 |             nn.Linear(channel // reduction, channel, bias=False),
45 |             nn.Sigmoid()
46 |         )
47 |         self.task_adaptive_attention = TaskAdaptiveAttention(channel)
48 | 
49 |     def init_weights(self):
50 |         for m in self.modules():
51 |             if isinstance(m, nn.Conv2d):
52 |                 init.kaiming_normal_(m.weight, mode='fan_out')
53 |                 if m.bias is not None:
54 |                     init.constant_(m.bias, 0)
55 |             elif isinstance(m, nn.BatchNorm2d):
56 |                 init.constant_(m.weight, 1)
57 |                 init.constant_(m.bias, 0)
58 |             elif isinstance(m, nn.Linear):
59 |                 init.normal_(m.weight, std=0.001)
60 |                 if m.bias is not None:
61 |                     init.constant_(m.bias, 0)
62 | 
63 |     def forward(self, x):
64 |         b, c, _, _ = x.size()
65 |         # Task descriptor influences SEAttention weights
66 |         task_descriptor = self.task_adaptive_attention(x)
67 |         pooled = self.avg_pool(x).view(b, c)
68 |         se_weights = self.fc(pooled).view(b, c, 1, 1)
69 |         adaptive_weights = task_descriptor.view(b, c, 1, 1)
70 |         return x * se_weights.expand_as(x) * adaptive_weights.expand_as(x)
71 |     
72 | if __name__ == '__main__':
73 |     model = SEAttention()
74 |     model.init_weights()
75 |     input = torch.randn(1, 512, 7, 7)
76 |     output = model(input)
77 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/pyramid_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by incorporating a pyramid pooling layer to generate multi-scale context features
 3 | Implement this by adding a pyramid pooling module that extracts pooled features at different scales
 4 | Apply a unified attention mechanism across these pooled features to recalibrate the feature map
 5 | Modify the forward function to include pyramid pooling and attention application
 6 | Evaluate the model's effectiveness by comparing detection performance on small and distributed targets, using visualization techniques and quantitative analysis on synthetic datasets
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | import numpy as np
12 | import torch
13 | from torch import flatten, nn
14 | from torch.nn import init
15 | from torch.nn.modules.activation import ReLU
16 | from torch.nn.modules.batchnorm import BatchNorm2d
17 | from torch.nn import functional as F
18 | 
19 | class PyramidPooling(nn.Module):
20 |     def __init__(self, in_channels, pool_sizes):
21 |         super(PyramidPooling, self).__init__()
22 |         self.stages = nn.ModuleList([self._make_stage(in_channels, size) for size in pool_sizes])
23 | 
24 |     def _make_stage(self, in_channels, size):
25 |         prior = nn.AdaptiveAvgPool2d(output_size=size)
26 |         conv = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False)
27 |         return nn.Sequential(prior, conv)
28 | 
29 |     def forward(self, x):
30 |         h, w = x.shape[2], x.shape[3]
31 |         pyramids = [F.interpolate(stage(x), size=(h, w), mode='bilinear', align_corners=True) for stage in self.stages]
32 |         return torch.cat(pyramids, dim=1)
33 | 
34 | class SEAttention(nn.Module):
35 | 
36 |     def __init__(self, channel=512, reduction=16, pool_sizes=[1, 2, 3, 6]):
37 |         super().__init__()
38 |         self.pyramid_pooling = PyramidPooling(channel, pool_sizes)
39 |         self.attention_conv = nn.Conv2d(channel * len(pool_sizes), channel, kernel_size=1, bias=False)
40 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
41 |         self.fc = nn.Sequential(
42 |             nn.Linear(channel, channel // reduction, bias=False),
43 |             nn.ReLU(inplace=True),
44 |             nn.Linear(channel // reduction, channel, bias=False),
45 |             nn.Sigmoid()
46 |         )
47 | 
48 |     def init_weights(self):
49 |         for m in self.modules():
50 |             if isinstance(m, nn.Conv2d):
51 |                 init.kaiming_normal_(m.weight, mode='fan_out')
52 |                 if m.bias is not None:
53 |                     init.constant_(m.bias, 0)
54 |             elif isinstance(m, nn.BatchNorm2d):
55 |                 init.constant_(m.weight, 1)
56 |                 init.constant_(m.bias, 0)
57 |             elif isinstance(m, nn.Linear):
58 |                 init.normal_(m.weight, std=0.001)
59 |                 if m.bias is not None:
60 |                     init.constant_(m.bias, 0)
61 | 
62 |     def forward(self, x):
63 |         b, c, _, _ = x.size()
64 |         x = self.pyramid_pooling(x)
65 |         x = self.attention_conv(x)
66 |         y = self.avg_pool(x).view(b, c)
67 |         y = self.fc(y).view(b, c, 1, 1)
68 |         return x * y.expand_as(x)
69 |     
70 | if __name__ == '__main__':
71 |     model = SEAttention()
72 |     model.init_weights()
73 |     input = torch.randn(1, 512, 7, 7)
74 |     output = model(input)
75 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-dynamic_complexity_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Enhance the `CoordAtt` module by integrating a dynamic complexity adjustment mechanism
 3 | Use a simple heuristic based on feature map variance to determine complexity
 4 | Route the input through either a lightweight or complex processing path: a basic path for low variance features and an enhanced path for high variance features
 5 | Modify the forward method to include this decision mechanism and dynamically adjust processing
 6 | Evaluate the module's adaptability on a small benchmark dataset, assessing improvements in feature discrimination and computational efficiency over the original and other variants
 7 | 
 8 | """
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | class h_sigmoid(nn.Module):
15 |     def __init__(self, inplace=True):
16 |         super(h_sigmoid, self).__init__()
17 |         self.relu = nn.ReLU6(inplace=inplace)
18 | 
19 |     def forward(self, x):
20 |         return self.relu(x + 3) / 6
21 | 
22 | class h_swish(nn.Module):
23 |     def __init__(self, inplace=True):
24 |         super(h_swish, self).__init__()
25 |         self.sigmoid = h_sigmoid(inplace=inplace)
26 | 
27 |     def forward(self, x):
28 |         return x * self.sigmoid(x)
29 | 
30 | class CoordAtt(nn.Module):
31 |     def __init__(self, inp, reduction=32):
32 |         super(CoordAtt, self).__init__()
33 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
34 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
35 | 
36 |         mip = max(8, inp // reduction)
37 | 
38 |         # Lightweight path
39 |         self.light_conv = nn.Sequential(
40 |             nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0),
41 |             nn.BatchNorm2d(mip),
42 |             h_swish()
43 |         )
44 | 
45 |         # Complex path
46 |         self.complex_conv = nn.Sequential(
47 |             nn.Conv2d(inp, mip, kernel_size=3, stride=1, padding=1),
48 |             nn.BatchNorm2d(mip),
49 |             h_swish(),
50 |             nn.Conv2d(mip, mip, kernel_size=3, stride=1, padding=1),
51 |             nn.BatchNorm2d(mip),
52 |             h_swish()
53 |         )
54 | 
55 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
56 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
57 | 
58 |     def forward(self, x):
59 |         identity = x
60 | 
61 |         # Calculate feature map variance
62 |         variance = x.var(dim=(2, 3), keepdim=True).mean()
63 | 
64 |         # Choose path based on variance
65 |         if variance < 0.5:  # Threshold can be tuned
66 |             y = self.light_conv(x)
67 |         else:
68 |             y = self.complex_conv(x)
69 | 
70 |         n, c, h, w = y.size()
71 |         x_h = self.pool_h(y)
72 |         x_w = self.pool_w(y).permute(0, 1, 3, 2)
73 | 
74 |         y = torch.cat([x_h, x_w], dim=2)
75 |         y = self.light_conv(y)
76 | 
77 |         x_h, x_w = torch.split(y, [h, w], dim=2)
78 |         x_w = x_w.permute(0, 1, 3, 2)
79 | 
80 |         a_h = self.conv_h(x_h).sigmoid()
81 |         a_w = self.conv_w(x_w).sigmoid()
82 | 
83 |         out = identity * a_w * a_h
84 | 
85 |         return out
86 | 
87 | if __name__ == '__main__':
88 |     x = torch.randn(2, 64, 32, 32)
89 |     att = CoordAtt(inp=64, reduction=32)
90 |     out = att(x)
91 |     print("输入尺寸:", x.shape)
92 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/adaptive_complexity_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Incorporate a complexity assessment module within the SEAttention framework
 3 | Implement a function that calculates a complexity score using simple features like pixel intensity variance or entropy from input data
 4 | Modify the forward function of SEAttention to adjust the attention weights using this complexity score
 5 | Evaluate the model's performance across small target detection tasks with varying image complexities, using metrics such as precision, recall, and F1-score
 6 | Compare the results against the baseline SEAttention model to demonstrate improvements in robustness and adaptability
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | import numpy as np
12 | import torch
13 | from torch import flatten, nn
14 | from torch.nn import init
15 | from torch.nn.modules.activation import ReLU
16 | from torch.nn.modules.batchnorm import BatchNorm2d
17 | from torch.nn import functional as F
18 | import torchvision.transforms as transforms
19 | 
20 | 
21 | class SEAttention(nn.Module):
22 | 
23 |     def __init__(self, channel=512, reduction=16):
24 |         super().__init__()
25 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
26 |         self.fc = nn.Sequential(
27 |             nn.Linear(channel, channel // reduction, bias=False),
28 |             nn.ReLU(inplace=True),
29 |             nn.Linear(channel // reduction, channel, bias=False),
30 |             nn.Sigmoid()
31 |         )
32 |     
33 |     def complexity_score(self, x):
34 |         # Convert to grayscale for simplicity
35 |         gray_transform = transforms.Grayscale()
36 |         x_gray = gray_transform(x)
37 |         
38 |         # Compute pixel intensity variance as complexity score
39 |         variance = torch.var(x_gray, dim=(2, 3), keepdim=True)
40 |         
41 |         # Normalize the variance to be between 0 and 1
42 |         max_variance = torch.max(variance)
43 |         min_variance = torch.min(variance)
44 |         complexity_score = (variance - min_variance) / (max_variance - min_variance + 1e-5)
45 |         
46 |         return complexity_score
47 | 
48 |     def init_weights(self):
49 |         for m in self.modules():
50 |             if isinstance(m, nn.Conv2d):
51 |                 init.kaiming_normal_(m.weight, mode='fan_out')
52 |                 if m.bias is not None:
53 |                     init.constant_(m.bias, 0)
54 |             elif isinstance(m, nn.BatchNorm2d):
55 |                 init.constant_(m.weight, 1)
56 |                 init.constant_(m.bias, 0)
57 |             elif isinstance(m, nn.Linear):
58 |                 init.normal_(m.weight, std=0.001)
59 |                 if m.bias is not None:
60 |                     init.constant_(m.bias, 0)
61 | 
62 |     def forward(self, x):
63 |         b, c, _, _ = x.size()
64 |         # Calculate complexity score
65 |         complexity = self.complexity_score(x)
66 |         
67 |         # Original SEAttention operations
68 |         y = self.avg_pool(x).view(b, c)
69 |         y = self.fc(y).view(b, c, 1, 1)
70 |         
71 |         # Adjust attention weights using complexity score
72 |         adjusted_y = y * complexity
73 |         
74 |         return x * adjusted_y.expand_as(x)
75 |     
76 | if __name__ == '__main__':
77 |     model = SEAttention()
78 |     model.init_weights()
79 |     input = torch.randn(1, 512, 7, 7)
80 |     output = model(input)
81 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/adaptive_gating_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by adding a learnable gating mechanism that dynamically adjusts attention weights based on input complexity
 3 | Implement this by introducing a gating layer that takes input feature statistics (e
 4 | g
 5 | , variance, mean) to modulate the balance between the original feature and the recalibrated attention feature
 6 | Modify the forward function to integrate this gating mechanism after the channel attention
 7 | Evaluate the model's performance by comparing it with the baseline SEAttention and other modifications, using synthetic datasets for small target detection and analyzing adaptive behavior through feature map visualizations
 8 | 
 9 | """
10 | 
11 | # Modified code
12 | import numpy as np
13 | import torch
14 | from torch import flatten, nn
15 | from torch.nn import init
16 | from torch.nn.modules.activation import ReLU
17 | from torch.nn.modules.batchnorm import BatchNorm2d
18 | from torch.nn import functional as F
19 | 
20 | 
21 | class SEAttention(nn.Module):
22 | 
23 |     def __init__(self, channel=512, reduction=16):
24 |         super().__init__()
25 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
26 |         self.fc = nn.Sequential(
27 |             nn.Linear(channel, channel // reduction, bias=False),
28 |             nn.ReLU(inplace=True),
29 |             nn.Linear(channel // reduction, channel, bias=False),
30 |             nn.Sigmoid()
31 |         )
32 |         # Gating mechanism
33 |         self.gate_fc = nn.Sequential(
34 |             nn.Linear(2, channel // reduction, bias=False),  # Assuming input feature statistics
35 |             nn.ReLU(inplace=True),
36 |             nn.Linear(channel // reduction, 1, bias=False),
37 |             nn.Sigmoid()
38 |         )
39 | 
40 |     def init_weights(self):
41 |         for m in self.modules():
42 |             if isinstance(m, nn.Conv2d):
43 |                 init.kaiming_normal_(m.weight, mode='fan_out')
44 |                 if m.bias is not None:
45 |                     init.constant_(m.bias, 0)
46 |             elif isinstance(m, nn.BatchNorm2d):
47 |                 init.constant_(m.weight, 1)
48 |                 init.constant_(m.bias, 0)
49 |             elif isinstance(m, nn.Linear):
50 |                 init.normal_(m.weight, std=0.001)
51 |                 if m.bias is not None:
52 |                     init.constant_(m.bias, 0)
53 | 
54 |     def forward(self, x):
55 |         b, c, _, _ = x.size()
56 |         y = self.avg_pool(x).view(b, c)
57 |         attention_weights = self.fc(y).view(b, c, 1, 1)
58 |         
59 |         # Compute input feature statistics
60 |         mean = x.mean(dim=[2, 3], keepdim=False).view(b, c, 1, 1)
61 |         variance = x.var(dim=[2, 3], keepdim=False).view(b, c, 1, 1)
62 |         feature_stats = torch.cat((mean, variance), dim=1).view(b, 2, 1, 1)
63 |         
64 |         # Gating mechanism
65 |         gate_value = self.gate_fc(feature_stats.view(b, 2)).view(b, 1, 1, 1)
66 |         
67 |         # Modulate attention feature with gating mechanism
68 |         modulated_feature = gate_value * x + (1 - gate_value) * (x * attention_weights.expand_as(x))
69 |         
70 |         return modulated_feature
71 |     
72 | if __name__ == '__main__':
73 |     model = SEAttention()
74 |     model.init_weights()
75 |     input = torch.randn(1, 512, 7, 7)
76 |     output = model(input)
77 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-dual_domain_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the CoordAtt module to incorporate both spatial and frequency domain attention mechanisms
 3 | Perform a Fast Fourier Transform (FFT) on the input feature maps to capture frequency domain information
 4 | Compute attention weights separately for spatial and frequency domains, then merge them using a simple weighted sum or concatenation followed by a linear transformation
 5 | Adjust the forward method to include these steps, and evaluate the module's performance on a small benchmark dataset, comparing improvements in feature representation, accuracy, and computational efficiency against the original CoordAtt and other variants
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | class h_sigmoid(nn.Module):
15 |     def __init__(self, inplace=True):
16 |         super(h_sigmoid, self).__init__()
17 |         self.relu = nn.ReLU6(inplace=inplace)
18 | 
19 |     def forward(self, x):
20 |         return self.relu(x + 3) / 6
21 | 
22 | class h_swish(nn.Module):
23 |     def __init__(self, inplace=True):
24 |         super(h_swish, self).__init__()
25 |         self.sigmoid = h_sigmoid(inplace=inplace)
26 | 
27 |     def forward(self, x):
28 |         return x * self.sigmoid(x)
29 | 
30 | class CoordAtt(nn.Module):
31 |     def __init__(self, inp, reduction=32, freq_weight=0.5):
32 |         super(CoordAtt, self).__init__()
33 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
34 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
35 | 
36 |         mip = max(8, inp // reduction)
37 | 
38 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
39 |         self.bn1 = nn.BatchNorm2d(mip)
40 |         self.act = h_swish()
41 | 
42 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
43 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
44 | 
45 |         # Frequency domain attention
46 |         self.freq_weight = freq_weight
47 |         self.conv_freq = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
48 |         self.bn_freq = nn.BatchNorm2d(mip)
49 | 
50 |     def forward(self, x):
51 |         identity = x
52 | 
53 |         # Spatial attention
54 |         n, c, h, w = x.size()
55 |         x_h = self.pool_h(x)
56 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
57 | 
58 |         y_spatial = torch.cat([x_h, x_w], dim=2)
59 |         y_spatial = self.conv1(y_spatial)
60 |         y_spatial = self.bn1(y_spatial)
61 |         y_spatial = self.act(y_spatial)
62 | 
63 |         x_h, x_w = torch.split(y_spatial, [h, w], dim=2)
64 |         x_w = x_w.permute(0, 1, 3, 2)
65 | 
66 |         a_h = self.conv_h(x_h).sigmoid()
67 |         a_w = self.conv_w(x_w).sigmoid()
68 | 
69 |         # Frequency domain attention
70 |         x_freq = torch.fft.fft2(x)
71 |         x_freq = torch.abs(x_freq)  # Use magnitude spectrum
72 |         x_freq = self.conv_freq(x_freq)
73 |         x_freq = self.bn_freq(x_freq)
74 |         x_freq = self.act(x_freq)
75 | 
76 |         # Weighted sum of spatial and frequency domain attention
77 |         out = (1 - self.freq_weight) * identity * a_w * a_h + self.freq_weight * x_freq
78 | 
79 |         return out
80 | 
81 | if __name__ == '__main__':
82 |     x = torch.randn(2, 64, 32, 32)
83 |     att = CoordAtt(inp=64, reduction=32)
84 |     out = att(x)
85 |     print("输入尺寸:", x.shape)
86 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-se_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Integrate a Squeeze-and-Excitation (SE) block within the CoordAtt module
 3 | Before applying the coordinate attention, add an SE block that squeezes the spatial dimensions and excites channels based on global average pooling
 4 | Modify the forward method to include this SE block before the existing coordinate attention operations
 5 | Evaluate the effectiveness by testing on a small benchmark dataset, comparing feature representation and attention quality to the original CoordAtt, and analyzing the computational overhead and parameter count
 6 | 
 7 | """
 8 | 
 9 | # 创新不足
10 | 
11 | # Modified code
12 | import torch
13 | import torch.nn as nn
14 | import torch.nn.functional as F
15 | 
16 | 
17 | class h_sigmoid(nn.Module):
18 |     def __init__(self, inplace=True):
19 |         super(h_sigmoid, self).__init__()
20 |         self.relu = nn.ReLU6(inplace=inplace)
21 | 
22 |     def forward(self, x):
23 |         return self.relu(x + 3) / 6
24 | 
25 | 
26 | class h_swish(nn.Module):
27 |     def __init__(self, inplace=True):
28 |         super(h_swish, self).__init__()
29 |         self.sigmoid = h_sigmoid(inplace=inplace)
30 | 
31 |     def forward(self, x):
32 |         return x * self.sigmoid(x)
33 | 
34 | 
35 | class SEBlock(nn.Module):
36 |     def __init__(self, inp, reduction=16):
37 |         super(SEBlock, self).__init__()
38 |         self.global_avgpool = nn.AdaptiveAvgPool2d(1)
39 |         self.fc1 = nn.Conv2d(inp, inp // reduction, kernel_size=1, padding=0)
40 |         self.relu = nn.ReLU(inplace=True)
41 |         self.fc2 = nn.Conv2d(inp // reduction, inp, kernel_size=1, padding=0)
42 |         self.sigmoid = nn.Sigmoid()
43 | 
44 |     def forward(self, x):
45 |         se = self.global_avgpool(x)
46 |         se = self.fc1(se)
47 |         se = self.relu(se)
48 |         se = self.fc2(se)
49 |         se = self.sigmoid(se)
50 |         return x * se
51 | 
52 | 
53 | class CoordAtt(nn.Module):
54 |     def __init__(self, inp, reduction=32):
55 |         super(CoordAtt, self).__init__()
56 |         self.se_block = SEBlock(inp, reduction=reduction)
57 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
58 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
59 | 
60 |         mip = max(8, inp // reduction)
61 | 
62 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
63 |         self.bn1 = nn.BatchNorm2d(mip)
64 |         self.act = h_swish()
65 | 
66 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
67 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
68 | 
69 |     def forward(self, x):
70 |         # Apply SE block
71 |         x = self.se_block(x)
72 |         identity = x
73 | 
74 |         n, c, h, w = x.size()
75 |         x_h = self.pool_h(x)
76 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
77 | 
78 |         y = torch.cat([x_h, x_w], dim=2)
79 |         y = self.conv1(y)
80 |         y = self.bn1(y)
81 |         y = self.act(y)
82 | 
83 |         x_h, x_w = torch.split(y, [h, w], dim=2)
84 |         x_w = x_w.permute(0, 1, 3, 2)
85 | 
86 |         a_h = self.conv_h(x_h).sigmoid()
87 |         a_w = self.conv_w(x_w).sigmoid()
88 | 
89 |         out = identity * a_w * a_h
90 | 
91 |         return out
92 | 
93 | if __name__ == '__main__':
94 |     x = torch.randn(2, 64, 32, 32)
95 |     att = CoordAtt(inp=64, reduction=32)
96 |     out = att(x)
97 |     print("输入尺寸:", x.shape)
98 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-shared_params_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the CoordAtt module to introduce a shared parameter block accessible to multiple network components, such as convolutional layers and activation functions
 3 | Implement this shared parameter block as a separate module with learnable parameters
 4 | Integrate it into the CoordAtt module by adjusting the conv1, conv_h, and conv_w layers to utilize these shared parameters
 5 | Evaluate the parameter count reduction and computational efficiency by testing on a small benchmark dataset, comparing performance and representation quality with the original CoordAtt and other variants
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | 
15 | class h_sigmoid(nn.Module):
16 |     def __init__(self, inplace=True):
17 |         super(h_sigmoid, self).__init__()
18 |         self.relu = nn.ReLU6(inplace=inplace)
19 | 
20 |     def forward(self, x):
21 |         return self.relu(x + 3) / 6
22 | 
23 | class h_swish(nn.Module):
24 |     def __init__(self, inplace=True):
25 |         super(h_swish, self).__init__()
26 |         self.sigmoid = h_sigmoid(inplace=inplace)
27 | 
28 |     def forward(self, x):
29 |         return x * self.sigmoid(x)
30 | 
31 | # Shared parameter block to be used in multiple layers
32 | class SharedParameterBlock(nn.Module):
33 |     def __init__(self, out_channels):
34 |         super(SharedParameterBlock, self).__init__()
35 |         self.weight = nn.Parameter(torch.randn(out_channels, 1, 1, 1))
36 |         self.bias = nn.Parameter(torch.zeros(out_channels))
37 | 
38 |     def forward(self, x):
39 |         return x * self.weight + self.bias
40 | 
41 | class CoordAtt(nn.Module):
42 |     def __init__(self, inp, reduction=32):
43 |         super(CoordAtt, self).__init__()
44 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
45 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
46 | 
47 |         mip = max(8, inp // reduction)
48 | 
49 |         # Integrate the shared parameter block into conv1, conv_h, conv_w
50 |         self.shared_params = SharedParameterBlock(mip)
51 | 
52 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
53 |         self.bn1 = nn.BatchNorm2d(mip)
54 |         self.act = h_swish()
55 | 
56 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
57 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
58 | 
59 |     def forward(self, x):
60 |         identity = x
61 | 
62 |         n, c, h, w = x.size()
63 |         x_h = self.pool_h(x)
64 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
65 | 
66 |         y = torch.cat([x_h, x_w], dim=2)
67 |         y = self.conv1(y)
68 |         y = self.bn1(y)
69 |         y = self.act(y)
70 | 
71 |         # Apply shared parameters
72 |         y = self.shared_params(y)
73 | 
74 |         x_h, x_w = torch.split(y, [h, w], dim=2)
75 |         x_w = x_w.permute(0, 1, 3, 2)
76 | 
77 |         # Apply shared parameters to the conv_h and conv_w layers
78 |         a_h = self.shared_params(self.conv_h(x_h)).sigmoid()
79 |         a_w = self.shared_params(self.conv_w(x_w)).sigmoid()
80 | 
81 |         out = identity * a_w * a_h
82 | 
83 |         return out
84 | 
85 | if __name__ == '__main__':
86 |     x = torch.randn(2, 64, 32, 32)
87 |     att = CoordAtt(inp=64, reduction=32)
88 |     out = att(x)
89 |     print("Input size:", x.shape)
90 |     print("Output size:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-probabilistic_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the CoordAtt module to introduce a probabilistic attention mechanism
 3 | Implement a stochastic gating mechanism where attention weights are sampled from a Gaussian distribution with learnable parameters (mean and variance)
 4 | Adjust the forward method to compute these probabilistic attention weights and integrate them into the feature modulation process
 5 | Evaluate the impact on feature representation and robustness by testing on a small benchmark dataset, comparing the performance to the original CoordAtt and other variants, and analyzing the uncertainty estimates
 6 | 
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | 
13 | class h_sigmoid(nn.Module):
14 |     def __init__(self, inplace=True):
15 |         super(h_sigmoid, self).__init__()
16 |         self.relu = nn.ReLU6(inplace=inplace)
17 | 
18 |     def forward(self, x):
19 |         return self.relu(x + 3) / 6
20 | 
21 | class h_swish(nn.Module):
22 |     def __init__(self, inplace=True):
23 |         super(h_swish, self).__init__()
24 |         self.sigmoid = h_sigmoid(inplace=inplace)
25 | 
26 |     def forward(self, x):
27 |         return x * self.sigmoid(x)
28 | 
29 | class ProbabilisticAttention(nn.Module):
30 |     def __init__(self, channels):
31 |         super(ProbabilisticAttention, self).__init__()
32 |         self.mean = nn.Parameter(torch.zeros(1, channels, 1, 1))
33 |         self.log_var = nn.Parameter(torch.zeros(1, channels, 1, 1))
34 |         
35 |     def forward(self, x):
36 |         std = torch.exp(0.5 * self.log_var)
37 |         epsilon = torch.randn_like(std)
38 |         attention_weights = self.mean + std * epsilon
39 |         return torch.sigmoid(attention_weights)
40 | 
41 | class CoordAtt(nn.Module):
42 |     def __init__(self, inp, reduction=32):
43 |         super(CoordAtt, self).__init__()
44 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
45 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
46 | 
47 |         mip = max(8, inp // reduction)
48 | 
49 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
50 |         self.bn1 = nn.BatchNorm2d(mip)
51 |         self.act = h_swish()
52 | 
53 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
54 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
55 | 
56 |         # Introduce probabilistic attention
57 |         self.prob_att_h = ProbabilisticAttention(inp)
58 |         self.prob_att_w = ProbabilisticAttention(inp)
59 | 
60 |     def forward(self, x):
61 |         identity = x
62 | 
63 |         n, c, h, w = x.size()
64 |         x_h = self.pool_h(x)
65 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
66 | 
67 |         y = torch.cat([x_h, x_w], dim=2)
68 |         y = self.conv1(y)
69 |         y = self.bn1(y)
70 |         y = self.act(y)
71 | 
72 |         x_h, x_w = torch.split(y, [h, w], dim=2)
73 |         x_w = x_w.permute(0, 1, 3, 2)
74 | 
75 |         a_h = self.conv_h(x_h)
76 |         a_w = self.conv_w(x_w)
77 | 
78 |         # Apply probabilistic attention
79 |         pa_h = self.prob_att_h(a_h)
80 |         pa_w = self.prob_att_w(a_w)
81 | 
82 |         out = identity * pa_w * pa_h
83 | 
84 |         return out
85 | 
86 | if __name__ == '__main__':
87 |     x = torch.randn(2, 64, 32, 32)
88 |     att = CoordAtt(inp=64, reduction=32)
89 |     out = att(x)
90 |     print("输入尺寸:", x.shape)
91 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/denoising_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Integrate a lightweight denoising autoencoder within the SEAttention framework
 3 | Implement an encoder-decoder structure focused on feature compression and noise reduction
 4 | Modify the forward function to pass input through the autoencoder before applying channel attention
 5 | Optimize the autoencoder's parameters using a transfer learning approach, ensuring it is tailored for small target detection
 6 | Evaluate performance by comparing detection accuracy and attention map clarity on small targets with and without the denoising mechanism, using synthetic datasets
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | 
12 | import numpy as np
13 | import torch
14 | from torch import flatten, nn
15 | from torch.nn import init
16 | from torch.nn.modules.activation import ReLU
17 | from torch.nn.modules.batchnorm import BatchNorm2d
18 | from torch.nn import functional as F
19 | 
20 | class DenoisingAutoencoder(nn.Module):
21 |     def __init__(self, channel=512, latent_dim=128):
22 |         super().__init__()
23 |         # Encoder
24 |         self.encoder = nn.Sequential(
25 |             nn.Conv2d(channel, channel // 2, kernel_size=3, padding=1),
26 |             nn.ReLU(inplace=True),
27 |             nn.Conv2d(channel // 2, latent_dim, kernel_size=3, padding=1),
28 |             nn.ReLU(inplace=True)
29 |         )
30 |         # Decoder
31 |         self.decoder = nn.Sequential(
32 |             nn.Conv2d(latent_dim, channel // 2, kernel_size=3, padding=1),
33 |             nn.ReLU(inplace=True),
34 |             nn.Conv2d(channel // 2, channel, kernel_size=3, padding=1),
35 |             nn.Sigmoid()
36 |         )
37 | 
38 |     def forward(self, x):
39 |         encoded = self.encoder(x)
40 |         decoded = self.decoder(encoded)
41 |         return decoded
42 | 
43 | class SEAttention(nn.Module):
44 |     def __init__(self, channel=512, reduction=16):
45 |         super().__init__()
46 |         self.denoising_autoencoder = DenoisingAutoencoder(channel)
47 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
48 |         self.fc = nn.Sequential(
49 |             nn.Linear(channel, channel // reduction, bias=False),
50 |             nn.ReLU(inplace=True),
51 |             nn.Linear(channel // reduction, channel, bias=False),
52 |             nn.Sigmoid()
53 |         )
54 | 
55 |     def init_weights(self):
56 |         for m in self.modules():
57 |             if isinstance(m, nn.Conv2d):
58 |                 init.kaiming_normal_(m.weight, mode='fan_out')
59 |                 if m.bias is not None:
60 |                     init.constant_(m.bias, 0)
61 |             elif isinstance(m, nn.BatchNorm2d):
62 |                 init.constant_(m.weight, 1)
63 |                 init.constant_(m.bias, 0)
64 |             elif isinstance(m, nn.Linear):
65 |                 init.normal_(m.weight, std=0.001)
66 |                 if m.bias is not None:
67 |                     init.constant_(m.bias, 0)
68 | 
69 |     def forward(self, x):
70 |         # Pass input through the denoising autoencoder
71 |         denoised = self.denoising_autoencoder(x)
72 |         
73 |         # SEAttention mechanism
74 |         b, c, _, _ = denoised.size()
75 |         y = self.avg_pool(denoised).view(b, c)
76 |         y = self.fc(y).view(b, c, 1, 1)
77 |         return denoised * y.expand_as(denoised)
78 | 
79 | if __name__ == '__main__':
80 |     model = SEAttention()
81 |     model.init_weights()
82 |     input = torch.randn(1, 512, 7, 7)
83 |     output = model(input)
84 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/modulated_post_sigmoid_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the `CoordAtt` module
 3 | After pooling the height and width features, apply a *single* 1x1 convolution to each of them
 4 | This 1x1 conv will project the pooled features to a lower dimension and also transform them for fusion
 5 | Apply the sigmoid activation to the transformed features
 6 | Introduce a learnable parameter for each of the sigmoid-activated features
 7 | Multiply the sigmoid-activated feature with the learnable parameter
 8 | Concatenate the modulated height and width attention maps along the channel dimension
 9 | Apply a ReLU activation to the concatenated feature map
10 | Then, apply a final 1x1 convolution to the ReLU activated feature map to create the combined attention map
11 | Use this combined attention map to modulate the input feature map
12 | Modify the `__init__` to include the initial 1x1 convolutions, the final 1x1 convolution for fusion, and the learnable parameters
13 | Modify the `forward` to implement the initial 1x1 convolutions, sigmoid activation, modulation with learnable parameters, concatenation, ReLU activation, the final 1x1 convolution for fusion and modulate the input feature map
14 | The output can be compared to the baseline using the same test input and observing the changes in output
15 | 
16 | """
17 | 
18 | # Modified code
19 | import torch
20 | import torch.nn as nn
21 | import torch.nn.functional as F
22 | 
23 | 
24 | class h_sigmoid(nn.Module):
25 |     def __init__(self, inplace=True):
26 |         super(h_sigmoid, self).__init__()
27 |         self.relu = nn.ReLU6(inplace=inplace)
28 | 
29 |     def forward(self, x):
30 |         return self.relu(x + 3) / 6
31 | 
32 | 
33 | class h_swish(nn.Module):
34 |     def __init__(self, inplace=True):
35 |         super(h_swish, self).__init__()
36 |         self.sigmoid = h_sigmoid(inplace=inplace)
37 | 
38 |     def forward(self, x):
39 |         return x * self.sigmoid(x)
40 | 
41 | 
42 | class CoordAtt(nn.Module):
43 |     def __init__(self, inp, reduction=32):
44 |         super(CoordAtt, self).__init__()
45 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
46 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
47 | 
48 |         mip = max(8, inp // reduction)
49 | 
50 |         self.conv1_h = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
51 |         self.conv1_w = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
52 | 
53 |         self.param_h = nn.Parameter(torch.randn(1, mip, 1, 1) * 0.02)
54 |         self.param_w = nn.Parameter(torch.randn(1, mip, 1, 1) * 0.02)
55 | 
56 |         self.conv_fusion = nn.Conv2d(mip*2, inp, kernel_size=1, stride=1, padding=0)
57 | 
58 |         self.relu = nn.ReLU()
59 | 
60 | 
61 |     def forward(self, x):
62 |         identity = x
63 | 
64 |         n, c, h, w = x.size()
65 |         x_h = self.pool_h(x)
66 |         x_w = self.pool_w(x)
67 | 
68 |         x_w = x_w.permute(0, 1, 3, 2)
69 | 
70 |         x_h = self.conv1_h(x_h)
71 |         x_w = self.conv1_w(x_w)
72 |         
73 |         a_h = torch.sigmoid(x_h) * self.param_h
74 |         a_w = torch.sigmoid(x_w) * self.param_w
75 | 
76 |         y = torch.cat([a_h, a_w], dim=1)
77 |         
78 |         y = self.relu(y)
79 |         y = self.conv_fusion(y)
80 | 
81 | 
82 |         out = identity * y
83 | 
84 |         return out
85 | 
86 | if __name__ == '__main__':
87 |     x = torch.randn(2, 64, 32, 32)
88 |     att = CoordAtt(inp=64, reduction=32)
89 |     out = att(x)
90 |     print("输入尺寸:", x.shape)
91 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxa-edge_aware_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Integrate a lightweight edge detection layer, such as a Sobel filter or small trainable convolutional layer, within the CoordAtt module
 3 | Modify the forward method to compute edge maps and integrate them by modulating the attention weights
 4 | Evaluate the impact on feature representation by testing on a small benchmark dataset, comparing performance to the original CoordAtt, and monitoring computational efficiency
 5 | 
 6 | """
 7 | 
 8 | # xxa
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torchvision.transforms as transforms
14 | 
15 | class h_sigmoid(nn.Module):
16 |     def __init__(self, inplace=True):
17 |         super(h_sigmoid, self).__init__()
18 |         self.relu = nn.ReLU6(inplace=inplace)
19 | 
20 |     def forward(self, x):
21 |         return self.relu(x + 3) / 6
22 | 
23 | 
24 | class h_swish(nn.Module):
25 |     def __init__(self, inplace=True):
26 |         super(h_swish, self).__init__()
27 |         self.sigmoid = h_sigmoid(inplace=inplace)
28 | 
29 |     def forward(self, x):
30 |         return x * self.sigmoid(x)
31 | 
32 | 
33 | class CoordAtt(nn.Module):
34 |     def __init__(self, inp, reduction=32):
35 |         super(CoordAtt, self).__init__()
36 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
37 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
38 | 
39 |         mip = max(8, inp // reduction)
40 | 
41 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
42 |         self.bn1 = nn.BatchNorm2d(mip)
43 |         self.act = h_swish()
44 | 
45 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
46 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
47 | 
48 |         # Edge detection using Sobel filters
49 |         self.sobel_x = nn.Conv2d(inp, 1, kernel_size=3, stride=1, padding=1, bias=False)
50 |         self.sobel_y = nn.Conv2d(inp, 1, kernel_size=3, stride=1, padding=1, bias=False)
51 |         sobel_kernel_x = torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=torch.float32).expand(1, inp, 3, 3)
52 |         sobel_kernel_y = torch.tensor([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype=torch.float32).expand(1, inp, 3, 3)
53 |         self.sobel_x.weight = nn.Parameter(sobel_kernel_x, requires_grad=False)
54 |         self.sobel_y.weight = nn.Parameter(sobel_kernel_y, requires_grad=False)
55 | 
56 |     def forward(self, x):
57 |         identity = x
58 | 
59 |         # Compute edge maps
60 |         edge_x = self.sobel_x(x)
61 |         edge_y = self.sobel_y(x)
62 |         edge_map = torch.sqrt(edge_x ** 2 + edge_y ** 2)
63 |         edge_map = edge_map.sigmoid()  # Normalize edge map
64 | 
65 |         n, c, h, w = x.size()
66 |         x_h = self.pool_h(x)
67 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
68 | 
69 |         y = torch.cat([x_h, x_w], dim=2)
70 |         y = self.conv1(y)
71 |         y = self.bn1(y)
72 |         y = self.act(y)
73 | 
74 |         x_h, x_w = torch.split(y, [h, w], dim=2)
75 |         x_w = x_w.permute(0, 1, 3, 2)
76 | 
77 |         a_h = self.conv_h(x_h).sigmoid()
78 |         a_w = self.conv_w(x_w).sigmoid()
79 | 
80 |         # Integrate edge map by modulating attention
81 |         out = identity * a_w * a_h * edge_map
82 | 
83 |         return out
84 | 
85 | if __name__ == '__main__':
86 |     x = torch.randn(2, 64, 32, 32)
87 |     att = CoordAtt(inp=64, reduction=32)
88 |     out = att(x)
89 |     print("输入尺寸:", x.shape)
90 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/probiou/experiment.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | 
 4 | 
 5 | def _get_covariance_matrix(boxes):
 6 |     """
 7 |     Generating covariance matrix from obbs.
 8 | 
 9 |     Args:
10 |         boxes (torch.Tensor): A tensor of shape (N, 5) representing rotated bounding boxes, with xywhr format.
11 | 
12 |     Returns:
13 |         (torch.Tensor): Covariance matrices corresponding to original rotated bounding boxes.
14 |     """
15 |     # Gaussian bounding boxes, ignore the center points (the first two columns) because they are not needed here.
16 |     gbbs = torch.cat((boxes[:, 2:4].pow(2) / 12, boxes[:, 4:]), dim=-1)
17 |     a, b, c = gbbs.split(1, dim=-1)
18 |     cos = c.cos()
19 |     sin = c.sin()
20 |     cos2 = cos.pow(2)
21 |     sin2 = sin.pow(2)
22 |     return a * cos2 + b * sin2, a * sin2 + b * cos2, (a - b) * cos * sin
23 | 
24 | 
25 | def probiou(obb1, obb2, CIoU=False, eps=1e-7):
26 |     """
27 |     Calculate probabilistic IoU between oriented bounding boxes.
28 | 
29 |     Implements the algorithm from https://arxiv.org/pdf/2106.06072v1.pdf.
30 | 
31 |     Args:
32 |         obb1 (torch.Tensor): Ground truth OBBs, shape (N, 5), format xywhr.
33 |         obb2 (torch.Tensor): Predicted OBBs, shape (N, 5), format xywhr.
34 |         CIoU (bool, optional): If True, calculate CIoU. Defaults to False.
35 |         eps (float, optional): Small value to avoid division by zero. Defaults to 1e-7.
36 | 
37 |     Returns:
38 |         (torch.Tensor): OBB similarities, shape (N,).
39 | 
40 |     Note:
41 |         OBB format: [center_x, center_y, width, height, rotation_angle].
42 |         If CIoU is True, returns CIoU instead of IoU.
43 |     """
44 |     x1, y1 = obb1[..., :2].split(1, dim=-1)
45 |     x2, y2 = obb2[..., :2].split(1, dim=-1)
46 |     a1, b1, c1 = _get_covariance_matrix(obb1)
47 |     a2, b2, c2 = _get_covariance_matrix(obb2)
48 | 
49 |     t1 = (
50 |         ((a1 + a2) * (y1 - y2).pow(2) + (b1 + b2) * (x1 - x2).pow(2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
51 |     ) * 0.25
52 |     t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)) * 0.5
53 |     t3 = (
54 |         ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))
55 |         / (4 * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt() + eps)
56 |         + eps
57 |     ).log() * 0.5
58 |     bd = (t1 + t2 + t3).clamp(eps, 100.0)
59 |     hd = (1.0 - (-bd).exp() + eps).sqrt()
60 |     iou = 1 - hd
61 |     
62 |     if CIoU:  # only include the wh aspect ratio part
63 |         w1, h1 = obb1[..., 2:4].split(1, dim=-1)
64 |         w2, h2 = obb2[..., 2:4].split(1, dim=-1)
65 |         v = (4 / math.pi**2) * ((w2 / h2).atan() - (w1 / h1).atan()).pow(2)
66 |         with torch.no_grad():
67 |             alpha = v / (v - iou + (1 + eps))
68 |         return iou - v * alpha  # CIoU
69 |     return iou
70 | 
71 | if __name__ == "__main__":
72 |     # 定义两个 OBBs
73 |     obb1 = torch.tensor([[0.0, 0.0, 2.0, 4.0, 0.0]])  # [x, y, w, h, r=0°]
74 |     obb2 = torch.tensor([[5.0, 5.0, 6.0, 2.0, math.radians(45)]])  # [x, y, w, h, r=45°]
75 |     print("OBB1:", obb1)
76 |     print("OBB2:", obb2)
77 | 
78 |     # 调用 _get_covariance_matrix
79 |     a1, b1, c1 = _get_covariance_matrix(obb1)
80 |     a2, b2, c2 = _get_covariance_matrix(obb2)
81 |     print("OBB1的协方差分量:", a1, b1, c1)
82 |     print("OBB2的协方差分量:", a2, b2, c2)
83 | 
84 |     # 计算概率IoU
85 |     iou = probiou(obb1, obb2, CIoU=False)
86 |     print("ProbIoU:", iou)
87 | 
88 |     # 计算CIoU
89 |     ciou = probiou(obb1, obb2, CIoU=True)
90 |     print("CIoU:", ciou)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/aaa-freq_domain_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Integrate a frequency domain analysis step within the CoordAtt module
 3 | Implement a function to perform Fast Fourier Transform (FFT) on the input feature maps, focusing on extracting significant frequency components
 4 | Use these components to modulate the attention weights in the CoordAtt module
 5 | Modify the forward method to incorporate this frequency domain information before applying the spatial attention mechanism
 6 | Evaluate the impact on feature representation using metrics such as accuracy, feature representation quality, and computational efficiency by testing on a small benchmark dataset
 7 | Compare the performance to the original CoordAtt and other variants
 8 | 
 9 | """
10 | ### aaa
11 | # Modified code
12 | import torch
13 | import torch.nn as nn
14 | import torch.nn.functional as F
15 | import torch.fft
16 | 
17 | class h_sigmoid(nn.Module):
18 |     def __init__(self, inplace=True):
19 |         super(h_sigmoid, self).__init__()
20 |         self.relu = nn.ReLU6(inplace=inplace)
21 | 
22 |     def forward(self, x):
23 |         return self.relu(x + 3) / 6
24 | 
25 | class h_swish(nn.Module):
26 |     def __init__(self, inplace=True):
27 |         super(h_swish, self).__init__()
28 |         self.sigmoid = h_sigmoid(inplace=inplace)
29 | 
30 |     def forward(self, x):
31 |         return x * self.sigmoid(x)
32 | 
33 | class CoordAtt(nn.Module):
34 |     def __init__(self, inp, reduction=32):
35 |         super(CoordAtt, self).__init__()
36 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
37 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
38 | 
39 |         mip = max(8, inp // reduction)
40 | 
41 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
42 |         self.bn1 = nn.BatchNorm2d(mip)
43 |         self.act = h_swish()
44 | 
45 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
46 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
47 | 
48 |     def forward(self, x):
49 |         identity = x
50 | 
51 |         # Compute FFT on a downsampled version of input feature maps
52 |         pool_x = F.adaptive_avg_pool2d(x, (x.size(2) // 2, x.size(3) // 2))
53 |         fft_x = torch.fft.fft2(pool_x)
54 |         fft_x = torch.fft.fftshift(fft_x)
55 | 
56 |         # Extract significant frequency components
57 |         freq_magnitude = torch.abs(fft_x)
58 | 
59 |         # Normalize frequency components to modulate attention
60 |         freq_magnitude = (freq_magnitude - freq_magnitude.min()) / (freq_magnitude.max() - freq_magnitude.min())
61 | 
62 |         n, c, h, w = x.size()
63 |         x_h = self.pool_h(x)
64 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
65 | 
66 |         y = torch.cat([x_h, x_w], dim=2)
67 |         y = self.conv1(y)
68 |         y = self.bn1(y)
69 |         y = self.act(y)
70 | 
71 |         x_h, x_w = torch.split(y, [h, w], dim=2)
72 |         x_w = x_w.permute(0, 1, 3, 2)
73 | 
74 |         a_h = self.conv_h(x_h).sigmoid()
75 |         a_w = self.conv_w(x_w).sigmoid()
76 | 
77 |         # Modulate attention weights with frequency magnitude
78 |         a_h = a_h * F.interpolate(freq_magnitude[:, :, :h, :], size=(h, 1))
79 |         a_w = a_w * F.interpolate(freq_magnitude[:, :, :, :w], size=(1, w))
80 | 
81 |         out = identity * a_w * a_h
82 | 
83 |         return out
84 | 
85 | if __name__ == '__main__':
86 |     x = torch.randn(2, 64, 32, 32)
87 |     att = CoordAtt(inp=64, reduction=32)
88 |     out = att(x)
89 |     print("输入尺寸:", x.shape)
90 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xaa-adaptive_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implement a simple heuristic-based mechanism within the CoordAtt module to dynamically adjust attention parameters based on input feature map statistics such as variance or entropy
 3 | Modify the forward method to incorporate this mechanism, allowing it to adaptively configure the attention strategy
 4 | Evaluate the adaptability and performance on a small benchmark dataset, comparing it with the original CoordAtt and other versions, focusing on accuracy, feature representation quality, and computational efficiency
 5 | The heuristic could be a rule-based system or a lightweight decision tree
 6 | 
 7 | 在CoordAtt模块中实现一个基于输入特征图统计（如方差或熵）的简单启发式机制，以动态调整注意力参数。
 8 | 修改forward方法以包含此机制，使其能够自适应地配置注意力策略。
 9 | 通过在一个小型基准数据集上评估其适应性和性能，
10 | 比较与原始CoordAtt和其他版本的差异，重点关注准确性、特征表示质量和计算效率。启发式方法可以是基于规则的系统或轻量级决策树。
11 | """
12 | 
13 | # xaa 可以试试 xxx实测不行 学不到任何能力
14 | 
15 | # Modified code
16 | 
17 | import torch
18 | import torch.nn as nn
19 | import torch.nn.functional as F
20 | import numpy as np
21 | 
22 | class h_sigmoid(nn.Module):
23 |     def __init__(self, inplace=True):
24 |         super(h_sigmoid, self).__init__()
25 |         self.relu = nn.ReLU6(inplace=inplace)
26 | 
27 |     def forward(self, x):
28 |         return self.relu(x + 3) / 6
29 | 
30 | class h_swish(nn.Module):
31 |     def __init__(self, inplace=True):
32 |         super(h_swish, self).__init__()
33 |         self.sigmoid = h_sigmoid(inplace=inplace)
34 | 
35 |     def forward(self, x):
36 |         return x * self.sigmoid(x)
37 | 
38 | class ADACoordAtt(nn.Module):
39 |     def __init__(self, inp, reduction=32):
40 |         super(ADACoordAtt, self).__init__()
41 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
42 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
43 | 
44 |         mip = max(8, inp // reduction)
45 | 
46 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
47 |         self.bn1 = nn.BatchNorm2d(mip)
48 |         self.act = h_swish()
49 | 
50 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
51 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
52 | 
53 |     def compute_variance(self, x):
54 |         # Calculate the variance of the feature map
55 |         return torch.var(x, dim=(2, 3), keepdim=True)
56 | 
57 |     def forward(self, x):
58 |         identity = x
59 | 
60 |         n, c, h, w = x.size()
61 |         
62 |         # Compute variance and use it to adjust the attention
63 |         variance = self.compute_variance(x)
64 |         
65 |         # Heuristic rule: if variance is high, reduce the impact of attention
66 |         # Scale factor ranges from 0.5 to 1.0 based on variance
67 |         scale_factor = torch.clamp(1.0 - 0.5 * (variance / variance.max()), min=0.5, max=1.0)
68 | 
69 |         x_h = self.pool_h(x)
70 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
71 | 
72 |         y = torch.cat([x_h, x_w], dim=2)
73 |         y = self.conv1(y)
74 |         y = self.bn1(y)
75 |         y = self.act(y)
76 | 
77 |         x_h, x_w = torch.split(y, [h, w], dim=2)
78 |         x_w = x_w.permute(0, 1, 3, 2)
79 | 
80 |         a_h = self.conv_h(x_h).sigmoid()
81 |         a_w = self.conv_w(x_w).sigmoid()
82 | 
83 |         # Apply the scale factor to the attention maps
84 |         a_h = a_h * scale_factor
85 |         a_w = a_w * scale_factor
86 | 
87 |         out = identity * a_w * a_h
88 | 
89 |         return out
90 | 
91 | if __name__ == '__main__':
92 |     x = torch.randn(2, 64, 32, 32)
93 |     att = ADACoordAtt(inp=64, reduction=32)
94 |     out = att(x)
95 |     print("输入尺寸:", x.shape)
96 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-content_adaptive_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the `CoordAtt` class to incorporate a content-adaptive attention mechanism
 3 | Implement a self-attention-like operation where attention weights are computed based on the cosine similarity between features
 4 | Integrate this mechanism before the existing coordinate attention operations, allowing attention weights to be modulated based on input content
 5 | Evaluate the benefits of this approach by assessing feature representation quality and comparing performance on a small benchmark dataset against the original CoordAtt
 6 | Monitor computational efficiency and parameter count to ensure the approach remains lightweight
 7 | 
 8 | """
 9 | 
10 | #  创新不足
11 | # Modified code
12 | 
13 | import torch
14 | import torch.nn as nn
15 | import torch.nn.functional as F
16 | 
17 | 
18 | class h_sigmoid(nn.Module):
19 |     def __init__(self, inplace=True):
20 |         super(h_sigmoid, self).__init__()
21 |         self.relu = nn.ReLU6(inplace=inplace)
22 | 
23 |     def forward(self, x):
24 |         return self.relu(x + 3) / 6
25 | 
26 | 
27 | class h_swish(nn.Module):
28 |     def __init__(self, inplace=True):
29 |         super(h_swish, self).__init__()
30 |         self.sigmoid = h_sigmoid(inplace=inplace)
31 | 
32 |     def forward(self, x):
33 |         return x * self.sigmoid(x)
34 | 
35 | 
36 | class CoordAtt(nn.Module):
37 |     def __init__(self, inp, reduction=32):
38 |         super(CoordAtt, self).__init__()
39 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
40 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
41 | 
42 |         mip = max(8, inp // reduction)
43 | 
44 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
45 |         self.bn1 = nn.BatchNorm2d(mip)
46 |         self.act = h_swish()
47 | 
48 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
49 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
50 | 
51 |         # Self-attention-like mechanism using cosine similarity
52 |         self.query_conv = nn.Conv2d(inp, inp // reduction, kernel_size=1)
53 |         self.key_conv = nn.Conv2d(inp, inp // reduction, kernel_size=1)
54 |         self.value_conv = nn.Conv2d(inp, inp, kernel_size=1)
55 |         self.softmax = nn.Softmax(dim=-1)
56 | 
57 |     def forward(self, x):
58 |         identity = x
59 | 
60 |         # Compute cosine similarity based attention
61 |         n, c, h, w = x.size()
62 |         query = self.query_conv(x)
63 |         key = self.key_conv(x)
64 |         value = self.value_conv(x)
65 | 
66 |         query = query.view(n, -1, h * w)
67 |         key = key.view(n, -1, h * w)
68 |         value = value.view(n, -1, h * w)
69 | 
70 |         attention = torch.bmm(query.permute(0, 2, 1), key)
71 |         attention = self.softmax(attention / (c ** 0.5))
72 |         out_attention = torch.bmm(value, attention).view(n, c, h, w)
73 | 
74 |         # Existing coordinate attention operations
75 |         x_h = self.pool_h(out_attention)
76 |         x_w = self.pool_w(out_attention).permute(0, 1, 3, 2)
77 | 
78 |         y = torch.cat([x_h, x_w], dim=2)
79 |         y = self.conv1(y)
80 |         y = self.bn1(y)
81 |         y = self.act(y)
82 | 
83 |         x_h, x_w = torch.split(y, [h, w], dim=2)
84 |         x_w = x_w.permute(0, 1, 3, 2)
85 | 
86 |         a_h = self.conv_h(x_h).sigmoid()
87 |         a_w = self.conv_w(x_w).sigmoid()
88 | 
89 |         out = identity * a_w * a_h
90 | 
91 |         return out
92 | 
93 | if __name__ == '__main__':
94 |     x = torch.randn(2, 64, 32, 32)
95 |     att = CoordAtt(inp=64, reduction=32)
96 |     out = att(x)
97 |     print("输入尺寸:", x.shape)
98 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/temporal_attention_fusion.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Integrate a temporal attention mechanism using LSTM or GRU layers into the SEAttention framework
 3 | This involves processing sequences of input frames to generate a temporal attention map
 4 | Combine the temporal map with the SEAttention channel output through element-wise multiplication
 5 | Evaluate improvements in detection metrics such as precision, recall, and F1-score on small target detection tasks, comparing against the baseline SEAttention model
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import numpy as np
11 | import torch
12 | from torch import flatten, nn
13 | from torch.nn import init
14 | from torch.nn.modules.activation import ReLU
15 | from torch.nn.modules.batchnorm import BatchNorm2d
16 | from torch.nn import functional as F
17 | 
18 | class SEAttention(nn.Module):
19 | 
20 |     def __init__(self, channel=512,reduction=16):
21 |         super().__init__()
22 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
23 |         self.fc = nn.Sequential(
24 |             nn.Linear(channel, channel // reduction, bias=False),
25 |             nn.ReLU(inplace=True),
26 |             nn.Linear(channel // reduction, channel, bias=False),
27 |             nn.Sigmoid()
28 |         )
29 | 
30 |     def init_weights(self):
31 |         for m in self.modules():
32 |             if isinstance(m, nn.Conv2d):
33 |                 init.kaiming_normal_(m.weight, mode='fan_out')
34 |                 if m.bias is not None:
35 |                     init.constant_(m.bias, 0)
36 |             elif isinstance(m, nn.BatchNorm2d):
37 |                 init.constant_(m.weight, 1)
38 |                 init.constant_(m.bias, 0)
39 |             elif isinstance(m, nn.Linear):
40 |                 init.normal_(m.weight, std=0.001)
41 |                 if m.bias is not None:
42 |                     init.constant_(m.bias, 0)
43 | 
44 |     def forward(self, x):
45 |         b, c, _, _ = x.size()
46 |         y = self.avg_pool(x).view(b, c)
47 |         y = self.fc(y).view(b, c, 1, 1)
48 |         return x * y.expand_as(x)
49 | 
50 | class TemporalAttention(nn.Module):
51 |     
52 |     def __init__(self, channel=512, hidden_size=256, num_layers=1):
53 |         super().__init__()
54 |         self.gru = nn.GRU(input_size=channel, hidden_size=hidden_size, 
55 |                           num_layers=num_layers, batch_first=True)
56 |         self.fc = nn.Linear(hidden_size, channel)
57 |         self.sigmoid = nn.Sigmoid()
58 |     
59 |     def forward(self, x):
60 |         b, t, c, h, w = x.size()  # assuming input shape is (batch, time, channel, height, width)
61 |         x = x.view(b, t, c * h * w)  # flatten spatial dimensions
62 |         _, h_n = self.gru(x)  # h_n is the last hidden state
63 |         y = self.fc(h_n[-1])  # take the last layer's hidden state
64 |         y = self.sigmoid(y).view(b, c, 1, 1)
65 |         return y
66 | 
67 | class SEAttentionWithTemporal(nn.Module):
68 | 
69 |     def __init__(self, channel=512, reduction=16, hidden_size=256, num_layers=1):
70 |         super().__init__()
71 |         self.se_attention = SEAttention(channel, reduction)
72 |         self.temporal_attention = TemporalAttention(channel, hidden_size, num_layers)
73 | 
74 |     def forward(self, x):
75 |         se_output = self.se_attention(x[:, -1])  # apply SEAttention on the last frame
76 |         temporal_map = self.temporal_attention(x)
77 |         return se_output * temporal_map.expand_as(se_output)
78 | 
79 | if __name__ == '__main__':
80 |     model = SEAttentionWithTemporal()
81 |     model.se_attention.init_weights()
82 |     input = torch.randn(1, 5, 512, 7, 7)  # example with 5-frame sequence
83 |     output = model(input)
84 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/simplified_graph_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by incorporating a simplified Graph Neural Network (GNN) layer
 3 | Treat feature maps as graphs with nodes representing spatial locations and edges encoding basic spatial relationships or proximity
 4 | Implement a lightweight graph convolution technique to process these graphs, focusing on essential spatial dependencies
 5 | Integrate this GNN layer after the channel attention stage
 6 | Modify the forward function to include basic graph construction and processing
 7 | Evaluate the model's performance on synthetic datasets by comparing detection accuracy and attention focus against baseline SEAttention, with emphasis on capturing spatial dependencies efficiently
 8 | 
 9 | """
10 | 
11 | # Modified code
12 | import numpy as np
13 | import torch
14 | from torch import flatten, nn
15 | from torch.nn import init
16 | from torch.nn.modules.activation import ReLU
17 | from torch.nn.modules.batchnorm import BatchNorm2d
18 | from torch.nn import functional as F
19 | from torch_geometric.nn import GCNConv  # Importing graph convolutional layer
20 | 
21 | class SEAttentionGNN(nn.Module):
22 | 
23 |     def __init__(self, channel=512, reduction=16):
24 |         super().__init__()
25 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
26 |         self.fc = nn.Sequential(
27 |             nn.Linear(channel, channel // reduction, bias=False),
28 |             nn.ReLU(inplace=True),
29 |             nn.Linear(channel // reduction, channel, bias=False),
30 |             nn.Sigmoid()
31 |         )
32 |         # Define a GCN layer for processing the graph
33 |         self.gcn = GCNConv(channel, channel)
34 | 
35 |     def init_weights(self):
36 |         for m in self.modules():
37 |             if isinstance(m, nn.Conv2d):
38 |                 init.kaiming_normal_(m.weight, mode='fan_out')
39 |                 if m.bias is not None:
40 |                     init.constant_(m.bias, 0)
41 |             elif isinstance(m, nn.BatchNorm2d):
42 |                 init.constant_(m.weight, 1)
43 |                 init.constant_(m.bias, 0)
44 |             elif isinstance(m, nn.Linear):
45 |                 init.normal_(m.weight, std=0.001)
46 |                 if m.bias is not None:
47 |                     init.constant_(m.bias, 0)
48 | 
49 |     def forward(self, x):
50 |         b, c, h, w = x.size()
51 |         
52 |         # Channel attention
53 |         y = self.avg_pool(x).view(b, c)
54 |         y = self.fc(y).view(b, c, 1, 1)
55 |         x = x * y.expand_as(x)
56 |         
57 |         # Convert feature maps to graph
58 |         x_flat = x.view(b, c, -1).permute(0, 2, 1)  # Reshape to (b, h*w, c)
59 |         
60 |         # Create adjacency matrix using spatial proximity (considering 4-connectivity)
61 |         edge_index = []
62 |         for i in range(h):
63 |             for j in range(w):
64 |                 index = i * w + j
65 |                 if i + 1 < h:  # Down
66 |                     edge_index.append([index, (i + 1) * w + j])
67 |                 if j + 1 < w:  # Right
68 |                     edge_index.append([index, i * w + (j + 1)])
69 |         
70 |         edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
71 |         
72 |         # Apply GCN layer
73 |         x_graph = []
74 |         for i in range(b):
75 |             x_graph.append(self.gcn(x_flat[i], edge_index))
76 |         
77 |         x_graph = torch.stack(x_graph).permute(0, 2, 1).view(b, c, h, w)
78 |         
79 |         return x_graph
80 | 
81 | if __name__ == '__main__':
82 |     model = SEAttentionGNN()
83 |     model.init_weights()
84 |     input = torch.randn(1, 512, 7, 7)
85 |     output = model(input)
86 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/sigmoid_weighted_interaction_group_conv.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the `CoordAtt` module
 3 | After pooling height and width features, perform an element-wise multiplication of the pooled height and width features
 4 | Concatenate the pooled height and width features
 5 | Apply a learnable parameter followed by a sigmoid activation to the element-wise multiplied feature
 6 | Perform a weighted sum of the sigmoid-activated element-wise multiplied feature and the concatenated features
 7 | Apply a group convolution instead of 1x1 convolution in the `conv1` layer
 8 | Use a small number of groups (e
 9 | g
10 | 4 or 8)
11 | Modify the `__init__` function to include the group convolution layer and a learnable parameter
12 | Modify the `forward` function to implement the element-wise multiplication, concatenation, sigmoid activation of the learnable parameter, weighted sum, and the group convolution before the shared `conv1` layer
13 | The rest of the forward pass remains the same
14 | Compare the output with the baseline using the same test input and observe changes
15 | This involves element-wise multiplication, concatenation, learnable parameter with sigmoid, weighted sum, group conv, and modifying the forward pass
16 | 
17 | """
18 | 
19 | import torch
20 | import torch.nn as nn
21 | import torch.nn.functional as F
22 | 
23 | 
24 | class h_sigmoid(nn.Module):
25 |     def __init__(self, inplace=True):
26 |         super(h_sigmoid, self).__init__()
27 |         self.relu = nn.ReLU6(inplace=inplace)
28 | 
29 |     def forward(self, x):
30 |         return self.relu(x + 3) / 6
31 | 
32 | 
33 | class h_swish(nn.Module):
34 |     def __init__(self, inplace=True):
35 |         super(h_swish, self).__init__()
36 |         self.sigmoid = h_sigmoid(inplace=inplace)
37 | 
38 |     def forward(self, x):
39 |         return x * self.sigmoid(x)
40 | 
41 | 
42 | class CoordAtt(nn.Module):
43 |     def __init__(self, inp, reduction=32, groups=4):
44 |         super(CoordAtt, self).__init__()
45 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
46 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
47 | 
48 |         mip = max(8, inp // reduction)
49 | 
50 |         self.conv1 = nn.Conv2d(mip, mip, kernel_size=1, stride=1, padding=0, groups=groups, bias=False)
51 |         self.bn1 = nn.BatchNorm2d(mip)
52 |         self.act = h_swish()
53 | 
54 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
55 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
56 | 
57 |         self.weight = nn.Parameter(torch.zeros(1, mip, 1, 1))
58 |         self.sigmoid = nn.Sigmoid()
59 | 
60 | 
61 |     def forward(self, x):
62 |         identity = x
63 | 
64 |         n, c, h, w = x.size()
65 |         x_h = self.pool_h(x)
66 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
67 | 
68 |         # Element-wise multiplication
69 |         x_hw = x_h * x_w
70 |         
71 |         # Concatenation
72 |         y = torch.cat([x_h, x_w], dim=2)
73 |         
74 |         # Sigmoid-weighted interaction
75 |         weight = self.sigmoid(self.weight)
76 |         x_hw = x_hw * weight
77 |         y = y + x_hw
78 |         
79 |         y = self.conv1(y)
80 |         y = self.bn1(y)
81 |         y = self.act(y)
82 | 
83 |         x_h, x_w = torch.split(y, [h, w], dim=2)
84 |         x_w = x_w.permute(0, 1, 3, 2)
85 | 
86 |         a_h = self.conv_h(x_h).sigmoid()
87 |         a_w = self.conv_w(x_w).sigmoid()
88 | 
89 |         out = identity * a_w * a_h
90 | 
91 |         return out
92 | 
93 | if __name__ == '__main__':
94 |     x = torch.randn(2, 64, 32, 32)
95 |     att = CoordAtt(inp=64, reduction=32)
96 |     out = att(x)
97 |     print("输入尺寸:", x.shape)
98 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/geometric_transformation_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by adding a lightweight geometric transformation layer that applies controlled transformations (e.g, small rotations, translations) to the input feature map
 3 | Integrate a transformation-aware attention mechanism that recalibrates feature maps based on invariant patterns across these transformations
 4 | Modify the forward function to include these geometric transformations and subsequent attention recalibration
 5 | Evaluate the model's effectiveness by comparing detection accuracy and visual focus of attention maps on synthetic datasets, particularly observing improvements in small target detection
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import numpy as np
11 | import torch
12 | from torch import flatten, nn
13 | from torch.nn import init
14 | from torch.nn.modules.activation import ReLU
15 | from torch.nn.modules.batchnorm import BatchNorm2d
16 | from torch.nn import functional as F
17 | import torchvision.transforms as T
18 | 
19 | class GeometricTransformLayer(nn.Module):
20 |     def __init__(self):
21 |         super().__init__()
22 |         self.transforms = T.Compose([
23 |             T.RandomAffine(degrees=5, translate=(0.05, 0.05))
24 |         ])
25 | 
26 |     def forward(self, x):
27 |         # Apply geometric transformation
28 |         return self.transforms(x)
29 | 
30 | class TransformationAwareAttention(nn.Module):
31 |     def __init__(self, channel):
32 |         super().__init__()
33 |         self.channel = channel
34 |         self.weight = nn.Parameter(torch.ones(channel, 1, 1))
35 | 
36 |     def forward(self, x, transformed_x):
37 |         # Recalibrate feature maps based on invariant patterns
38 |         attention_map = torch.sigmoid(self.weight)
39 |         return x * attention_map + transformed_x * (1 - attention_map)
40 | 
41 | class SEAttention(nn.Module):
42 |     def __init__(self, channel=512, reduction=16):
43 |         super().__init__()
44 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
45 |         self.fc = nn.Sequential(
46 |             nn.Linear(channel, channel // reduction, bias=False),
47 |             nn.ReLU(inplace=True),
48 |             nn.Linear(channel // reduction, channel, bias=False),
49 |             nn.Sigmoid()
50 |         )
51 |         self.geo_transform = GeometricTransformLayer()
52 |         self.trans_attention = TransformationAwareAttention(channel)
53 | 
54 |     def init_weights(self):
55 |         for m in self.modules():
56 |             if isinstance(m, nn.Conv2d):
57 |                 init.kaiming_normal_(m.weight, mode='fan_out')
58 |                 if m.bias is not None:
59 |                     init.constant_(m.bias, 0)
60 |             elif isinstance(m, nn.BatchNorm2d):
61 |                 init.constant_(m.weight, 1)
62 |                 init.constant_(m.bias, 0)
63 |             elif isinstance(m, nn.Linear):
64 |                 init.normal_(m.weight, std=0.001)
65 |                 if m.bias is not None:
66 |                     init.constant_(m.bias, 0)
67 | 
68 |     def forward(self, x):
69 |         b, c, _, _ = x.size()
70 |         # Apply geometric transformation
71 |         transformed_x = self.geo_transform(x)
72 |         # SE attention
73 |         y = self.avg_pool(x).view(b, c)
74 |         y = self.fc(y).view(b, c, 1, 1)
75 |         se_attention = x * y.expand_as(x)
76 |         # Transformation-aware attention
77 |         attention_output = self.trans_attention(se_attention, transformed_x)
78 |         return attention_output
79 |     
80 | if __name__ == '__main__':
81 |     model = SEAttention()
82 |     model.init_weights()
83 |     input = torch.randn(1, 512, 7, 7)
84 |     output = model(input)
85 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-multi_scale_coordatt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Add parallel convolutional branches with kernel sizes 1x1, 3x3, and 5x5 to `CoordAtt` to capture multi-scale features
 3 | Each branch should have its own convolutional layer followed by batch normalization and activation
 4 | Concatenate the outputs of these branches before combining with the original coordinate attention features
 5 | Modify the `forward` method to integrate these multi-scale features before applying attention weights
 6 | Evaluate the enhancement in feature representation by testing on a small benchmark dataset and comparing the modified module's performance to the original, while also monitoring computational overhead
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | 
15 | 
16 | class h_sigmoid(nn.Module):
17 |     def __init__(self, inplace=True):
18 |         super(h_sigmoid, self).__init__()
19 |         self.relu = nn.ReLU6(inplace=inplace)
20 | 
21 |     def forward(self, x):
22 |         return self.relu(x + 3) / 6
23 | 
24 | 
25 | class h_swish(nn.Module):
26 |     def __init__(self, inplace=True):
27 |         super(h_swish, self).__init__()
28 |         self.sigmoid = h_sigmoid(inplace=inplace)
29 | 
30 |     def forward(self, x):
31 |         return x * self.sigmoid(x)
32 | 
33 | 
34 | class CoordAtt(nn.Module):
35 |     def __init__(self, inp, reduction=32):
36 |         super(CoordAtt, self).__init__()
37 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
38 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
39 | 
40 |         mip = max(8, inp // reduction)
41 | 
42 |         # Original coordinate attention components
43 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
44 |         self.bn1 = nn.BatchNorm2d(mip)
45 |         self.act = h_swish()
46 | 
47 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
48 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
49 | 
50 |         # Multi-scale feature branches
51 |         self.conv_1x1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
52 |         self.bn_1x1 = nn.BatchNorm2d(mip)
53 | 
54 |         self.conv_3x3 = nn.Conv2d(inp, mip, kernel_size=3, stride=1, padding=1)
55 |         self.bn_3x3 = nn.BatchNorm2d(mip)
56 | 
57 |         self.conv_5x5 = nn.Conv2d(inp, mip, kernel_size=5, stride=1, padding=2)
58 |         self.bn_5x5 = nn.BatchNorm2d(mip)
59 | 
60 |     def forward(self, x):
61 |         identity = x
62 | 
63 |         n, c, h, w = x.size()
64 |         x_h = self.pool_h(x)
65 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
66 | 
67 |         # Coordinate attention path
68 |         y = torch.cat([x_h, x_w], dim=2)
69 |         y = self.conv1(y)
70 |         y = self.bn1(y)
71 |         y = self.act(y)
72 | 
73 |         x_h, x_w = torch.split(y, [h, w], dim=2)
74 |         x_w = x_w.permute(0, 1, 3, 2)
75 | 
76 |         a_h = self.conv_h(x_h).sigmoid()
77 |         a_w = self.conv_w(x_w).sigmoid()
78 | 
79 |         # Multi-scale feature branches
80 |         y_1x1 = self.bn_1x1(self.conv_1x1(x))
81 |         y_3x3 = self.bn_3x3(self.conv_3x3(x))
82 |         y_5x5 = self.bn_5x5(self.conv_5x5(x))
83 | 
84 |         multi_scale_features = torch.cat([y_1x1, y_3x3, y_5x5], dim=1)
85 |         multi_scale_features = self.act(multi_scale_features)
86 | 
87 |         # Combine multi-scale features with coordinate attention
88 |         out = identity * a_w * a_h + multi_scale_features
89 | 
90 |         return out
91 | 
92 | if __name__ == '__main__':
93 |     x = torch.randn(2, 64, 32, 32)
94 |     att = CoordAtt(inp=64, reduction=32)
95 |     out = att(x)
96 |     print("输入尺寸:", x.shape)
97 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/dynamic_attention_selection.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implement a dynamic attention mechanism that selects between spatial and channel attentions based on input characteristics
 3 | Develop a decision layer that analyzes input features and outputs a preference score for each attention type
 4 | Modify the SEAttention class to incorporate spatial attention
 5 | Use the decision layer to dynamically apply spatial or channel attention
 6 | Evaluate the model's performance on small target detection tasks by analyzing precision, recall, and F1-score, comparing against the baseline SEAttention model and other enhanced models
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | import numpy as np
12 | import torch
13 | from torch import flatten, nn
14 | from torch.nn import init
15 | from torch.nn.modules.activation import ReLU
16 | from torch.nn.modules.batchnorm import BatchNorm2d
17 | from torch.nn import functional as F
18 | 
19 | class SEAttention(nn.Module):
20 | 
21 |     def __init__(self, channel=512, reduction=16):
22 |         super().__init__()
23 |         self.channel = channel
24 |         self.reduction = reduction
25 |         
26 |         # Channel Attention Components
27 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
28 |         self.fc = nn.Sequential(
29 |             nn.Linear(channel, channel // reduction, bias=False),
30 |             nn.ReLU(inplace=True),
31 |             nn.Linear(channel // reduction, channel, bias=False),
32 |             nn.Sigmoid()
33 |         )
34 |         
35 |         # Spatial Attention Components
36 |         self.conv1 = nn.Conv2d(channel, channel // reduction, kernel_size=1)
37 |         self.conv2 = nn.Conv2d(channel // reduction, 1, kernel_size=1)
38 |         self.sigmoid = nn.Sigmoid()
39 | 
40 |         # Decision Layer
41 |         self.decision_layer = nn.Sequential(
42 |             nn.Linear(channel, 2),
43 |             nn.Softmax(dim=1)
44 |         )
45 | 
46 |     def init_weights(self):
47 |         for m in self.modules():
48 |             if isinstance(m, nn.Conv2d):
49 |                 init.kaiming_normal_(m.weight, mode='fan_out')
50 |                 if m.bias is not None:
51 |                     init.constant_(m.bias, 0)
52 |             elif isinstance(m, nn.BatchNorm2d):
53 |                 init.constant_(m.weight, 1)
54 |                 init.constant_(m.bias, 0)
55 |             elif isinstance(m, nn.Linear):
56 |                 init.normal_(m.weight, std=0.001)
57 |                 if m.bias is not None:
58 |                     init.constant_(m.bias, 0)
59 | 
60 |     def channel_attention(self, x, b, c):
61 |         y = self.avg_pool(x).view(b, c)
62 |         y = self.fc(y).view(b, c, 1, 1)
63 |         return x * y.expand_as(x)
64 | 
65 |     def spatial_attention(self, x, b, c, h, w):
66 |         y = self.conv1(x)
67 |         y = self.conv2(y)
68 |         y = self.sigmoid(y)
69 |         return x * y.expand_as(x)
70 | 
71 |     def forward(self, x):
72 |         b, c, h, w = x.size()
73 |         
74 |         # Decision layer based on input features
75 |         avg_features = self.avg_pool(x).view(b, c)
76 |         decision = self.decision_layer(avg_features)
77 |         
78 |         # Split decision into channel and spatial attention weights
79 |         channel_weight, spatial_weight = decision[:, 0], decision[:, 1]
80 |         
81 |         # Apply attention based on decision weights
82 |         channel_attended = self.channel_attention(x, b, c) * channel_weight.view(b, 1, 1, 1)
83 |         spatial_attended = self.spatial_attention(x, b, c, h, w) * spatial_weight.view(b, 1, 1, 1)
84 |         
85 |         return channel_attended + spatial_attended
86 |     
87 | if __name__ == '__main__':
88 |     model = SEAttention()
89 |     model.init_weights()
90 |     input = torch.randn(1, 512, 7, 7)
91 |     output = model(input)
92 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/pre_pool_spatial_adaptive_channel_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the `CoordAtt` module
 3 | Introduce a spatial adaptive channel attention module before the pooling operations
 4 | This module will consist of global average pooling and global max pooling
 5 | A 1x1 convolution will be used to generate a spatial weight map for the max pooling output
 6 | The weighted max pooling output is then added element-wise to the average pooling output
 7 | This result is then passed through a 1x1 convolution, a ReLU activation, and a sigmoid activation
 8 | The output of the channel attention will be used to modulate the input feature map before the height and width pooling
 9 | Modify the `__init__` function to include the channel attention module and the 1x1 convolution for weight map generation
10 | Modify the `forward` function to implement the channel attention, the weighted sum of the pooling outputs using the spatial weight map, modulation of the input feature map, and then the rest of the operations
11 | Compare the output with the baseline using the same test input and observe the changes
12 | This involves adding global average pooling, global max pooling, a 1x1 conv for spatial weight map, a 1x1 conv, ReLU and sigmoid, and modifying the forward pass to apply the attention before pooling
13 | 
14 | """
15 | 
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | 
20 | 
21 | class h_sigmoid(nn.Module):
22 |     def __init__(self, inplace=True):
23 |         super(h_sigmoid, self).__init__()
24 |         self.relu = nn.ReLU6(inplace=inplace)
25 | 
26 |     def forward(self, x):
27 |         return self.relu(x + 3) / 6
28 | 
29 | 
30 | class h_swish(nn.Module):
31 |     def __init__(self, inplace=True):
32 |         super(h_swish, self).__init__()
33 |         self.sigmoid = h_sigmoid(inplace=inplace)
34 | 
35 |     def forward(self, x):
36 |         return x * self.sigmoid(x)
37 | 
38 | 
39 | class CoordAtt(nn.Module):
40 |     def __init__(self, inp, reduction=32):
41 |         super(CoordAtt, self).__init__()
42 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
43 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
44 |         
45 |         mip = max(8, inp // reduction)
46 |         self.conv_reduce = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
47 |         
48 |         self.spatial_weight = nn.Conv2d(mip, 1, kernel_size=1, stride=1, padding=0)
49 | 
50 |         self.bn1 = nn.BatchNorm2d(mip)
51 |         self.act = h_swish()
52 | 
53 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
54 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
55 | 
56 |     def forward(self, x):
57 |         identity = x
58 |         n, c, h, w = x.size()
59 | 
60 |         x_reduced = self.conv_reduce(x)
61 |         avg_out = self.avg_pool(x_reduced)
62 |         max_out = self.max_pool(x_reduced)
63 |         
64 |         spatial_weight = self.spatial_weight(max_out).sigmoid()
65 |         
66 |         channel_att = avg_out + max_out * spatial_weight
67 |         
68 |         channel_att = self.bn1(channel_att)
69 |         channel_att = self.act(channel_att)
70 |         
71 |         x = x * channel_att
72 | 
73 |         x_h = nn.AdaptiveAvgPool2d((None, 1))(x)
74 |         x_w = nn.AdaptiveAvgPool2d((1, None))(x).permute(0, 1, 3, 2)
75 | 
76 | 
77 |         y = torch.cat([x_h, x_w], dim=2)
78 |         
79 |         y = self.bn1(self.conv_reduce(y))
80 |         y = self.act(y)
81 | 
82 | 
83 |         x_h, x_w = torch.split(y, [h, w], dim=2)
84 |         x_w = x_w.permute(0, 1, 3, 2)
85 | 
86 |         a_h = self.conv_h(x_h).sigmoid()
87 |         a_w = self.conv_w(x_w).sigmoid()
88 | 
89 |         out = identity * a_w * a_h
90 | 
91 |         return out
92 | 
93 | if __name__ == '__main__':
94 |     x = torch.randn(2, 64, 32, 32)
95 |     att = CoordAtt(inp=64, reduction=32)
96 |     out = att(x)
97 |     print("输入尺寸:", x.shape)
98 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxx-sparse_coordatt.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Integrate an L1 regularization term into the training process of the CoordAtt module to induce sparsity in the feature maps
  3 | Modify the loss function to include this L1 penalty, encouraging sparsity in the output of the initial convolutional layers
  4 | Evaluate the impact on feature representation quality and computational efficiency by testing on a small benchmark dataset
  5 | Compare the results in terms of accuracy, feature discrimination, and computational overhead with the original CoordAtt and other variants
  6 | 
  7 | """
  8 | 
  9 | # Modified code
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | 
 14 | class h_sigmoid(nn.Module):
 15 |     def __init__(self, inplace=True):
 16 |         super(h_sigmoid, self).__init__()
 17 |         self.relu = nn.ReLU6(inplace=inplace)
 18 | 
 19 |     def forward(self, x):
 20 |         return self.relu(x + 3) / 6
 21 | 
 22 | class h_swish(nn.Module):
 23 |     def __init__(self, inplace=True):
 24 |         super(h_swish, self).__init__()
 25 |         self.sigmoid = h_sigmoid(inplace=inplace)
 26 | 
 27 |     def forward(self, x):
 28 |         return x * self.sigmoid(x)
 29 | 
 30 | class CoordAtt(nn.Module):
 31 |     def __init__(self, inp, reduction=32):
 32 |         super(CoordAtt, self).__init__()
 33 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
 34 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
 35 | 
 36 |         mip = max(8, inp // reduction)
 37 | 
 38 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
 39 |         self.bn1 = nn.BatchNorm2d(mip)
 40 |         self.act = h_swish()
 41 | 
 42 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 43 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 44 | 
 45 |     def forward(self, x):
 46 |         identity = x
 47 | 
 48 |         n, c, h, w = x.size()
 49 |         x_h = self.pool_h(x)
 50 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
 51 | 
 52 |         y = torch.cat([x_h, x_w], dim=2)
 53 |         y = self.conv1(y)
 54 |         y = self.bn1(y)
 55 |         y = self.act(y)
 56 | 
 57 |         x_h, x_w = torch.split(y, [h, w], dim=2)
 58 |         x_w = x_w.permute(0, 1, 3, 2)
 59 | 
 60 |         a_h = self.conv_h(x_h).sigmoid()
 61 |         a_w = self.conv_w(x_w).sigmoid()
 62 | 
 63 |         out = identity * a_w * a_h
 64 | 
 65 |         return out
 66 | 
 67 | def l1_regularization(model, lambda_l1):
 68 |     l1_norm = sum(p.abs().sum() for p in model.parameters())
 69 |     return lambda_l1 * l1_norm
 70 | 
 71 | # Example training loop
 72 | def train(model, dataloader, criterion, optimizer, lambda_l1):
 73 |     model.train()
 74 |     total_loss = 0.0
 75 |     for data, target in dataloader:
 76 |         optimizer.zero_grad()
 77 |         output = model(data)
 78 |         loss = criterion(output, target)
 79 |         
 80 |         # Add L1 regularization penalty
 81 |         l1_penalty = l1_regularization(model, lambda_l1)
 82 |         loss += l1_penalty
 83 | 
 84 |         loss.backward()
 85 |         optimizer.step()
 86 | 
 87 |         total_loss += loss.item()
 88 |     return total_loss / len(dataloader)
 89 | 
 90 | if __name__ == '__main__':
 91 |     x = torch.randn(2, 64, 32, 32)
 92 |     att = CoordAtt(inp=64, reduction=32)
 93 |     out = att(x)
 94 |     print("输入尺寸:", x.shape)
 95 |     print("输出尺寸:", out.shape)
 96 | 
 97 |     # Example of lambda_l1 for L1 regularization
 98 |     lambda_l1 = 0.01
 99 | 
100 |     # Mock dataloader, criterion, and optimizer for testing
101 |     dataloader = [ (x, torch.randn(2, 64, 32, 32)) for _ in range(10) ]
102 |     criterion = nn.MSELoss()
103 |     optimizer = torch.optim.SGD(att.parameters(), lr=0.01)
104 | 
105 |     # Run a single training epoch
106 |     avg_loss = train(att, dataloader, criterion, optimizer, lambda_l1)
107 |     print("Average training loss with L1 regularization:", avg_loss)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/color_channel_fusion_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extend SEAttention by implementing a color channel fusion mechanism
 3 | Add a preprocessing step that applies a shared attention mechanism to the R, G, and B channels, followed by a weighted fusion of these channels to create a comprehensive feature map
 4 | Integrate this fused feature map into the existing SEAttention architecture
 5 | Evaluate the model's effectiveness by comparing detection performance on synthetic datasets designed with varying color contrasts and subtle variations, using quantitative metrics such as precision and recall, and qualitative analysis of attention map focus
 6 | 
 7 | """
 8 | 
 9 | # Modified code
10 | import numpy as np
11 | import torch
12 | from torch import flatten, nn
13 | from torch.nn import init
14 | from torch.nn.modules.activation import ReLU
15 | from torch.nn.modules.batchnorm import BatchNorm2d
16 | from torch.nn import functional as F
17 | 
18 | class ChannelAttention(nn.Module):
19 |     def __init__(self, channel=512, reduction=16):
20 |         super().__init__()
21 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
22 |         self.fc = nn.Sequential(
23 |             nn.Linear(channel, channel // reduction, bias=False),
24 |             nn.ReLU(inplace=True),
25 |             nn.Linear(channel // reduction, channel, bias=False),
26 |             nn.Sigmoid()
27 |         )
28 |     
29 |     def forward(self, x):
30 |         b, c, _, _ = x.size()
31 |         y = self.avg_pool(x).view(b, c)
32 |         y = self.fc(y).view(b, c, 1, 1)
33 |         return x * y.expand_as(x)
34 | 
35 | class ColorChannelFusion(nn.Module):
36 |     def __init__(self, channel=512, reduction=16):
37 |         super().__init__()
38 |         self.red_attention = ChannelAttention(channel=channel, reduction=reduction)
39 |         self.green_attention = ChannelAttention(channel=channel, reduction=reduction)
40 |         self.blue_attention = ChannelAttention(channel=channel, reduction=reduction)
41 |         self.weighted_fusion = nn.Conv2d(channel * 3, channel, kernel_size=1, stride=1, padding=0, bias=False)
42 | 
43 |     def forward(self, x):
44 |         # Assuming input x shape is (batch_size, 3, height, width)
45 |         red, green, blue = torch.split(x, 1, dim=1)
46 |         red = self.red_attention(red)
47 |         green = self.green_attention(green)
48 |         blue = self.blue_attention(blue)
49 |         
50 |         # Concatenate along channel dimension
51 |         fused = torch.cat([red, green, blue], dim=1)
52 |         
53 |         # Apply weighted fusion
54 |         fused_feature_map = self.weighted_fusion(fused)
55 |         return fused_feature_map
56 | 
57 | class EnhancedSEAttention(nn.Module):
58 |     def __init__(self, channel=512, reduction=16):
59 |         super().__init__()
60 |         self.color_fusion = ColorChannelFusion(channel=channel, reduction=reduction)
61 |         self.se_attention = ChannelAttention(channel=channel, reduction=reduction)
62 | 
63 |     def init_weights(self):
64 |         for m in self.modules():
65 |             if isinstance(m, nn.Conv2d):
66 |                 init.kaiming_normal_(m.weight, mode='fan_out')
67 |                 if m.bias is not None:
68 |                     init.constant_(m.bias, 0)
69 |             elif isinstance(m, nn.BatchNorm2d):
70 |                 init.constant_(m.weight, 1)
71 |                 init.constant_(m.bias, 0)
72 |             elif isinstance(m, nn.Linear):
73 |                 init.normal_(m.weight, std=0.001)
74 |                 if m.bias is not None:
75 |                     init.constant_(m.bias, 0)
76 | 
77 |     def forward(self, x):
78 |         fused_features = self.color_fusion(x)
79 |         attention_output = self.se_attention(fused_features)
80 |         return attention_output
81 | 
82 | if __name__ == '__main__':
83 |     model = EnhancedSEAttention()
84 |     model.init_weights()
85 |     input = torch.randn(1, 3, 7, 7)  # Updated to expect 3 channels (R, G, B)
86 |     output = model(input)
87 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/pre_pool_conv_spatial_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Modify the `CoordAtt` module
 3 | Apply a 1x1 convolution to the input feature map before applying max and average pooling
 4 | Concatenate the results of the max and average pooling along the channel dimension
 5 | Apply a lightweight spatial attention module to the concatenated map which will have a 3x3 depthwise convolution, followed by a 1x1 convolution and finally a sigmoid activation
 6 | This spatial attention module modulates the combined attention map before it is split into a_h and a_w
 7 | Modify the `__init__` to include the 1x1 convolution before the pooling layers, and the lightweight spatial attention module
 8 | Modify the `forward` to implement the new pooling and modulation scheme
 9 | The output can be compared to the baseline using the same test input and observing the changes in output
10 | This involves modifying `__init__` to incorporate pre-pooling conv, depthwise conv, 1x1 conv and sigmoid, and `forward` to implement the pooling and spatial attention
11 | 
12 | """
13 | 
14 | import torch
15 | import torch.nn as nn
16 | import torch.nn.functional as F
17 | 
18 | 
19 | class h_sigmoid(nn.Module):
20 |     def __init__(self, inplace=True):
21 |         super(h_sigmoid, self).__init__()
22 |         self.relu = nn.ReLU6(inplace=inplace)
23 | 
24 |     def forward(self, x):
25 |         return self.relu(x + 3) / 6
26 | 
27 | 
28 | class h_swish(nn.Module):
29 |     def __init__(self, inplace=True):
30 |         super(h_swish, self).__init__()
31 |         self.sigmoid = h_sigmoid(inplace=inplace)
32 | 
33 |     def forward(self, x):
34 |         return x * self.sigmoid(x)
35 | 
36 | 
37 | class CoordAtt(nn.Module):
38 |     def __init__(self, inp, reduction=32):
39 |         super(CoordAtt, self).__init__()
40 |         self.pre_conv = nn.Conv2d(inp, inp, kernel_size=1, stride=1, padding=0) # 1x1 conv before pooling
41 |         self.pool_h = nn.AdaptiveMaxPool2d((None, 1))
42 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
43 | 
44 |         mip = max(8, inp // reduction)
45 | 
46 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
47 |         self.bn1 = nn.BatchNorm2d(mip)
48 |         self.act = h_swish()
49 | 
50 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
51 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
52 | 
53 |         # Spatial Attention Module
54 |         self.spatial_conv1 = nn.Conv2d(2, 1, kernel_size=3, stride=1, padding=1, groups=1) # Depthwise conv
55 |         self.spatial_sigmoid = nn.Sigmoid()
56 | 
57 | 
58 |     def forward(self, x):
59 |         identity = x
60 | 
61 |         n, c, h, w = x.size()
62 |         x = self.pre_conv(x) # Apply 1x1 convolution
63 | 
64 |         x_h = self.pool_h(x)
65 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
66 | 
67 |         #Spatial Attention
68 |         attention_map = torch.cat([x_h, x_w.permute(0,1,3,2)], dim=1)
69 |         attention_map = self.spatial_conv1(attention_map)
70 |         attention_map = self.spatial_sigmoid(attention_map)
71 |         attention_map = F.interpolate(attention_map, size=(h, w), mode='bilinear', align_corners=False)
72 |         
73 |         y = torch.cat([x_h, x_w], dim=2)
74 |         
75 |         y = self.conv1(y)
76 |         y = self.bn1(y)
77 |         y = self.act(y)
78 | 
79 |         x_h, x_w = torch.split(y, [h, w], dim=2)
80 |         x_w = x_w.permute(0, 1, 3, 2)
81 | 
82 |         a_h = self.conv_h(x_h)
83 |         a_w = self.conv_w(x_w)
84 | 
85 |         a_h = F.interpolate(a_h, size=(h, w), mode='bilinear', align_corners=False).sigmoid()
86 |         a_w = F.interpolate(a_w, size=(h, w), mode='bilinear', align_corners=False).sigmoid()
87 | 
88 | 
89 |         out = identity * a_w * a_h * attention_map
90 | 
91 |         return out
92 | 
93 | if __name__ == '__main__':
94 |     x = torch.randn(2, 64, 32, 32)
95 |     att = CoordAtt(inp=64, reduction=32)
96 |     out = att(x)
97 |     print("输入尺寸:", x.shape)
98 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/uncertainty_guided_attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Integrate an uncertainty estimation module within the SEAttention framework
 3 | Develop functions to compute uncertainty scores for different regions in the input feature map, using methods such as Monte Carlo Dropout or entropy-based measures
 4 | Modify the SEAttention class to incorporate these uncertainty scores into the attention mechanism, adjusting attention weights based on uncertainty
 5 | Evaluate the model's performance on small target detection tasks using metrics such as precision, recall, and F1-score, while also analyzing the uncertainty estimation's impact on detection accuracy
 6 | Compare results with the baseline SEAttention model and other enhanced models
 7 | 
 8 | """
 9 | 
10 | # Modified code
11 | 
12 | import numpy as np
13 | import torch
14 | from torch import flatten, nn
15 | from torch.nn import init
16 | from torch.nn.modules.activation import ReLU
17 | from torch.nn.modules.batchnorm import BatchNorm2d
18 | from torch.nn import functional as F
19 | from torch.distributions import Categorical
20 | 
21 | class MonteCarloDropout(nn.Module):
22 |     def __init__(self, p=0.5):
23 |         super(MonteCarloDropout, self).__init__()
24 |         self.p = p
25 | 
26 |     def forward(self, x):
27 |         return F.dropout(x, p=self.p, training=True)
28 | 
29 | class UncertaintyEstimator(nn.Module):
30 |     def __init__(self, channel, num_samples=10):
31 |         super(UncertaintyEstimator, self).__init__()
32 |         self.num_samples = num_samples
33 |         self.dropout = MonteCarloDropout(p=0.5)
34 |         self.conv = nn.Conv2d(channel, channel, kernel_size=1)
35 | 
36 |     def forward(self, x):
37 |         # Use Monte Carlo sampling to estimate uncertainty
38 |         predictions = torch.stack([self.conv(self.dropout(x)) for _ in range(self.num_samples)], dim=0)
39 |         mean_prediction = torch.mean(predictions, dim=0)
40 |         uncertainty = torch.var(predictions, dim=0).mean(dim=(2, 3), keepdim=True)  # Calculate uncertainty as variance
41 |         return mean_prediction, uncertainty
42 | 
43 | class SEAttention(nn.Module):
44 | 
45 |     def __init__(self, channel=512, reduction=16):
46 |         super().__init__()
47 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
48 |         self.fc = nn.Sequential(
49 |             nn.Linear(channel, channel // reduction, bias=False),
50 |             nn.ReLU(inplace=True),
51 |             nn.Linear(channel // reduction, channel, bias=False),
52 |             nn.Sigmoid()
53 |         )
54 |         self.uncertainty_estimator = UncertaintyEstimator(channel)
55 | 
56 |     def init_weights(self):
57 |         for m in self.modules():
58 |             if isinstance(m, nn.Conv2d):
59 |                 init.kaiming_normal_(m.weight, mode='fan_out')
60 |                 if m.bias is not None:
61 |                     init.constant_(m.bias, 0)
62 |             elif isinstance(m, nn.BatchNorm2d):
63 |                 init.constant_(m.weight, 1)
64 |                 init.constant_(m.bias, 0)
65 |             elif isinstance(m, nn.Linear):
66 |                 init.normal_(m.weight, std=0.001)
67 |                 if m.bias is not None:
68 |                     init.constant_(m.bias, 0)
69 | 
70 |     def forward(self, x):
71 |         b, c, _, _ = x.size()
72 |         y = self.avg_pool(x).view(b, c)
73 |         y = self.fc(y).view(b, c, 1, 1)
74 |         
75 |         # Estimate uncertainty
76 |         mean_prediction, uncertainty = self.uncertainty_estimator(x)
77 |         
78 |         # Integrate uncertainty into attention weights
79 |         # Here, uncertainty is used to scale the attention weights, 
80 |         # with higher uncertainty leading to lower attention weights.
81 |         attention = x * y.expand_as(x)
82 |         adjusted_attention = attention * (1 - uncertainty)
83 |         
84 |         return adjusted_attention
85 | 
86 | if __name__ == '__main__':
87 |     model = SEAttention()
88 |     model.init_weights()
89 |     input = torch.randn(1, 512, 7, 7)
90 |     output = model(input)
91 |     print(output.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxa-nonlocal_coordatt.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Introduce a lightweight non-local attention mechanism within the CoordAtt module
  3 | Implement a simplified version of the non-local block that captures global context effectively
  4 | Modify the forward method to first compute these non-local attention features and integrate them with the existing coordinate attention features before applying the final attention weights
  5 | Evaluate performance on a small benchmark dataset, focusing on improvements in feature representation and capturing long-range dependencies
  6 | Compare against the original CoordAtt and other variants to assess computational efficiency and accuracy improvements
  7 | 
  8 | """
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | 
 14 | 
 15 | class h_sigmoid(nn.Module):
 16 |     def __init__(self, inplace=True):
 17 |         super(h_sigmoid, self).__init__()
 18 |         self.relu = nn.ReLU6(inplace=inplace)
 19 | 
 20 |     def forward(self, x):
 21 |         return self.relu(x + 3) / 6
 22 | 
 23 | 
 24 | class h_swish(nn.Module):
 25 |     def __init__(self, inplace=True):
 26 |         super(h_swish, self).__init__()
 27 |         self.sigmoid = h_sigmoid(inplace=inplace)
 28 | 
 29 |     def forward(self, x):
 30 |         return x * self.sigmoid(x)
 31 | 
 32 | 
 33 | class SimplifiedNonLocalBlock(nn.Module):
 34 |     def __init__(self, in_channels):
 35 |         super(SimplifiedNonLocalBlock, self).__init__()
 36 |         self.theta = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1)
 37 |         self.phi = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1)
 38 |         self.g = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1)
 39 |         self.out_conv = nn.Conv2d(in_channels // 2, in_channels, kernel_size=1)
 40 |         
 41 |     def forward(self, x):
 42 |         n, c, h, w = x.size()
 43 |         
 44 |         theta = self.theta(x).view(n, c // 2, -1)
 45 |         phi = self.phi(x).view(n, c // 2, -1)
 46 |         g = self.g(x).view(n, c // 2, -1)
 47 |         
 48 |         attention = torch.bmm(theta.permute(0, 2, 1), phi)
 49 |         attention = F.softmax(attention, dim=-1)
 50 |         
 51 |         out = torch.bmm(g, attention.permute(0, 2, 1))
 52 |         out = out.view(n, c // 2, h, w)
 53 |         out = self.out_conv(out)
 54 |         
 55 |         return x + out
 56 | 
 57 | 
 58 | class CoordAtt(nn.Module):
 59 |     def __init__(self, inp, reduction=32):
 60 |         super(CoordAtt, self).__init__()
 61 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
 62 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
 63 | 
 64 |         mip = max(8, inp // reduction)
 65 | 
 66 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
 67 |         self.bn1 = nn.BatchNorm2d(mip)
 68 |         self.act = h_swish()
 69 | 
 70 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 71 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 72 | 
 73 |         # Add non-local block
 74 |         self.non_local_block = SimplifiedNonLocalBlock(inp)
 75 | 
 76 |     def forward(self, x):
 77 |         identity = x
 78 | 
 79 |         # Compute non-local features
 80 |         non_local_features = self.non_local_block(x)
 81 | 
 82 |         n, c, h, w = x.size()
 83 |         x_h = self.pool_h(non_local_features)
 84 |         x_w = self.pool_w(non_local_features).permute(0, 1, 3, 2)
 85 | 
 86 |         y = torch.cat([x_h, x_w], dim=2)
 87 |         y = self.conv1(y)
 88 |         y = self.bn1(y)
 89 |         y = self.act(y)
 90 | 
 91 |         x_h, x_w = torch.split(y, [h, w], dim=2)
 92 |         x_w = x_w.permute(0, 1, 3, 2)
 93 | 
 94 |         a_h = self.conv_h(x_h).sigmoid()
 95 |         a_w = self.conv_w(x_w).sigmoid()
 96 | 
 97 |         out = identity * a_w * a_h
 98 | 
 99 |         return out
100 | 
101 | if __name__ == '__main__':
102 |     x = torch.randn(2, 64, 32, 32)
103 |     att = CoordAtt(inp=64, reduction=32)
104 |     out = att(x)
105 |     print("输入尺寸:", x.shape)
106 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/refined_modulated_cross_spatial_interaction.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Modify the `CoordAtt` module
  3 | After pooling height and width features, add two 1x1 projection convolutions
  4 | One for height and one for width
  5 | Introduce a learnable parameter for each of the projected features, and multiply them by the corresponding projected features
  6 | Modulate the pooled width feature using the modulated projected height feature via element-wise multiplication, and vice-versa
  7 | Concatenate the modulated features along the channel dimension
  8 | Feed the concatenated features to the shared `conv1`
  9 | In the `__init__` function, add two 1x1 convolution layers for projection, and two learnable parameters
 10 | In the `forward` function, implement the projection, learnable parameter multiplication, element-wise modulation, and concatenation before shared `conv1`
 11 | The rest of the forward pass remains the same
 12 | Compare output with the baseline using same test input, observe changes
 13 | This involves adding two 1x1 convs, two learnable parameters, and modifying the forward pass
 14 | 
 15 | """
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | 
 21 | 
 22 | class h_sigmoid(nn.Module):
 23 |     def __init__(self, inplace=True):
 24 |         super(h_sigmoid, self).__init__()
 25 |         self.relu = nn.ReLU6(inplace=inplace)
 26 | 
 27 |     def forward(self, x):
 28 |         return self.relu(x + 3) / 6
 29 | 
 30 | 
 31 | class h_swish(nn.Module):
 32 |     def __init__(self, inplace=True):
 33 |         super(h_swish, self).__init__()
 34 |         self.sigmoid = h_sigmoid(inplace=inplace)
 35 | 
 36 |     def forward(self, x):
 37 |         return x * self.sigmoid(x)
 38 | 
 39 | 
 40 | class CoordAtt(nn.Module):
 41 |     def __init__(self, inp, reduction=32):
 42 |         super(CoordAtt, self).__init__()
 43 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
 44 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
 45 | 
 46 |         mip = max(8, inp // reduction)
 47 |         
 48 |         # Projection Convolutions
 49 |         self.proj_h = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
 50 |         self.proj_w = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
 51 | 
 52 |         # Learnable Parameters
 53 |         self.alpha_h = nn.Parameter(torch.ones(1))
 54 |         self.alpha_w = nn.Parameter(torch.ones(1))
 55 | 
 56 |         self.conv1 = nn.Conv2d(mip * 2, mip, kernel_size=1, stride=1, padding=0) # Modified to accept concatenated feature
 57 |         self.bn1 = nn.BatchNorm2d(mip)
 58 |         self.act = h_swish()
 59 | 
 60 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 61 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 62 | 
 63 |     def forward(self, x):
 64 |         identity = x
 65 | 
 66 |         n, c, h, w = x.size()
 67 |         x_h = self.pool_h(x)
 68 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
 69 | 
 70 |         # Projection
 71 |         proj_h = self.proj_h(x_h)
 72 |         proj_w = self.proj_w(x_w)
 73 | 
 74 |         # Learnable Parameter Modulation
 75 |         mod_h = proj_h * self.alpha_h
 76 |         mod_w = proj_w * self.alpha_w
 77 | 
 78 | 
 79 |         # Modulate
 80 |         mod_x_h = x_h * mod_w
 81 |         mod_x_w = x_w * mod_h
 82 | 
 83 | 
 84 |         # Concatenate along height dimension
 85 |         y = torch.cat([mod_x_h, mod_x_w], dim=2)
 86 | 
 87 |         # Shared conv1
 88 |         y = self.conv1(y)
 89 |         y = self.bn1(y)
 90 |         y = self.act(y)
 91 | 
 92 |         # Split along the height dimension
 93 |         x_h, x_w = torch.split(y, [h, w], dim=2)
 94 |         x_w = x_w.permute(0, 1, 3, 2)
 95 | 
 96 |         a_h = self.conv_h(x_h).sigmoid()
 97 |         a_w = self.conv_w(x_w).sigmoid()
 98 | 
 99 |         out = identity * a_w * a_h
100 | 
101 |         return out
102 | 
103 | if __name__ == '__main__':
104 |     x = torch.randn(2, 64, 32, 32)
105 |     att = CoordAtt(inp=64, reduction=32)
106 |     out = att(x)
107 |     print("输入尺寸:", x.shape)
108 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | .venv_jax
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # PyCharm
159 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
162 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | .idea/
164 | .aider*
165 | *.DS_Store
166 | 
167 | # Misc folders
168 | data/
169 | *ckpt.pt
170 | *.zip
171 | ICLR2022-OpenReviewData/
172 | templates/*/run_0/
173 | templates/*/*.png
174 | 


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/early_fusion_addition_projected.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Modify the `CoordAtt` module
  3 | After pooling the height and width features, perform an element-wise addition of the pooled height and width features
  4 | Apply a 1x1 convolution to the summed feature map, followed by a non-linearity (e
  5 | g
  6 | , ReLU)
  7 | Then, apply *two separate* 1x1 convolutions to the fused feature map to generate projected height and width features, respectively
  8 | Feed these projected height and width features to the respective `conv_h` and `conv_w` layers
  9 | Modify the `__init__` to add the 1x1 convolution and activation for fusion, and *two additional 1x1 convolutions* for projections
 10 | Modify the `forward` to implement the element-wise addition, 1x1 convolution, activation, two projection convolutions, and feeding to the subsequent convolution layers
 11 | The rest of the forward pass remains unchanged
 12 | Compare the output with the baseline using the same test input to observe changes
 13 | This involves adding the 1x1 conv, non-linearity, two projection 1x1 convs, and modifying the `forward` pass to implement the fusion using addition and projected representations
 14 | 
 15 | """
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | 
 21 | 
 22 | class h_sigmoid(nn.Module):
 23 |     def __init__(self, inplace=True):
 24 |         super(h_sigmoid, self).__init__()
 25 |         self.relu = nn.ReLU6(inplace=inplace)
 26 | 
 27 |     def forward(self, x):
 28 |         return self.relu(x + 3) / 6
 29 | 
 30 | 
 31 | class h_swish(nn.Module):
 32 |     def __init__(self, inplace=True):
 33 |         super(h_swish, self).__init__()
 34 |         self.sigmoid = h_sigmoid(inplace=inplace)
 35 | 
 36 |     def forward(self, x):
 37 |         return x * self.sigmoid(x)
 38 | 
 39 | 
 40 | class CoordAtt(nn.Module):
 41 |     def __init__(self, inp, reduction=32):
 42 |         super(CoordAtt, self).__init__()
 43 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
 44 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
 45 | 
 46 |         mip = max(8, inp // reduction)
 47 | 
 48 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
 49 |         self.bn1 = nn.BatchNorm2d(mip)
 50 |         self.act = h_swish()
 51 | 
 52 |         # Fusion convolution and activation
 53 |         self.fusion_conv = nn.Conv2d(mip, mip, kernel_size=1, stride=1, padding=0)
 54 |         self.fusion_act = nn.ReLU() # Changed to ReLU
 55 | 
 56 |         # Projection convolutions
 57 |         self.proj_h = nn.Conv2d(mip, mip, kernel_size=1, stride=1, padding=0)
 58 |         self.proj_w = nn.Conv2d(mip, mip, kernel_size=1, stride=1, padding=0)
 59 | 
 60 | 
 61 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 62 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 63 | 
 64 |     def forward(self, x):
 65 |         identity = x
 66 | 
 67 |         n, c, h, w = x.size()
 68 |         x_h = self.pool_h(x)
 69 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
 70 |         
 71 |         y = torch.cat([x_h, x_w], dim=2)
 72 |         y = self.conv1(y)
 73 |         y = self.bn1(y)
 74 |         y = self.act(y)
 75 |         
 76 |         x_h, x_w = torch.split(y, [h, w], dim=2)
 77 |         x_w = x_w.permute(0, 1, 3, 2)
 78 | 
 79 |         # Element-wise addition of pooled features
 80 |         fused_feature = x_h + x_w
 81 | 
 82 |         # 1x1 convolution and activation
 83 |         fused_feature = self.fusion_conv(fused_feature)
 84 |         fused_feature = self.fusion_act(fused_feature)
 85 | 
 86 |         # Projection convolutions
 87 |         proj_h_feature = self.proj_h(fused_feature)
 88 |         proj_w_feature = self.proj_w(fused_feature)
 89 | 
 90 |         a_h = self.conv_h(proj_h_feature).sigmoid()
 91 |         a_w = self.conv_w(proj_w_feature).sigmoid()
 92 |         
 93 |         out = identity * a_w * a_h
 94 | 
 95 |         return out
 96 | 
 97 | if __name__ == '__main__':
 98 |     x = torch.randn(2, 64, 32, 32)
 99 |     att = CoordAtt(inp=64, reduction=32)
100 |     out = att(x)
101 |     print("输入尺寸:", x.shape)
102 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/bottleneck_attention_modulation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Modify the `CoordAtt` module
  3 | After pooling height and width features, concatenate them along the channel dimension
  4 | Apply a single 1x1 convolution to the concatenated feature map
  5 | Then, apply a bottleneck layer consisting of a 1x1 convolution, a non-linearity (e
  6 | g
  7 | , ReLU), and another 1x1 convolution, followed by a sigmoid activation to produce an attention map
  8 | Use this attention map to modulate the *original* pooled height and width features *separately* before concatenating them
  9 | Finally, feed the modulated concatenated map to the shared `conv1`
 10 | In the `__init__` function, add three 1x1 convolution layers, one for the initial concatenated feature transformation and two for the bottleneck attention map generation, and a non-linearity
 11 | In the `forward` function, implement the concatenation, the initial transformation, the bottleneck attention map generation, the separate modulation of the pooled height and width features, and finally the concatenation before feeding to the shared `conv1`
 12 | The rest of the forward pass remains the same
 13 | Compare output with the baseline using same test input, observe changes
 14 | 
 15 | """
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | 
 21 | 
 22 | class h_sigmoid(nn.Module):
 23 |     def __init__(self, inplace=True):
 24 |         super(h_sigmoid, self).__init__()
 25 |         self.relu = nn.ReLU6(inplace=inplace)
 26 | 
 27 |     def forward(self, x):
 28 |         return self.relu(x + 3) / 6
 29 | 
 30 | 
 31 | class h_swish(nn.Module):
 32 |     def __init__(self, inplace=True):
 33 |         super(h_swish, self).__init__()
 34 |         self.sigmoid = h_sigmoid(inplace=inplace)
 35 | 
 36 |     def forward(self, x):
 37 |         return x * self.sigmoid(x)
 38 | 
 39 | 
 40 | class CoordAtt(nn.Module):
 41 |     def __init__(self, inp, reduction=32):
 42 |         super(CoordAtt, self).__init__()
 43 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
 44 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
 45 | 
 46 |         mip = max(8, inp // reduction)
 47 | 
 48 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
 49 |         self.bn1 = nn.BatchNorm2d(mip)
 50 |         self.act = h_swish()
 51 | 
 52 |         # Bottleneck attention layers
 53 |         self.conv_concat = nn.Conv2d(inp * 2, mip, kernel_size=1, stride=1, padding=0) # For initial transformation
 54 |         self.bottleneck_conv1 = nn.Conv2d(mip, mip // 2, kernel_size=1, stride=1, padding=0)
 55 |         self.relu = nn.ReLU()
 56 |         self.bottleneck_conv2 = nn.Conv2d(mip // 2, mip, kernel_size=1, stride=1, padding=0)
 57 |         
 58 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 59 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 60 | 
 61 |     def forward(self, x):
 62 |         identity = x
 63 | 
 64 |         n, c, h, w = x.size()
 65 |         x_h = self.pool_h(x)
 66 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
 67 | 
 68 |         # Concatenate pooled features
 69 |         y = torch.cat([x_h, x_w], dim=2)
 70 | 
 71 |         # Initial transformation
 72 |         y_concat = self.conv_concat(y)
 73 |         
 74 |         # Bottleneck attention map
 75 |         attn = self.bottleneck_conv1(y_concat)
 76 |         attn = self.relu(attn)
 77 |         attn = self.bottleneck_conv2(attn).sigmoid()
 78 |         
 79 |         # Apply bottleneck attention
 80 |         y_attn = y_concat * attn
 81 |         
 82 |         # Shared conv layer
 83 |         y = self.conv1(y_attn)
 84 |         y = self.bn1(y)
 85 |         y = self.act(y)
 86 | 
 87 | 
 88 |         x_h, x_w = torch.split(y, [h, w], dim=2)
 89 |         x_w = x_w.permute(0, 1, 3, 2)
 90 |         
 91 |         a_h = self.conv_h(x_h).sigmoid()
 92 |         a_w = self.conv_w(x_w).sigmoid()
 93 | 
 94 |         out = identity * a_w * a_h
 95 | 
 96 |         return out
 97 | 
 98 | if __name__ == '__main__':
 99 |     x = torch.randn(2, 64, 32, 32)
100 |     att = CoordAtt(inp=64, reduction=32)
101 |     out = att(x)
102 |     print("输入尺寸:", x.shape)
103 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/coordattention/code/xxa-deformable_pooling_coordatt.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Replace the `nn
  3 | AdaptiveAvgPool2d` operations in the CoordAtt module with a custom deformable pooling layer
  4 | Implement learnable offsets for pooling regions that adjust based on input features, allowing the pooling operation to capture more complex spatial hierarchies
  5 | Ensure the deformable pooling layer is lightweight to maintain computational efficiency
  6 | Evaluate the modified CoordAtt's performance on a small benchmark dataset, comparing improvements in feature representation quality and accuracy against the original implementation
  7 | 
  8 | """
  9 | 
 10 | # Modified code
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | 
 15 | class h_sigmoid(nn.Module):
 16 |     def __init__(self, inplace=True):
 17 |         super(h_sigmoid, self).__init__()
 18 |         self.relu = nn.ReLU6(inplace=inplace)
 19 | 
 20 |     def forward(self, x):
 21 |         return self.relu(x + 3) / 6
 22 | 
 23 | 
 24 | class h_swish(nn.Module):
 25 |     def __init__(self, inplace=True):
 26 |         super(h_swish, self).__init__()
 27 |         self.sigmoid = h_sigmoid(inplace=inplace)
 28 | 
 29 |     def forward(self, x):
 30 |         return x * self.sigmoid(x)
 31 | 
 32 | 
 33 | class DeformablePooling(nn.Module):
 34 |     def __init__(self, channels, kernel_size=1):
 35 |         super(DeformablePooling, self).__init__()
 36 |         self.offset_conv = nn.Conv2d(channels, 2, kernel_size=3, padding=1)
 37 |         self.kernel_size = kernel_size
 38 | 
 39 |     def forward(self, x):
 40 |         n, c, h, w = x.size()
 41 |         # Compute offsets
 42 |         offsets = self.offset_conv(x)
 43 |         # Create a normalized grid
 44 |         grid = self.create_grid(h, w, device=x.device)
 45 |         # Add offsets to the grid
 46 |         grid = grid + offsets.permute(0, 2, 3, 1)
 47 |         # Clamp grid values to ensure they are within valid range
 48 |         grid = torch.clamp(grid, -1, 1)
 49 |         # Sample using the modified grid
 50 |         sampled = F.grid_sample(x, grid, mode='bilinear', padding_mode='zeros', align_corners=True)
 51 |         return sampled
 52 | 
 53 |     def create_grid(self, height, width, device):
 54 |         # Create a grid for sampling
 55 |         theta = torch.tensor([[[1, 0, 0], [0, 1, 0]]], dtype=torch.float, device=device)
 56 |         grid = F.affine_grid(theta, (1, 1, height, width), align_corners=True)
 57 |         return grid.repeat(1, 1, 1, 1)
 58 | 
 59 | 
 60 | class CoordAtt(nn.Module):
 61 |     def __init__(self, inp, reduction=32):
 62 |         super(CoordAtt, self).__init__()
 63 |         # Initialize deformable pooling layers for height and width
 64 |         self.pool_h = DeformablePooling(inp)
 65 |         self.pool_w = DeformablePooling(inp)
 66 | 
 67 |         mip = max(8, inp // reduction)
 68 | 
 69 |         # Convolutional layers for processing pooled features
 70 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
 71 |         self.bn1 = nn.BatchNorm2d(mip)
 72 |         self.act = h_swish()
 73 | 
 74 |         self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 75 |         self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
 76 | 
 77 |     def forward(self, x):
 78 |         identity = x
 79 | 
 80 |         n, c, h, w = x.size()
 81 |         # Apply deformable pooling to both height and width
 82 |         x_h = self.pool_h(x)
 83 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
 84 | 
 85 |         # Concatenate pooled features and process through convolutions
 86 |         y = torch.cat([x_h, x_w], dim=2)
 87 |         y = self.conv1(y)
 88 |         y = self.bn1(y)
 89 |         y = self.act(y)
 90 | 
 91 |         x_h, x_w = torch.split(y, [h, w], dim=2)
 92 |         x_w = x_w.permute(0, 1, 3, 2)
 93 | 
 94 |         # Generate attention weights
 95 |         a_h = self.conv_h(x_h).sigmoid()
 96 |         a_w = self.conv_w(x_w).sigmoid()
 97 | 
 98 |         # Apply attention to the identity
 99 |         out = identity * a_w * a_h
100 | 
101 |         return out
102 | 
103 | if __name__ == '__main__':
104 |     x = torch.randn(2, 64, 32, 32)
105 |     att = CoordAtt(inp=64, reduction=32)
106 |     out = att(x)
107 |     print("输入尺寸:", x.shape)
108 |     print("输出尺寸:", out.shape)
109 | 
110 | # I am done


--------------------------------------------------------------------------------
/generation_idea_template/coordattention-gemini/code/sigmoid_dynamic_weighted_fusion.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Modify the `CoordAtt` module
  3 | After pooling height and width features, apply a *single 1x1 convolution* to each of the pooled feature maps *separately*
  4 | This 1x1 conv will output *two channels*
  5 | The first channel will represent the transformed feature, and the second channel will represent the *dynamic weight*
  6 | Apply a *sigmoid activation* to the dynamic weight channel
  7 | Then, perform a weighted addition of the transformed height and width features using their respective sigmoid-activated dynamic weights
  8 | Feed the result into the shared `conv1`
  9 | In the `__init__` function, add *two 1x1 convolution layers*, one for each of height and width, each of which output *two channels*
 10 | In the `forward` function, implement the separate convolutions, the separation of the two output channels into transformed feature and dynamic weight, the application of the sigmoid activation to the dynamic weight channel, the weighted addition using these sigmoid-activated dynamic weights, before passing the result to the shared `conv1`
 11 | The rest of the forward pass remains the same
 12 | Compare output with the baseline using the same test input and observe the changes
 13 | This involves modifying `__init__` to include the 1x1 conv layers with two output channels, and `forward` to implement the channel separation, sigmoid activation and dynamic fusion
 14 | 
 15 | """
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | 
 21 | 
 22 | class h_sigmoid(nn.Module):
 23 |     def __init__(self, inplace=True):
 24 |         super(h_sigmoid, self).__init__()
 25 |         self.relu = nn.ReLU6(inplace=inplace)
 26 | 
 27 |     def forward(self, x):
 28 |         return self.relu(x + 3) / 6
 29 | 
 30 | 
 31 | class h_swish(nn.Module):
 32 |     def __init__(self, inplace=True):
 33 |         super(h_swish, self).__init__()
 34 |         self.sigmoid = h_sigmoid(inplace=inplace)
 35 | 
 36 |     def forward(self, x):
 37 |         return x * self.sigmoid(x)
 38 | 
 39 | 
 40 | class CoordAtt(nn.Module):
 41 |     def __init__(self, inp, reduction=32):
 42 |         super(CoordAtt, self).__init__()
 43 |         self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
 44 |         self.pool_w = nn.AdaptiveAvgPool2d((1, None))
 45 | 
 46 |         mip = max(8, inp // reduction)
 47 | 
 48 |         self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
 49 |         self.bn1 = nn.BatchNorm2d(mip)
 50 |         self.act = h_swish()
 51 | 
 52 |         # Add two 1x1 conv layers, one for h and one for w, each outputting 2 channels
 53 |         self.conv_h_sep = nn.Conv2d(inp, 2, kernel_size=1, stride=1, padding=0)
 54 |         self.conv_w_sep = nn.Conv2d(inp, 2, kernel_size=1, stride=1, padding=0)
 55 |         self.conv_h_expand = nn.Conv2d(1, inp, kernel_size=1, stride=1, padding=0)
 56 |         self.conv_w_expand = nn.Conv2d(1, inp, kernel_size=1, stride=1, padding=0)
 57 | 
 58 | 
 59 |     def forward(self, x):
 60 |         identity = x
 61 | 
 62 |         n, c, h, w = x.size()
 63 |         x_h = self.pool_h(x)
 64 |         x_w = self.pool_w(x).permute(0, 1, 3, 2)
 65 | 
 66 |         # Apply separate 1x1 convs to pooled features
 67 |         x_h_sep = self.conv_h_sep(x_h)
 68 |         x_w_sep = self.conv_w_sep(x_w)
 69 | 
 70 |         # Separate transformed feature and dynamic weight
 71 |         x_h_trans, x_h_weight = torch.split(x_h_sep, [1, 1], dim=1)
 72 |         x_w_trans, x_w_weight = torch.split(x_w_sep, [1, 1], dim=1)
 73 | 
 74 | 
 75 |         # Apply sigmoid to dynamic weights
 76 |         a_h = x_h_weight.sigmoid()
 77 |         a_w = x_w_weight.sigmoid()
 78 | 
 79 |         # Expand channels before weighted addition
 80 |         x_h_trans = self.conv_h_expand(x_h_trans)
 81 |         x_w_trans = self.conv_w_expand(x_w_trans)
 82 | 
 83 |         # Weighted addition of transformed features
 84 |         y =  x_h_trans * a_h + x_w_trans.permute(0, 1, 3, 2) * a_w
 85 | 
 86 | 
 87 |         y = self.conv1(y)
 88 |         y = self.bn1(y)
 89 |         y = self.act(y)
 90 | 
 91 | 
 92 |         out = identity * y
 93 | 
 94 |         return out
 95 | 
 96 | if __name__ == '__main__':
 97 |     x = torch.randn(2, 64, 32, 32)
 98 |     att = CoordAtt(inp=64, reduction=32)
 99 |     out = att(x)
100 |     print("输入尺寸:", x.shape)
101 |     print("输出尺寸:", out.shape)


--------------------------------------------------------------------------------
/generation_idea_template/small_object_attention/code/contrastive_learning_integration.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Integrate a contrastive learning module into the SEAttention framework
  3 | Develop functions to generate contrastive pairs from input data, either through augmentations or synthetic data creation
  4 | Ensure these pairs highlight small target presence or absence
  5 | Modify the training loop to include a contrastive loss alongside the standard detection loss
  6 | Evaluate the performance improvements using metrics such as precision, recall, and F1-score on small target detection tasks, comparing results with the baseline SEAttention model and other enhanced models
  7 | Emphasize robustness in varied detection scenarios
  8 | 
  9 | """
 10 | 
 11 | # Modified code
 12 | import numpy as np
 13 | import torch
 14 | from torch import flatten, nn
 15 | from torch.nn import init
 16 | from torch.nn.modules.activation import ReLU
 17 | from torch.nn.modules.batchnorm import BatchNorm2d
 18 | from torch.nn import functional as F
 19 | from torchvision import transforms
 20 | 
 21 | class SEAttention(nn.Module):
 22 | 
 23 |     def __init__(self, channel=512, reduction=16):
 24 |         super().__init__()
 25 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 26 |         self.fc = nn.Sequential(
 27 |             nn.Linear(channel, channel // reduction, bias=False),
 28 |             nn.ReLU(inplace=True),
 29 |             nn.Linear(channel // reduction, channel, bias=False),
 30 |             nn.Sigmoid()
 31 |         )
 32 | 
 33 |     def init_weights(self):
 34 |         for m in self.modules():
 35 |             if isinstance(m, nn.Conv2d):
 36 |                 init.kaiming_normal_(m.weight, mode='fan_out')
 37 |                 if m.bias is not None:
 38 |                     init.constant_(m.bias, 0)
 39 |             elif isinstance(m, nn.BatchNorm2d):
 40 |                 init.constant_(m.weight, 1)
 41 |                 init.constant_(m.bias, 0)
 42 |             elif isinstance(m, nn.Linear):
 43 |                 init.normal_(m.weight, std=0.001)
 44 |                 if m.bias is not None:
 45 |                     init.constant_(m.bias, 0)
 46 | 
 47 |     def forward(self, x):
 48 |         b, c, _, _ = x.size()
 49 |         y = self.avg_pool(x).view(b, c)
 50 |         y = self.fc(y).view(b, c, 1, 1)
 51 |         return x * y.expand_as(x)
 52 | 
 53 | class ContrastiveLearningModule(nn.Module):
 54 |     
 55 |     def __init__(self, feature_dim):
 56 |         super().__init__()
 57 |         self.projector = nn.Sequential(
 58 |             nn.Linear(feature_dim, feature_dim, bias=False),
 59 |             nn.ReLU(inplace=True),
 60 |             nn.Linear(feature_dim, feature_dim, bias=False)
 61 |         )
 62 |     
 63 |     def forward(self, x1, x2):
 64 |         z1 = self.projector(x1)
 65 |         z2 = self.projector(x2)
 66 |         return z1, z2
 67 | 
 68 | def contrastive_loss(z1, z2, temperature=0.5, device='cpu'):
 69 |     z1 = F.normalize(z1, dim=1)
 70 |     z2 = F.normalize(z2, dim=1)
 71 |     batch_size = z1.size(0)
 72 |     labels = torch.arange(batch_size).to(device)
 73 |     similarity_matrix = torch.matmul(z1, z2.T) / temperature
 74 |     loss = F.cross_entropy(similarity_matrix, labels)
 75 |     return loss
 76 | 
 77 | def generate_contrastive_pairs(input_data):
 78 |     # Applying random augmentations to generate pairs
 79 |     transform = transforms.Compose([
 80 |         transforms.RandomResizedCrop(7),
 81 |         transforms.RandomHorizontalFlip(),
 82 |         transforms.RandomVerticalFlip()
 83 |     ])
 84 |     augmented_data_1 = transform(input_data)
 85 |     augmented_data_2 = transform(input_data)
 86 |     return augmented_data_1, augmented_data_2
 87 | 
 88 | if __name__ == '__main__':
 89 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 90 |     model = SEAttention().to(device)
 91 |     model.init_weights()
 92 |     contrastive_model = ContrastiveLearningModule(feature_dim=512).to(device)
 93 | 
 94 |     input_data = torch.randn(10, 512, 7, 7).to(device)  # Example for a batch size of 10
 95 |     output = model(input_data)
 96 |     
 97 |     augmented_data_1, augmented_data_2 = generate_contrastive_pairs(input_data)
 98 |     z1, z2 = contrastive_model(output.view(output.size(0), -1), output.view(output.size(0), -1))
 99 |     cl_loss = contrastive_loss(z1, z2, device=device)
100 |     
101 |     print(f'Output shape: {output.shape}, Contrastive Loss: {cl_loss.item()}')


--------------------------------------------------------------------------------