├── .idea
├── .gitignore
├── Hand-torn_code.iml
├── Hand-torn_code.time
├── deployment.xml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── README.md
├── about_attention
├── CBAM.py
├── MSCAAttention.py
└── __init__.py
├── about_interview
├── Attention.py
├── Embedding.py
├── FFN.py
├── LayerNorm.py
└── __init__.py
├── about_light_net
├── __init__.py
└── mobile_net
│ └── __init__.py
├── about_transformer
├── ViT
│ ├── ViT_model.py
│ └── __init__.py
├── __init__.py
├── attention_is_all_you_need
│ ├── __init__.py
│ ├── attention_module.py
│ ├── transformer_decoder.py
│ ├── transformer_encoder.py
│ ├── transformer_model.py
│ └── utils_module.py
├── efficient_vit
│ └── __init__.py
└── mobile_vit
│ ├── __init__.py
│ ├── model.py
│ ├── model_config.py
│ └── transformer_encoder.py
├── classic_conv
├── AlexNet.py
├── SENet.py
└── __init__.py
└── image_segmentation
├── __init__.py
└── about_unet
├── UNet.py
├── UNet_pp.py
└── __init__.py
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # 默认忽略的文件
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # 基于编辑器的 HTTP 客户端请求
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/Hand-torn_code.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/Hand-torn_code.time:
--------------------------------------------------------------------------------
1 | "2024-03-09 03:53:05:859",5000
2 | "2024-03-09 03:55:02:941",113000
3 | "2024-03-09 03:57:00:551",101000
4 | "2024-03-09 03:57:33:103",29000
5 | "2024-03-09 03:59:16:425",26000
6 | "2024-03-09 04:00:38:638",78000
7 | "2024-03-09 04:01:52:395",60000
8 | "2024-03-09 04:02:03:162",1000
9 | "2024-03-09 04:02:15:737",6000
10 | "2024-03-09 04:02:58:952",14000
11 | "2024-03-09 04:10:02:728",345000
12 | "2024-03-09 09:15:03:764",0
13 | "2024-03-09 09:41:26:962",5000
14 | "2024-03-09 09:41:32:627",3000
15 | "2024-03-09 09:41:43:947",10000
16 | "2024-03-09 09:43:42:785",116000
17 | "2024-03-09 09:43:56:374",8000
18 | "2024-03-09 09:44:04:365",6000
19 | "2024-03-09 09:44:46:498",41000
20 | "2024-03-09 09:45:06:840",16000
21 | "2024-03-09 09:45:31:951",23000
22 | "2024-03-09 09:51:12:195",314000
23 | "2024-03-09 09:57:37:053",148000
24 | "2024-03-09 10:16:43:362",5000
25 | "2024-03-09 10:42:19:809",300000
26 | "2024-03-09 10:53:51:578",0
27 | "2024-03-09 22:19:44:923",6000
28 | "2024-03-09 22:46:28:385",0
29 | "2024-03-09 23:23:46:074",36000
30 | "2024-03-09 23:23:52:845",1000
31 | "2024-03-09 23:24:14:510",3000
32 | "2024-03-09 23:24:16:569",0
33 | "2024-03-09 23:26:40:834",141000
34 | "2024-03-09 23:27:16:033",3000
35 | "2024-03-09 23:27:46:404",22000
36 | "2024-03-09 23:28:09:755",19000
37 | "2024-03-09 23:28:39:037",27000
38 | "2024-03-09 23:28:46:022",2000
39 | "2024-03-09 23:28:48:156",0
40 | "2024-03-09 23:29:03:303",13000
41 | "2024-03-09 23:29:49:517",8000
42 | "2024-03-09 23:30:12:900",3000
43 | "2024-03-09 23:30:57:849",19000
44 | "2024-03-09 23:31:58:347",49000
45 | "2024-03-09 23:32:03:466",3000
46 | "2024-03-09 23:32:14:726",1000
47 | "2024-03-09 23:32:55:988",2000
48 | "2024-03-09 23:33:03:092",3000
49 | "2024-03-09 23:35:38:720",4000
50 | "2024-03-09 23:36:13:215",10000
51 | "2024-03-09 23:36:39:099",16000
52 | "2024-03-09 23:36:45:571",0
53 | "2024-03-09 23:37:11:020",21000
54 | "2024-03-09 23:44:47:237",69000
55 | "2024-03-09 23:54:07:048",458000
56 | "2024-03-09 23:56:00:398",42000
57 | "2024-03-09 23:56:30:569",0
58 | "2024-03-09 23:57:30:807",56000
59 | "2024-03-09 23:57:41:535",6000
60 | "2024-03-09 23:58:15:676",31000
61 | "2024-03-09 23:59:12:975",24000
62 | "2024-03-09 23:59:36:816",3000
63 | "2024-03-10 00:00:02:445",21000
64 | "2024-03-10 00:00:13:928",3000
65 | "2024-03-10 00:00:23:129",5000
66 | "2024-03-10 00:00:34:165",8000
67 | "2024-03-10 00:04:06:857",12000
68 | "2024-03-10 00:04:12:339",3000
69 | "2024-03-10 00:04:17:164",3000
70 | "2024-03-10 00:19:44:900",8000
71 | "2024-03-10 00:38:46:725",11000
72 | "2024-03-10 00:39:30:834",42000
73 | "2024-03-10 00:46:13:785",7000
74 | "2024-03-10 00:51:29:728",285000
75 | "2024-03-10 00:57:04:525",300000
76 | "2024-03-10 01:07:45:007",0
77 | "2024-03-10 02:22:33:044",2000
78 | "2024-03-10 02:22:36:588",0
79 | "2024-03-10 02:22:37:812",0
80 | "2024-03-10 18:30:34:702",182000
81 | "2024-03-10 18:37:44:385",300000
82 | "2024-03-10 19:03:27:299",0
83 | "2024-03-10 19:03:47:940",12000
84 | "2024-03-10 19:04:00:629",9000
85 | "2024-03-10 19:04:18:785",14000
86 | "2024-03-10 19:04:23:954",4000
87 | "2024-03-10 19:04:37:099",12000
88 | "2024-03-10 19:04:56:027",17000
89 | "2024-03-10 19:05:30:325",31000
90 | "2024-03-10 19:05:38:096",6000
91 | "2024-03-10 19:06:03:694",23000
92 | "2024-03-10 19:06:07:829",2000
93 | "2024-03-10 19:06:10:003",0
94 | "2024-03-10 19:06:31:419",20000
95 | "2024-03-10 19:07:46:839",72000
96 | "2024-03-10 19:08:10:116",20000
97 | "2024-03-10 19:08:39:578",28000
98 | "2024-03-10 19:08:57:380",6000
99 | "2024-03-10 19:09:35:283",10000
100 | "2024-03-10 19:11:38:062",14000
101 | "2024-03-10 19:11:52:494",3000
102 | "2024-03-10 19:14:03:552",36000
103 | "2024-03-10 19:15:21:821",8000
104 | "2024-03-10 19:15:37:653",8000
105 | "2024-03-10 19:16:08:753",26000
106 | "2024-03-10 19:16:54:659",19000
107 | "2024-03-10 19:17:11:201",10000
108 | "2024-03-10 19:19:06:883",7000
109 | "2024-03-10 19:20:06:457",52000
110 | "2024-03-10 19:20:39:975",27000
111 | "2024-03-10 19:21:10:843",18000
112 | "2024-03-10 19:23:59:212",162000
113 | "2024-03-10 19:24:49:081",44000
114 | "2024-03-10 19:25:51:113",58000
115 | "2024-03-10 19:26:22:162",28000
116 | "2024-03-10 19:27:24:245",28000
117 | "2024-03-10 19:28:54:057",79000
118 | "2024-03-10 19:29:09:801",14000
119 | "2024-03-10 19:29:51:900",36000
120 | "2024-03-10 19:30:04:062",11000
121 | "2024-03-10 19:30:28:475",14000
122 | "2024-03-10 19:31:05:054",35000
123 | "2024-03-10 19:31:38:256",31000
124 | "2024-03-10 19:31:51:786",12000
125 | "2024-03-10 19:32:05:575",9000
126 | "2024-03-10 19:32:15:097",7000
127 | "2024-03-10 19:32:36:242",17000
128 | "2024-03-10 19:33:31:456",44000
129 | "2024-03-10 19:34:03:603",30000
130 | "2024-03-10 19:34:30:426",23000
131 | "2024-03-10 19:34:35:534",2000
132 | "2024-03-10 19:34:45:348",8000
133 | "2024-03-10 19:35:06:853",15000
134 | "2024-03-10 19:35:23:158",15000
135 | "2024-03-10 19:35:43:527",17000
136 | "2024-03-10 19:35:51:459",5000
137 | "2024-03-10 19:36:18:817",7000
138 | "2024-03-10 19:37:10:786",31000
139 | "2024-03-10 19:37:32:919",11000
140 | "2024-03-10 19:37:49:591",8000
141 | "2024-03-10 19:38:35:077",36000
142 | "2024-03-10 19:38:40:737",4000
143 | "2024-03-10 19:39:05:987",24000
144 | "2024-03-10 19:39:14:796",7000
145 | "2024-03-10 19:39:19:590",3000
146 | "2024-03-10 19:39:46:000",15000
147 | "2024-03-10 19:40:28:150",40000
148 | "2024-03-10 19:41:09:397",38000
149 | "2024-03-10 19:42:38:967",86000
150 | "2024-03-10 19:43:04:788",23000
151 | "2024-03-10 19:43:48:509",17000
152 | "2024-03-10 19:45:00:516",69000
153 | "2024-03-10 19:46:26:137",81000
154 | "2024-03-10 19:47:54:268",83000
155 | "2024-03-10 19:48:14:119",5000
156 | "2024-03-10 19:50:22:354",19000
157 | "2024-03-10 19:51:46:056",36000
158 | "2024-03-10 19:51:55:949",0
159 | "2024-03-10 19:51:59:555",0
160 | "2024-03-10 19:52:05:431",0
161 | "2024-03-10 19:52:17:630",4000
162 | "2024-03-10 19:52:27:433",5000
163 | "2024-03-10 19:52:31:786",2000
164 | "2024-03-10 19:52:36:417",2000
165 | "2024-03-10 19:53:34:133",13000
166 | "2024-03-10 19:55:18:907",2000
167 | "2024-03-10 19:55:37:713",15000
168 | "2024-03-10 19:55:43:179",0
169 | "2024-03-10 19:56:36:904",51000
170 | "2024-03-10 19:57:09:477",30000
171 | "2024-03-10 19:57:14:506",0
172 | "2024-03-10 19:57:39:789",23000
173 | "2024-03-10 19:57:54:958",3000
174 | "2024-03-10 19:58:28:592",18000
175 | "2024-03-10 19:58:51:918",17000
176 | "2024-03-10 19:59:03:065",6000
177 | "2024-03-10 20:02:53:853",2000
178 | "2024-03-10 21:49:27:681",71000
179 | "2024-03-10 21:49:39:761",0
180 | "2024-03-10 21:49:50:302",6000
181 | "2024-03-10 21:50:31:795",14000
182 | "2024-03-10 23:58:06:595",15000
183 | "2024-03-11 01:32:52:033",0
184 | "2024-03-15 08:22:14:592",4000
185 | "2024-03-15 08:23:45:643",26000
186 | "2024-03-15 08:25:39:518",111000
187 | "2024-03-15 08:31:27:783",98000
188 | "2024-03-15 08:31:48:971",8000
189 | "2024-03-15 08:32:26:129",26000
190 | "2024-03-15 08:36:20:237",227000
191 | "2024-03-15 08:36:34:374",0
192 | "2024-03-15 08:36:35:774",0
193 | "2024-03-15 08:37:07:695",30000
194 | "2024-03-15 08:39:04:526",7000
195 | "2024-03-15 08:39:11:457",5000
196 | "2024-03-15 23:23:07:318",0
197 | "2024-03-16 05:35:00:928",17000
198 | "2024-03-16 05:35:05:573",2000
199 | "2024-03-16 05:35:42:361",30000
200 | "2024-03-16 05:36:38:597",54000
201 | "2024-03-16 05:36:41:002",0
202 | "2024-03-16 05:36:57:333",15000
203 | "2024-03-16 05:37:47:268",48000
204 | "2024-03-16 05:38:15:974",15000
205 | "2024-03-16 05:39:37:849",74000
206 | "2024-03-16 05:40:09:779",6000
207 | "2024-03-16 05:40:21:110",7000
208 | "2024-03-16 05:42:08:069",102000
209 | "2024-03-16 05:42:52:371",15000
210 | "2024-03-16 05:43:47:429",29000
211 | "2024-03-16 05:44:27:926",24000
212 | "2024-03-16 05:45:06:173",36000
213 | "2024-03-16 05:46:50:781",31000
214 | "2024-03-16 05:48:17:151",10000
215 | "2024-03-16 05:49:18:603",13000
216 | "2024-03-16 05:54:29:726",298000
217 | "2024-03-16 05:57:34:523",3000
218 | "2024-03-16 05:57:42:514",3000
219 | "2024-03-16 05:58:21:967",26000
220 | "2024-03-16 06:02:45:037",71000
221 | "2024-03-16 06:02:50:739",4000
222 | "2024-03-16 06:03:07:197",14000
223 | "2024-03-16 06:04:55:954",106000
224 | "2024-03-16 06:06:24:347",30000
225 | "2024-03-16 06:07:50:117",80000
226 | "2024-03-16 06:08:07:376",15000
227 | "2024-03-16 06:08:12:489",3000
228 | "2024-03-16 06:08:51:723",33000
229 | "2024-03-16 06:09:16:772",19000
230 | "2024-03-16 06:10:23:972",37000
231 | "2024-03-16 06:11:21:116",39000
232 | "2024-03-16 06:12:21:875",46000
233 | "2024-03-16 06:12:47:806",17000
234 | "2024-03-16 06:13:33:034",43000
235 | "2024-03-16 06:14:19:896",44000
236 | "2024-03-16 06:15:24:768",22000
237 | "2024-03-16 06:15:47:827",12000
238 | "2024-03-16 06:15:56:268",7000
239 | "2024-03-16 06:16:44:668",39000
240 | "2024-03-16 06:17:15:990",29000
241 | "2024-03-16 06:17:41:260",14000
242 | "2024-03-16 06:19:35:140",79000
243 | "2024-03-16 06:19:54:445",17000
244 | "2024-03-16 06:21:29:244",3000
245 | "2024-03-16 07:17:06:691",5000
246 | "2024-03-16 07:17:08:602",0
247 | "2024-03-16 07:18:31:830",81000
248 | "2024-03-16 07:21:19:094",153000
249 | "2024-03-16 07:22:51:156",82000
250 | "2024-03-16 07:24:15:508",72000
251 | "2024-03-16 07:25:24:382",48000
252 | "2024-03-16 07:25:31:358",0
253 | "2024-03-16 07:28:36:820",169000
254 | "2024-03-16 07:28:38:867",1000
255 | "2024-03-16 07:30:42:061",21000
256 | "2024-03-16 07:31:25:353",39000
257 | "2024-03-16 07:31:45:181",12000
258 | "2024-03-16 07:31:47:652",2000
259 | "2024-03-16 07:32:11:945",2000
260 | "2024-03-16 07:33:45:988",16000
261 | "2024-03-16 07:40:21:405",24000
262 | "2024-03-16 07:41:23:985",23000
263 | "2024-03-16 07:42:27:475",32000
264 | "2024-03-16 07:43:24:928",55000
265 | "2024-03-16 07:43:48:120",0
266 | "2024-03-16 07:45:22:589",91000
267 | "2024-03-16 07:48:21:006",137000
268 | "2024-03-16 07:48:28:694",3000
269 | "2024-03-16 07:54:08:241",11000
270 | "2024-03-16 07:54:22:241",5000
271 | "2024-03-16 07:55:17:262",3000
272 | "2024-03-16 07:55:49:740",3000
273 | "2024-03-16 07:56:43:005",3000
274 | "2024-03-16 07:56:55:914",1000
275 | "2024-03-16 07:57:16:819",3000
276 | "2024-03-16 07:57:43:806",9000
277 | "2024-03-16 07:58:04:486",2000
278 | "2024-03-16 07:59:22:296",34000
279 | "2024-03-16 08:00:13:959",6000
280 | "2024-03-16 08:00:50:497",14000
281 | "2024-03-16 08:04:51:791",1000
282 | "2024-03-16 08:05:20:420",22000
283 | "2024-03-16 08:05:30:851",6000
284 | "2024-03-16 08:05:41:697",5000
285 | "2024-03-16 08:05:50:974",6000
286 | "2024-03-16 08:07:34:897",12000
287 | "2024-03-16 08:07:50:491",0
288 | "2024-03-16 08:07:57:176",5000
289 | "2024-03-16 21:56:51:786",26000
290 | "2024-03-16 21:57:33:097",38000
291 | "2024-03-16 21:58:02:673",13000
292 | "2024-03-16 21:58:11:220",6000
293 | "2024-03-16 21:58:14:134",1000
294 | "2024-03-16 21:58:26:069",8000
295 | "2024-03-16 21:58:40:929",10000
296 | "2024-03-16 21:59:13:498",1000
297 | "2024-03-16 21:59:40:721",26000
298 | "2024-03-16 22:00:05:615",22000
299 | "2024-03-16 22:05:42:506",332000
300 | "2024-03-16 22:49:25:416",0
301 | "2024-03-16 22:49:37:603",0
302 | "2024-03-16 23:55:48:814",111000
303 | "2024-03-16 23:56:05:131",2000
304 | "2024-03-16 23:56:11:567",0
305 | "2024-03-17 00:05:24:535",5000
306 | "2024-03-17 00:09:32:743",228000
307 | "2024-03-17 00:12:40:856",152000
308 | "2024-03-17 00:13:11:407",21000
309 | "2024-03-17 00:14:54:236",68000
310 | "2024-03-23 23:10:49:244",20000
311 | "2024-03-23 23:11:05:096",13000
312 | "2024-03-23 23:11:21:092",13000
313 | "2024-03-23 23:11:30:619",7000
314 | "2024-03-23 23:11:36:629",2000
315 | "2024-03-23 23:11:59:468",10000
316 | "2024-03-23 23:12:01:683",0
317 | "2024-03-23 23:12:05:694",1000
318 | "2024-03-23 23:12:17:478",11000
319 | "2024-03-23 23:12:26:569",6000
320 | "2024-03-23 23:12:30:799",2000
321 | "2024-03-23 23:12:45:955",0
322 | "2024-03-23 23:13:09:345",22000
323 | "2024-03-23 23:13:11:633",0
324 | "2024-03-23 23:13:27:276",8000
325 | "2024-03-23 23:14:02:321",25000
326 | "2024-03-23 23:14:04:642",0
327 | "2024-03-23 23:15:29:992",73000
328 | "2024-03-23 23:15:56:834",22000
329 | "2024-03-23 23:17:09:364",33000
330 | "2024-03-23 23:17:59:260",2000
331 | "2024-03-23 23:20:35:560",152000
332 | "2024-03-23 23:20:50:728",0
333 | "2024-03-23 23:20:54:537",2000
334 | "2024-03-23 23:21:00:463",0
335 | "2024-03-23 23:21:17:148",8000
336 | "2024-03-23 23:21:49:946",26000
337 | "2024-03-23 23:22:04:048",4000
338 | "2024-03-23 23:22:21:259",13000
339 | "2024-03-23 23:23:41:993",7000
340 | "2024-03-23 23:24:06:027",14000
341 | "2024-03-23 23:24:49:821",34000
342 | "2024-03-23 23:25:28:575",28000
343 | "2024-03-23 23:25:38:254",5000
344 | "2024-03-23 23:26:45:514",21000
345 | "2024-03-23 23:27:47:927",2000
346 | "2024-03-23 23:28:32:506",5000
347 | "2024-03-23 23:30:15:911",24000
348 | "2024-03-23 23:33:28:829",18000
349 | "2024-03-23 23:34:06:636",15000
350 | "2024-03-23 23:34:11:073",2000
351 | "2024-03-23 23:34:50:320",8000
352 | "2024-03-23 23:35:34:823",0
353 | "2024-03-23 23:35:41:044",3000
354 | "2024-03-23 23:36:19:799",34000
355 | "2024-03-23 23:37:10:619",46000
356 | "2024-03-23 23:37:18:151",2000
357 | "2024-03-23 23:37:57:300",35000
358 | "2024-03-23 23:37:58:715",0
359 | "2024-03-23 23:38:29:266",29000
360 | "2024-03-23 23:38:30:470",0
361 | "2024-03-23 23:39:14:277",31000
362 | "2024-03-23 23:40:07:984",47000
363 | "2024-03-23 23:40:20:406",10000
364 | "2024-03-23 23:40:54:560",12000
365 | "2024-03-23 23:41:06:695",10000
366 | "2024-03-23 23:41:09:233",1000
367 | "2024-03-23 23:41:27:772",13000
368 | "2024-03-23 23:41:53:605",5000
369 | "2024-03-23 23:44:49:647",50000
370 | "2024-03-24 00:01:19:806",32000
371 | "2024-03-24 00:01:27:557",1000
372 | "2024-03-24 00:08:41:973",17000
373 | "2024-03-24 00:08:45:532",2000
374 | "2024-03-24 00:11:46:098",12000
375 | "2024-03-24 00:12:55:046",26000
376 | "2024-03-24 00:13:04:087",7000
377 | "2024-03-24 00:13:10:533",5000
378 | "2024-03-24 00:13:22:836",1000
379 | "2024-03-24 00:13:25:362",1000
380 | "2024-03-24 00:13:57:754",2000
381 | "2024-03-24 00:14:02:375",1000
382 | "2024-03-24 00:14:09:275",2000
383 | "2024-03-24 00:14:28:082",18000
384 | "2024-03-24 00:14:35:980",2000
385 | "2024-03-24 00:14:37:581",0
386 | "2024-03-24 00:14:38:287",0
387 | "2024-03-24 00:14:46:682",1000
388 | "2024-03-24 00:15:07:161",2000
389 | "2024-03-24 00:15:12:873",0
390 | "2024-03-24 00:18:44:478",203000
391 | "2024-03-24 00:19:26:858",39000
392 | "2024-03-24 00:20:01:693",25000
393 | "2024-03-24 00:20:29:170",14000
394 | "2024-03-24 00:20:38:890",3000
395 | "2024-03-24 00:21:32:825",11000
396 | "2024-03-24 00:21:52:142",2000
397 | "2024-03-24 00:21:56:222",1000
398 | "2024-03-24 00:22:18:167",1000
399 | "2024-03-24 00:22:20:452",1000
400 | "2024-03-24 00:22:26:058",1000
401 | "2024-03-24 00:23:26:082",43000
402 | "2024-03-24 00:25:02:722",21000
403 | "2024-03-24 00:25:32:127",23000
404 | "2024-03-24 00:25:35:657",0
405 | "2024-03-24 00:26:00:750",13000
406 | "2024-03-24 00:26:08:563",3000
407 | "2024-03-24 00:26:23:762",12000
408 | "2024-03-24 00:27:15:287",46000
409 | "2024-03-24 00:27:39:345",10000
410 | "2024-03-24 00:29:29:852",15000
411 | "2024-03-24 00:29:50:968",0
412 | "2024-03-24 00:33:25:077",9000
413 | "2024-03-24 00:33:36:061",2000
414 | "2024-03-24 00:33:49:567",13000
415 | "2024-03-24 00:34:42:861",2000
416 | "2024-03-24 00:36:21:539",68000
417 | "2024-03-24 00:36:29:649",4000
418 | "2024-03-24 00:38:22:403",32000
419 | "2024-03-24 00:38:54:873",0
420 | "2024-03-24 00:38:58:720",3000
421 | "2024-03-24 00:39:33:379",27000
422 | "2024-03-24 00:39:52:457",0
423 | "2024-03-24 00:43:24:427",1000
424 | "2024-03-24 00:55:26:651",2000
425 | "2024-03-24 00:55:36:480",4000
426 | "2024-03-24 01:03:06:732",439000
427 | "2024-03-24 01:03:08:173",0
428 | "2024-03-24 01:03:12:054",2000
429 | "2024-03-24 01:03:40:675",2000
430 | "2024-03-24 01:05:01:248",53000
431 | "2024-03-24 01:10:53:244",122000
432 | "2024-03-24 01:11:25:637",6000
433 | "2024-03-24 01:11:35:409",5000
434 | "2024-03-24 01:11:46:672",7000
435 | "2024-03-24 01:13:11:260",80000
436 | "2024-03-24 09:56:42:710",300000
437 | "2024-03-24 10:10:41:347",0
438 | "2024-03-25 22:38:24:807",32000
439 | "2024-03-25 22:38:28:791",2000
440 | "2024-03-25 22:38:32:847",4000
441 | "2024-03-25 22:41:34:037",178000
442 | "2024-03-25 23:01:46:232",53000
443 | "2024-03-25 23:26:06:823",147000
444 | "2024-03-25 23:27:32:519",84000
445 | "2024-03-25 23:38:26:508",420000
446 | "2024-03-25 23:40:58:164",141000
447 | "2024-03-25 23:41:37:488",36000
448 | "2024-03-25 23:42:17:290",35000
449 | "2024-03-25 23:45:24:789",181000
450 | "2024-03-25 23:46:47:056",80000
451 | "2024-03-25 23:50:59:364",239000
452 | "2024-03-25 23:52:47:504",42000
453 | "2024-03-25 23:53:36:396",49000
454 | "2024-03-25 23:54:23:661",9000
455 | "2024-03-25 23:55:22:993",53000
456 | "2024-03-25 23:55:48:866",20000
457 | "2024-03-25 23:57:07:561",23000
458 | "2024-03-26 00:00:19:909",16000
459 | "2024-03-26 00:00:23:576",0
460 | "2024-03-26 00:00:25:090",0
461 | "2024-03-26 00:01:04:859",19000
462 | "2024-03-26 00:01:17:700",1000
463 | "2024-03-26 00:01:47:502",9000
464 | "2024-03-26 00:01:51:643",1000
465 | "2024-03-26 00:08:03:208",340000
466 | "2024-03-26 00:08:15:123",5000
467 | "2024-03-26 00:09:09:956",7000
468 | "2024-03-26 00:12:04:110",7000
469 | "2024-03-26 00:16:43:449",17000
470 | "2024-03-26 00:17:55:636",42000
471 | "2024-03-26 00:18:04:944",7000
472 | "2024-03-26 00:18:39:599",24000
473 | "2024-03-26 00:18:59:721",14000
474 | "2024-03-26 00:19:13:150",8000
475 | "2024-03-26 00:19:31:416",9000
476 | "2024-03-26 00:21:53:829",139000
477 | "2024-03-26 00:22:48:934",45000
478 | "2024-03-26 00:23:03:786",12000
479 | "2024-03-26 00:23:18:224",13000
480 | "2024-03-26 00:23:52:087",3000
481 | "2024-03-26 00:24:14:257",11000
482 | "2024-03-26 00:24:32:645",1000
483 | "2024-03-26 00:24:34:293",0
484 | "2024-03-26 00:25:01:932",12000
485 | "2024-03-26 00:28:01:059",146000
486 | "2024-03-26 00:28:46:533",35000
487 | "2024-03-26 00:30:09:177",11000
488 | "2024-03-26 00:30:59:606",5000
489 | "2024-03-26 00:31:21:848",9000
490 | "2024-03-26 00:31:55:206",5000
491 | "2024-03-26 00:33:13:834",14000
492 | "2024-03-26 00:33:16:405",0
493 | "2024-03-26 00:33:24:903",6000
494 | "2024-03-26 00:34:24:846",11000
495 | "2024-03-26 00:35:08:757",10000
496 | "2024-03-26 00:40:02:886",4000
497 | "2024-03-26 00:40:12:728",5000
498 | "2024-03-26 00:40:18:645",1000
499 | "2024-03-26 00:40:43:553",9000
500 | "2024-03-26 00:41:06:024",6000
501 | "2024-03-26 00:41:13:597",2000
502 | "2024-03-26 00:41:17:694",0
503 | "2024-03-26 00:41:23:086",4000
504 | "2024-03-26 00:41:47:508",12000
505 | "2024-03-26 00:41:53:403",3000
506 | "2024-03-26 00:41:57:200",2000
507 | "2024-03-26 00:42:58:086",14000
508 | "2024-03-26 00:43:22:539",21000
509 | "2024-03-26 00:44:34:583",0
510 | "2024-03-26 00:44:45:686",10000
511 | "2024-03-26 00:48:09:536",193000
512 | "2024-03-26 00:49:23:971",50000
513 | "2024-03-26 00:49:29:433",1000
514 | "2024-03-26 00:50:03:880",30000
515 | "2024-03-26 00:50:35:257",29000
516 | "2024-03-26 00:51:21:246",28000
517 | "2024-03-26 00:51:46:897",3000
518 | "2024-03-26 00:52:47:488",18000
519 | "2024-03-26 00:52:50:585",0
520 | "2024-03-26 00:52:59:858",4000
521 | "2024-03-26 00:53:39:446",27000
522 | "2024-03-26 00:53:45:854",4000
523 | "2024-03-26 00:55:24:565",22000
524 | "2024-03-26 00:55:52:243",3000
525 | "2024-03-26 00:57:06:102",0
526 | "2024-03-26 00:59:17:463",4000
527 | "2024-03-26 00:59:32:253",13000
528 | "2024-03-26 00:59:43:184",5000
529 | "2024-03-26 01:00:10:926",15000
530 | "2024-03-26 01:03:11:598",173000
531 | "2024-03-26 01:03:42:307",2000
532 | "2024-03-26 01:04:19:368",35000
533 | "2024-03-26 01:04:45:445",6000
534 | "2024-03-26 01:08:59:941",9000
535 | "2024-03-26 01:11:54:200",16000
536 | "2024-03-26 01:13:27:235",33000
537 | "2024-03-26 01:14:15:231",5000
538 | "2024-03-26 01:15:56:777",65000
539 | "2024-03-26 01:16:45:035",38000
540 | "2024-03-26 01:20:12:805",1000
541 | "2024-03-26 01:21:37:724",77000
542 | "2024-03-26 01:21:50:475",10000
543 | "2024-03-26 01:22:10:650",17000
544 | "2024-03-26 01:22:13:328",0
545 | "2024-03-26 01:22:16:618",0
546 | "2024-03-26 01:24:37:465",138000
547 | "2024-03-26 01:26:07:955",75000
548 | "2024-03-26 01:26:13:432",0
549 | "2024-03-26 01:27:05:237",44000
550 | "2024-03-26 01:27:34:382",28000
551 | "2024-03-26 01:27:39:927",2000
552 | "2024-03-26 01:28:19:813",9000
553 | "2024-03-26 01:29:14:707",29000
554 | "2024-03-26 01:30:08:459",15000
555 | "2024-03-26 01:32:58:950",134000
556 | "2024-03-26 01:33:39:548",3000
557 | "2024-03-26 01:34:47:351",9000
558 | "2024-03-26 01:35:37:423",44000
559 | "2024-03-26 01:36:52:232",53000
560 | "2024-03-26 01:37:23:094",21000
561 | "2024-03-26 01:38:05:220",8000
562 | "2024-03-26 01:38:33:939",9000
563 | "2024-03-26 01:39:05:087",13000
564 | "2024-03-26 01:39:45:587",8000
565 | "2024-03-26 01:41:16:758",63000
566 | "2024-03-26 01:44:22:606",0
567 | "2024-03-26 01:44:35:027",7000
568 | "2024-03-26 01:44:58:592",8000
569 | "2024-03-26 01:46:42:630",21000
570 | "2024-03-26 01:47:28:788",43000
571 | "2024-03-26 01:49:29:491",103000
572 | "2024-03-26 01:51:35:115",101000
573 | "2024-03-26 01:52:21:898",37000
574 | "2024-03-26 01:52:38:285",3000
575 | "2024-03-26 01:52:58:611",18000
576 | "2024-03-26 01:54:46:335",2000
577 | "2024-03-26 01:55:56:740",45000
578 | "2024-03-26 01:56:06:408",2000
579 | "2024-03-26 01:56:16:378",7000
580 | "2024-03-26 01:57:26:021",18000
581 | "2024-03-26 01:58:17:105",17000
582 | "2024-03-26 01:58:41:604",8000
583 | "2024-03-26 01:59:26:196",24000
584 | "2024-03-26 02:00:54:683",18000
585 | "2024-03-26 02:02:21:480",68000
586 | "2024-03-26 02:04:25:207",97000
587 | "2024-03-26 02:04:27:900",0
588 | "2024-03-26 02:05:20:084",35000
589 | "2024-03-26 02:08:16:154",75000
590 | "2024-03-26 02:25:05:159",148000
591 | "2024-03-26 02:25:40:426",8000
592 | "2024-03-26 02:26:04:404",10000
593 | "2024-03-26 02:26:17:357",8000
594 | "2024-03-26 02:29:18:216",175000
595 | "2024-03-26 02:32:39:138",192000
596 | "2024-03-26 02:36:34:037",170000
597 | "2024-03-26 02:37:07:717",11000
598 | "2024-03-26 02:38:12:531",0
599 | "2024-03-26 02:39:27:160",11000
600 | "2024-03-26 02:43:30:449",241000
601 | "2024-03-26 02:58:06:467",772000
602 | "2024-03-26 02:58:35:292",1000
603 | "2024-03-26 06:55:27:482",2000
604 | "2024-03-26 06:55:29:583",0
605 | "2024-03-26 06:55:36:831",4000
606 | "2024-03-26 06:55:52:775",13000
607 | "2024-03-26 06:56:46:106",0
608 | "2024-03-26 06:56:47:196",0
609 | "2024-03-26 06:58:07:712",4000
610 | "2024-03-26 07:00:40:328",6000
611 | "2024-03-26 07:00:45:056",4000
612 | "2024-03-26 07:01:13:256",5000
613 | "2024-03-26 07:02:09:852",38000
614 | "2024-03-26 07:02:18:843",3000
615 | "2024-03-26 07:41:16:294",1000
616 | "2024-03-26 07:45:32:104",3000
617 | "2024-03-26 08:07:38:369",1000
618 | "2024-03-26 08:07:45:799",0
619 | "2024-03-26 08:28:52:384",1000
620 | "2024-03-26 08:39:14:769",4000
621 | "2024-03-26 08:47:59:998",4000
622 | "2024-03-26 08:48:03:881",0
623 | "2024-03-26 09:24:07:763",2000
624 | "2024-03-26 09:31:06:957",30000
625 | "2024-03-26 09:31:40:292",26000
626 | "2024-03-26 09:38:17:276",13000
627 | "2024-03-26 09:38:31:706",0
628 | "2024-03-26 09:39:12:866",39000
629 | "2024-03-26 09:39:31:012",9000
630 | "2024-03-26 09:39:41:460",3000
631 | "2024-03-26 09:40:43:051",4000
632 | "2024-03-26 09:40:48:197",2000
633 | "2024-03-26 09:42:09:644",13000
634 | "2024-03-26 09:42:23:531",1000
635 | "2024-03-26 09:42:27:193",1000
636 | "2024-03-26 09:44:10:646",11000
637 | "2024-03-26 09:44:45:162",30000
638 | "2024-03-26 09:47:39:913",164000
639 | "2024-03-26 09:51:52:580",4000
640 | "2024-03-26 22:27:07:026",0
641 | "2024-03-26 22:46:21:596",3000
642 | "2024-03-26 22:58:15:519",315000
643 | "2024-03-26 23:03:41:149",0
644 | "2024-03-27 08:44:21:211",1000
645 | "2024-03-27 08:49:29:069",300000
646 | "2024-03-27 08:51:45:502",24000
647 | "2024-03-27 09:00:57:346",2000
648 | "2024-03-27 09:09:22:731",2000
649 | "2024-03-27 09:27:28:892",13000
650 | "2024-03-31 03:08:12:488",24000
651 | "2024-03-31 03:08:14:581",0
652 | "2024-03-31 03:09:02:055",22000
653 | "2024-03-31 03:09:29:893",18000
654 | "2024-03-31 03:09:38:535",4000
655 | "2024-03-31 03:09:47:497",4000
656 | "2024-03-31 03:09:57:468",5000
657 | "2024-03-31 03:10:13:855",11000
658 | "2024-03-31 03:11:01:227",23000
659 | "2024-03-31 03:12:34:665",71000
660 | "2024-03-31 03:13:59:460",66000
661 | "2024-03-31 03:16:57:755",136000
662 | "2024-03-31 03:17:48:363",38000
663 | "2024-03-31 03:18:15:422",0
664 | "2024-03-31 03:18:29:586",10000
665 | "2024-03-31 03:20:38:967",14000
666 | "2024-03-31 03:20:55:856",0
667 | "2024-03-31 03:22:07:970",68000
668 | "2024-03-31 03:22:11:667",1000
669 | "2024-03-31 03:22:20:058",6000
670 | "2024-03-31 03:22:28:288",6000
671 | "2024-03-31 03:22:42:773",13000
672 | "2024-03-31 03:25:58:535",192000
673 | "2024-03-31 03:26:30:212",29000
674 | "2024-03-31 03:26:48:526",3000
675 | "2024-03-31 03:27:32:567",42000
676 | "2024-03-31 03:29:12:195",26000
677 | "2024-03-31 03:29:23:232",0
678 | "2024-03-31 03:29:53:039",27000
679 | "2024-03-31 03:30:00:396",3000
680 | "2024-03-31 03:30:16:495",14000
681 | "2024-03-31 03:32:37:215",2000
682 | "2024-03-31 05:03:09:091",50000
683 | "2024-03-31 05:03:22:640",12000
684 | "2024-03-31 05:04:06:367",38000
685 | "2024-03-31 05:04:29:194",9000
686 | "2024-03-31 05:05:15:947",7000
687 | "2024-03-31 05:05:18:786",1000
688 | "2024-03-31 05:06:34:839",56000
689 | "2024-03-31 05:07:01:988",22000
690 | "2024-03-31 05:07:14:089",0
691 | "2024-03-31 05:07:21:053",5000
692 | "2024-03-31 05:25:26:519",1000
693 | "2024-03-31 05:25:34:319",0
694 | "2024-03-31 08:14:18:333",24000
695 | "2024-03-31 08:14:52:593",9000
696 | "2024-03-31 09:34:05:823",10000
697 | "2024-03-31 09:34:09:928",2000
698 | "2024-03-31 09:34:25:105",8000
699 | "2024-03-31 09:35:15:193",8000
700 | "2024-03-31 09:35:18:653",2000
701 | "2024-03-31 09:35:22:743",3000
702 | "2024-03-31 09:35:32:767",2000
703 | "2024-03-31 09:36:16:195",17000
704 | "2024-03-31 09:36:30:208",7000
705 | "2024-03-31 09:36:40:228",7000
706 | "2024-03-31 09:37:13:490",25000
707 | "2024-03-31 09:38:02:115",37000
708 | "2024-03-31 09:38:21:674",16000
709 | "2024-03-31 09:38:23:190",0
710 | "2024-03-31 09:38:31:850",3000
711 | "2024-03-31 09:38:44:369",8000
712 | "2024-03-31 09:40:13:133",86000
713 | "2024-03-31 09:41:48:501",87000
714 | "2024-03-31 09:42:13:830",17000
715 | "2024-03-31 09:42:25:410",10000
716 | "2024-03-31 09:42:49:775",18000
717 | "2024-03-31 09:44:38:852",98000
718 | "2024-03-31 09:48:40:651",37000
719 | "2024-03-31 09:48:47:392",4000
720 | "2024-03-31 09:49:10:869",19000
721 | "2024-03-31 09:49:28:181",1000
722 | "2024-03-31 09:49:33:447",3000
723 | "2024-03-31 09:52:18:522",0
724 | "2024-03-31 09:55:15:583",141000
725 | "2024-03-31 09:55:32:388",3000
726 | "2024-03-31 09:55:39:231",4000
727 | "2024-03-31 09:56:06:725",20000
728 | "2024-03-31 09:56:17:671",8000
729 | "2024-03-31 09:56:39:053",19000
730 | "2024-03-31 09:59:59:251",184000
731 | "2024-03-31 10:00:24:860",2000
732 | "2024-03-31 10:01:16:273",44000
733 | "2024-03-31 10:03:58:049",135000
734 | "2024-03-31 10:05:05:730",12000
735 | "2024-03-31 10:05:09:150",0
736 | "2024-03-31 10:07:00:848",15000
737 | "2024-03-31 10:08:04:582",28000
738 | "2024-03-31 10:08:36:858",28000
739 | "2024-03-31 10:09:09:598",24000
740 | "2024-03-31 10:09:21:420",1000
741 | "2024-03-31 10:10:06:701",40000
742 | "2024-03-31 10:13:03:605",174000
743 | "2024-03-31 10:14:00:481",30000
744 | "2024-03-31 10:14:20:878",0
745 | "2024-03-31 10:14:31:870",8000
746 | "2024-03-31 10:15:23:643",4000
747 | "2024-03-31 10:15:36:210",9000
748 |
--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Record my favorite and useful deep learning models encountered in the learning process, including but not limited to classic convolutional neural networks (e.g. AlexNet,ResNet), attention-mechanism related modules, transformer related models (e.g. ViT,Efficient ViT), etc.
2 |
3 | I will continue to update more models as I learn. All models are based on the pytorch implementation and use cuda version 11.8. Another important point is that a large number of implemented models are available in the pytorch framework and transformers library. Reading the source code will improve your coding skills.
4 |
5 | The model code has been updated:
6 |
7 | + [image_segmentation:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/image_segmentation)
8 | + [about_unet:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/image_segmentation/about_unet)
9 | + [UNet](https://github.com/anshilaoliu/Hand-torn_code/blob/master/image_segmentation/about_unet/UNet.py)
10 | + [UNet_pp](https://github.com/anshilaoliu/Hand-torn_code/blob/master/image_segmentation/about_unet/UNet_pp.py)
11 | + [about_transformer:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_transformer)
12 | + [Transformer](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_transformer/attention_is_all_you_need)
13 | + [ViT_model](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_transformer/ViT/ViT_model.py)
14 | + [mobile_vit](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_transformer/mobile_vit)
15 | + [about_attention:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_attention)
16 | + [MSCAAttention](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_attention/MSCAAttention.py)
17 | + [CBAM](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_attention/CBAM.py)
18 | + [classic_conv:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/classic_conv)
19 | + [AlexNet](https://github.com/anshilaoliu/Hand-torn_code/blob/master/classic_conv/AlexNet.py)
20 | + [SENet](https://github.com/anshilaoliu/Hand-torn_code/blob/master/classic_conv/SENet.py)
21 | + [about_interview](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_interview)
22 | + [Attention](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_interview/Attention.py)
23 | + [Embedding](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_interview/Embedding.py)
24 | + [FFN](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_interview/FFN.py)
25 | + [LayerNorm](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_interview/LayerNorm.py)
26 |
27 |
--------------------------------------------------------------------------------
/about_attention/CBAM.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : CBAM.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/31 3:08
7 | # @Classes :
8 | import torch
9 | import torch.nn as nn
10 |
11 |
12 | class ChannelAttention(nn.Module):
13 | """通道注意力"""
14 | def __init__(self, in_planes, ratio=8):
15 | super(ChannelAttention, self).__init__()
16 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
17 | self.max_pool = nn.AdaptiveMaxPool2d(1)
18 |
19 | self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
20 | self.relu1 = nn.ReLU()
21 | self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
22 |
23 | self.sigmoid = nn.Sigmoid()
24 |
25 | def forward(self, x):
26 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
27 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
28 | out = avg_out + max_out
29 | return self.sigmoid(out)
30 |
31 |
32 | class SpatialAttention(nn.Module):
33 | """空间注意力"""
34 | def __init__(self, kernel_size=7):
35 | super(SpatialAttention, self).__init__()
36 |
37 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
38 | padding = 3 if kernel_size == 7 else 1
39 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
40 | self.sigmoid = nn.Sigmoid()
41 |
42 | def forward(self, x):
43 | avg_out = torch.mean(x, dim=1, keepdim=True)
44 | max_out, _ = torch.max(x, dim=1, keepdim=True)
45 | x = torch.cat([avg_out, max_out], dim=1)
46 | x = self.conv1(x)
47 | return self.sigmoid(x)
48 |
49 |
50 | class CBAMBlock(nn.Module):
51 | def __init__(self, channel, ratio=8, kernel_size=7):
52 | super(CBAMBlock, self).__init__()
53 | self.channel_attention = ChannelAttention(channel, ratio=ratio)
54 | self.spatial_attention = SpatialAttention(kernel_size=kernel_size)
55 | # 根据你的实际需求来考虑是否残差连接与下采样
56 | self.channel_down = nn.Conv2d(channel * 2, channel, kernel_size=1)
57 |
58 | def forward(self, x):
59 | residual = x
60 | x = x * self.channel_attention(x)
61 | x = x * self.spatial_attention(x)
62 |
63 | x = torch.cat((x, residual), dim=1)
64 | x = self.channel_down(x)
65 | return x
66 |
67 |
68 | if __name__ == '__main__':
69 | pass
70 |
--------------------------------------------------------------------------------
/about_attention/MSCAAttention.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : MSCAAttention.py
5 | # @author : LJH
6 | # @Start Date : 2024/3/4 23:17
7 | # @Classes : MSCAAttention(Multi-Scale Channel Attention)实现,该注意力机制可以增强神经网络在特定通道和空间维度上的感知能力(自动学习特定通道和空间位置的重要性),从而有助于提取更加丰富和有用的特征
8 | import torch
9 | import torch.nn as nn
10 | from mmengine.model import BaseModule
11 | from mmcv.cnn import build_activation_layer
12 |
13 |
14 | # Source from https://github.com/haoshao-nku/medical_seg
15 | # 提出的注意力的实现模块
16 | class MSCAAttention(BaseModule):
17 | """Multi-Scale Convolutional Attention(MSCA)模块.
18 | 多尺度特征提取:通过多个卷积核大小和填充的卷积操作,以提取不同尺度的特征信息。
19 | 这些卷积操作包括一个具有较大卷积核的初始卷积 (self.conv0) 和多个后续的卷积操作(self.conv0_1,self.conv0_2,self.conv1_1,self.conv1_2,self.conv2_1,self.conv2_2),每个都针对不同的核大小和填充。
20 | 通道混合:在提取多尺度特征之后,通过对这些特征进行通道混合来整合不同尺度的信息。通道混合操作由最后一个卷积层 self.conv3 完成。
21 | 卷积注意力:最后,通过将通道混合后的特征与输入特征进行逐元素乘法,实现了一种卷积注意力机制。这意味着模块通过对不同通道的特征赋予不同的权重来选择性地强调或抑制输入特征。
22 | """
23 | def __init__(self,
24 | channels,
25 | kernel_sizes=[5, [1, 7], [1, 11], [1, 21]],
26 | paddings=[2, [0, 3], [0, 5], [0, 10]]):
27 | """
28 |
29 | :param channels: 通道数.
30 | :param kernel_sizes: 注意力核大小. 默认: [5, [1, 7], [1, 11], [1, 21]].
31 | :param paddings: 注意力模块中相应填充值的个数.
32 | 默认: [2, [0, 3], [0, 5], [0, 10]].
33 | """
34 | super().__init__()
35 | self.conv0 = nn.Conv2d(
36 | channels,
37 | channels,
38 | kernel_size=kernel_sizes[0],
39 | padding=paddings[0],
40 | groups=channels)
41 | for i, (kernel_size,
42 | padding) in enumerate(zip(kernel_sizes[1:], paddings[1:])):
43 | kernel_size_ = [kernel_size, kernel_size[::-1]]
44 | padding_ = [padding, padding[::-1]]
45 | conv_name = [f'conv{i}_1', f'conv{i}_2']
46 | for i_kernel, i_pad, i_conv in zip(kernel_size_, padding_,
47 | conv_name):
48 | self.add_module(
49 | i_conv,
50 | nn.Conv2d(
51 | channels,
52 | channels,
53 | tuple(i_kernel),
54 | padding=i_pad,
55 | groups=channels))
56 | self.conv3 = nn.Conv2d(channels, channels, 1)
57 |
58 | def forward(self, x):
59 | u = x.clone()
60 |
61 | attn = self.conv0(x)
62 |
63 | # 多尺度特征提取
64 | attn_0 = self.conv0_1(attn)
65 | attn_0 = self.conv0_2(attn_0)
66 |
67 | attn_1 = self.conv1_1(attn)
68 | attn_1 = self.conv1_2(attn_1)
69 |
70 | attn_2 = self.conv2_1(attn)
71 | attn_2 = self.conv2_2(attn_2)
72 |
73 | attn = attn + attn_0 + attn_1 + attn_2
74 | # 通道融合(也是通过1x1卷积)
75 | attn = self.conv3(attn)
76 |
77 | # Convolutional Attention
78 | x = attn * u
79 |
80 | return x
81 |
82 |
83 | # 原论文模型中带有封装MSCAAttention,可用于参考作者怎么使用这个注意力模块
84 | class MSCASpatialAttention(BaseModule):
85 | """
86 | Spatial Attention Module in Multi-Scale Convolutional Attention Module,多尺度卷积注意力模块中的空间注意模块
87 | 先过1x1卷积,gelu激活后过注意力,再过一次1x1卷积,最后和跳跃连接
88 | """
89 |
90 | def __init__(self,
91 | in_channels,
92 | attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]],
93 | attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]],
94 | act_cfg=dict(type='GELU')):
95 | """
96 |
97 | :param in_channels: 通道数.
98 | :param attention_kernel_sizes (list): 注意力核大小. 默认: [5, [1, 7], [1, 11], [1, 21]].
99 | :param attention_kernel_paddings (list): 注意力模块中相应填充值的个数.
100 | :param act_cfg (list): 注意力模块中相应填充值的个数.
101 | """
102 | super().__init__()
103 | self.proj_1 = nn.Conv2d(in_channels, in_channels, 1)
104 | self.activation = build_activation_layer(act_cfg)
105 | self.spatial_gating_unit = MSCAAttention(in_channels,
106 | attention_kernel_sizes,
107 | attention_kernel_paddings)
108 | self.proj_2 = nn.Conv2d(in_channels, in_channels, 1)
109 |
110 | def forward(self, x):
111 | # 跳跃连接
112 | shorcut = x.clone()
113 | # 先过1x1卷积
114 | x = self.proj_1(x)
115 | # 激活
116 | x = self.activation(x)
117 | # 过MSCAAttention
118 | x = self.spatial_gating_unit(x)
119 | # 1x1卷积
120 | x = self.proj_2(x)
121 | # 残差融合
122 | x = x + shorcut
123 | return x
124 |
125 |
126 | def test():
127 | x = torch.rand(3, 64, 32, 32)
128 | model = MSCASpatialAttention(in_channels=64)
129 | pred = model(x)
130 | print(x.shape)
131 | print(pred.shape)
132 |
133 |
134 | if __name__ == '__main__':
135 | test()
136 |
--------------------------------------------------------------------------------
/about_attention/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @Package Name:
5 | # @File Name : __init__.py.py
6 | # @author : ahua
7 | # @Version : 1.0
8 | # @Start Date : 2024/3/9 3:54
9 | # @Classes :
10 |
11 |
12 | if __name__ == '__main__':
13 | pass
14 |
--------------------------------------------------------------------------------
/about_interview/Attention.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : Attention.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/31 3:22
7 | # @Classes :
8 | import torch
9 | from torch import nn
10 | import math
11 |
12 |
13 | class ScaledDotProductAttention(nn.Module):
14 | """根据公式计算QkV"""
15 | def __init__(self, n_d):
16 | """
17 |
18 | :param n_d: 每个头的dim,用于scaling
19 | """
20 | super(ScaledDotProductAttention, self).__init__()
21 |
22 | self.n_d = n_d
23 | # 在最后一个维度上进行softmax
24 | self.softmax = nn.Softmax(dim=-1)
25 |
26 | def forward(self, Q, K, V, mask):
27 | # q和k关于(2,3)维度的转置相乘并scaling
28 | attn_score = Q @ K.transpose(2, 3) / math.sqrt(self.n_d)
29 |
30 | if mask is not None:
31 | attn_score = attn_score.masked_fill(mask == 0, float("-inf"))
32 |
33 | attn_score = self.softmax(attn_score)
34 | attn_score = attn_score @ V
35 |
36 | return attn_score
37 |
38 |
39 | class MultiHeadAttention(nn.Module):
40 | """多头注意力,包括残差连接和Norm"""
41 | def __init__(self, d_model, n_head, dropout=0.1, bias=True):
42 | """
43 |
44 | :param d_model: 输入向量embedding维度
45 | :param n_head:
46 | :param bias:
47 | """
48 | super(MultiHeadAttention, self).__init__()
49 |
50 | if d_model % n_head != 0:
51 | raise ValueError(
52 | "Embedding dim must be divisible by number of heads in {}. Got: embed_dim={} and num_heads={}".format(
53 | self.__class__.__name__, d_model, n_head
54 | )
55 | )
56 |
57 | self.n_head = n_head
58 | self.d_model = d_model
59 | self.n_d = d_model // n_head
60 |
61 | # 投影映射矩阵
62 | self.w_q = nn.Linear(in_features=d_model, out_features=d_model, bias=bias)
63 | self.w_k = nn.Linear(in_features=d_model, out_features=d_model, bias=bias)
64 | self.w_v = nn.Linear(in_features=d_model, out_features=d_model, bias=bias)
65 |
66 | self.get_attn = ScaledDotProductAttention(self.n_d)
67 |
68 | # 最后多头合并之后再做一次映射
69 | self.w_o = nn.Linear(d_model, d_model)
70 |
71 | self.dropout = nn.Dropout(p=dropout)
72 |
73 | self.layer_norm = nn.LayerNorm(d_model)
74 |
75 | def forward(self, x_q, x_k, x_v, mask=None):
76 | residual = x_q
77 | batch, seq_len, dimension = x_q.shape
78 | # 映射得到QKV矩阵
79 | q, k, v = self.w_q(x_q), self.w_k(x_k), self.w_v(x_v)
80 |
81 | # 拆分为四维张量后将(0,1,2,3)reshape为(0,2,1,3)
82 | q = q.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3)
83 | k = k.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3)
84 | v = v.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3)
85 |
86 | attn_score = self.get_attn(q, k, v, mask)
87 |
88 | # 重新排列维度,保证内存连续型后改变为三维张量
89 | attn_score = attn_score.permute(0, 2, 1, 3).contiguous().view(batch, seq_len, dimension)
90 |
91 | output = self.w_o(attn_score)
92 | output = self.dropout(output)
93 |
94 | # 残差连接和Norm
95 | output = self.layer_norm(output + residual)
96 | return output
97 |
--------------------------------------------------------------------------------
/about_interview/Embedding.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : Embedding.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/31 3:25
7 | # @Classes :
8 | import torch
9 | from torch import nn
10 |
11 |
12 | class PositionalEmbedding(nn.Module):
13 | """位置编码,输入token embedding返回加上位置编码后的总的embedding"""
14 | def __init__(self, d_model, max_len=5000, dropout=0.1):
15 | super(PositionalEmbedding, self).__init__()
16 | # 初始化编码
17 | self.pe = torch.zeros(max_len, d_model)
18 | # 原始论文中位置编码是直接算的,不用训练
19 | self.pe.requires_grad_(False)
20 |
21 | # 照着公式敲就行了
22 | # 初始化pos
23 | pos = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
24 | # 2i
25 | _2i = torch.arange(0, d_model, 2)
26 |
27 | # 偶数计算
28 | self.pe[:, 0::2] = torch.sin(pos / (10000 ** (_2i / d_model)))
29 | # 奇数计算
30 | self.pe[:, 1::2] = torch.cos(pos / (10000 ** (_2i / d_model)))
31 |
32 | self.dropout = nn.Dropout(p=dropout)
33 |
34 | def forward(self, x):
35 | """
36 |
37 | :param x: 输入的token embedding
38 | :return:
39 | """
40 | seq_len = x.shape[1]
41 | x = x + self.pe[:seq_len, :]
42 | return self.dropout(x)
--------------------------------------------------------------------------------
/about_interview/FFN.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : FFN.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/31 3:22
7 | # @Classes :
8 | from torch import nn
9 |
10 |
11 | class PositionwiseFeedForwardNet(nn.Module):
12 | """前馈网络,包括后续残差连接与Norm"""
13 | def __init__(self, d_model, hidden, dropout=0.1):
14 | super(PositionwiseFeedForwardNet, self).__init__()
15 | self.fc = nn.Sequential(
16 | nn.Linear(d_model, hidden),
17 | nn.ReLU(),
18 | nn.Dropout(p=dropout),
19 | nn.Linear(hidden, d_model),
20 | nn.Dropout(p=dropout)
21 | )
22 | self.layer_norm = nn.LayerNorm(d_model)
23 |
24 | def forward(self, x):
25 | residual = x
26 | x = self.fc(x)
27 | output = self.layer_norm(x+residual)
28 | return output
29 |
--------------------------------------------------------------------------------
/about_interview/LayerNorm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : LayerNorm.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/31 3:22
7 | # @Classes :
8 | import torch
9 | from torch import nn
10 |
11 |
12 | class LayerNorm(nn.Module):
13 | """也可以直接用nn.LayerNorm"""
14 | def __init__(self, d_model, eps=1e-9):
15 | super(LayerNorm, self).__init__()
16 | # 俩个参数,权重和偏置,防止输入激活函数的线性表示部分导致非线性效果不佳
17 | self.weight = nn.Parameter(torch.ones(d_model))
18 | self.beta = nn.Parameter(torch.zeros(d_model))
19 | # 防止分母0
20 | self.eps = eps
21 |
22 | def forward(self, x):
23 | # LayerNorm全都是对最后一维进行归一化
24 | mean = x.mean(-1, keepdim=True)
25 | var = x.var(-1, unbiased=False, keepdim=True)
26 | out = (x - mean) / torch.sqrt(var + self.eps)
27 | return self.weight * out + self.beta
28 |
--------------------------------------------------------------------------------
/about_interview/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : __init__.py.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/31 3:22
7 | # @Classes :
8 |
9 |
10 | if __name__ == '__main__':
11 | pass
12 |
--------------------------------------------------------------------------------
/about_light_net/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : __init__.py.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/16 21:59
7 | # @Classes :
8 |
9 |
10 | if __name__ == '__main__':
11 | pass
12 |
--------------------------------------------------------------------------------
/about_light_net/mobile_net/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : __init__.py.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/16 22:00
7 | # @Classes : 注:参考自官方代码
8 |
9 |
10 | if __name__ == '__main__':
11 | pass
12 |
--------------------------------------------------------------------------------
/about_transformer/ViT/ViT_model.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : ViT_model.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/10 0:38
7 | # @Classes : 简易版ViT模型
8 | import torch
9 | import torch.nn as nn
10 |
11 |
12 | class PatchEmbedding(nn.Module):
13 | """
14 | 切patch
15 | """
16 | def __init__(self, in_channels, patch_size, embed_dim, patch_num, dropout=0.1):
17 | """
18 |
19 | :param in_channels: 输入通道数
20 | :param patch_size:小方块大小,即每个小方块大小是 (patch_size x patch_size)
21 | :param embed_dim:embedding维度,也是卷积切分后的输出维度,等于patch_size*patch_size*in_channels
22 | :param patch_num:patch个数
23 | :param dropout:默认0.1
24 | """
25 | super(PatchEmbedding, self).__init__()
26 | # 卷积切patch,并拉平
27 | self.get_patch = nn.Sequential(
28 | nn.Conv2d(in_channels=in_channels, out_channels=embed_dim, kernel_size=patch_size, stride=patch_size),
29 | nn.Flatten(2)
30 | )
31 |
32 | # 加入CLS Token(随机初始化),为了能和patch的embedding拼在一起,第三个维度应该一致,第一维先默认使用1初始化,forward实例化时再扩充对齐
33 | self.cls_token = nn.Parameter(torch.randn(size=(1, 1, embed_dim)), requires_grad=True)
34 | # 加入位置编码,也是随机初始化,为了能和总的embedding加在一起,第二个维度应该等于patch_num+1,同样第三个维度等于embed_dim
35 | self.position_embedding = nn.Parameter(torch.randn(size=(1, patch_num+1, embed_dim)), requires_grad=True)
36 | self.dropout = nn.Dropout(p=dropout)
37 |
38 | def forward(self, x):
39 | # 切
40 | x = self.get_patch(x)
41 | # 交换后俩维
42 | x = x.permute(0, 2, 1)
43 |
44 | # 拼cls token
45 | cls_token = self.cls_token.expand(x.shape[0], -1, -1) # 第一维扩充对齐,因为第一维一般是是batch_size,运行前是不确定batch_size大小的
46 | x = torch.cat([x, cls_token], dim=1)
47 | # 加position_embedding
48 | x = x + self.position_embedding
49 |
50 | x = self.dropout(x)
51 | return x
52 |
53 |
54 | class ViT(nn.Module):
55 | """
56 | ViT模型构建
57 | """
58 | def __init__(self, in_channels, patch_size, embed_dim, patch_num, heads_num, activation,
59 | encoders_num, classes_num, dropout=0.1):
60 | """
61 |
62 | :param in_channels:
63 | :param patch_size:
64 | :param embed_dim:
65 | :param patch_num:
66 | :param heads_num: 多头注意力中的头
67 | :param activation: 激活方式
68 | :param encoders_num:
69 | :param classes_num: 类别数
70 | :param dropout:
71 | """
72 | super(ViT, self).__init__()
73 | self.patch_embedding = PatchEmbedding(in_channels, patch_size, embed_dim, patch_num)
74 |
75 | # 用torch封装好的定义Transformer中的Encoder layer
76 | encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=heads_num, dropout=dropout, activation=activation,
77 | batch_first=True, norm_first=True)
78 | # Encoder layer装入
79 | self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=encoders_num)
80 |
81 | # MLP分类头
82 | self.MLP = nn.Sequential(
83 | # 先做层归一化
84 | nn.LayerNorm(normalized_shape=embed_dim),
85 | nn.Linear(in_features=embed_dim, out_features=classes_num)
86 | )
87 |
88 | def forward(self, x):
89 | # 切
90 | x = self.patch_embedding(x)
91 | # 过encoder
92 | x = self.encoder(x)
93 | # 过MLP,因为是分类任务,只取了编码后的 CLS token(即第一个位置,也即只取第二维中索引为0的所有数据)作为输入
94 | x = self.MLP(x[:, 0, :])
95 | return x
96 |
97 |
98 | def test():
99 | # 随机生成一组张量,可视为3张3通道照片,尺寸224x224
100 | x = torch.randn(3, 3, 224, 224)
101 | # 切成16x16个块,每个块14x14大小,转化维度后相当于切14x14=196个patch,8个头6个Encoder,类别假设是10
102 | vit_model = ViT(3, 16, 16*16*3, 14*14, 8, "gelu", 6, 10)
103 | pred = vit_model(x)
104 | print(x.shape)
105 | print(pred.shape)
106 |
107 |
108 | if __name__ == '__main__':
109 | test()
110 |
--------------------------------------------------------------------------------
/about_transformer/ViT/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : __init__.py.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/9 3:57
7 | # @Classes :
8 |
9 |
10 | if __name__ == '__main__':
11 | pass
12 |
--------------------------------------------------------------------------------
/about_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @Package Name:
5 | # @File Name : __init__.py.py
6 | # @author : ahua
7 | # @Version : 1.0
8 | # @Start Date : 2024/3/9 3:54
9 | # @Classes :
10 |
11 |
12 | if __name__ == '__main__':
13 | pass
14 |
--------------------------------------------------------------------------------
/about_transformer/attention_is_all_you_need/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : __init__.py.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/23 23:10
7 | # @Classes :
8 |
9 |
10 | if __name__ == '__main__':
11 | pass
12 |
--------------------------------------------------------------------------------
/about_transformer/attention_is_all_you_need/attention_module.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : attention_module.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/23 23:11
7 | # @Classes : 多头注意力
8 | import torch
9 | from torch import nn
10 | import math
11 |
12 | from utils_module import LayerNorm
13 |
14 |
15 | class ScaledDotProductAttention(nn.Module):
16 | """根据公式计算QkV"""
17 | def __init__(self, n_d):
18 | """
19 |
20 | :param n_d: 每个头的dim,用于scaling
21 | """
22 | super(ScaledDotProductAttention, self).__init__()
23 |
24 | self.n_d = n_d
25 | # 在最后一个维度上进行softmax
26 | self.softmax = nn.Softmax(dim=-1)
27 |
28 | def forward(self, Q, K, V, mask):
29 | # q和k关于(2,3)维度的转置相乘并scaling
30 | attn_score = Q @ K.transpose(2, 3) / math.sqrt(self.n_d)
31 |
32 | if mask is not None:
33 | attn_score = attn_score.masked_fill(mask == 0, float("-inf"))
34 |
35 | attn_score = self.softmax(attn_score)
36 | attn_score = attn_score @ V
37 |
38 | return attn_score
39 |
40 |
41 | class MultiHeadAttention(nn.Module):
42 | """多头注意力,包括残差连接和Norm"""
43 | def __init__(self, d_model, n_head, dropout=0.1, bias=True):
44 | """
45 |
46 | :param d_model: 输入向量embedding维度
47 | :param n_head:
48 | :param bias:
49 | """
50 | super(MultiHeadAttention, self).__init__()
51 |
52 | if d_model % n_head != 0:
53 | raise ValueError(
54 | "Embedding dim must be divisible by number of heads in {}. Got: embed_dim={} and num_heads={}".format(
55 | self.__class__.__name__, d_model, n_head
56 | )
57 | )
58 |
59 | self.n_head = n_head
60 | self.d_model = d_model
61 | self.n_d = d_model // n_head
62 |
63 | # 投影映射矩阵
64 | self.w_q = nn.Linear(in_features=d_model, out_features=d_model, bias=bias)
65 | self.w_k = nn.Linear(in_features=d_model, out_features=d_model, bias=bias)
66 | self.w_v = nn.Linear(in_features=d_model, out_features=d_model, bias=bias)
67 |
68 | self.get_attn = ScaledDotProductAttention(self.n_d)
69 |
70 | # 最后多头合并之后再做一次映射
71 | self.w_o = nn.Linear(d_model, d_model)
72 |
73 | self.dropout = nn.Dropout(p=dropout)
74 |
75 | self.layer_norm = LayerNorm(d_model)
76 |
77 | def forward(self, x_q, x_k, x_v, mask=None):
78 | residual = x_q
79 | batch, seq_len, dimension = x_q.shape
80 | # 映射得到QKV矩阵
81 | q, k, v = self.w_q(x_q), self.w_k(x_k), self.w_v(x_v)
82 |
83 | # 拆分为四维张量后将(0,1,2,3)reshape为(0,2,1,3)
84 | q = q.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3)
85 | k = k.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3)
86 | v = v.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3)
87 |
88 | attn_score = self.get_attn(q, k, v, mask)
89 |
90 | # 重新排列维度,保证内存连续型后改变为三维张量
91 | attn_score = attn_score.permute(0, 2, 1, 3).contiguous().view(batch, seq_len, dimension)
92 |
93 | output = self.w_o(attn_score)
94 | output = self.dropout(output)
95 |
96 | # 残差连接和Norm
97 | output = self.layer_norm(output + residual)
98 | return output
99 |
100 |
101 | def test():
102 | d_model = 1024
103 | n_head = 8
104 |
105 | x = torch.randn(32, 64, 1024) # Batch, Time, Dimension
106 | print(x.shape)
107 |
108 | att_model = MultiHeadAttention(d_model, n_head)
109 | out = att_model(x, x, x)
110 | print(out.shape)
111 |
112 |
113 | if __name__ == '__main__':
114 | test()
115 |
--------------------------------------------------------------------------------
/about_transformer/attention_is_all_you_need/transformer_decoder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : transformer_decoder.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/26 0:17
7 | # @Classes :
8 | import torch
9 | from torch import nn
10 |
11 | from attention_module import MultiHeadAttention
12 | from utils_module import PositionwiseFeedForwardNet
13 | from utils_module import PositionalEmbedding
14 |
15 |
16 | class DecoderLayer(nn.Module):
17 | def __init__(self, d_model, n_head, ffn_hidden, dropout=0.1):
18 | super(DecoderLayer, self).__init__()
19 | self.causal_attention = MultiHeadAttention(d_model, n_head, dropout)
20 |
21 | self.cross_attention = MultiHeadAttention(d_model, n_head, dropout)
22 |
23 | self.ffn = PositionwiseFeedForwardNet(d_model, ffn_hidden, dropout)
24 |
25 | def forward(self, dec, enc, causal_mask, padding_mask):
26 | """
27 |
28 | :param dec: 来自decoder的输入
29 | :param enc: 来自encoder的输出
30 | :param causal_mask: 下三角掩码,防止看见未来信息
31 | :param padding_mask: 将输入序列中的填充部分标记为不可关注,防止模型在训练过程中对这些无意义的padding部分进行不必要的关注
32 | :return:
33 | """
34 | x = self.causal_attention(dec, dec, dec, causal_mask)
35 |
36 | x = self.cross_attention(x, enc, enc, padding_mask)
37 |
38 | x = self.ffn(x)
39 |
40 | return x
41 |
42 |
43 | class Decoder(nn.Module):
44 | def __init__(self, dec_vocabulary_size, d_model=512, n_head=8, ffn_hidden=2048, max_len=5000,
45 | n_layer=6, dropout=0.1):
46 | super(Decoder, self).__init__()
47 |
48 | self.token_embedding = nn.Embedding(dec_vocabulary_size, d_model, padding_idx=1)
49 | self.embedding = PositionalEmbedding(d_model, max_len, dropout)
50 |
51 | self.layers = nn.ModuleList(
52 | [DecoderLayer(d_model, n_head, ffn_hidden, dropout) for _ in range(n_layer)]
53 | )
54 |
55 | def forward(self, dec, enc, causal_mask, padding_mask):
56 | dec = self.token_embedding(dec)
57 | dec = self.embedding(dec)
58 |
59 | for layer in self.layers:
60 | dec = layer(dec, enc, causal_mask, padding_mask)
61 |
62 | return dec
63 |
64 |
65 | if __name__ == '__main__':
66 | pass
67 |
--------------------------------------------------------------------------------
/about_transformer/attention_is_all_you_need/transformer_encoder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : transformer_encoder.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/23 23:14
7 | # @Classes : Encoder
8 | import torch
9 | from torch import nn
10 |
11 | from attention_module import MultiHeadAttention
12 | from utils_module import PositionwiseFeedForwardNet
13 | from utils_module import PositionalEmbedding
14 |
15 |
16 | class EncoderLayer(nn.Module):
17 | def __init__(self, d_model, n_head, ffn_hidden, dropout=0.1):
18 | super(EncoderLayer, self).__init__()
19 | self.attention = MultiHeadAttention(d_model, n_head, dropout)
20 |
21 | self.ffn = PositionwiseFeedForwardNet(d_model, ffn_hidden, dropout)
22 |
23 | def forward(self, x, mask=None):
24 | x = self.attention(x, x, x, mask)
25 | x = self.ffn(x)
26 |
27 | return x
28 |
29 |
30 | class Encoder(nn.Module):
31 | def __init__(self, enc_vocabulary_size, d_model=512, n_head=8, ffn_hidden=2048, max_len=5000,
32 | n_layer=6, dropout=0.1):
33 | super(Encoder, self).__init__()
34 |
35 | self.token_embedding = nn.Embedding(enc_vocabulary_size, d_model, padding_idx=1)
36 | self.embedding = PositionalEmbedding(d_model, max_len, dropout)
37 |
38 | self.layers = nn.ModuleList(
39 | [EncoderLayer(d_model, n_head, ffn_hidden, dropout) for _ in range(n_layer)]
40 | )
41 |
42 | def forward(self, x, padding_mask=None):
43 | x = self.token_embedding(x)
44 | x = self.embedding(x)
45 |
46 | for layer in self.layers:
47 | x = layer(x, padding_mask)
48 | return x
49 |
50 |
51 | if __name__ == '__main__':
52 | pass
53 |
--------------------------------------------------------------------------------
/about_transformer/attention_is_all_you_need/transformer_model.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : transformer_model.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/23 23:11
7 | # @Classes : 构建Transformer
8 | import torch
9 | from torch import nn
10 |
11 | from transformer_encoder import Encoder
12 | from transformer_decoder import Decoder
13 |
14 |
15 | class Transformer(nn.Module):
16 | def __init__(self, src_pad_idx, tgt_pad_idx, enc_vocabulary_size, dec_vocabulary_size, d_model=512,
17 | n_head=8, ffn_hidden=2048, max_len=5000, n_layers=6, dropout=0.1):
18 | """
19 |
20 | :param src_pad_idx: source的pad标识符
21 | :param tgt_pad_idx: target的pad的标识符
22 | :param enc_vocabulary_size: source的词汇表大小
23 | :param dec_vocabulary_size: target的词汇表大小
24 | :param max_len:
25 | :param d_model:
26 | :param n_head:
27 | :param ffn_hidden:
28 | :param n_layers:
29 | :param dropout:
30 | """
31 | super(Transformer, self).__init__()
32 |
33 | self.src_pad_idx = src_pad_idx
34 | self.tgt_pad_idx = tgt_pad_idx
35 |
36 | # Encoder层
37 | self.encoder = Encoder(enc_vocabulary_size, d_model, n_head, ffn_hidden, max_len,
38 | n_layers, dropout)
39 | # Decoder层
40 | self.decoder = Decoder(dec_vocabulary_size, d_model, n_head, ffn_hidden, max_len,
41 | n_layers, dropout)
42 | # 输出层,做一个线性映射
43 | self.fc = nn.Linear(d_model, dec_vocabulary_size)
44 |
45 | def _make_casual_mask(self, q, k):
46 | # 获取第二维的seq_len, 因为是QK相乘再做mask,所以mask大小应符合QK
47 | len_q, len_k = q.size(1), k.size(1)
48 | # 生成三角mask矩阵
49 | mask = torch.tril(torch.ones(len_q, len_k)).type(torch.BoolTensor)
50 | return mask
51 |
52 | def _make_padding_mask(self, q, k, pad_idx_q, pad_idx_k):
53 | len_q, len_k = q.size(1), k.size(1)
54 |
55 | # mask矩阵大小应为(Batch, seq_len, len_q, len_k)
56 | # 不等于pad_idx时设置为True,并增加俩个维度seq_len和len_k
57 | q = q.ne(pad_idx_q).unsqueeze(1).unsqueeze(3)
58 | # 在len_k维上做重复补全
59 | q = q.repeat(1, 1, 1, len_k)
60 |
61 | k = k.ne(pad_idx_k).unsqueeze(1).unsqueeze(2)
62 | k = k.repeat(1, 1, len_q, 1)
63 |
64 | mask = q & k
65 | return mask
66 |
67 | def forward(self, src, tgt):
68 | # Encoder的padding_mask,此时QK都来自source
69 | enc_padding_mask = self._make_padding_mask(src, src, self.src_pad_idx, self.src_pad_idx)
70 | # Decoder的因果mask,不仅要考虑不给看未来还要考虑padding
71 | dec_casual_mask = self._make_padding_mask(tgt, tgt, self.tgt_pad_idx, self.tgt_pad_idx) * \
72 | self._make_casual_mask(tgt, tgt)
73 | # 交叉注意力的padding_mask
74 | cross_padding_mask = self._make_padding_mask(tgt, src, self.tgt_pad_idx, self.src_pad_idx)
75 |
76 | enc = self.encoder(src, enc_padding_mask)
77 | dec = self.decoder(tgt, enc, dec_casual_mask, cross_padding_mask)
78 | output = self.fc(dec)
79 | return output
80 |
81 |
82 | if __name__ == '__main__':
83 | pass
84 |
--------------------------------------------------------------------------------
/about_transformer/attention_is_all_you_need/utils_module.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : utils_module.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/23 23:13
7 | # @Classes : LayerNorm和前馈网络以及positional embedding
8 | import torch
9 | from torch import nn
10 |
11 |
12 | class LayerNorm(nn.Module):
13 | """也可以直接用nn.LayerNorm"""
14 | def __init__(self, d_model, eps=1e-9):
15 | super(LayerNorm, self).__init__()
16 | # 俩个参数,权重和偏置,防止输入激活函数的线性表示部分导致非线性效果不佳
17 | self.weight = nn.Parameter(torch.ones(d_model))
18 | self.beta = nn.Parameter(torch.zeros(d_model))
19 | # 防止分母0
20 | self.eps = eps
21 |
22 | def forward(self, x):
23 | # LayerNorm全都是对最后一维进行归一化
24 | mean = x.mean(-1, keepdim=True)
25 | var = x.var(-1, unbiased=False, keepdim=True)
26 | out = (x - mean) / torch.sqrt(var + self.eps)
27 | return self.weight * out + self.beta
28 |
29 |
30 | class PositionwiseFeedForwardNet(nn.Module):
31 | """前馈网络,包括后续残差连接与Norm"""
32 | def __init__(self, d_model, hidden, dropout=0.1):
33 | super(PositionwiseFeedForwardNet, self).__init__()
34 | self.fc = nn.Sequential(
35 | nn.Linear(d_model, hidden),
36 | nn.ReLU(),
37 | nn.Dropout(p=dropout),
38 | nn.Linear(hidden, d_model),
39 | nn.Dropout(p=dropout)
40 | )
41 | self.layer_norm = LayerNorm(d_model)
42 |
43 | def forward(self, x):
44 | residual = x
45 | x = self.fc(x)
46 | output = self.layer_norm(x+residual)
47 | return output
48 |
49 |
50 | class PositionalEmbedding(nn.Module):
51 | """位置编码,输入token embedding返回加上位置编码后的总的embedding"""
52 | def __init__(self, d_model, max_len=5000, dropout=0.1):
53 | super(PositionalEmbedding, self).__init__()
54 | # 初始化编码
55 | self.pe = torch.zeros(max_len, d_model)
56 | # 原始论文中位置编码是直接算的,不用训练
57 | self.pe.requires_grad_(False)
58 |
59 | # 照着公式敲就行了
60 | # 初始化pos
61 | pos = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
62 | # 2i
63 | _2i = torch.arange(0, d_model, 2)
64 |
65 | # 偶数计算
66 | self.pe[:, 0::2] = torch.sin(pos / (10000 ** (_2i / d_model)))
67 | # 奇数计算
68 | self.pe[:, 1::2] = torch.cos(pos / (10000 ** (_2i / d_model)))
69 |
70 | self.dropout = nn.Dropout(p=dropout)
71 |
72 | def forward(self, x):
73 | """
74 |
75 | :param x: 输入的token embedding
76 | :return:
77 | """
78 | seq_len = x.shape[1]
79 | x = x + self.pe[:seq_len, :]
80 | return self.dropout(x)
81 |
82 |
83 | if __name__ == '__main__':
84 | pass
85 |
--------------------------------------------------------------------------------
/about_transformer/efficient_vit/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : __init__.py.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/9 4:00
7 | # @Classes :
8 |
9 |
10 | if __name__ == '__main__':
11 | pass
12 |
--------------------------------------------------------------------------------
/about_transformer/mobile_vit/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : __init__.py.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/16 21:56
7 | # @Classes : MobileViT实现,参考自苹果给出的官方代码
8 |
9 |
10 | if __name__ == '__main__':
11 | pass
12 |
--------------------------------------------------------------------------------
/about_transformer/mobile_vit/model.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : model.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/16 21:58
7 | # @Classes :
8 | """
9 | original code from apple:
10 | https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py
11 | """
12 |
13 | from typing import Optional, Tuple, Union, Dict
14 | import math
15 | import torch
16 | import torch.nn as nn
17 | from torch import Tensor
18 | from torch.nn import functional as F
19 | import torchvision.models as models
20 |
21 | from transformer_encoder import TransformerEncoder
22 | from model_config import get_config
23 |
24 |
25 | def make_divisible(
26 | v: Union[float, int],
27 | divisor: Optional[int] = 8,
28 | min_value: Optional[Union[float, int]] = None,
29 | ) -> Union[float, int]:
30 | """
31 | This function is taken from the original tf repo.
32 | It ensures that all layers have a channel number that is divisible by 8
33 | It can be seen here:
34 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
35 | :param v:
36 | :param divisor:
37 | :param min_value:
38 | :return:
39 | """
40 | if min_value is None:
41 | min_value = divisor
42 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
43 | # Make sure that round down does not go down by more than 10%.
44 | if new_v < 0.9 * v:
45 | new_v += divisor
46 | return new_v
47 |
48 |
49 | class ConvLayer(nn.Module):
50 | """
51 | Applies a 2D convolution over an input
52 |
53 | Args:
54 | in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})`
55 | out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out})`
56 | kernel_size (Union[int, Tuple[int, int]]): Kernel size for convolution.
57 | stride (Union[int, Tuple[int, int]]): Stride for convolution. Default: 1
58 | groups (Optional[int]): Number of groups in convolution. Default: 1
59 | bias (Optional[bool]): Use bias. Default: ``False``
60 | use_norm (Optional[bool]): Use normalization layer after convolution. Default: ``True``
61 | use_act (Optional[bool]): Use activation layer after convolution (or convolution and normalization).
62 | Default: ``True``
63 |
64 | Shape:
65 | - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
66 | - Output: :math:`(N, C_{out}, H_{out}, W_{out})`
67 |
68 | .. note::
69 | For depth-wise convolution, `groups=C_{in}=C_{out}`.
70 | """
71 |
72 | def __init__(
73 | self,
74 | in_channels: int,
75 | out_channels: int,
76 | kernel_size: Union[int, Tuple[int, int]],
77 | stride: Optional[Union[int, Tuple[int, int]]] = 1,
78 | groups: Optional[int] = 1,
79 | bias: Optional[bool] = False,
80 | use_norm: Optional[bool] = True,
81 | use_act: Optional[bool] = True,
82 | ) -> None:
83 | super().__init__()
84 |
85 | if isinstance(kernel_size, int):
86 | kernel_size = (kernel_size, kernel_size)
87 |
88 | if isinstance(stride, int):
89 | stride = (stride, stride)
90 |
91 | assert isinstance(kernel_size, Tuple)
92 | assert isinstance(stride, Tuple)
93 |
94 | padding = (
95 | int((kernel_size[0] - 1) / 2),
96 | int((kernel_size[1] - 1) / 2),
97 | )
98 |
99 | block = nn.Sequential()
100 |
101 | conv_layer = nn.Conv2d(
102 | in_channels=in_channels,
103 | out_channels=out_channels,
104 | kernel_size=kernel_size,
105 | stride=stride,
106 | groups=groups,
107 | padding=padding,
108 | bias=bias
109 | )
110 |
111 | block.add_module(name="conv", module=conv_layer)
112 |
113 | if use_norm:
114 | norm_layer = nn.BatchNorm2d(num_features=out_channels, momentum=0.1)
115 | block.add_module(name="norm", module=norm_layer)
116 |
117 | if use_act:
118 | act_layer = nn.SiLU()
119 | block.add_module(name="act", module=act_layer)
120 |
121 | self.block = block
122 |
123 | def forward(self, x: Tensor) -> Tensor:
124 | return self.block(x)
125 |
126 |
127 | class InvertedResidual(nn.Module):
128 | """
129 | This class implements the inverted residual block, as described in `MobileNetv2 `_ paper
130 |
131 | Args:
132 | in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})`
133 | out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out)`
134 | stride (int): Use convolutions with a stride. Default: 1
135 | expand_ratio (Union[int, float]): Expand the input channels by this factor in depth-wise conv
136 | skip_connection (Optional[bool]): Use skip-connection. Default: True
137 |
138 | Shape:
139 | - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
140 | - Output: :math:`(N, C_{out}, H_{out}, W_{out})`
141 |
142 | .. note::
143 | If `in_channels =! out_channels` and `stride > 1`, we set `skip_connection=False`
144 |
145 | """
146 |
147 | def __init__(
148 | self,
149 | in_channels: int,
150 | out_channels: int,
151 | stride: int,
152 | expand_ratio: Union[int, float],
153 | skip_connection: Optional[bool] = True,
154 | ) -> None:
155 | assert stride in [1, 2]
156 | hidden_dim = make_divisible(int(round(in_channels * expand_ratio)), 8)
157 |
158 | super().__init__()
159 |
160 | block = nn.Sequential()
161 | if expand_ratio != 1:
162 | block.add_module(
163 | name="exp_1x1",
164 | module=ConvLayer(
165 | in_channels=in_channels,
166 | out_channels=hidden_dim,
167 | kernel_size=1
168 | ),
169 | )
170 |
171 | block.add_module(
172 | name="conv_3x3",
173 | module=ConvLayer(
174 | in_channels=hidden_dim,
175 | out_channels=hidden_dim,
176 | stride=stride,
177 | kernel_size=3,
178 | groups=hidden_dim
179 | ),
180 | )
181 |
182 | block.add_module(
183 | name="red_1x1",
184 | module=ConvLayer(
185 | in_channels=hidden_dim,
186 | out_channels=out_channels,
187 | kernel_size=1,
188 | use_act=False,
189 | use_norm=True,
190 | ),
191 | )
192 |
193 | self.block = block
194 | self.in_channels = in_channels
195 | self.out_channels = out_channels
196 | self.exp = expand_ratio
197 | self.stride = stride
198 | self.use_res_connect = (
199 | self.stride == 1 and in_channels == out_channels and skip_connection
200 | )
201 |
202 | def forward(self, x: Tensor, *args, **kwargs) -> Tensor:
203 | if self.use_res_connect:
204 | return x + self.block(x)
205 | else:
206 | return self.block(x)
207 |
208 |
209 | class MobileViTBlock(nn.Module):
210 | """
211 | This class defines the `MobileViT block `_
212 |
213 | Args:
214 | opts: command line arguments
215 | in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H, W)`
216 | transformer_dim (int): Input dimension to the transformer unit
217 | ffn_dim (int): Dimension of the FFN block
218 | n_transformer_blocks (int): Number of transformer blocks. Default: 2
219 | head_dim (int): Head dimension in the multi-head attention. Default: 32
220 | attn_dropout (float): Dropout in multi-head attention. Default: 0.0
221 | dropout (float): Dropout rate. Default: 0.0
222 | ffn_dropout (float): Dropout between FFN layers in transformer. Default: 0.0
223 | patch_h (int): Patch height for unfolding operation. Default: 8
224 | patch_w (int): Patch width for unfolding operation. Default: 8
225 | transformer_norm_layer (Optional[str]): Normalization layer in the transformer block. Default: layer_norm
226 | conv_ksize (int): Kernel size to learn local representations in MobileViT block. Default: 3
227 | no_fusion (Optional[bool]): Do not combine the input and output feature maps. Default: False
228 | """
229 |
230 | def __init__(
231 | self,
232 | in_channels: int,
233 | transformer_dim: int,
234 | ffn_dim: int,
235 | n_transformer_blocks: int = 2,
236 | head_dim: int = 32,
237 | attn_dropout: float = 0.0,
238 | dropout: float = 0.0,
239 | ffn_dropout: float = 0.0,
240 | patch_h: int = 8,
241 | patch_w: int = 8,
242 | conv_ksize: Optional[int] = 3,
243 | *args,
244 | **kwargs
245 | ) -> None:
246 | super().__init__()
247 |
248 | conv_3x3_in = ConvLayer(
249 | in_channels=in_channels,
250 | out_channels=in_channels,
251 | kernel_size=conv_ksize,
252 | stride=1
253 | )
254 | conv_1x1_in = ConvLayer(
255 | in_channels=in_channels,
256 | out_channels=transformer_dim,
257 | kernel_size=1,
258 | stride=1,
259 | use_norm=False,
260 | use_act=False
261 | )
262 |
263 | conv_1x1_out = ConvLayer(
264 | in_channels=transformer_dim,
265 | out_channels=in_channels,
266 | kernel_size=1,
267 | stride=1
268 | )
269 | conv_3x3_out = ConvLayer(
270 | in_channels=2 * in_channels,
271 | out_channels=in_channels,
272 | kernel_size=conv_ksize,
273 | stride=1
274 | )
275 |
276 | self.local_rep = nn.Sequential()
277 | self.local_rep.add_module(name="conv_3x3", module=conv_3x3_in)
278 | self.local_rep.add_module(name="conv_1x1", module=conv_1x1_in)
279 |
280 | assert transformer_dim % head_dim == 0
281 | num_heads = transformer_dim // head_dim
282 |
283 | global_rep = [
284 | TransformerEncoder(
285 | embed_dim=transformer_dim,
286 | ffn_latent_dim=ffn_dim,
287 | num_heads=num_heads,
288 | attn_dropout=attn_dropout,
289 | dropout=dropout,
290 | ffn_dropout=ffn_dropout
291 | )
292 | for _ in range(n_transformer_blocks)
293 | ]
294 | global_rep.append(nn.LayerNorm(transformer_dim))
295 | self.global_rep = nn.Sequential(*global_rep)
296 |
297 | self.conv_proj = conv_1x1_out
298 | self.fusion = conv_3x3_out
299 |
300 | self.patch_h = patch_h
301 | self.patch_w = patch_w
302 | self.patch_area = self.patch_w * self.patch_h
303 |
304 | self.cnn_in_dim = in_channels
305 | self.cnn_out_dim = transformer_dim
306 | self.n_heads = num_heads
307 | self.ffn_dim = ffn_dim
308 | self.dropout = dropout
309 | self.attn_dropout = attn_dropout
310 | self.ffn_dropout = ffn_dropout
311 | self.n_blocks = n_transformer_blocks
312 | self.conv_ksize = conv_ksize
313 |
314 | def unfolding(self, x: Tensor) -> Tuple[Tensor, Dict]:
315 | patch_w, patch_h = self.patch_w, self.patch_h
316 | patch_area = patch_w * patch_h
317 | batch_size, in_channels, orig_h, orig_w = x.shape
318 |
319 | new_h = int(math.ceil(orig_h / self.patch_h) * self.patch_h)
320 | new_w = int(math.ceil(orig_w / self.patch_w) * self.patch_w)
321 |
322 | interpolate = False
323 | if new_w != orig_w or new_h != orig_h:
324 | # Note: Padding can be done, but then it needs to be handled in attention function.
325 | x = F.interpolate(x, size=(new_h, new_w), mode="bilinear", align_corners=False)
326 | interpolate = True
327 |
328 | # number of patches along width and height
329 | num_patch_w = new_w // patch_w # n_w
330 | num_patch_h = new_h // patch_h # n_h
331 | num_patches = num_patch_h * num_patch_w # N
332 |
333 | # [B, C, H, W] -> [B * C * n_h, p_h, n_w, p_w]
334 | x = x.reshape(batch_size * in_channels * num_patch_h, patch_h, num_patch_w, patch_w)
335 | # [B * C * n_h, p_h, n_w, p_w] -> [B * C * n_h, n_w, p_h, p_w]
336 | x = x.transpose(1, 2)
337 | # [B * C * n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w
338 | x = x.reshape(batch_size, in_channels, num_patches, patch_area)
339 | # [B, C, N, P] -> [B, P, N, C]
340 | x = x.transpose(1, 3)
341 | # [B, P, N, C] -> [BP, N, C]
342 | x = x.reshape(batch_size * patch_area, num_patches, -1)
343 |
344 | info_dict = {
345 | "orig_size": (orig_h, orig_w),
346 | "batch_size": batch_size,
347 | "interpolate": interpolate,
348 | "total_patches": num_patches,
349 | "num_patches_w": num_patch_w,
350 | "num_patches_h": num_patch_h,
351 | }
352 |
353 | return x, info_dict
354 |
355 | def folding(self, x: Tensor, info_dict: Dict) -> Tensor:
356 | n_dim = x.dim()
357 | assert n_dim == 3, "Tensor should be of shape BPxNxC. Got: {}".format(
358 | x.shape
359 | )
360 | # [BP, N, C] --> [B, P, N, C]
361 | x = x.contiguous().view(
362 | info_dict["batch_size"], self.patch_area, info_dict["total_patches"], -1
363 | )
364 |
365 | batch_size, pixels, num_patches, channels = x.size()
366 | num_patch_h = info_dict["num_patches_h"]
367 | num_patch_w = info_dict["num_patches_w"]
368 |
369 | # [B, P, N, C] -> [B, C, N, P]
370 | x = x.transpose(1, 3)
371 | # [B, C, N, P] -> [B*C*n_h, n_w, p_h, p_w]
372 | x = x.reshape(batch_size * channels * num_patch_h, num_patch_w, self.patch_h, self.patch_w)
373 | # [B*C*n_h, n_w, p_h, p_w] -> [B*C*n_h, p_h, n_w, p_w]
374 | x = x.transpose(1, 2)
375 | # [B*C*n_h, p_h, n_w, p_w] -> [B, C, H, W]
376 | x = x.reshape(batch_size, channels, num_patch_h * self.patch_h, num_patch_w * self.patch_w)
377 | if info_dict["interpolate"]:
378 | x = F.interpolate(
379 | x,
380 | size=info_dict["orig_size"],
381 | mode="bilinear",
382 | align_corners=False,
383 | )
384 | return x
385 |
386 | def forward(self, x: Tensor) -> Tensor:
387 | res = x
388 |
389 | fm = self.local_rep(x)
390 |
391 | # convert feature map to patches
392 | patches, info_dict = self.unfolding(fm)
393 |
394 | # learn global representations
395 | for transformer_layer in self.global_rep:
396 | patches = transformer_layer(patches)
397 |
398 | # [B x Patch x Patches x C] -> [B x C x Patches x Patch]
399 | fm = self.folding(x=patches, info_dict=info_dict)
400 |
401 | fm = self.conv_proj(fm)
402 |
403 | fm = self.fusion(torch.cat((res, fm), dim=1))
404 | return fm
405 |
406 |
407 | class MobileViT(nn.Module):
408 | """
409 | This class implements the `MobileViT architecture `_
410 | """
411 |
412 | def __init__(self, model_cfg: Dict, num_classes: int = 1000):
413 | super().__init__()
414 |
415 | image_channels = 3
416 | out_channels = 16
417 |
418 | self.conv_1 = ConvLayer(
419 | in_channels=image_channels,
420 | out_channels=out_channels,
421 | kernel_size=3,
422 | stride=2
423 | )
424 |
425 | self.layer_1, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer1"])
426 | self.layer_2, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer2"])
427 | self.layer_3, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer3"])
428 | self.layer_4, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer4"])
429 | self.layer_5, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer5"])
430 |
431 | exp_channels = min(model_cfg["last_layer_exp_factor"] * out_channels, 960)
432 | self.conv_1x1_exp = ConvLayer(
433 | in_channels=out_channels,
434 | out_channels=exp_channels,
435 | kernel_size=1
436 | )
437 |
438 | self.classifier = nn.Sequential()
439 | self.classifier.add_module(name="global_pool", module=nn.AdaptiveAvgPool2d(1))
440 | self.classifier.add_module(name="flatten", module=nn.Flatten())
441 | if 0.0 < model_cfg["cls_dropout"] < 1.0:
442 | self.classifier.add_module(name="dropout", module=nn.Dropout(p=model_cfg["cls_dropout"]))
443 | self.classifier.add_module(name="fc", module=nn.Linear(in_features=exp_channels, out_features=num_classes))
444 |
445 | # weight init
446 | self.apply(self.init_parameters)
447 |
448 | def _make_layer(self, input_channel, cfg: Dict) -> Tuple[nn.Sequential, int]:
449 | block_type = cfg.get("block_type", "mobilevit")
450 | if block_type.lower() == "mobilevit":
451 | return self._make_mit_layer(input_channel=input_channel, cfg=cfg)
452 | else:
453 | return self._make_mobilenet_layer(input_channel=input_channel, cfg=cfg)
454 |
455 | @staticmethod
456 | def _make_mobilenet_layer(input_channel: int, cfg: Dict) -> Tuple[nn.Sequential, int]:
457 | output_channels = cfg.get("out_channels")
458 | num_blocks = cfg.get("num_blocks", 2)
459 | expand_ratio = cfg.get("expand_ratio", 4)
460 | block = []
461 |
462 | for i in range(num_blocks):
463 | stride = cfg.get("stride", 1) if i == 0 else 1
464 |
465 | layer = InvertedResidual(
466 | in_channels=input_channel,
467 | out_channels=output_channels,
468 | stride=stride,
469 | expand_ratio=expand_ratio
470 | )
471 | block.append(layer)
472 | input_channel = output_channels
473 |
474 | return nn.Sequential(*block), input_channel
475 |
476 | @staticmethod
477 | def _make_mit_layer(input_channel: int, cfg: Dict) -> [nn.Sequential, int]:
478 | stride = cfg.get("stride", 1)
479 | block = []
480 |
481 | if stride == 2:
482 | layer = InvertedResidual(
483 | in_channels=input_channel,
484 | out_channels=cfg.get("out_channels"),
485 | stride=stride,
486 | expand_ratio=cfg.get("mv_expand_ratio", 4)
487 | )
488 |
489 | block.append(layer)
490 | input_channel = cfg.get("out_channels")
491 |
492 | transformer_dim = cfg["transformer_channels"]
493 | ffn_dim = cfg.get("ffn_dim")
494 | num_heads = cfg.get("num_heads", 4)
495 | head_dim = transformer_dim // num_heads
496 |
497 | if transformer_dim % head_dim != 0:
498 | raise ValueError("Transformer input dimension should be divisible by head dimension. "
499 | "Got {} and {}.".format(transformer_dim, head_dim))
500 |
501 | block.append(MobileViTBlock(
502 | in_channels=input_channel,
503 | transformer_dim=transformer_dim,
504 | ffn_dim=ffn_dim,
505 | n_transformer_blocks=cfg.get("transformer_blocks", 1),
506 | patch_h=cfg.get("patch_h", 2),
507 | patch_w=cfg.get("patch_w", 2),
508 | dropout=cfg.get("dropout", 0.1),
509 | ffn_dropout=cfg.get("ffn_dropout", 0.0),
510 | attn_dropout=cfg.get("attn_dropout", 0.1),
511 | head_dim=head_dim,
512 | conv_ksize=3
513 | ))
514 |
515 | return nn.Sequential(*block), input_channel
516 |
517 | @staticmethod
518 | def init_parameters(m):
519 | if isinstance(m, nn.Conv2d):
520 | if m.weight is not None:
521 | nn.init.kaiming_normal_(m.weight, mode="fan_out")
522 | if m.bias is not None:
523 | nn.init.zeros_(m.bias)
524 | elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)):
525 | if m.weight is not None:
526 | nn.init.ones_(m.weight)
527 | if m.bias is not None:
528 | nn.init.zeros_(m.bias)
529 | elif isinstance(m, (nn.Linear,)):
530 | if m.weight is not None:
531 | nn.init.trunc_normal_(m.weight, mean=0.0, std=0.02)
532 | if m.bias is not None:
533 | nn.init.zeros_(m.bias)
534 | else:
535 | pass
536 |
537 | def forward(self, x: Tensor) -> Tensor:
538 | x = self.conv_1(x)
539 | x = self.layer_1(x)
540 | x = self.layer_2(x)
541 |
542 | x = self.layer_3(x)
543 | x = self.layer_4(x)
544 | x = self.layer_5(x)
545 | x = self.conv_1x1_exp(x)
546 | x = self.classifier(x)
547 | return x
548 |
549 |
550 | def mobile_vit_xx_small(num_classes: int = 1000):
551 | # pretrain weight link
552 | # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xxs.pt
553 | config = get_config("xx_small")
554 | m = MobileViT(config, num_classes=num_classes)
555 | return m
556 |
557 |
558 | def mobile_vit_x_small(num_classes: int = 1000):
559 | # pretrain weight link
560 | # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xs.pt
561 | config = get_config("x_small")
562 | m = MobileViT(config, num_classes=num_classes)
563 | return m
564 |
565 |
566 | def mobile_vit_small(num_classes: int = 1000):
567 | # pretrain weight link
568 | # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_s.pt
569 | config = get_config("small")
570 | m = MobileViT(config, num_classes=num_classes)
571 | return m
572 |
573 | if __name__ == '__main__':
574 | pass
575 |
--------------------------------------------------------------------------------
/about_transformer/mobile_vit/model_config.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : model_config.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/16 21:58
7 | # @Classes :
8 |
9 | # 针对不同大小的MobileViT模型,返回不同参数
10 | def get_config(mode: str = "xxs") -> dict:
11 | if mode == "xx_small":
12 | mv2_exp_mult = 2
13 | config = {
14 | "layer1": {
15 | "out_channels": 16,
16 | "expand_ratio": mv2_exp_mult,
17 | "num_blocks": 1,
18 | "stride": 1,
19 | "block_type": "mv2",
20 | },
21 | "layer2": {
22 | "out_channels": 24,
23 | "expand_ratio": mv2_exp_mult,
24 | "num_blocks": 3,
25 | "stride": 2,
26 | "block_type": "mv2",
27 | },
28 | "layer3": { # 28x28
29 | "out_channels": 48,
30 | "transformer_channels": 64,
31 | "ffn_dim": 128,
32 | "transformer_blocks": 2,
33 | "patch_h": 2, # 8,
34 | "patch_w": 2, # 8,
35 | "stride": 2,
36 | "mv_expand_ratio": mv2_exp_mult,
37 | "num_heads": 4,
38 | "block_type": "mobilevit",
39 | },
40 | "layer4": { # 14x14
41 | "out_channels": 64,
42 | "transformer_channels": 80,
43 | "ffn_dim": 160,
44 | "transformer_blocks": 4,
45 | "patch_h": 2, # 4,
46 | "patch_w": 2, # 4,
47 | "stride": 2,
48 | "mv_expand_ratio": mv2_exp_mult,
49 | "num_heads": 4,
50 | "block_type": "mobilevit",
51 | },
52 | "layer5": { # 7x7
53 | "out_channels": 80,
54 | "transformer_channels": 96,
55 | "ffn_dim": 192,
56 | "transformer_blocks": 3,
57 | "patch_h": 2,
58 | "patch_w": 2,
59 | "stride": 2,
60 | "mv_expand_ratio": mv2_exp_mult,
61 | "num_heads": 4,
62 | "block_type": "mobilevit",
63 | },
64 | "last_layer_exp_factor": 4,
65 | "cls_dropout": 0.1
66 | }
67 | elif mode == "x_small":
68 | mv2_exp_mult = 4
69 | config = {
70 | "layer1": {
71 | "out_channels": 32,
72 | "expand_ratio": mv2_exp_mult,
73 | "num_blocks": 1,
74 | "stride": 1,
75 | "block_type": "mv2",
76 | },
77 | "layer2": {
78 | "out_channels": 48,
79 | "expand_ratio": mv2_exp_mult,
80 | "num_blocks": 3,
81 | "stride": 2,
82 | "block_type": "mv2",
83 | },
84 | "layer3": { # 28x28
85 | "out_channels": 64,
86 | "transformer_channels": 96,
87 | "ffn_dim": 192,
88 | "transformer_blocks": 2,
89 | "patch_h": 2,
90 | "patch_w": 2,
91 | "stride": 2,
92 | "mv_expand_ratio": mv2_exp_mult,
93 | "num_heads": 4,
94 | "block_type": "mobilevit",
95 | },
96 | "layer4": { # 14x14
97 | "out_channels": 80,
98 | "transformer_channels": 120,
99 | "ffn_dim": 240,
100 | "transformer_blocks": 4,
101 | "patch_h": 2,
102 | "patch_w": 2,
103 | "stride": 2,
104 | "mv_expand_ratio": mv2_exp_mult,
105 | "num_heads": 4,
106 | "block_type": "mobilevit",
107 | },
108 | "layer5": { # 7x7
109 | "out_channels": 96,
110 | "transformer_channels": 144,
111 | "ffn_dim": 288,
112 | "transformer_blocks": 3,
113 | "patch_h": 2,
114 | "patch_w": 2,
115 | "stride": 2,
116 | "mv_expand_ratio": mv2_exp_mult,
117 | "num_heads": 4,
118 | "block_type": "mobilevit",
119 | },
120 | "last_layer_exp_factor": 4,
121 | "cls_dropout": 0.1
122 | }
123 | elif mode == "small":
124 | mv2_exp_mult = 4
125 | config = {
126 | "layer1": {
127 | "out_channels": 32,
128 | "expand_ratio": mv2_exp_mult,
129 | "num_blocks": 1,
130 | "stride": 1,
131 | "block_type": "mv2",
132 | },
133 | "layer2": {
134 | "out_channels": 64,
135 | "expand_ratio": mv2_exp_mult,
136 | "num_blocks": 3,
137 | "stride": 2,
138 | "block_type": "mv2",
139 | },
140 | "layer3": { # 28x28
141 | "out_channels": 96,
142 | "transformer_channels": 144,
143 | "ffn_dim": 288,
144 | "transformer_blocks": 2,
145 | "patch_h": 2,
146 | "patch_w": 2,
147 | "stride": 2,
148 | "mv_expand_ratio": mv2_exp_mult,
149 | "num_heads": 4,
150 | "block_type": "mobilevit",
151 | },
152 | "layer4": { # 14x14
153 | "out_channels": 128,
154 | "transformer_channels": 192,
155 | "ffn_dim": 384,
156 | "transformer_blocks": 4,
157 | "patch_h": 2,
158 | "patch_w": 2,
159 | "stride": 2,
160 | "mv_expand_ratio": mv2_exp_mult,
161 | "num_heads": 4,
162 | "block_type": "mobilevit",
163 | },
164 | "layer5": { # 7x7
165 | "out_channels": 160,
166 | "transformer_channels": 240,
167 | "ffn_dim": 480,
168 | "transformer_blocks": 3,
169 | "patch_h": 2,
170 | "patch_w": 2,
171 | "stride": 2,
172 | "mv_expand_ratio": mv2_exp_mult,
173 | "num_heads": 4,
174 | "block_type": "mobilevit",
175 | },
176 | "last_layer_exp_factor": 4,
177 | "cls_dropout": 0.1
178 | }
179 | else:
180 | raise NotImplementedError
181 |
182 | for k in ["layer1", "layer2", "layer3", "layer4", "layer5"]:
183 | config[k].update({"dropout": 0.1, "ffn_dropout": 0.0, "attn_dropout": 0.0})
184 |
185 | return config
186 |
187 |
188 | if __name__ == '__main__':
189 | pass
190 |
--------------------------------------------------------------------------------
/about_transformer/mobile_vit/transformer_encoder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : transformer_encoder.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/16 21:58
7 | # @Classes :
8 | from typing import Optional
9 |
10 | import torch
11 | import torch.nn as nn
12 | from torch import Tensor
13 |
14 |
15 | class MultiHeadAttention(nn.Module):
16 | """
17 | 构建多头注意力或者交叉注意力模块
18 | 输入维度为(N, P, C_{in}),N是batch size,P是patch数,C_{in}是输入的embedding dim
19 | 输出维度与输入维度一致
20 | """
21 |
22 | def __init__(
23 | self,
24 | embed_dim: int,
25 | num_heads: int,
26 | attn_dropout: float = 0.0,
27 | bias: bool = True,
28 | *args,
29 | **kwargs
30 | ) -> None:
31 | super().__init__()
32 | # 输入的embedding dim必须能被num_heads整除
33 | if embed_dim % num_heads != 0:
34 | raise ValueError(
35 | "Embedding dim must be divisible by number of heads in {}. Got: embed_dim={} and num_heads={}".format(
36 | self.__class__.__name__, embed_dim, num_heads
37 | )
38 | )
39 |
40 | self.qkv_proj = nn.Linear(in_features=embed_dim, out_features=3 * embed_dim, bias=bias)
41 |
42 | self.attn_dropout = nn.Dropout(p=attn_dropout)
43 | self.out_proj = nn.Linear(in_features=embed_dim, out_features=embed_dim, bias=bias)
44 |
45 | self.head_dim = embed_dim // num_heads
46 | self.scaling = self.head_dim ** -0.5
47 | self.softmax = nn.Softmax(dim=-1)
48 | self.num_heads = num_heads
49 | self.embed_dim = embed_dim
50 |
51 | def forward(self, x_q: Tensor) -> Tensor:
52 | # [N, P, C]
53 | b_sz, n_patches, in_channels = x_q.shape
54 |
55 | # self-attention
56 | # [N, P, C] -> [N, P, 3C] -> [N, P, 3, h, c] where C = hc
57 | qkv = self.qkv_proj(x_q).reshape(b_sz, n_patches, 3, self.num_heads, -1)
58 |
59 | # [N, P, 3, h, c] -> [N, h, 3, P, C]
60 | qkv = qkv.transpose(1, 3).contiguous()
61 |
62 | # [N, h, 3, P, C] -> [N, h, P, C] x 3
63 | query, key, value = qkv[:, :, 0], qkv[:, :, 1], qkv[:, :, 2]
64 |
65 | query = query * self.scaling
66 |
67 | # [N h, P, c] -> [N, h, c, P]
68 | key = key.transpose(-1, -2)
69 |
70 | # QK^T
71 | # [N, h, P, c] x [N, h, c, P] -> [N, h, P, P]
72 | attn = torch.matmul(query, key)
73 | attn = self.softmax(attn)
74 | attn = self.attn_dropout(attn)
75 |
76 | # weighted sum
77 | # [N, h, P, P] x [N, h, P, c] -> [N, h, P, c]
78 | out = torch.matmul(attn, value)
79 |
80 | # [N, h, P, c] -> [N, P, h, c] -> [N, P, C]
81 | out = out.transpose(1, 2).reshape(b_sz, n_patches, -1)
82 | out = self.out_proj(out)
83 |
84 | return out
85 |
86 |
87 | class TransformerEncoder(nn.Module):
88 | """
89 | This class defines the pre-norm `Transformer encoder `_
90 | Args:
91 | embed_dim (int): :math:`C_{in}` from an expected input of size :math:`(N, P, C_{in})`
92 | ffn_latent_dim (int): Inner dimension of the FFN
93 | num_heads (int) : Number of heads in multi-head attention. Default: 8
94 | attn_dropout (float): Dropout rate for attention in multi-head attention. Default: 0.0
95 | dropout (float): Dropout rate. Default: 0.0
96 | ffn_dropout (float): Dropout between FFN layers. Default: 0.0
97 |
98 | Shape:
99 | - Input: :math:`(N, P, C_{in})` where :math:`N` is batch size, :math:`P` is number of patches,
100 | and :math:`C_{in}` is input embedding dim
101 | - Output: same shape as the input
102 | """
103 |
104 | def __init__(
105 | self,
106 | embed_dim: int,
107 | ffn_latent_dim: int,
108 | num_heads: Optional[int] = 8,
109 | attn_dropout: Optional[float] = 0.0,
110 | dropout: Optional[float] = 0.0,
111 | ffn_dropout: Optional[float] = 0.0,
112 | *args,
113 | **kwargs
114 | ) -> None:
115 | super().__init__()
116 |
117 | attn_unit = MultiHeadAttention(
118 | embed_dim,
119 | num_heads,
120 | attn_dropout=attn_dropout,
121 | bias=True
122 | )
123 |
124 | self.pre_norm_mha = nn.Sequential(
125 | nn.LayerNorm(embed_dim),
126 | attn_unit,
127 | nn.Dropout(p=dropout)
128 | )
129 |
130 | self.pre_norm_ffn = nn.Sequential(
131 | nn.LayerNorm(embed_dim),
132 | nn.Linear(in_features=embed_dim, out_features=ffn_latent_dim, bias=True),
133 | nn.SiLU(),
134 | nn.Dropout(p=ffn_dropout),
135 | nn.Linear(in_features=ffn_latent_dim, out_features=embed_dim, bias=True),
136 | nn.Dropout(p=dropout)
137 | )
138 | self.embed_dim = embed_dim
139 | self.ffn_dim = ffn_latent_dim
140 | self.ffn_dropout = ffn_dropout
141 | self.std_dropout = dropout
142 |
143 | def forward(self, x: Tensor) -> Tensor:
144 | # multi-head attention
145 | res = x
146 | x = self.pre_norm_mha(x)
147 | x = x + res
148 |
149 | # feed forward network
150 | x = x + self.pre_norm_ffn(x)
151 | return x
152 |
153 |
154 | if __name__ == '__main__':
155 | pass
156 |
--------------------------------------------------------------------------------
/classic_conv/AlexNet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : AlexNet.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/9 3:54
7 | # @Classes :
8 | import torch
9 | from torch import nn
10 |
11 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12 |
13 |
14 | class AlexNet(nn.Module):
15 | def __init__(self, num_classes=1000):
16 | """AlexNet网络实现
17 |
18 | :param num_classes: 默认1000,因为原论文参加的竞赛类别就是1000
19 | """
20 | super(AlexNet, self).__init__()
21 | # 卷积层
22 | self.conv = nn.Sequential(
23 | # 由于LRN层已经证明无用,所以这里不写LRN
24 | # 第一层,输入通道数3,输出通道数96,使用11x11大小的卷积核
25 | nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), # input:[3, 224, 224] output:[96, 55, 55]
26 | nn.ReLU(),
27 | nn.MaxPool2d(kernel_size=3, stride=2), # output:[96, 27, 27]
28 |
29 | # 第二层,开始减小卷积核大小且增大输出通道数,从而提取更多特征
30 | nn.Conv2d(96, 256, 5, 1, 2), # output: [256, 27, 27]
31 | nn.ReLU(),
32 | nn.MaxPool2d(3, 2), # output: [256, 13, 13]
33 | # 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
34 | # 前两个卷积层后不使用池化层来减小输入的高和宽
35 | nn.Conv2d(256, 384, 3, 1, 1), # output: [384, 13, 13]
36 | nn.ReLU(),
37 | nn.Conv2d(384, 384, 3, 1, 1), # output: [384, 13, 13]
38 | nn.ReLU(),
39 | nn.Conv2d(384, 256, 3, 1, 1), # output: [256, 13, 13]
40 | nn.ReLU(),
41 | nn.MaxPool2d(3, 2) # output: [256, 6, 6]
42 | )
43 | # 全连接层
44 | self.fc = nn.Sequential(
45 | # 第一个全连接层,输入维度是256*,输出维度是4096
46 | nn.Linear(256 * 6 * 6, 4096),
47 | nn.ReLU(),
48 | nn.Dropout(0.5),
49 | nn.Linear(4096, 4096),
50 | nn.ReLU(),
51 | nn.Dropout(0.5),
52 | # 输出层
53 | nn.Linear(4096, num_classes),
54 | )
55 |
56 | # 前向传播
57 | def forward(self, img):
58 | feature = self.conv(img)
59 | output = self.fc(feature.view(img.shape[0], -1))
60 | return output
61 |
62 |
63 | def test():
64 | # 随机生成一组张量,可视为3张3通道照片,尺寸227x227
65 | x = torch.randn(3, 3, 227, 227)
66 | alex_model = AlexNet()
67 | pred = alex_model(x)
68 | print(x.shape)
69 | print(pred.shape)
70 |
71 |
72 | if __name__ == '__main__':
73 | test()
74 |
--------------------------------------------------------------------------------
/classic_conv/SENet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : SENet.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/31 9:34
7 | # @Classes : 搭建SEBlock,以及用于SE-ResNet18/34的SEBasicBlock、用于SE-ResNet50/101/152的SEBottleNeck
8 | # 具体SE-ResNet这里就不写了
9 | import torch
10 | from torch import nn
11 |
12 |
13 | class SEBlock(nn.Module):
14 | def __init__(self, in_channel, r=6):
15 | """
16 |
17 | :param in_channel:
18 | :param r: 论文中全连接层的r,即通道数缩放因子
19 | """
20 | super(SEBlock, self).__init__()
21 | # 全局平均池化(Squeeze)
22 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
23 | # 两个全连接层(Excitation)
24 | self.fc = nn.Sequential(
25 | nn.Linear(in_channel, in_channel // r, bias=False),
26 | nn.ReLU(),
27 | nn.Linear(in_channel // r, in_channel, bias=False),
28 | nn.Sigmoid()
29 | )
30 |
31 | def forward(self, x):
32 | b, c, h, w = x.size()
33 | # Squeeze,得到通道描述符,即(b, c)张量
34 | out = self.avg_pool(x).view(b, c)
35 | # Excitation,得到每个通道的权重
36 | out = self.fc(out).view(b, c, 1, 1)
37 | # 特征加权后输出
38 | return x * out.expand_as(x)
39 |
40 |
41 | class BasicBlock(nn.Module):
42 | expansion = 1
43 |
44 | def __init__(self, in_channel, out_channel, stride=1, r=6):
45 | super(BasicBlock, self).__init__()
46 | self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
47 | self.bn1 = nn.BatchNorm2d(out_channel)
48 | self.relu = nn.ReLU(inplace=True)
49 |
50 | self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False)
51 | self.bn2 = nn.BatchNorm2d(out_channel)
52 |
53 | self.SE = SEBlock(out_channel, r)
54 |
55 | # 防止无法连接,进行1x1下采样
56 | if stride != 1 or in_channel != self.expansion * out_channel:
57 | self.down_sample = nn.Sequential(nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False),
58 | nn.BatchNorm2d(self.expansion * out_channel))
59 | else:
60 | self.down_sample = lambda x: x
61 |
62 | def forward(self, x):
63 | residual = self.down_sample(x)
64 |
65 | out = self.relu(self.bn1(self.conv1(x)))
66 | out = self.bn2(self.conv2(out))
67 |
68 | out = self.SE(out)
69 |
70 | out = residual + out
71 | out = self.relu(out)
72 | return out
73 |
74 |
75 | class SEBottleNeck(nn.Module):
76 | expansion = 4
77 |
78 | def __init__(self, in_channel, out_channel, stride=1, r=6):
79 | super(SEBottleNeck, self).__init__()
80 | self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=1, bias=False)
81 | self.bn1 = nn.BatchNorm2d(out_channel)
82 | self.relu = nn.ReLU(inplace=True)
83 |
84 | self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
85 | self.bn2 = nn.BatchNorm2d(out_channel)
86 |
87 | self.conv3 = nn.Conv2d(out_channel, out_channel * self.expansion, kernel_size=1, bias=False)
88 | self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)
89 |
90 | self.SE = SEBlock(self.expansion * out_channel, r)
91 |
92 | # 防止无法连接,进行1x1下采样
93 | if stride != 1 or in_channel != self.expansion * out_channel:
94 | self.down_sample = nn.Sequential(
95 | nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False),
96 | nn.BatchNorm2d(self.expansion * out_channel))
97 | else:
98 | self.down_sample = lambda x: x
99 |
100 | def forward(self, x):
101 | residual = self.down_sample(x)
102 |
103 | out = self.relu(self.bn1(self.conv1(x)))
104 |
105 | out = self.relu(self.bn2(self.conv2(out)))
106 |
107 | out = self.bn3(self.conv3(out))
108 | out = self.SE(out)
109 |
110 | out += residual
111 | out = self.relu(out)
112 |
113 | return out
114 |
115 |
116 | def test():
117 | x = torch.randn(3, 3, 224, 224)
118 | # block = BasicBlock(3, 64)
119 | block = SEBottleNeck(3, 64)
120 | pred = block(x)
121 | print(x.shape)
122 | print(pred.shape)
123 |
124 |
125 | if __name__ == '__main__':
126 | test()
127 |
--------------------------------------------------------------------------------
/classic_conv/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @Package Name:
5 | # @File Name : __init__.py.py
6 | # @author : ahua
7 | # @Version : 1.0
8 | # @Start Date : 2024/3/9 3:53
9 | # @Classes :
10 |
11 |
12 | if __name__ == '__main__':
13 | pass
14 |
--------------------------------------------------------------------------------
/image_segmentation/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : __init__.py.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/9 3:59
7 | # @Classes :
8 |
9 |
10 | if __name__ == '__main__':
11 | pass
12 |
--------------------------------------------------------------------------------
/image_segmentation/about_unet/UNet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : UNet.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/16 5:35
7 | # @Classes :
8 | import torch
9 | import torch.nn as nn
10 | import torchvision.transforms.functional as TF
11 |
12 |
13 | class DoubleConv(nn.Module):
14 | """定义连续的俩次卷积"""
15 |
16 | def __init__(self, in_channel, out_channel):
17 | super(DoubleConv, self).__init__()
18 | # 俩次卷积
19 | self.d_conv = nn.Sequential(
20 | # 相比原论文,这里加入了padding与BN
21 | nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False),
22 | nn.BatchNorm2d(out_channel),
23 | nn.ReLU(inplace=True),
24 |
25 | nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False),
26 | nn.BatchNorm2d(out_channel),
27 | nn.ReLU(inplace=True),
28 | )
29 |
30 | def forward(self, x):
31 | return self.d_conv(x)
32 |
33 |
34 | class UNet(nn.Module):
35 | def __init__(self, in_channel=3, out_channel=2, features=[64, 128, 256, 512]):
36 | """
37 |
38 | :param in_channel:
39 | :param out_channel:
40 | :param features: 各个采样后对应的通道数
41 | """
42 | super(UNet, self).__init__()
43 | # 记录一系列上采样和下采样操作层
44 | self.ups = nn.ModuleList()
45 | self.downs = nn.ModuleList()
46 |
47 | # 最大池化下采样
48 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
49 |
50 | # 加入
51 | for feature in features:
52 | self.downs.append(DoubleConv(in_channel, feature))
53 | # 下次的输入通道数变为刚刚的输出通道数
54 | in_channel = feature
55 |
56 | # 上采样最下面的一步俩次卷积
57 | self.final_up = DoubleConv(features[-1], features[-1]*2)
58 |
59 | # 上采样逆置list
60 | for feature in reversed(features):
61 | # 转置卷积上采样,因为进行了拼接,所以输入通道数x2
62 | self.ups.append(nn.ConvTranspose2d(feature * 2, feature, kernel_size=3, stride=1, padding=1))
63 | self.ups.append(DoubleConv(feature*2, feature))
64 |
65 | # 最后出结果的1x1卷积
66 | self.final_conv = nn.Conv2d(features[0], out_channel, kernel_size=1)
67 |
68 | def forward(self, x):
69 | # 记录跳跃连接
70 | skip_connections = []
71 | # 下采样
72 | for down in self.downs:
73 | x = down(x)
74 | skip_connections.append(x)
75 | x = self.pool(x)
76 | # 最下层卷积
77 | x = self.final_up(x)
78 | # 逆置跳跃连接
79 | skip_connections = skip_connections[::-1]
80 | # 上采样
81 | for idx in range(0, len(self.ups), 2):
82 | x = self.ups[idx](x)
83 | skip_connection = skip_connections[idx // 2]
84 | if skip_connection.shape != x.shape:
85 | # 原论文中这里是对skip_connection做裁剪,这里对x做resize
86 | x = TF.resize(x, size=skip_connection.shape[2:])
87 | x = torch.cat((x, skip_connection), dim=1)
88 | x = self.ups[idx+1](x)
89 | output = self.final_conv(x)
90 | return output
91 |
92 |
93 | def test():
94 | x = torch.randn(3, 3, 572, 572)
95 | model = UNet()
96 | print(x.shape)
97 | print(model(x).shape)
98 |
99 |
100 | if __name__ == '__main__':
101 | test()
102 |
--------------------------------------------------------------------------------
/image_segmentation/about_unet/UNet_pp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : UNet_pp.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/4 22:52
7 | # @Classes : UNet++网络
8 | from torch import nn
9 | import torch
10 |
11 |
12 | class DoubleConv(nn.Module):
13 | """同UNet定义连续的俩次卷积"""
14 |
15 | def __init__(self, in_channel, out_channel):
16 | super(DoubleConv, self).__init__()
17 | # 俩次卷积
18 | self.d_conv = nn.Sequential(
19 | # 相比原论文,这里加入了padding与BN
20 | nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False),
21 | nn.BatchNorm2d(out_channel),
22 | nn.ReLU(inplace=True),
23 |
24 | nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False),
25 | nn.BatchNorm2d(out_channel),
26 | nn.ReLU(inplace=True),
27 | )
28 |
29 | def forward(self, x):
30 | return self.d_conv(x)
31 |
32 |
33 | class UNetPP(nn.Module):
34 | def __init__(self, in_channel=3, out_channel=2, features=[64, 128, 256, 512, 1024], deep_supervision=False):
35 | """
36 |
37 | :param in_channel:
38 | :param out_channel:
39 | :param features: 各个采样后对应的通道数
40 | :param deep_supervision: 是否使用深度监督
41 | """
42 | super(UNetPP, self).__init__()
43 |
44 | self.deep_supervision = deep_supervision
45 |
46 | # 下采样的池化层
47 | self.pool = nn.MaxPool2d(2, 2)
48 | # 双线性插值进行上采样,也可以通过ConvTranspose2d或者先ConvTranspose2d后插值实现,这里为了方便直接插值
49 | self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
50 |
51 | # 原始UNet的下采样层,每个下采样层的第0层卷积
52 | self.conv0_0 = DoubleConv(in_channel, features[0])
53 | self.conv1_0 = DoubleConv(features[0], features[1])
54 | self.conv2_0 = DoubleConv(features[1], features[2])
55 | self.conv3_0 = DoubleConv(features[2], features[3])
56 | self.conv4_0 = DoubleConv(features[3], features[4])
57 |
58 | # 每个下采样层的第一层卷积
59 | self.conv0_1 = DoubleConv(features[0] + features[1], features[0])
60 | self.conv1_1 = DoubleConv(features[1] + features[2], features[1])
61 | self.conv2_1 = DoubleConv(features[2] + features[3], features[2])
62 | self.conv3_1 = DoubleConv(features[3] + features[4], features[3])
63 |
64 | # 每个下采样层的第二层卷积
65 | self.conv0_2 = DoubleConv(features[0] * 2 + features[1], features[0])
66 | self.conv1_2 = DoubleConv(features[1] * 2 + features[2], features[1])
67 | self.conv2_2 = DoubleConv(features[2] * 2 + features[3], features[2])
68 |
69 | # 每个下采样层的第三层卷积
70 | self.conv0_3 = DoubleConv(features[0] * 3 + features[1], features[0])
71 | self.conv1_3 = DoubleConv(features[1] * 3 + features[2], features[1])
72 |
73 | # 每个下采样层的第四层卷积
74 | self.conv0_4 = DoubleConv(features[0] * 4 + features[1], features[0])
75 |
76 | # 分割头,作者原论文写了深度监督之后还过sigmoid,但是UNet没有sigmoid
77 | self.sigmoid = nn.Sigmoid()
78 | if self.deep_supervision:
79 | self.final1 = nn.Conv2d(features[0], out_channel, kernel_size=1)
80 | self.final2 = nn.Conv2d(features[0], out_channel, kernel_size=1)
81 | self.final3 = nn.Conv2d(features[0], out_channel, kernel_size=1)
82 | self.final4 = nn.Conv2d(features[0], out_channel, kernel_size=1)
83 | else:
84 | self.final = nn.Conv2d(features[0], out_channel, kernel_size=1)
85 |
86 | def forward(self, x):
87 | x0_0 = self.conv0_0(x)
88 | x1_0 = self.conv1_0(self.pool(x0_0))
89 | x0_1 = self.conv0_1(torch.cat([x0_0, self.up(x1_0)], 1))
90 |
91 | x2_0 = self.conv2_0(self.pool(x1_0))
92 | x1_1 = self.conv1_1(torch.cat([x1_0, self.up(x2_0)], 1))
93 | x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.up(x1_1)], 1))
94 |
95 | x3_0 = self.conv3_0(self.pool(x2_0))
96 | x2_1 = self.conv2_1(torch.cat([x2_0, self.up(x3_0)], 1))
97 | x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.up(x2_1)], 1))
98 | x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.up(x1_2)], 1))
99 |
100 | x4_0 = self.conv4_0(self.pool(x3_0))
101 | x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], 1))
102 | x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.up(x3_1)], 1))
103 | x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.up(x2_2)], 1))
104 | x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.up(x1_3)], 1))
105 |
106 | # 使用深度监督,返回四个分割图
107 | if self.deep_supervision:
108 | output1 = self.final1(x0_1)
109 | output1 = self.sigmoid(output1)
110 | output2 = self.final2(x0_2)
111 | output2 = self.sigmoid(output2)
112 | output3 = self.final3(x0_3)
113 | output3 = self.sigmoid(output3)
114 | output4 = self.final4(x0_4)
115 | output4 = self.sigmoid(output4)
116 | return [output1, output2, output3, output4]
117 |
118 | else:
119 | output = self.final(x0_4)
120 | output = self.sigmoid(output)
121 | return output
122 |
123 |
124 | def test():
125 | x = torch.randn(3, 3, 224, 224)
126 | model = UNetPP()
127 | print(x.shape)
128 | print(model(x).shape)
129 |
130 |
131 | if __name__ == '__main__':
132 | test()
133 |
--------------------------------------------------------------------------------
/image_segmentation/about_unet/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Project Name: Hand-torn_code
4 | # @File Name : __init__.py.py
5 | # @author : ahua
6 | # @Start Date : 2024/3/9 3:59
7 | # @Classes :
8 |
9 |
10 | if __name__ == '__main__':
11 | pass
12 |
--------------------------------------------------------------------------------