├── .idea ├── .gitignore ├── Hand-torn_code.iml ├── Hand-torn_code.time ├── deployment.xml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── README.md ├── about_attention ├── CBAM.py ├── MSCAAttention.py └── __init__.py ├── about_interview ├── Attention.py ├── Embedding.py ├── FFN.py ├── LayerNorm.py └── __init__.py ├── about_light_net ├── __init__.py └── mobile_net │ └── __init__.py ├── about_transformer ├── ViT │ ├── ViT_model.py │ └── __init__.py ├── __init__.py ├── attention_is_all_you_need │ ├── __init__.py │ ├── attention_module.py │ ├── transformer_decoder.py │ ├── transformer_encoder.py │ ├── transformer_model.py │ └── utils_module.py ├── efficient_vit │ └── __init__.py └── mobile_vit │ ├── __init__.py │ ├── model.py │ ├── model_config.py │ └── transformer_encoder.py ├── classic_conv ├── AlexNet.py ├── SENet.py └── __init__.py └── image_segmentation ├── __init__.py └── about_unet ├── UNet.py ├── UNet_pp.py └── __init__.py /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # 默认忽略的文件 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # 基于编辑器的 HTTP 客户端请求 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/Hand-torn_code.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/Hand-torn_code.time: -------------------------------------------------------------------------------- 1 | "2024-03-09 03:53:05:859",5000 2 | "2024-03-09 03:55:02:941",113000 3 | "2024-03-09 03:57:00:551",101000 4 | "2024-03-09 03:57:33:103",29000 5 | "2024-03-09 03:59:16:425",26000 6 | "2024-03-09 04:00:38:638",78000 7 | "2024-03-09 04:01:52:395",60000 8 | "2024-03-09 04:02:03:162",1000 9 | "2024-03-09 04:02:15:737",6000 10 | "2024-03-09 04:02:58:952",14000 11 | "2024-03-09 04:10:02:728",345000 12 | "2024-03-09 09:15:03:764",0 13 | "2024-03-09 09:41:26:962",5000 14 | "2024-03-09 09:41:32:627",3000 15 | "2024-03-09 09:41:43:947",10000 16 | "2024-03-09 09:43:42:785",116000 17 | "2024-03-09 09:43:56:374",8000 18 | "2024-03-09 09:44:04:365",6000 19 | "2024-03-09 09:44:46:498",41000 20 | "2024-03-09 09:45:06:840",16000 21 | "2024-03-09 09:45:31:951",23000 22 | "2024-03-09 09:51:12:195",314000 23 | "2024-03-09 09:57:37:053",148000 24 | "2024-03-09 10:16:43:362",5000 25 | "2024-03-09 10:42:19:809",300000 26 | "2024-03-09 10:53:51:578",0 27 | "2024-03-09 22:19:44:923",6000 28 | "2024-03-09 22:46:28:385",0 29 | "2024-03-09 23:23:46:074",36000 30 | "2024-03-09 23:23:52:845",1000 31 | "2024-03-09 23:24:14:510",3000 32 | "2024-03-09 23:24:16:569",0 33 | "2024-03-09 23:26:40:834",141000 34 | "2024-03-09 23:27:16:033",3000 35 | "2024-03-09 23:27:46:404",22000 36 | "2024-03-09 23:28:09:755",19000 37 | "2024-03-09 23:28:39:037",27000 38 | "2024-03-09 23:28:46:022",2000 39 | "2024-03-09 23:28:48:156",0 40 | "2024-03-09 23:29:03:303",13000 41 | "2024-03-09 23:29:49:517",8000 42 | "2024-03-09 23:30:12:900",3000 43 | "2024-03-09 23:30:57:849",19000 44 | "2024-03-09 23:31:58:347",49000 45 | "2024-03-09 23:32:03:466",3000 46 | "2024-03-09 23:32:14:726",1000 47 | "2024-03-09 23:32:55:988",2000 48 | "2024-03-09 23:33:03:092",3000 49 | "2024-03-09 23:35:38:720",4000 50 | "2024-03-09 23:36:13:215",10000 51 | "2024-03-09 23:36:39:099",16000 52 | "2024-03-09 23:36:45:571",0 53 | "2024-03-09 23:37:11:020",21000 54 | "2024-03-09 23:44:47:237",69000 55 | "2024-03-09 23:54:07:048",458000 56 | "2024-03-09 23:56:00:398",42000 57 | "2024-03-09 23:56:30:569",0 58 | "2024-03-09 23:57:30:807",56000 59 | "2024-03-09 23:57:41:535",6000 60 | "2024-03-09 23:58:15:676",31000 61 | "2024-03-09 23:59:12:975",24000 62 | "2024-03-09 23:59:36:816",3000 63 | "2024-03-10 00:00:02:445",21000 64 | "2024-03-10 00:00:13:928",3000 65 | "2024-03-10 00:00:23:129",5000 66 | "2024-03-10 00:00:34:165",8000 67 | "2024-03-10 00:04:06:857",12000 68 | "2024-03-10 00:04:12:339",3000 69 | "2024-03-10 00:04:17:164",3000 70 | "2024-03-10 00:19:44:900",8000 71 | "2024-03-10 00:38:46:725",11000 72 | "2024-03-10 00:39:30:834",42000 73 | "2024-03-10 00:46:13:785",7000 74 | "2024-03-10 00:51:29:728",285000 75 | "2024-03-10 00:57:04:525",300000 76 | "2024-03-10 01:07:45:007",0 77 | "2024-03-10 02:22:33:044",2000 78 | "2024-03-10 02:22:36:588",0 79 | "2024-03-10 02:22:37:812",0 80 | "2024-03-10 18:30:34:702",182000 81 | "2024-03-10 18:37:44:385",300000 82 | "2024-03-10 19:03:27:299",0 83 | "2024-03-10 19:03:47:940",12000 84 | "2024-03-10 19:04:00:629",9000 85 | "2024-03-10 19:04:18:785",14000 86 | "2024-03-10 19:04:23:954",4000 87 | "2024-03-10 19:04:37:099",12000 88 | "2024-03-10 19:04:56:027",17000 89 | "2024-03-10 19:05:30:325",31000 90 | "2024-03-10 19:05:38:096",6000 91 | "2024-03-10 19:06:03:694",23000 92 | "2024-03-10 19:06:07:829",2000 93 | "2024-03-10 19:06:10:003",0 94 | "2024-03-10 19:06:31:419",20000 95 | "2024-03-10 19:07:46:839",72000 96 | "2024-03-10 19:08:10:116",20000 97 | "2024-03-10 19:08:39:578",28000 98 | "2024-03-10 19:08:57:380",6000 99 | "2024-03-10 19:09:35:283",10000 100 | "2024-03-10 19:11:38:062",14000 101 | "2024-03-10 19:11:52:494",3000 102 | "2024-03-10 19:14:03:552",36000 103 | "2024-03-10 19:15:21:821",8000 104 | "2024-03-10 19:15:37:653",8000 105 | "2024-03-10 19:16:08:753",26000 106 | "2024-03-10 19:16:54:659",19000 107 | "2024-03-10 19:17:11:201",10000 108 | "2024-03-10 19:19:06:883",7000 109 | "2024-03-10 19:20:06:457",52000 110 | "2024-03-10 19:20:39:975",27000 111 | "2024-03-10 19:21:10:843",18000 112 | "2024-03-10 19:23:59:212",162000 113 | "2024-03-10 19:24:49:081",44000 114 | "2024-03-10 19:25:51:113",58000 115 | "2024-03-10 19:26:22:162",28000 116 | "2024-03-10 19:27:24:245",28000 117 | "2024-03-10 19:28:54:057",79000 118 | "2024-03-10 19:29:09:801",14000 119 | "2024-03-10 19:29:51:900",36000 120 | "2024-03-10 19:30:04:062",11000 121 | "2024-03-10 19:30:28:475",14000 122 | "2024-03-10 19:31:05:054",35000 123 | "2024-03-10 19:31:38:256",31000 124 | "2024-03-10 19:31:51:786",12000 125 | "2024-03-10 19:32:05:575",9000 126 | "2024-03-10 19:32:15:097",7000 127 | "2024-03-10 19:32:36:242",17000 128 | "2024-03-10 19:33:31:456",44000 129 | "2024-03-10 19:34:03:603",30000 130 | "2024-03-10 19:34:30:426",23000 131 | "2024-03-10 19:34:35:534",2000 132 | "2024-03-10 19:34:45:348",8000 133 | "2024-03-10 19:35:06:853",15000 134 | "2024-03-10 19:35:23:158",15000 135 | "2024-03-10 19:35:43:527",17000 136 | "2024-03-10 19:35:51:459",5000 137 | "2024-03-10 19:36:18:817",7000 138 | "2024-03-10 19:37:10:786",31000 139 | "2024-03-10 19:37:32:919",11000 140 | "2024-03-10 19:37:49:591",8000 141 | "2024-03-10 19:38:35:077",36000 142 | "2024-03-10 19:38:40:737",4000 143 | "2024-03-10 19:39:05:987",24000 144 | "2024-03-10 19:39:14:796",7000 145 | "2024-03-10 19:39:19:590",3000 146 | "2024-03-10 19:39:46:000",15000 147 | "2024-03-10 19:40:28:150",40000 148 | "2024-03-10 19:41:09:397",38000 149 | "2024-03-10 19:42:38:967",86000 150 | "2024-03-10 19:43:04:788",23000 151 | "2024-03-10 19:43:48:509",17000 152 | "2024-03-10 19:45:00:516",69000 153 | "2024-03-10 19:46:26:137",81000 154 | "2024-03-10 19:47:54:268",83000 155 | "2024-03-10 19:48:14:119",5000 156 | "2024-03-10 19:50:22:354",19000 157 | "2024-03-10 19:51:46:056",36000 158 | "2024-03-10 19:51:55:949",0 159 | "2024-03-10 19:51:59:555",0 160 | "2024-03-10 19:52:05:431",0 161 | "2024-03-10 19:52:17:630",4000 162 | "2024-03-10 19:52:27:433",5000 163 | "2024-03-10 19:52:31:786",2000 164 | "2024-03-10 19:52:36:417",2000 165 | "2024-03-10 19:53:34:133",13000 166 | "2024-03-10 19:55:18:907",2000 167 | "2024-03-10 19:55:37:713",15000 168 | "2024-03-10 19:55:43:179",0 169 | "2024-03-10 19:56:36:904",51000 170 | "2024-03-10 19:57:09:477",30000 171 | "2024-03-10 19:57:14:506",0 172 | "2024-03-10 19:57:39:789",23000 173 | "2024-03-10 19:57:54:958",3000 174 | "2024-03-10 19:58:28:592",18000 175 | "2024-03-10 19:58:51:918",17000 176 | "2024-03-10 19:59:03:065",6000 177 | "2024-03-10 20:02:53:853",2000 178 | "2024-03-10 21:49:27:681",71000 179 | "2024-03-10 21:49:39:761",0 180 | "2024-03-10 21:49:50:302",6000 181 | "2024-03-10 21:50:31:795",14000 182 | "2024-03-10 23:58:06:595",15000 183 | "2024-03-11 01:32:52:033",0 184 | "2024-03-15 08:22:14:592",4000 185 | "2024-03-15 08:23:45:643",26000 186 | "2024-03-15 08:25:39:518",111000 187 | "2024-03-15 08:31:27:783",98000 188 | "2024-03-15 08:31:48:971",8000 189 | "2024-03-15 08:32:26:129",26000 190 | "2024-03-15 08:36:20:237",227000 191 | "2024-03-15 08:36:34:374",0 192 | "2024-03-15 08:36:35:774",0 193 | "2024-03-15 08:37:07:695",30000 194 | "2024-03-15 08:39:04:526",7000 195 | "2024-03-15 08:39:11:457",5000 196 | "2024-03-15 23:23:07:318",0 197 | "2024-03-16 05:35:00:928",17000 198 | "2024-03-16 05:35:05:573",2000 199 | "2024-03-16 05:35:42:361",30000 200 | "2024-03-16 05:36:38:597",54000 201 | "2024-03-16 05:36:41:002",0 202 | "2024-03-16 05:36:57:333",15000 203 | "2024-03-16 05:37:47:268",48000 204 | "2024-03-16 05:38:15:974",15000 205 | "2024-03-16 05:39:37:849",74000 206 | "2024-03-16 05:40:09:779",6000 207 | "2024-03-16 05:40:21:110",7000 208 | "2024-03-16 05:42:08:069",102000 209 | "2024-03-16 05:42:52:371",15000 210 | "2024-03-16 05:43:47:429",29000 211 | "2024-03-16 05:44:27:926",24000 212 | "2024-03-16 05:45:06:173",36000 213 | "2024-03-16 05:46:50:781",31000 214 | "2024-03-16 05:48:17:151",10000 215 | "2024-03-16 05:49:18:603",13000 216 | "2024-03-16 05:54:29:726",298000 217 | "2024-03-16 05:57:34:523",3000 218 | "2024-03-16 05:57:42:514",3000 219 | "2024-03-16 05:58:21:967",26000 220 | "2024-03-16 06:02:45:037",71000 221 | "2024-03-16 06:02:50:739",4000 222 | "2024-03-16 06:03:07:197",14000 223 | "2024-03-16 06:04:55:954",106000 224 | "2024-03-16 06:06:24:347",30000 225 | "2024-03-16 06:07:50:117",80000 226 | "2024-03-16 06:08:07:376",15000 227 | "2024-03-16 06:08:12:489",3000 228 | "2024-03-16 06:08:51:723",33000 229 | "2024-03-16 06:09:16:772",19000 230 | "2024-03-16 06:10:23:972",37000 231 | "2024-03-16 06:11:21:116",39000 232 | "2024-03-16 06:12:21:875",46000 233 | "2024-03-16 06:12:47:806",17000 234 | "2024-03-16 06:13:33:034",43000 235 | "2024-03-16 06:14:19:896",44000 236 | "2024-03-16 06:15:24:768",22000 237 | "2024-03-16 06:15:47:827",12000 238 | "2024-03-16 06:15:56:268",7000 239 | "2024-03-16 06:16:44:668",39000 240 | "2024-03-16 06:17:15:990",29000 241 | "2024-03-16 06:17:41:260",14000 242 | "2024-03-16 06:19:35:140",79000 243 | "2024-03-16 06:19:54:445",17000 244 | "2024-03-16 06:21:29:244",3000 245 | "2024-03-16 07:17:06:691",5000 246 | "2024-03-16 07:17:08:602",0 247 | "2024-03-16 07:18:31:830",81000 248 | "2024-03-16 07:21:19:094",153000 249 | "2024-03-16 07:22:51:156",82000 250 | "2024-03-16 07:24:15:508",72000 251 | "2024-03-16 07:25:24:382",48000 252 | "2024-03-16 07:25:31:358",0 253 | "2024-03-16 07:28:36:820",169000 254 | "2024-03-16 07:28:38:867",1000 255 | "2024-03-16 07:30:42:061",21000 256 | "2024-03-16 07:31:25:353",39000 257 | "2024-03-16 07:31:45:181",12000 258 | "2024-03-16 07:31:47:652",2000 259 | "2024-03-16 07:32:11:945",2000 260 | "2024-03-16 07:33:45:988",16000 261 | "2024-03-16 07:40:21:405",24000 262 | "2024-03-16 07:41:23:985",23000 263 | "2024-03-16 07:42:27:475",32000 264 | "2024-03-16 07:43:24:928",55000 265 | "2024-03-16 07:43:48:120",0 266 | "2024-03-16 07:45:22:589",91000 267 | "2024-03-16 07:48:21:006",137000 268 | "2024-03-16 07:48:28:694",3000 269 | "2024-03-16 07:54:08:241",11000 270 | "2024-03-16 07:54:22:241",5000 271 | "2024-03-16 07:55:17:262",3000 272 | "2024-03-16 07:55:49:740",3000 273 | "2024-03-16 07:56:43:005",3000 274 | "2024-03-16 07:56:55:914",1000 275 | "2024-03-16 07:57:16:819",3000 276 | "2024-03-16 07:57:43:806",9000 277 | "2024-03-16 07:58:04:486",2000 278 | "2024-03-16 07:59:22:296",34000 279 | "2024-03-16 08:00:13:959",6000 280 | "2024-03-16 08:00:50:497",14000 281 | "2024-03-16 08:04:51:791",1000 282 | "2024-03-16 08:05:20:420",22000 283 | "2024-03-16 08:05:30:851",6000 284 | "2024-03-16 08:05:41:697",5000 285 | "2024-03-16 08:05:50:974",6000 286 | "2024-03-16 08:07:34:897",12000 287 | "2024-03-16 08:07:50:491",0 288 | "2024-03-16 08:07:57:176",5000 289 | "2024-03-16 21:56:51:786",26000 290 | "2024-03-16 21:57:33:097",38000 291 | "2024-03-16 21:58:02:673",13000 292 | "2024-03-16 21:58:11:220",6000 293 | "2024-03-16 21:58:14:134",1000 294 | "2024-03-16 21:58:26:069",8000 295 | "2024-03-16 21:58:40:929",10000 296 | "2024-03-16 21:59:13:498",1000 297 | "2024-03-16 21:59:40:721",26000 298 | "2024-03-16 22:00:05:615",22000 299 | "2024-03-16 22:05:42:506",332000 300 | "2024-03-16 22:49:25:416",0 301 | "2024-03-16 22:49:37:603",0 302 | "2024-03-16 23:55:48:814",111000 303 | "2024-03-16 23:56:05:131",2000 304 | "2024-03-16 23:56:11:567",0 305 | "2024-03-17 00:05:24:535",5000 306 | "2024-03-17 00:09:32:743",228000 307 | "2024-03-17 00:12:40:856",152000 308 | "2024-03-17 00:13:11:407",21000 309 | "2024-03-17 00:14:54:236",68000 310 | "2024-03-23 23:10:49:244",20000 311 | "2024-03-23 23:11:05:096",13000 312 | "2024-03-23 23:11:21:092",13000 313 | "2024-03-23 23:11:30:619",7000 314 | "2024-03-23 23:11:36:629",2000 315 | "2024-03-23 23:11:59:468",10000 316 | "2024-03-23 23:12:01:683",0 317 | "2024-03-23 23:12:05:694",1000 318 | "2024-03-23 23:12:17:478",11000 319 | "2024-03-23 23:12:26:569",6000 320 | "2024-03-23 23:12:30:799",2000 321 | "2024-03-23 23:12:45:955",0 322 | "2024-03-23 23:13:09:345",22000 323 | "2024-03-23 23:13:11:633",0 324 | "2024-03-23 23:13:27:276",8000 325 | "2024-03-23 23:14:02:321",25000 326 | "2024-03-23 23:14:04:642",0 327 | "2024-03-23 23:15:29:992",73000 328 | "2024-03-23 23:15:56:834",22000 329 | "2024-03-23 23:17:09:364",33000 330 | "2024-03-23 23:17:59:260",2000 331 | "2024-03-23 23:20:35:560",152000 332 | "2024-03-23 23:20:50:728",0 333 | "2024-03-23 23:20:54:537",2000 334 | "2024-03-23 23:21:00:463",0 335 | "2024-03-23 23:21:17:148",8000 336 | "2024-03-23 23:21:49:946",26000 337 | "2024-03-23 23:22:04:048",4000 338 | "2024-03-23 23:22:21:259",13000 339 | "2024-03-23 23:23:41:993",7000 340 | "2024-03-23 23:24:06:027",14000 341 | "2024-03-23 23:24:49:821",34000 342 | "2024-03-23 23:25:28:575",28000 343 | "2024-03-23 23:25:38:254",5000 344 | "2024-03-23 23:26:45:514",21000 345 | "2024-03-23 23:27:47:927",2000 346 | "2024-03-23 23:28:32:506",5000 347 | "2024-03-23 23:30:15:911",24000 348 | "2024-03-23 23:33:28:829",18000 349 | "2024-03-23 23:34:06:636",15000 350 | "2024-03-23 23:34:11:073",2000 351 | "2024-03-23 23:34:50:320",8000 352 | "2024-03-23 23:35:34:823",0 353 | "2024-03-23 23:35:41:044",3000 354 | "2024-03-23 23:36:19:799",34000 355 | "2024-03-23 23:37:10:619",46000 356 | "2024-03-23 23:37:18:151",2000 357 | "2024-03-23 23:37:57:300",35000 358 | "2024-03-23 23:37:58:715",0 359 | "2024-03-23 23:38:29:266",29000 360 | "2024-03-23 23:38:30:470",0 361 | "2024-03-23 23:39:14:277",31000 362 | "2024-03-23 23:40:07:984",47000 363 | "2024-03-23 23:40:20:406",10000 364 | "2024-03-23 23:40:54:560",12000 365 | "2024-03-23 23:41:06:695",10000 366 | "2024-03-23 23:41:09:233",1000 367 | "2024-03-23 23:41:27:772",13000 368 | "2024-03-23 23:41:53:605",5000 369 | "2024-03-23 23:44:49:647",50000 370 | "2024-03-24 00:01:19:806",32000 371 | "2024-03-24 00:01:27:557",1000 372 | "2024-03-24 00:08:41:973",17000 373 | "2024-03-24 00:08:45:532",2000 374 | "2024-03-24 00:11:46:098",12000 375 | "2024-03-24 00:12:55:046",26000 376 | "2024-03-24 00:13:04:087",7000 377 | "2024-03-24 00:13:10:533",5000 378 | "2024-03-24 00:13:22:836",1000 379 | "2024-03-24 00:13:25:362",1000 380 | "2024-03-24 00:13:57:754",2000 381 | "2024-03-24 00:14:02:375",1000 382 | "2024-03-24 00:14:09:275",2000 383 | "2024-03-24 00:14:28:082",18000 384 | "2024-03-24 00:14:35:980",2000 385 | "2024-03-24 00:14:37:581",0 386 | "2024-03-24 00:14:38:287",0 387 | "2024-03-24 00:14:46:682",1000 388 | "2024-03-24 00:15:07:161",2000 389 | "2024-03-24 00:15:12:873",0 390 | "2024-03-24 00:18:44:478",203000 391 | "2024-03-24 00:19:26:858",39000 392 | "2024-03-24 00:20:01:693",25000 393 | "2024-03-24 00:20:29:170",14000 394 | "2024-03-24 00:20:38:890",3000 395 | "2024-03-24 00:21:32:825",11000 396 | "2024-03-24 00:21:52:142",2000 397 | "2024-03-24 00:21:56:222",1000 398 | "2024-03-24 00:22:18:167",1000 399 | "2024-03-24 00:22:20:452",1000 400 | "2024-03-24 00:22:26:058",1000 401 | "2024-03-24 00:23:26:082",43000 402 | "2024-03-24 00:25:02:722",21000 403 | "2024-03-24 00:25:32:127",23000 404 | "2024-03-24 00:25:35:657",0 405 | "2024-03-24 00:26:00:750",13000 406 | "2024-03-24 00:26:08:563",3000 407 | "2024-03-24 00:26:23:762",12000 408 | "2024-03-24 00:27:15:287",46000 409 | "2024-03-24 00:27:39:345",10000 410 | "2024-03-24 00:29:29:852",15000 411 | "2024-03-24 00:29:50:968",0 412 | "2024-03-24 00:33:25:077",9000 413 | "2024-03-24 00:33:36:061",2000 414 | "2024-03-24 00:33:49:567",13000 415 | "2024-03-24 00:34:42:861",2000 416 | "2024-03-24 00:36:21:539",68000 417 | "2024-03-24 00:36:29:649",4000 418 | "2024-03-24 00:38:22:403",32000 419 | "2024-03-24 00:38:54:873",0 420 | "2024-03-24 00:38:58:720",3000 421 | "2024-03-24 00:39:33:379",27000 422 | "2024-03-24 00:39:52:457",0 423 | "2024-03-24 00:43:24:427",1000 424 | "2024-03-24 00:55:26:651",2000 425 | "2024-03-24 00:55:36:480",4000 426 | "2024-03-24 01:03:06:732",439000 427 | "2024-03-24 01:03:08:173",0 428 | "2024-03-24 01:03:12:054",2000 429 | "2024-03-24 01:03:40:675",2000 430 | "2024-03-24 01:05:01:248",53000 431 | "2024-03-24 01:10:53:244",122000 432 | "2024-03-24 01:11:25:637",6000 433 | "2024-03-24 01:11:35:409",5000 434 | "2024-03-24 01:11:46:672",7000 435 | "2024-03-24 01:13:11:260",80000 436 | "2024-03-24 09:56:42:710",300000 437 | "2024-03-24 10:10:41:347",0 438 | "2024-03-25 22:38:24:807",32000 439 | "2024-03-25 22:38:28:791",2000 440 | "2024-03-25 22:38:32:847",4000 441 | "2024-03-25 22:41:34:037",178000 442 | "2024-03-25 23:01:46:232",53000 443 | "2024-03-25 23:26:06:823",147000 444 | "2024-03-25 23:27:32:519",84000 445 | "2024-03-25 23:38:26:508",420000 446 | "2024-03-25 23:40:58:164",141000 447 | "2024-03-25 23:41:37:488",36000 448 | "2024-03-25 23:42:17:290",35000 449 | "2024-03-25 23:45:24:789",181000 450 | "2024-03-25 23:46:47:056",80000 451 | "2024-03-25 23:50:59:364",239000 452 | "2024-03-25 23:52:47:504",42000 453 | "2024-03-25 23:53:36:396",49000 454 | "2024-03-25 23:54:23:661",9000 455 | "2024-03-25 23:55:22:993",53000 456 | "2024-03-25 23:55:48:866",20000 457 | "2024-03-25 23:57:07:561",23000 458 | "2024-03-26 00:00:19:909",16000 459 | "2024-03-26 00:00:23:576",0 460 | "2024-03-26 00:00:25:090",0 461 | "2024-03-26 00:01:04:859",19000 462 | "2024-03-26 00:01:17:700",1000 463 | "2024-03-26 00:01:47:502",9000 464 | "2024-03-26 00:01:51:643",1000 465 | "2024-03-26 00:08:03:208",340000 466 | "2024-03-26 00:08:15:123",5000 467 | "2024-03-26 00:09:09:956",7000 468 | "2024-03-26 00:12:04:110",7000 469 | "2024-03-26 00:16:43:449",17000 470 | "2024-03-26 00:17:55:636",42000 471 | "2024-03-26 00:18:04:944",7000 472 | "2024-03-26 00:18:39:599",24000 473 | "2024-03-26 00:18:59:721",14000 474 | "2024-03-26 00:19:13:150",8000 475 | "2024-03-26 00:19:31:416",9000 476 | "2024-03-26 00:21:53:829",139000 477 | "2024-03-26 00:22:48:934",45000 478 | "2024-03-26 00:23:03:786",12000 479 | "2024-03-26 00:23:18:224",13000 480 | "2024-03-26 00:23:52:087",3000 481 | "2024-03-26 00:24:14:257",11000 482 | "2024-03-26 00:24:32:645",1000 483 | "2024-03-26 00:24:34:293",0 484 | "2024-03-26 00:25:01:932",12000 485 | "2024-03-26 00:28:01:059",146000 486 | "2024-03-26 00:28:46:533",35000 487 | "2024-03-26 00:30:09:177",11000 488 | "2024-03-26 00:30:59:606",5000 489 | "2024-03-26 00:31:21:848",9000 490 | "2024-03-26 00:31:55:206",5000 491 | "2024-03-26 00:33:13:834",14000 492 | "2024-03-26 00:33:16:405",0 493 | "2024-03-26 00:33:24:903",6000 494 | "2024-03-26 00:34:24:846",11000 495 | "2024-03-26 00:35:08:757",10000 496 | "2024-03-26 00:40:02:886",4000 497 | "2024-03-26 00:40:12:728",5000 498 | "2024-03-26 00:40:18:645",1000 499 | "2024-03-26 00:40:43:553",9000 500 | "2024-03-26 00:41:06:024",6000 501 | "2024-03-26 00:41:13:597",2000 502 | "2024-03-26 00:41:17:694",0 503 | "2024-03-26 00:41:23:086",4000 504 | "2024-03-26 00:41:47:508",12000 505 | "2024-03-26 00:41:53:403",3000 506 | "2024-03-26 00:41:57:200",2000 507 | "2024-03-26 00:42:58:086",14000 508 | "2024-03-26 00:43:22:539",21000 509 | "2024-03-26 00:44:34:583",0 510 | "2024-03-26 00:44:45:686",10000 511 | "2024-03-26 00:48:09:536",193000 512 | "2024-03-26 00:49:23:971",50000 513 | "2024-03-26 00:49:29:433",1000 514 | "2024-03-26 00:50:03:880",30000 515 | "2024-03-26 00:50:35:257",29000 516 | "2024-03-26 00:51:21:246",28000 517 | "2024-03-26 00:51:46:897",3000 518 | "2024-03-26 00:52:47:488",18000 519 | "2024-03-26 00:52:50:585",0 520 | "2024-03-26 00:52:59:858",4000 521 | "2024-03-26 00:53:39:446",27000 522 | "2024-03-26 00:53:45:854",4000 523 | "2024-03-26 00:55:24:565",22000 524 | "2024-03-26 00:55:52:243",3000 525 | "2024-03-26 00:57:06:102",0 526 | "2024-03-26 00:59:17:463",4000 527 | "2024-03-26 00:59:32:253",13000 528 | "2024-03-26 00:59:43:184",5000 529 | "2024-03-26 01:00:10:926",15000 530 | "2024-03-26 01:03:11:598",173000 531 | "2024-03-26 01:03:42:307",2000 532 | "2024-03-26 01:04:19:368",35000 533 | "2024-03-26 01:04:45:445",6000 534 | "2024-03-26 01:08:59:941",9000 535 | "2024-03-26 01:11:54:200",16000 536 | "2024-03-26 01:13:27:235",33000 537 | "2024-03-26 01:14:15:231",5000 538 | "2024-03-26 01:15:56:777",65000 539 | "2024-03-26 01:16:45:035",38000 540 | "2024-03-26 01:20:12:805",1000 541 | "2024-03-26 01:21:37:724",77000 542 | "2024-03-26 01:21:50:475",10000 543 | "2024-03-26 01:22:10:650",17000 544 | "2024-03-26 01:22:13:328",0 545 | "2024-03-26 01:22:16:618",0 546 | "2024-03-26 01:24:37:465",138000 547 | "2024-03-26 01:26:07:955",75000 548 | "2024-03-26 01:26:13:432",0 549 | "2024-03-26 01:27:05:237",44000 550 | "2024-03-26 01:27:34:382",28000 551 | "2024-03-26 01:27:39:927",2000 552 | "2024-03-26 01:28:19:813",9000 553 | "2024-03-26 01:29:14:707",29000 554 | "2024-03-26 01:30:08:459",15000 555 | "2024-03-26 01:32:58:950",134000 556 | "2024-03-26 01:33:39:548",3000 557 | "2024-03-26 01:34:47:351",9000 558 | "2024-03-26 01:35:37:423",44000 559 | "2024-03-26 01:36:52:232",53000 560 | "2024-03-26 01:37:23:094",21000 561 | "2024-03-26 01:38:05:220",8000 562 | "2024-03-26 01:38:33:939",9000 563 | "2024-03-26 01:39:05:087",13000 564 | "2024-03-26 01:39:45:587",8000 565 | "2024-03-26 01:41:16:758",63000 566 | "2024-03-26 01:44:22:606",0 567 | "2024-03-26 01:44:35:027",7000 568 | "2024-03-26 01:44:58:592",8000 569 | "2024-03-26 01:46:42:630",21000 570 | "2024-03-26 01:47:28:788",43000 571 | "2024-03-26 01:49:29:491",103000 572 | "2024-03-26 01:51:35:115",101000 573 | "2024-03-26 01:52:21:898",37000 574 | "2024-03-26 01:52:38:285",3000 575 | "2024-03-26 01:52:58:611",18000 576 | "2024-03-26 01:54:46:335",2000 577 | "2024-03-26 01:55:56:740",45000 578 | "2024-03-26 01:56:06:408",2000 579 | "2024-03-26 01:56:16:378",7000 580 | "2024-03-26 01:57:26:021",18000 581 | "2024-03-26 01:58:17:105",17000 582 | "2024-03-26 01:58:41:604",8000 583 | "2024-03-26 01:59:26:196",24000 584 | "2024-03-26 02:00:54:683",18000 585 | "2024-03-26 02:02:21:480",68000 586 | "2024-03-26 02:04:25:207",97000 587 | "2024-03-26 02:04:27:900",0 588 | "2024-03-26 02:05:20:084",35000 589 | "2024-03-26 02:08:16:154",75000 590 | "2024-03-26 02:25:05:159",148000 591 | "2024-03-26 02:25:40:426",8000 592 | "2024-03-26 02:26:04:404",10000 593 | "2024-03-26 02:26:17:357",8000 594 | "2024-03-26 02:29:18:216",175000 595 | "2024-03-26 02:32:39:138",192000 596 | "2024-03-26 02:36:34:037",170000 597 | "2024-03-26 02:37:07:717",11000 598 | "2024-03-26 02:38:12:531",0 599 | "2024-03-26 02:39:27:160",11000 600 | "2024-03-26 02:43:30:449",241000 601 | "2024-03-26 02:58:06:467",772000 602 | "2024-03-26 02:58:35:292",1000 603 | "2024-03-26 06:55:27:482",2000 604 | "2024-03-26 06:55:29:583",0 605 | "2024-03-26 06:55:36:831",4000 606 | "2024-03-26 06:55:52:775",13000 607 | "2024-03-26 06:56:46:106",0 608 | "2024-03-26 06:56:47:196",0 609 | "2024-03-26 06:58:07:712",4000 610 | "2024-03-26 07:00:40:328",6000 611 | "2024-03-26 07:00:45:056",4000 612 | "2024-03-26 07:01:13:256",5000 613 | "2024-03-26 07:02:09:852",38000 614 | "2024-03-26 07:02:18:843",3000 615 | "2024-03-26 07:41:16:294",1000 616 | "2024-03-26 07:45:32:104",3000 617 | "2024-03-26 08:07:38:369",1000 618 | "2024-03-26 08:07:45:799",0 619 | "2024-03-26 08:28:52:384",1000 620 | "2024-03-26 08:39:14:769",4000 621 | "2024-03-26 08:47:59:998",4000 622 | "2024-03-26 08:48:03:881",0 623 | "2024-03-26 09:24:07:763",2000 624 | "2024-03-26 09:31:06:957",30000 625 | "2024-03-26 09:31:40:292",26000 626 | "2024-03-26 09:38:17:276",13000 627 | "2024-03-26 09:38:31:706",0 628 | "2024-03-26 09:39:12:866",39000 629 | "2024-03-26 09:39:31:012",9000 630 | "2024-03-26 09:39:41:460",3000 631 | "2024-03-26 09:40:43:051",4000 632 | "2024-03-26 09:40:48:197",2000 633 | "2024-03-26 09:42:09:644",13000 634 | "2024-03-26 09:42:23:531",1000 635 | "2024-03-26 09:42:27:193",1000 636 | "2024-03-26 09:44:10:646",11000 637 | "2024-03-26 09:44:45:162",30000 638 | "2024-03-26 09:47:39:913",164000 639 | "2024-03-26 09:51:52:580",4000 640 | "2024-03-26 22:27:07:026",0 641 | "2024-03-26 22:46:21:596",3000 642 | "2024-03-26 22:58:15:519",315000 643 | "2024-03-26 23:03:41:149",0 644 | "2024-03-27 08:44:21:211",1000 645 | "2024-03-27 08:49:29:069",300000 646 | "2024-03-27 08:51:45:502",24000 647 | "2024-03-27 09:00:57:346",2000 648 | "2024-03-27 09:09:22:731",2000 649 | "2024-03-27 09:27:28:892",13000 650 | "2024-03-31 03:08:12:488",24000 651 | "2024-03-31 03:08:14:581",0 652 | "2024-03-31 03:09:02:055",22000 653 | "2024-03-31 03:09:29:893",18000 654 | "2024-03-31 03:09:38:535",4000 655 | "2024-03-31 03:09:47:497",4000 656 | "2024-03-31 03:09:57:468",5000 657 | "2024-03-31 03:10:13:855",11000 658 | "2024-03-31 03:11:01:227",23000 659 | "2024-03-31 03:12:34:665",71000 660 | "2024-03-31 03:13:59:460",66000 661 | "2024-03-31 03:16:57:755",136000 662 | "2024-03-31 03:17:48:363",38000 663 | "2024-03-31 03:18:15:422",0 664 | "2024-03-31 03:18:29:586",10000 665 | "2024-03-31 03:20:38:967",14000 666 | "2024-03-31 03:20:55:856",0 667 | "2024-03-31 03:22:07:970",68000 668 | "2024-03-31 03:22:11:667",1000 669 | "2024-03-31 03:22:20:058",6000 670 | "2024-03-31 03:22:28:288",6000 671 | "2024-03-31 03:22:42:773",13000 672 | "2024-03-31 03:25:58:535",192000 673 | "2024-03-31 03:26:30:212",29000 674 | "2024-03-31 03:26:48:526",3000 675 | "2024-03-31 03:27:32:567",42000 676 | "2024-03-31 03:29:12:195",26000 677 | "2024-03-31 03:29:23:232",0 678 | "2024-03-31 03:29:53:039",27000 679 | "2024-03-31 03:30:00:396",3000 680 | "2024-03-31 03:30:16:495",14000 681 | "2024-03-31 03:32:37:215",2000 682 | "2024-03-31 05:03:09:091",50000 683 | "2024-03-31 05:03:22:640",12000 684 | "2024-03-31 05:04:06:367",38000 685 | "2024-03-31 05:04:29:194",9000 686 | "2024-03-31 05:05:15:947",7000 687 | "2024-03-31 05:05:18:786",1000 688 | "2024-03-31 05:06:34:839",56000 689 | "2024-03-31 05:07:01:988",22000 690 | "2024-03-31 05:07:14:089",0 691 | "2024-03-31 05:07:21:053",5000 692 | "2024-03-31 05:25:26:519",1000 693 | "2024-03-31 05:25:34:319",0 694 | "2024-03-31 08:14:18:333",24000 695 | "2024-03-31 08:14:52:593",9000 696 | "2024-03-31 09:34:05:823",10000 697 | "2024-03-31 09:34:09:928",2000 698 | "2024-03-31 09:34:25:105",8000 699 | "2024-03-31 09:35:15:193",8000 700 | "2024-03-31 09:35:18:653",2000 701 | "2024-03-31 09:35:22:743",3000 702 | "2024-03-31 09:35:32:767",2000 703 | "2024-03-31 09:36:16:195",17000 704 | "2024-03-31 09:36:30:208",7000 705 | "2024-03-31 09:36:40:228",7000 706 | "2024-03-31 09:37:13:490",25000 707 | "2024-03-31 09:38:02:115",37000 708 | "2024-03-31 09:38:21:674",16000 709 | "2024-03-31 09:38:23:190",0 710 | "2024-03-31 09:38:31:850",3000 711 | "2024-03-31 09:38:44:369",8000 712 | "2024-03-31 09:40:13:133",86000 713 | "2024-03-31 09:41:48:501",87000 714 | "2024-03-31 09:42:13:830",17000 715 | "2024-03-31 09:42:25:410",10000 716 | "2024-03-31 09:42:49:775",18000 717 | "2024-03-31 09:44:38:852",98000 718 | "2024-03-31 09:48:40:651",37000 719 | "2024-03-31 09:48:47:392",4000 720 | "2024-03-31 09:49:10:869",19000 721 | "2024-03-31 09:49:28:181",1000 722 | "2024-03-31 09:49:33:447",3000 723 | "2024-03-31 09:52:18:522",0 724 | "2024-03-31 09:55:15:583",141000 725 | "2024-03-31 09:55:32:388",3000 726 | "2024-03-31 09:55:39:231",4000 727 | "2024-03-31 09:56:06:725",20000 728 | "2024-03-31 09:56:17:671",8000 729 | "2024-03-31 09:56:39:053",19000 730 | "2024-03-31 09:59:59:251",184000 731 | "2024-03-31 10:00:24:860",2000 732 | "2024-03-31 10:01:16:273",44000 733 | "2024-03-31 10:03:58:049",135000 734 | "2024-03-31 10:05:05:730",12000 735 | "2024-03-31 10:05:09:150",0 736 | "2024-03-31 10:07:00:848",15000 737 | "2024-03-31 10:08:04:582",28000 738 | "2024-03-31 10:08:36:858",28000 739 | "2024-03-31 10:09:09:598",24000 740 | "2024-03-31 10:09:21:420",1000 741 | "2024-03-31 10:10:06:701",40000 742 | "2024-03-31 10:13:03:605",174000 743 | "2024-03-31 10:14:00:481",30000 744 | "2024-03-31 10:14:20:878",0 745 | "2024-03-31 10:14:31:870",8000 746 | "2024-03-31 10:15:23:643",4000 747 | "2024-03-31 10:15:36:210",9000 748 | -------------------------------------------------------------------------------- /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Record my favorite and useful deep learning models encountered in the learning process, including but not limited to classic convolutional neural networks (e.g. AlexNet,ResNet), attention-mechanism related modules, transformer related models (e.g. ViT,Efficient ViT), etc. 2 | 3 | I will continue to update more models as I learn. All models are based on the pytorch implementation and use cuda version 11.8. Another important point is that a large number of implemented models are available in the pytorch framework and transformers library. Reading the source code will improve your coding skills. 4 | 5 | The model code has been updated: 6 | 7 | + [image_segmentation:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/image_segmentation) 8 | + [about_unet:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/image_segmentation/about_unet) 9 | + [UNet](https://github.com/anshilaoliu/Hand-torn_code/blob/master/image_segmentation/about_unet/UNet.py) 10 | + [UNet_pp](https://github.com/anshilaoliu/Hand-torn_code/blob/master/image_segmentation/about_unet/UNet_pp.py) 11 | + [about_transformer:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_transformer) 12 | + [Transformer](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_transformer/attention_is_all_you_need) 13 | + [ViT_model](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_transformer/ViT/ViT_model.py) 14 | + [mobile_vit](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_transformer/mobile_vit) 15 | + [about_attention:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_attention) 16 | + [MSCAAttention](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_attention/MSCAAttention.py) 17 | + [CBAM](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_attention/CBAM.py) 18 | + [classic_conv:](https://github.com/anshilaoliu/Hand-torn_code/tree/master/classic_conv) 19 | + [AlexNet](https://github.com/anshilaoliu/Hand-torn_code/blob/master/classic_conv/AlexNet.py) 20 | + [SENet](https://github.com/anshilaoliu/Hand-torn_code/blob/master/classic_conv/SENet.py) 21 | + [about_interview](https://github.com/anshilaoliu/Hand-torn_code/tree/master/about_interview) 22 | + [Attention](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_interview/Attention.py) 23 | + [Embedding](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_interview/Embedding.py) 24 | + [FFN](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_interview/FFN.py) 25 | + [LayerNorm](https://github.com/anshilaoliu/Hand-torn_code/blob/master/about_interview/LayerNorm.py) 26 | 27 | -------------------------------------------------------------------------------- /about_attention/CBAM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : CBAM.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/31 3:08 7 | # @Classes : 8 | import torch 9 | import torch.nn as nn 10 | 11 | 12 | class ChannelAttention(nn.Module): 13 | """通道注意力""" 14 | def __init__(self, in_planes, ratio=8): 15 | super(ChannelAttention, self).__init__() 16 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 17 | self.max_pool = nn.AdaptiveMaxPool2d(1) 18 | 19 | self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False) 20 | self.relu1 = nn.ReLU() 21 | self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False) 22 | 23 | self.sigmoid = nn.Sigmoid() 24 | 25 | def forward(self, x): 26 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 27 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 28 | out = avg_out + max_out 29 | return self.sigmoid(out) 30 | 31 | 32 | class SpatialAttention(nn.Module): 33 | """空间注意力""" 34 | def __init__(self, kernel_size=7): 35 | super(SpatialAttention, self).__init__() 36 | 37 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7' 38 | padding = 3 if kernel_size == 7 else 1 39 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 40 | self.sigmoid = nn.Sigmoid() 41 | 42 | def forward(self, x): 43 | avg_out = torch.mean(x, dim=1, keepdim=True) 44 | max_out, _ = torch.max(x, dim=1, keepdim=True) 45 | x = torch.cat([avg_out, max_out], dim=1) 46 | x = self.conv1(x) 47 | return self.sigmoid(x) 48 | 49 | 50 | class CBAMBlock(nn.Module): 51 | def __init__(self, channel, ratio=8, kernel_size=7): 52 | super(CBAMBlock, self).__init__() 53 | self.channel_attention = ChannelAttention(channel, ratio=ratio) 54 | self.spatial_attention = SpatialAttention(kernel_size=kernel_size) 55 | # 根据你的实际需求来考虑是否残差连接与下采样 56 | self.channel_down = nn.Conv2d(channel * 2, channel, kernel_size=1) 57 | 58 | def forward(self, x): 59 | residual = x 60 | x = x * self.channel_attention(x) 61 | x = x * self.spatial_attention(x) 62 | 63 | x = torch.cat((x, residual), dim=1) 64 | x = self.channel_down(x) 65 | return x 66 | 67 | 68 | if __name__ == '__main__': 69 | pass 70 | -------------------------------------------------------------------------------- /about_attention/MSCAAttention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : MSCAAttention.py 5 | # @author : LJH 6 | # @Start Date : 2024/3/4 23:17 7 | # @Classes : MSCAAttention(Multi-Scale Channel Attention)实现,该注意力机制可以增强神经网络在特定通道和空间维度上的感知能力(自动学习特定通道和空间位置的重要性),从而有助于提取更加丰富和有用的特征 8 | import torch 9 | import torch.nn as nn 10 | from mmengine.model import BaseModule 11 | from mmcv.cnn import build_activation_layer 12 | 13 | 14 | # Source from https://github.com/haoshao-nku/medical_seg 15 | # 提出的注意力的实现模块 16 | class MSCAAttention(BaseModule): 17 | """Multi-Scale Convolutional Attention(MSCA)模块. 18 | 多尺度特征提取:通过多个卷积核大小和填充的卷积操作,以提取不同尺度的特征信息。 19 | 这些卷积操作包括一个具有较大卷积核的初始卷积 (self.conv0) 和多个后续的卷积操作(self.conv0_1,self.conv0_2,self.conv1_1,self.conv1_2,self.conv2_1,self.conv2_2),每个都针对不同的核大小和填充。 20 | 通道混合:在提取多尺度特征之后,通过对这些特征进行通道混合来整合不同尺度的信息。通道混合操作由最后一个卷积层 self.conv3 完成。 21 | 卷积注意力:最后,通过将通道混合后的特征与输入特征进行逐元素乘法,实现了一种卷积注意力机制。这意味着模块通过对不同通道的特征赋予不同的权重来选择性地强调或抑制输入特征。 22 | """ 23 | def __init__(self, 24 | channels, 25 | kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], 26 | paddings=[2, [0, 3], [0, 5], [0, 10]]): 27 | """ 28 | 29 | :param channels: 通道数. 30 | :param kernel_sizes: 注意力核大小. 默认: [5, [1, 7], [1, 11], [1, 21]]. 31 | :param paddings: 注意力模块中相应填充值的个数. 32 | 默认: [2, [0, 3], [0, 5], [0, 10]]. 33 | """ 34 | super().__init__() 35 | self.conv0 = nn.Conv2d( 36 | channels, 37 | channels, 38 | kernel_size=kernel_sizes[0], 39 | padding=paddings[0], 40 | groups=channels) 41 | for i, (kernel_size, 42 | padding) in enumerate(zip(kernel_sizes[1:], paddings[1:])): 43 | kernel_size_ = [kernel_size, kernel_size[::-1]] 44 | padding_ = [padding, padding[::-1]] 45 | conv_name = [f'conv{i}_1', f'conv{i}_2'] 46 | for i_kernel, i_pad, i_conv in zip(kernel_size_, padding_, 47 | conv_name): 48 | self.add_module( 49 | i_conv, 50 | nn.Conv2d( 51 | channels, 52 | channels, 53 | tuple(i_kernel), 54 | padding=i_pad, 55 | groups=channels)) 56 | self.conv3 = nn.Conv2d(channels, channels, 1) 57 | 58 | def forward(self, x): 59 | u = x.clone() 60 | 61 | attn = self.conv0(x) 62 | 63 | # 多尺度特征提取 64 | attn_0 = self.conv0_1(attn) 65 | attn_0 = self.conv0_2(attn_0) 66 | 67 | attn_1 = self.conv1_1(attn) 68 | attn_1 = self.conv1_2(attn_1) 69 | 70 | attn_2 = self.conv2_1(attn) 71 | attn_2 = self.conv2_2(attn_2) 72 | 73 | attn = attn + attn_0 + attn_1 + attn_2 74 | # 通道融合(也是通过1x1卷积) 75 | attn = self.conv3(attn) 76 | 77 | # Convolutional Attention 78 | x = attn * u 79 | 80 | return x 81 | 82 | 83 | # 原论文模型中带有封装MSCAAttention,可用于参考作者怎么使用这个注意力模块 84 | class MSCASpatialAttention(BaseModule): 85 | """ 86 | Spatial Attention Module in Multi-Scale Convolutional Attention Module,多尺度卷积注意力模块中的空间注意模块 87 | 先过1x1卷积,gelu激活后过注意力,再过一次1x1卷积,最后和跳跃连接 88 | """ 89 | 90 | def __init__(self, 91 | in_channels, 92 | attention_kernel_sizes=[5, [1, 7], [1, 11], [1, 21]], 93 | attention_kernel_paddings=[2, [0, 3], [0, 5], [0, 10]], 94 | act_cfg=dict(type='GELU')): 95 | """ 96 | 97 | :param in_channels: 通道数. 98 | :param attention_kernel_sizes (list): 注意力核大小. 默认: [5, [1, 7], [1, 11], [1, 21]]. 99 | :param attention_kernel_paddings (list): 注意力模块中相应填充值的个数. 100 | :param act_cfg (list): 注意力模块中相应填充值的个数. 101 | """ 102 | super().__init__() 103 | self.proj_1 = nn.Conv2d(in_channels, in_channels, 1) 104 | self.activation = build_activation_layer(act_cfg) 105 | self.spatial_gating_unit = MSCAAttention(in_channels, 106 | attention_kernel_sizes, 107 | attention_kernel_paddings) 108 | self.proj_2 = nn.Conv2d(in_channels, in_channels, 1) 109 | 110 | def forward(self, x): 111 | # 跳跃连接 112 | shorcut = x.clone() 113 | # 先过1x1卷积 114 | x = self.proj_1(x) 115 | # 激活 116 | x = self.activation(x) 117 | # 过MSCAAttention 118 | x = self.spatial_gating_unit(x) 119 | # 1x1卷积 120 | x = self.proj_2(x) 121 | # 残差融合 122 | x = x + shorcut 123 | return x 124 | 125 | 126 | def test(): 127 | x = torch.rand(3, 64, 32, 32) 128 | model = MSCASpatialAttention(in_channels=64) 129 | pred = model(x) 130 | print(x.shape) 131 | print(pred.shape) 132 | 133 | 134 | if __name__ == '__main__': 135 | test() 136 | -------------------------------------------------------------------------------- /about_attention/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @Package Name: 5 | # @File Name : __init__.py.py 6 | # @author : ahua 7 | # @Version : 1.0 8 | # @Start Date : 2024/3/9 3:54 9 | # @Classes : 10 | 11 | 12 | if __name__ == '__main__': 13 | pass 14 | -------------------------------------------------------------------------------- /about_interview/Attention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : Attention.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/31 3:22 7 | # @Classes : 8 | import torch 9 | from torch import nn 10 | import math 11 | 12 | 13 | class ScaledDotProductAttention(nn.Module): 14 | """根据公式计算QkV""" 15 | def __init__(self, n_d): 16 | """ 17 | 18 | :param n_d: 每个头的dim,用于scaling 19 | """ 20 | super(ScaledDotProductAttention, self).__init__() 21 | 22 | self.n_d = n_d 23 | # 在最后一个维度上进行softmax 24 | self.softmax = nn.Softmax(dim=-1) 25 | 26 | def forward(self, Q, K, V, mask): 27 | # q和k关于(2,3)维度的转置相乘并scaling 28 | attn_score = Q @ K.transpose(2, 3) / math.sqrt(self.n_d) 29 | 30 | if mask is not None: 31 | attn_score = attn_score.masked_fill(mask == 0, float("-inf")) 32 | 33 | attn_score = self.softmax(attn_score) 34 | attn_score = attn_score @ V 35 | 36 | return attn_score 37 | 38 | 39 | class MultiHeadAttention(nn.Module): 40 | """多头注意力,包括残差连接和Norm""" 41 | def __init__(self, d_model, n_head, dropout=0.1, bias=True): 42 | """ 43 | 44 | :param d_model: 输入向量embedding维度 45 | :param n_head: 46 | :param bias: 47 | """ 48 | super(MultiHeadAttention, self).__init__() 49 | 50 | if d_model % n_head != 0: 51 | raise ValueError( 52 | "Embedding dim must be divisible by number of heads in {}. Got: embed_dim={} and num_heads={}".format( 53 | self.__class__.__name__, d_model, n_head 54 | ) 55 | ) 56 | 57 | self.n_head = n_head 58 | self.d_model = d_model 59 | self.n_d = d_model // n_head 60 | 61 | # 投影映射矩阵 62 | self.w_q = nn.Linear(in_features=d_model, out_features=d_model, bias=bias) 63 | self.w_k = nn.Linear(in_features=d_model, out_features=d_model, bias=bias) 64 | self.w_v = nn.Linear(in_features=d_model, out_features=d_model, bias=bias) 65 | 66 | self.get_attn = ScaledDotProductAttention(self.n_d) 67 | 68 | # 最后多头合并之后再做一次映射 69 | self.w_o = nn.Linear(d_model, d_model) 70 | 71 | self.dropout = nn.Dropout(p=dropout) 72 | 73 | self.layer_norm = nn.LayerNorm(d_model) 74 | 75 | def forward(self, x_q, x_k, x_v, mask=None): 76 | residual = x_q 77 | batch, seq_len, dimension = x_q.shape 78 | # 映射得到QKV矩阵 79 | q, k, v = self.w_q(x_q), self.w_k(x_k), self.w_v(x_v) 80 | 81 | # 拆分为四维张量后将(0,1,2,3)reshape为(0,2,1,3) 82 | q = q.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3) 83 | k = k.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3) 84 | v = v.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3) 85 | 86 | attn_score = self.get_attn(q, k, v, mask) 87 | 88 | # 重新排列维度,保证内存连续型后改变为三维张量 89 | attn_score = attn_score.permute(0, 2, 1, 3).contiguous().view(batch, seq_len, dimension) 90 | 91 | output = self.w_o(attn_score) 92 | output = self.dropout(output) 93 | 94 | # 残差连接和Norm 95 | output = self.layer_norm(output + residual) 96 | return output 97 | -------------------------------------------------------------------------------- /about_interview/Embedding.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : Embedding.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/31 3:25 7 | # @Classes : 8 | import torch 9 | from torch import nn 10 | 11 | 12 | class PositionalEmbedding(nn.Module): 13 | """位置编码,输入token embedding返回加上位置编码后的总的embedding""" 14 | def __init__(self, d_model, max_len=5000, dropout=0.1): 15 | super(PositionalEmbedding, self).__init__() 16 | # 初始化编码 17 | self.pe = torch.zeros(max_len, d_model) 18 | # 原始论文中位置编码是直接算的,不用训练 19 | self.pe.requires_grad_(False) 20 | 21 | # 照着公式敲就行了 22 | # 初始化pos 23 | pos = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 24 | # 2i 25 | _2i = torch.arange(0, d_model, 2) 26 | 27 | # 偶数计算 28 | self.pe[:, 0::2] = torch.sin(pos / (10000 ** (_2i / d_model))) 29 | # 奇数计算 30 | self.pe[:, 1::2] = torch.cos(pos / (10000 ** (_2i / d_model))) 31 | 32 | self.dropout = nn.Dropout(p=dropout) 33 | 34 | def forward(self, x): 35 | """ 36 | 37 | :param x: 输入的token embedding 38 | :return: 39 | """ 40 | seq_len = x.shape[1] 41 | x = x + self.pe[:seq_len, :] 42 | return self.dropout(x) -------------------------------------------------------------------------------- /about_interview/FFN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : FFN.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/31 3:22 7 | # @Classes : 8 | from torch import nn 9 | 10 | 11 | class PositionwiseFeedForwardNet(nn.Module): 12 | """前馈网络,包括后续残差连接与Norm""" 13 | def __init__(self, d_model, hidden, dropout=0.1): 14 | super(PositionwiseFeedForwardNet, self).__init__() 15 | self.fc = nn.Sequential( 16 | nn.Linear(d_model, hidden), 17 | nn.ReLU(), 18 | nn.Dropout(p=dropout), 19 | nn.Linear(hidden, d_model), 20 | nn.Dropout(p=dropout) 21 | ) 22 | self.layer_norm = nn.LayerNorm(d_model) 23 | 24 | def forward(self, x): 25 | residual = x 26 | x = self.fc(x) 27 | output = self.layer_norm(x+residual) 28 | return output 29 | -------------------------------------------------------------------------------- /about_interview/LayerNorm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : LayerNorm.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/31 3:22 7 | # @Classes : 8 | import torch 9 | from torch import nn 10 | 11 | 12 | class LayerNorm(nn.Module): 13 | """也可以直接用nn.LayerNorm""" 14 | def __init__(self, d_model, eps=1e-9): 15 | super(LayerNorm, self).__init__() 16 | # 俩个参数,权重和偏置,防止输入激活函数的线性表示部分导致非线性效果不佳 17 | self.weight = nn.Parameter(torch.ones(d_model)) 18 | self.beta = nn.Parameter(torch.zeros(d_model)) 19 | # 防止分母0 20 | self.eps = eps 21 | 22 | def forward(self, x): 23 | # LayerNorm全都是对最后一维进行归一化 24 | mean = x.mean(-1, keepdim=True) 25 | var = x.var(-1, unbiased=False, keepdim=True) 26 | out = (x - mean) / torch.sqrt(var + self.eps) 27 | return self.weight * out + self.beta 28 | -------------------------------------------------------------------------------- /about_interview/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : __init__.py.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/31 3:22 7 | # @Classes : 8 | 9 | 10 | if __name__ == '__main__': 11 | pass 12 | -------------------------------------------------------------------------------- /about_light_net/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : __init__.py.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/16 21:59 7 | # @Classes : 8 | 9 | 10 | if __name__ == '__main__': 11 | pass 12 | -------------------------------------------------------------------------------- /about_light_net/mobile_net/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : __init__.py.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/16 22:00 7 | # @Classes : 注:参考自官方代码 8 | 9 | 10 | if __name__ == '__main__': 11 | pass 12 | -------------------------------------------------------------------------------- /about_transformer/ViT/ViT_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : ViT_model.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/10 0:38 7 | # @Classes : 简易版ViT模型 8 | import torch 9 | import torch.nn as nn 10 | 11 | 12 | class PatchEmbedding(nn.Module): 13 | """ 14 | 切patch 15 | """ 16 | def __init__(self, in_channels, patch_size, embed_dim, patch_num, dropout=0.1): 17 | """ 18 | 19 | :param in_channels: 输入通道数 20 | :param patch_size:小方块大小,即每个小方块大小是 (patch_size x patch_size) 21 | :param embed_dim:embedding维度,也是卷积切分后的输出维度,等于patch_size*patch_size*in_channels 22 | :param patch_num:patch个数 23 | :param dropout:默认0.1 24 | """ 25 | super(PatchEmbedding, self).__init__() 26 | # 卷积切patch,并拉平 27 | self.get_patch = nn.Sequential( 28 | nn.Conv2d(in_channels=in_channels, out_channels=embed_dim, kernel_size=patch_size, stride=patch_size), 29 | nn.Flatten(2) 30 | ) 31 | 32 | # 加入CLS Token(随机初始化),为了能和patch的embedding拼在一起,第三个维度应该一致,第一维先默认使用1初始化,forward实例化时再扩充对齐 33 | self.cls_token = nn.Parameter(torch.randn(size=(1, 1, embed_dim)), requires_grad=True) 34 | # 加入位置编码,也是随机初始化,为了能和总的embedding加在一起,第二个维度应该等于patch_num+1,同样第三个维度等于embed_dim 35 | self.position_embedding = nn.Parameter(torch.randn(size=(1, patch_num+1, embed_dim)), requires_grad=True) 36 | self.dropout = nn.Dropout(p=dropout) 37 | 38 | def forward(self, x): 39 | # 切 40 | x = self.get_patch(x) 41 | # 交换后俩维 42 | x = x.permute(0, 2, 1) 43 | 44 | # 拼cls token 45 | cls_token = self.cls_token.expand(x.shape[0], -1, -1) # 第一维扩充对齐,因为第一维一般是是batch_size,运行前是不确定batch_size大小的 46 | x = torch.cat([x, cls_token], dim=1) 47 | # 加position_embedding 48 | x = x + self.position_embedding 49 | 50 | x = self.dropout(x) 51 | return x 52 | 53 | 54 | class ViT(nn.Module): 55 | """ 56 | ViT模型构建 57 | """ 58 | def __init__(self, in_channels, patch_size, embed_dim, patch_num, heads_num, activation, 59 | encoders_num, classes_num, dropout=0.1): 60 | """ 61 | 62 | :param in_channels: 63 | :param patch_size: 64 | :param embed_dim: 65 | :param patch_num: 66 | :param heads_num: 多头注意力中的头 67 | :param activation: 激活方式 68 | :param encoders_num: 69 | :param classes_num: 类别数 70 | :param dropout: 71 | """ 72 | super(ViT, self).__init__() 73 | self.patch_embedding = PatchEmbedding(in_channels, patch_size, embed_dim, patch_num) 74 | 75 | # 用torch封装好的定义Transformer中的Encoder layer 76 | encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=heads_num, dropout=dropout, activation=activation, 77 | batch_first=True, norm_first=True) 78 | # Encoder layer装入 79 | self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=encoders_num) 80 | 81 | # MLP分类头 82 | self.MLP = nn.Sequential( 83 | # 先做层归一化 84 | nn.LayerNorm(normalized_shape=embed_dim), 85 | nn.Linear(in_features=embed_dim, out_features=classes_num) 86 | ) 87 | 88 | def forward(self, x): 89 | # 切 90 | x = self.patch_embedding(x) 91 | # 过encoder 92 | x = self.encoder(x) 93 | # 过MLP,因为是分类任务,只取了编码后的 CLS token(即第一个位置,也即只取第二维中索引为0的所有数据)作为输入 94 | x = self.MLP(x[:, 0, :]) 95 | return x 96 | 97 | 98 | def test(): 99 | # 随机生成一组张量,可视为3张3通道照片,尺寸224x224 100 | x = torch.randn(3, 3, 224, 224) 101 | # 切成16x16个块,每个块14x14大小,转化维度后相当于切14x14=196个patch,8个头6个Encoder,类别假设是10 102 | vit_model = ViT(3, 16, 16*16*3, 14*14, 8, "gelu", 6, 10) 103 | pred = vit_model(x) 104 | print(x.shape) 105 | print(pred.shape) 106 | 107 | 108 | if __name__ == '__main__': 109 | test() 110 | -------------------------------------------------------------------------------- /about_transformer/ViT/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : __init__.py.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/9 3:57 7 | # @Classes : 8 | 9 | 10 | if __name__ == '__main__': 11 | pass 12 | -------------------------------------------------------------------------------- /about_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @Package Name: 5 | # @File Name : __init__.py.py 6 | # @author : ahua 7 | # @Version : 1.0 8 | # @Start Date : 2024/3/9 3:54 9 | # @Classes : 10 | 11 | 12 | if __name__ == '__main__': 13 | pass 14 | -------------------------------------------------------------------------------- /about_transformer/attention_is_all_you_need/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : __init__.py.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/23 23:10 7 | # @Classes : 8 | 9 | 10 | if __name__ == '__main__': 11 | pass 12 | -------------------------------------------------------------------------------- /about_transformer/attention_is_all_you_need/attention_module.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : attention_module.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/23 23:11 7 | # @Classes : 多头注意力 8 | import torch 9 | from torch import nn 10 | import math 11 | 12 | from utils_module import LayerNorm 13 | 14 | 15 | class ScaledDotProductAttention(nn.Module): 16 | """根据公式计算QkV""" 17 | def __init__(self, n_d): 18 | """ 19 | 20 | :param n_d: 每个头的dim,用于scaling 21 | """ 22 | super(ScaledDotProductAttention, self).__init__() 23 | 24 | self.n_d = n_d 25 | # 在最后一个维度上进行softmax 26 | self.softmax = nn.Softmax(dim=-1) 27 | 28 | def forward(self, Q, K, V, mask): 29 | # q和k关于(2,3)维度的转置相乘并scaling 30 | attn_score = Q @ K.transpose(2, 3) / math.sqrt(self.n_d) 31 | 32 | if mask is not None: 33 | attn_score = attn_score.masked_fill(mask == 0, float("-inf")) 34 | 35 | attn_score = self.softmax(attn_score) 36 | attn_score = attn_score @ V 37 | 38 | return attn_score 39 | 40 | 41 | class MultiHeadAttention(nn.Module): 42 | """多头注意力,包括残差连接和Norm""" 43 | def __init__(self, d_model, n_head, dropout=0.1, bias=True): 44 | """ 45 | 46 | :param d_model: 输入向量embedding维度 47 | :param n_head: 48 | :param bias: 49 | """ 50 | super(MultiHeadAttention, self).__init__() 51 | 52 | if d_model % n_head != 0: 53 | raise ValueError( 54 | "Embedding dim must be divisible by number of heads in {}. Got: embed_dim={} and num_heads={}".format( 55 | self.__class__.__name__, d_model, n_head 56 | ) 57 | ) 58 | 59 | self.n_head = n_head 60 | self.d_model = d_model 61 | self.n_d = d_model // n_head 62 | 63 | # 投影映射矩阵 64 | self.w_q = nn.Linear(in_features=d_model, out_features=d_model, bias=bias) 65 | self.w_k = nn.Linear(in_features=d_model, out_features=d_model, bias=bias) 66 | self.w_v = nn.Linear(in_features=d_model, out_features=d_model, bias=bias) 67 | 68 | self.get_attn = ScaledDotProductAttention(self.n_d) 69 | 70 | # 最后多头合并之后再做一次映射 71 | self.w_o = nn.Linear(d_model, d_model) 72 | 73 | self.dropout = nn.Dropout(p=dropout) 74 | 75 | self.layer_norm = LayerNorm(d_model) 76 | 77 | def forward(self, x_q, x_k, x_v, mask=None): 78 | residual = x_q 79 | batch, seq_len, dimension = x_q.shape 80 | # 映射得到QKV矩阵 81 | q, k, v = self.w_q(x_q), self.w_k(x_k), self.w_v(x_v) 82 | 83 | # 拆分为四维张量后将(0,1,2,3)reshape为(0,2,1,3) 84 | q = q.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3) 85 | k = k.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3) 86 | v = v.view(batch, seq_len, self.n_head, self.n_d).permute(0, 2, 1, 3) 87 | 88 | attn_score = self.get_attn(q, k, v, mask) 89 | 90 | # 重新排列维度,保证内存连续型后改变为三维张量 91 | attn_score = attn_score.permute(0, 2, 1, 3).contiguous().view(batch, seq_len, dimension) 92 | 93 | output = self.w_o(attn_score) 94 | output = self.dropout(output) 95 | 96 | # 残差连接和Norm 97 | output = self.layer_norm(output + residual) 98 | return output 99 | 100 | 101 | def test(): 102 | d_model = 1024 103 | n_head = 8 104 | 105 | x = torch.randn(32, 64, 1024) # Batch, Time, Dimension 106 | print(x.shape) 107 | 108 | att_model = MultiHeadAttention(d_model, n_head) 109 | out = att_model(x, x, x) 110 | print(out.shape) 111 | 112 | 113 | if __name__ == '__main__': 114 | test() 115 | -------------------------------------------------------------------------------- /about_transformer/attention_is_all_you_need/transformer_decoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : transformer_decoder.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/26 0:17 7 | # @Classes : 8 | import torch 9 | from torch import nn 10 | 11 | from attention_module import MultiHeadAttention 12 | from utils_module import PositionwiseFeedForwardNet 13 | from utils_module import PositionalEmbedding 14 | 15 | 16 | class DecoderLayer(nn.Module): 17 | def __init__(self, d_model, n_head, ffn_hidden, dropout=0.1): 18 | super(DecoderLayer, self).__init__() 19 | self.causal_attention = MultiHeadAttention(d_model, n_head, dropout) 20 | 21 | self.cross_attention = MultiHeadAttention(d_model, n_head, dropout) 22 | 23 | self.ffn = PositionwiseFeedForwardNet(d_model, ffn_hidden, dropout) 24 | 25 | def forward(self, dec, enc, causal_mask, padding_mask): 26 | """ 27 | 28 | :param dec: 来自decoder的输入 29 | :param enc: 来自encoder的输出 30 | :param causal_mask: 下三角掩码,防止看见未来信息 31 | :param padding_mask: 将输入序列中的填充部分标记为不可关注,防止模型在训练过程中对这些无意义的padding部分进行不必要的关注 32 | :return: 33 | """ 34 | x = self.causal_attention(dec, dec, dec, causal_mask) 35 | 36 | x = self.cross_attention(x, enc, enc, padding_mask) 37 | 38 | x = self.ffn(x) 39 | 40 | return x 41 | 42 | 43 | class Decoder(nn.Module): 44 | def __init__(self, dec_vocabulary_size, d_model=512, n_head=8, ffn_hidden=2048, max_len=5000, 45 | n_layer=6, dropout=0.1): 46 | super(Decoder, self).__init__() 47 | 48 | self.token_embedding = nn.Embedding(dec_vocabulary_size, d_model, padding_idx=1) 49 | self.embedding = PositionalEmbedding(d_model, max_len, dropout) 50 | 51 | self.layers = nn.ModuleList( 52 | [DecoderLayer(d_model, n_head, ffn_hidden, dropout) for _ in range(n_layer)] 53 | ) 54 | 55 | def forward(self, dec, enc, causal_mask, padding_mask): 56 | dec = self.token_embedding(dec) 57 | dec = self.embedding(dec) 58 | 59 | for layer in self.layers: 60 | dec = layer(dec, enc, causal_mask, padding_mask) 61 | 62 | return dec 63 | 64 | 65 | if __name__ == '__main__': 66 | pass 67 | -------------------------------------------------------------------------------- /about_transformer/attention_is_all_you_need/transformer_encoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : transformer_encoder.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/23 23:14 7 | # @Classes : Encoder 8 | import torch 9 | from torch import nn 10 | 11 | from attention_module import MultiHeadAttention 12 | from utils_module import PositionwiseFeedForwardNet 13 | from utils_module import PositionalEmbedding 14 | 15 | 16 | class EncoderLayer(nn.Module): 17 | def __init__(self, d_model, n_head, ffn_hidden, dropout=0.1): 18 | super(EncoderLayer, self).__init__() 19 | self.attention = MultiHeadAttention(d_model, n_head, dropout) 20 | 21 | self.ffn = PositionwiseFeedForwardNet(d_model, ffn_hidden, dropout) 22 | 23 | def forward(self, x, mask=None): 24 | x = self.attention(x, x, x, mask) 25 | x = self.ffn(x) 26 | 27 | return x 28 | 29 | 30 | class Encoder(nn.Module): 31 | def __init__(self, enc_vocabulary_size, d_model=512, n_head=8, ffn_hidden=2048, max_len=5000, 32 | n_layer=6, dropout=0.1): 33 | super(Encoder, self).__init__() 34 | 35 | self.token_embedding = nn.Embedding(enc_vocabulary_size, d_model, padding_idx=1) 36 | self.embedding = PositionalEmbedding(d_model, max_len, dropout) 37 | 38 | self.layers = nn.ModuleList( 39 | [EncoderLayer(d_model, n_head, ffn_hidden, dropout) for _ in range(n_layer)] 40 | ) 41 | 42 | def forward(self, x, padding_mask=None): 43 | x = self.token_embedding(x) 44 | x = self.embedding(x) 45 | 46 | for layer in self.layers: 47 | x = layer(x, padding_mask) 48 | return x 49 | 50 | 51 | if __name__ == '__main__': 52 | pass 53 | -------------------------------------------------------------------------------- /about_transformer/attention_is_all_you_need/transformer_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : transformer_model.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/23 23:11 7 | # @Classes : 构建Transformer 8 | import torch 9 | from torch import nn 10 | 11 | from transformer_encoder import Encoder 12 | from transformer_decoder import Decoder 13 | 14 | 15 | class Transformer(nn.Module): 16 | def __init__(self, src_pad_idx, tgt_pad_idx, enc_vocabulary_size, dec_vocabulary_size, d_model=512, 17 | n_head=8, ffn_hidden=2048, max_len=5000, n_layers=6, dropout=0.1): 18 | """ 19 | 20 | :param src_pad_idx: source的pad标识符 21 | :param tgt_pad_idx: target的pad的标识符 22 | :param enc_vocabulary_size: source的词汇表大小 23 | :param dec_vocabulary_size: target的词汇表大小 24 | :param max_len: 25 | :param d_model: 26 | :param n_head: 27 | :param ffn_hidden: 28 | :param n_layers: 29 | :param dropout: 30 | """ 31 | super(Transformer, self).__init__() 32 | 33 | self.src_pad_idx = src_pad_idx 34 | self.tgt_pad_idx = tgt_pad_idx 35 | 36 | # Encoder层 37 | self.encoder = Encoder(enc_vocabulary_size, d_model, n_head, ffn_hidden, max_len, 38 | n_layers, dropout) 39 | # Decoder层 40 | self.decoder = Decoder(dec_vocabulary_size, d_model, n_head, ffn_hidden, max_len, 41 | n_layers, dropout) 42 | # 输出层,做一个线性映射 43 | self.fc = nn.Linear(d_model, dec_vocabulary_size) 44 | 45 | def _make_casual_mask(self, q, k): 46 | # 获取第二维的seq_len, 因为是QK相乘再做mask,所以mask大小应符合QK 47 | len_q, len_k = q.size(1), k.size(1) 48 | # 生成三角mask矩阵 49 | mask = torch.tril(torch.ones(len_q, len_k)).type(torch.BoolTensor) 50 | return mask 51 | 52 | def _make_padding_mask(self, q, k, pad_idx_q, pad_idx_k): 53 | len_q, len_k = q.size(1), k.size(1) 54 | 55 | # mask矩阵大小应为(Batch, seq_len, len_q, len_k) 56 | # 不等于pad_idx时设置为True,并增加俩个维度seq_len和len_k 57 | q = q.ne(pad_idx_q).unsqueeze(1).unsqueeze(3) 58 | # 在len_k维上做重复补全 59 | q = q.repeat(1, 1, 1, len_k) 60 | 61 | k = k.ne(pad_idx_k).unsqueeze(1).unsqueeze(2) 62 | k = k.repeat(1, 1, len_q, 1) 63 | 64 | mask = q & k 65 | return mask 66 | 67 | def forward(self, src, tgt): 68 | # Encoder的padding_mask,此时QK都来自source 69 | enc_padding_mask = self._make_padding_mask(src, src, self.src_pad_idx, self.src_pad_idx) 70 | # Decoder的因果mask,不仅要考虑不给看未来还要考虑padding 71 | dec_casual_mask = self._make_padding_mask(tgt, tgt, self.tgt_pad_idx, self.tgt_pad_idx) * \ 72 | self._make_casual_mask(tgt, tgt) 73 | # 交叉注意力的padding_mask 74 | cross_padding_mask = self._make_padding_mask(tgt, src, self.tgt_pad_idx, self.src_pad_idx) 75 | 76 | enc = self.encoder(src, enc_padding_mask) 77 | dec = self.decoder(tgt, enc, dec_casual_mask, cross_padding_mask) 78 | output = self.fc(dec) 79 | return output 80 | 81 | 82 | if __name__ == '__main__': 83 | pass 84 | -------------------------------------------------------------------------------- /about_transformer/attention_is_all_you_need/utils_module.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : utils_module.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/23 23:13 7 | # @Classes : LayerNorm和前馈网络以及positional embedding 8 | import torch 9 | from torch import nn 10 | 11 | 12 | class LayerNorm(nn.Module): 13 | """也可以直接用nn.LayerNorm""" 14 | def __init__(self, d_model, eps=1e-9): 15 | super(LayerNorm, self).__init__() 16 | # 俩个参数,权重和偏置,防止输入激活函数的线性表示部分导致非线性效果不佳 17 | self.weight = nn.Parameter(torch.ones(d_model)) 18 | self.beta = nn.Parameter(torch.zeros(d_model)) 19 | # 防止分母0 20 | self.eps = eps 21 | 22 | def forward(self, x): 23 | # LayerNorm全都是对最后一维进行归一化 24 | mean = x.mean(-1, keepdim=True) 25 | var = x.var(-1, unbiased=False, keepdim=True) 26 | out = (x - mean) / torch.sqrt(var + self.eps) 27 | return self.weight * out + self.beta 28 | 29 | 30 | class PositionwiseFeedForwardNet(nn.Module): 31 | """前馈网络,包括后续残差连接与Norm""" 32 | def __init__(self, d_model, hidden, dropout=0.1): 33 | super(PositionwiseFeedForwardNet, self).__init__() 34 | self.fc = nn.Sequential( 35 | nn.Linear(d_model, hidden), 36 | nn.ReLU(), 37 | nn.Dropout(p=dropout), 38 | nn.Linear(hidden, d_model), 39 | nn.Dropout(p=dropout) 40 | ) 41 | self.layer_norm = LayerNorm(d_model) 42 | 43 | def forward(self, x): 44 | residual = x 45 | x = self.fc(x) 46 | output = self.layer_norm(x+residual) 47 | return output 48 | 49 | 50 | class PositionalEmbedding(nn.Module): 51 | """位置编码,输入token embedding返回加上位置编码后的总的embedding""" 52 | def __init__(self, d_model, max_len=5000, dropout=0.1): 53 | super(PositionalEmbedding, self).__init__() 54 | # 初始化编码 55 | self.pe = torch.zeros(max_len, d_model) 56 | # 原始论文中位置编码是直接算的,不用训练 57 | self.pe.requires_grad_(False) 58 | 59 | # 照着公式敲就行了 60 | # 初始化pos 61 | pos = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 62 | # 2i 63 | _2i = torch.arange(0, d_model, 2) 64 | 65 | # 偶数计算 66 | self.pe[:, 0::2] = torch.sin(pos / (10000 ** (_2i / d_model))) 67 | # 奇数计算 68 | self.pe[:, 1::2] = torch.cos(pos / (10000 ** (_2i / d_model))) 69 | 70 | self.dropout = nn.Dropout(p=dropout) 71 | 72 | def forward(self, x): 73 | """ 74 | 75 | :param x: 输入的token embedding 76 | :return: 77 | """ 78 | seq_len = x.shape[1] 79 | x = x + self.pe[:seq_len, :] 80 | return self.dropout(x) 81 | 82 | 83 | if __name__ == '__main__': 84 | pass 85 | -------------------------------------------------------------------------------- /about_transformer/efficient_vit/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : __init__.py.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/9 4:00 7 | # @Classes : 8 | 9 | 10 | if __name__ == '__main__': 11 | pass 12 | -------------------------------------------------------------------------------- /about_transformer/mobile_vit/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : __init__.py.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/16 21:56 7 | # @Classes : MobileViT实现,参考自苹果给出的官方代码 8 | 9 | 10 | if __name__ == '__main__': 11 | pass 12 | -------------------------------------------------------------------------------- /about_transformer/mobile_vit/model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : model.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/16 21:58 7 | # @Classes : 8 | """ 9 | original code from apple: 10 | https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py 11 | """ 12 | 13 | from typing import Optional, Tuple, Union, Dict 14 | import math 15 | import torch 16 | import torch.nn as nn 17 | from torch import Tensor 18 | from torch.nn import functional as F 19 | import torchvision.models as models 20 | 21 | from transformer_encoder import TransformerEncoder 22 | from model_config import get_config 23 | 24 | 25 | def make_divisible( 26 | v: Union[float, int], 27 | divisor: Optional[int] = 8, 28 | min_value: Optional[Union[float, int]] = None, 29 | ) -> Union[float, int]: 30 | """ 31 | This function is taken from the original tf repo. 32 | It ensures that all layers have a channel number that is divisible by 8 33 | It can be seen here: 34 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 35 | :param v: 36 | :param divisor: 37 | :param min_value: 38 | :return: 39 | """ 40 | if min_value is None: 41 | min_value = divisor 42 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 43 | # Make sure that round down does not go down by more than 10%. 44 | if new_v < 0.9 * v: 45 | new_v += divisor 46 | return new_v 47 | 48 | 49 | class ConvLayer(nn.Module): 50 | """ 51 | Applies a 2D convolution over an input 52 | 53 | Args: 54 | in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})` 55 | out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out})` 56 | kernel_size (Union[int, Tuple[int, int]]): Kernel size for convolution. 57 | stride (Union[int, Tuple[int, int]]): Stride for convolution. Default: 1 58 | groups (Optional[int]): Number of groups in convolution. Default: 1 59 | bias (Optional[bool]): Use bias. Default: ``False`` 60 | use_norm (Optional[bool]): Use normalization layer after convolution. Default: ``True`` 61 | use_act (Optional[bool]): Use activation layer after convolution (or convolution and normalization). 62 | Default: ``True`` 63 | 64 | Shape: 65 | - Input: :math:`(N, C_{in}, H_{in}, W_{in})` 66 | - Output: :math:`(N, C_{out}, H_{out}, W_{out})` 67 | 68 | .. note:: 69 | For depth-wise convolution, `groups=C_{in}=C_{out}`. 70 | """ 71 | 72 | def __init__( 73 | self, 74 | in_channels: int, 75 | out_channels: int, 76 | kernel_size: Union[int, Tuple[int, int]], 77 | stride: Optional[Union[int, Tuple[int, int]]] = 1, 78 | groups: Optional[int] = 1, 79 | bias: Optional[bool] = False, 80 | use_norm: Optional[bool] = True, 81 | use_act: Optional[bool] = True, 82 | ) -> None: 83 | super().__init__() 84 | 85 | if isinstance(kernel_size, int): 86 | kernel_size = (kernel_size, kernel_size) 87 | 88 | if isinstance(stride, int): 89 | stride = (stride, stride) 90 | 91 | assert isinstance(kernel_size, Tuple) 92 | assert isinstance(stride, Tuple) 93 | 94 | padding = ( 95 | int((kernel_size[0] - 1) / 2), 96 | int((kernel_size[1] - 1) / 2), 97 | ) 98 | 99 | block = nn.Sequential() 100 | 101 | conv_layer = nn.Conv2d( 102 | in_channels=in_channels, 103 | out_channels=out_channels, 104 | kernel_size=kernel_size, 105 | stride=stride, 106 | groups=groups, 107 | padding=padding, 108 | bias=bias 109 | ) 110 | 111 | block.add_module(name="conv", module=conv_layer) 112 | 113 | if use_norm: 114 | norm_layer = nn.BatchNorm2d(num_features=out_channels, momentum=0.1) 115 | block.add_module(name="norm", module=norm_layer) 116 | 117 | if use_act: 118 | act_layer = nn.SiLU() 119 | block.add_module(name="act", module=act_layer) 120 | 121 | self.block = block 122 | 123 | def forward(self, x: Tensor) -> Tensor: 124 | return self.block(x) 125 | 126 | 127 | class InvertedResidual(nn.Module): 128 | """ 129 | This class implements the inverted residual block, as described in `MobileNetv2 `_ paper 130 | 131 | Args: 132 | in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})` 133 | out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out)` 134 | stride (int): Use convolutions with a stride. Default: 1 135 | expand_ratio (Union[int, float]): Expand the input channels by this factor in depth-wise conv 136 | skip_connection (Optional[bool]): Use skip-connection. Default: True 137 | 138 | Shape: 139 | - Input: :math:`(N, C_{in}, H_{in}, W_{in})` 140 | - Output: :math:`(N, C_{out}, H_{out}, W_{out})` 141 | 142 | .. note:: 143 | If `in_channels =! out_channels` and `stride > 1`, we set `skip_connection=False` 144 | 145 | """ 146 | 147 | def __init__( 148 | self, 149 | in_channels: int, 150 | out_channels: int, 151 | stride: int, 152 | expand_ratio: Union[int, float], 153 | skip_connection: Optional[bool] = True, 154 | ) -> None: 155 | assert stride in [1, 2] 156 | hidden_dim = make_divisible(int(round(in_channels * expand_ratio)), 8) 157 | 158 | super().__init__() 159 | 160 | block = nn.Sequential() 161 | if expand_ratio != 1: 162 | block.add_module( 163 | name="exp_1x1", 164 | module=ConvLayer( 165 | in_channels=in_channels, 166 | out_channels=hidden_dim, 167 | kernel_size=1 168 | ), 169 | ) 170 | 171 | block.add_module( 172 | name="conv_3x3", 173 | module=ConvLayer( 174 | in_channels=hidden_dim, 175 | out_channels=hidden_dim, 176 | stride=stride, 177 | kernel_size=3, 178 | groups=hidden_dim 179 | ), 180 | ) 181 | 182 | block.add_module( 183 | name="red_1x1", 184 | module=ConvLayer( 185 | in_channels=hidden_dim, 186 | out_channels=out_channels, 187 | kernel_size=1, 188 | use_act=False, 189 | use_norm=True, 190 | ), 191 | ) 192 | 193 | self.block = block 194 | self.in_channels = in_channels 195 | self.out_channels = out_channels 196 | self.exp = expand_ratio 197 | self.stride = stride 198 | self.use_res_connect = ( 199 | self.stride == 1 and in_channels == out_channels and skip_connection 200 | ) 201 | 202 | def forward(self, x: Tensor, *args, **kwargs) -> Tensor: 203 | if self.use_res_connect: 204 | return x + self.block(x) 205 | else: 206 | return self.block(x) 207 | 208 | 209 | class MobileViTBlock(nn.Module): 210 | """ 211 | This class defines the `MobileViT block `_ 212 | 213 | Args: 214 | opts: command line arguments 215 | in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H, W)` 216 | transformer_dim (int): Input dimension to the transformer unit 217 | ffn_dim (int): Dimension of the FFN block 218 | n_transformer_blocks (int): Number of transformer blocks. Default: 2 219 | head_dim (int): Head dimension in the multi-head attention. Default: 32 220 | attn_dropout (float): Dropout in multi-head attention. Default: 0.0 221 | dropout (float): Dropout rate. Default: 0.0 222 | ffn_dropout (float): Dropout between FFN layers in transformer. Default: 0.0 223 | patch_h (int): Patch height for unfolding operation. Default: 8 224 | patch_w (int): Patch width for unfolding operation. Default: 8 225 | transformer_norm_layer (Optional[str]): Normalization layer in the transformer block. Default: layer_norm 226 | conv_ksize (int): Kernel size to learn local representations in MobileViT block. Default: 3 227 | no_fusion (Optional[bool]): Do not combine the input and output feature maps. Default: False 228 | """ 229 | 230 | def __init__( 231 | self, 232 | in_channels: int, 233 | transformer_dim: int, 234 | ffn_dim: int, 235 | n_transformer_blocks: int = 2, 236 | head_dim: int = 32, 237 | attn_dropout: float = 0.0, 238 | dropout: float = 0.0, 239 | ffn_dropout: float = 0.0, 240 | patch_h: int = 8, 241 | patch_w: int = 8, 242 | conv_ksize: Optional[int] = 3, 243 | *args, 244 | **kwargs 245 | ) -> None: 246 | super().__init__() 247 | 248 | conv_3x3_in = ConvLayer( 249 | in_channels=in_channels, 250 | out_channels=in_channels, 251 | kernel_size=conv_ksize, 252 | stride=1 253 | ) 254 | conv_1x1_in = ConvLayer( 255 | in_channels=in_channels, 256 | out_channels=transformer_dim, 257 | kernel_size=1, 258 | stride=1, 259 | use_norm=False, 260 | use_act=False 261 | ) 262 | 263 | conv_1x1_out = ConvLayer( 264 | in_channels=transformer_dim, 265 | out_channels=in_channels, 266 | kernel_size=1, 267 | stride=1 268 | ) 269 | conv_3x3_out = ConvLayer( 270 | in_channels=2 * in_channels, 271 | out_channels=in_channels, 272 | kernel_size=conv_ksize, 273 | stride=1 274 | ) 275 | 276 | self.local_rep = nn.Sequential() 277 | self.local_rep.add_module(name="conv_3x3", module=conv_3x3_in) 278 | self.local_rep.add_module(name="conv_1x1", module=conv_1x1_in) 279 | 280 | assert transformer_dim % head_dim == 0 281 | num_heads = transformer_dim // head_dim 282 | 283 | global_rep = [ 284 | TransformerEncoder( 285 | embed_dim=transformer_dim, 286 | ffn_latent_dim=ffn_dim, 287 | num_heads=num_heads, 288 | attn_dropout=attn_dropout, 289 | dropout=dropout, 290 | ffn_dropout=ffn_dropout 291 | ) 292 | for _ in range(n_transformer_blocks) 293 | ] 294 | global_rep.append(nn.LayerNorm(transformer_dim)) 295 | self.global_rep = nn.Sequential(*global_rep) 296 | 297 | self.conv_proj = conv_1x1_out 298 | self.fusion = conv_3x3_out 299 | 300 | self.patch_h = patch_h 301 | self.patch_w = patch_w 302 | self.patch_area = self.patch_w * self.patch_h 303 | 304 | self.cnn_in_dim = in_channels 305 | self.cnn_out_dim = transformer_dim 306 | self.n_heads = num_heads 307 | self.ffn_dim = ffn_dim 308 | self.dropout = dropout 309 | self.attn_dropout = attn_dropout 310 | self.ffn_dropout = ffn_dropout 311 | self.n_blocks = n_transformer_blocks 312 | self.conv_ksize = conv_ksize 313 | 314 | def unfolding(self, x: Tensor) -> Tuple[Tensor, Dict]: 315 | patch_w, patch_h = self.patch_w, self.patch_h 316 | patch_area = patch_w * patch_h 317 | batch_size, in_channels, orig_h, orig_w = x.shape 318 | 319 | new_h = int(math.ceil(orig_h / self.patch_h) * self.patch_h) 320 | new_w = int(math.ceil(orig_w / self.patch_w) * self.patch_w) 321 | 322 | interpolate = False 323 | if new_w != orig_w or new_h != orig_h: 324 | # Note: Padding can be done, but then it needs to be handled in attention function. 325 | x = F.interpolate(x, size=(new_h, new_w), mode="bilinear", align_corners=False) 326 | interpolate = True 327 | 328 | # number of patches along width and height 329 | num_patch_w = new_w // patch_w # n_w 330 | num_patch_h = new_h // patch_h # n_h 331 | num_patches = num_patch_h * num_patch_w # N 332 | 333 | # [B, C, H, W] -> [B * C * n_h, p_h, n_w, p_w] 334 | x = x.reshape(batch_size * in_channels * num_patch_h, patch_h, num_patch_w, patch_w) 335 | # [B * C * n_h, p_h, n_w, p_w] -> [B * C * n_h, n_w, p_h, p_w] 336 | x = x.transpose(1, 2) 337 | # [B * C * n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w 338 | x = x.reshape(batch_size, in_channels, num_patches, patch_area) 339 | # [B, C, N, P] -> [B, P, N, C] 340 | x = x.transpose(1, 3) 341 | # [B, P, N, C] -> [BP, N, C] 342 | x = x.reshape(batch_size * patch_area, num_patches, -1) 343 | 344 | info_dict = { 345 | "orig_size": (orig_h, orig_w), 346 | "batch_size": batch_size, 347 | "interpolate": interpolate, 348 | "total_patches": num_patches, 349 | "num_patches_w": num_patch_w, 350 | "num_patches_h": num_patch_h, 351 | } 352 | 353 | return x, info_dict 354 | 355 | def folding(self, x: Tensor, info_dict: Dict) -> Tensor: 356 | n_dim = x.dim() 357 | assert n_dim == 3, "Tensor should be of shape BPxNxC. Got: {}".format( 358 | x.shape 359 | ) 360 | # [BP, N, C] --> [B, P, N, C] 361 | x = x.contiguous().view( 362 | info_dict["batch_size"], self.patch_area, info_dict["total_patches"], -1 363 | ) 364 | 365 | batch_size, pixels, num_patches, channels = x.size() 366 | num_patch_h = info_dict["num_patches_h"] 367 | num_patch_w = info_dict["num_patches_w"] 368 | 369 | # [B, P, N, C] -> [B, C, N, P] 370 | x = x.transpose(1, 3) 371 | # [B, C, N, P] -> [B*C*n_h, n_w, p_h, p_w] 372 | x = x.reshape(batch_size * channels * num_patch_h, num_patch_w, self.patch_h, self.patch_w) 373 | # [B*C*n_h, n_w, p_h, p_w] -> [B*C*n_h, p_h, n_w, p_w] 374 | x = x.transpose(1, 2) 375 | # [B*C*n_h, p_h, n_w, p_w] -> [B, C, H, W] 376 | x = x.reshape(batch_size, channels, num_patch_h * self.patch_h, num_patch_w * self.patch_w) 377 | if info_dict["interpolate"]: 378 | x = F.interpolate( 379 | x, 380 | size=info_dict["orig_size"], 381 | mode="bilinear", 382 | align_corners=False, 383 | ) 384 | return x 385 | 386 | def forward(self, x: Tensor) -> Tensor: 387 | res = x 388 | 389 | fm = self.local_rep(x) 390 | 391 | # convert feature map to patches 392 | patches, info_dict = self.unfolding(fm) 393 | 394 | # learn global representations 395 | for transformer_layer in self.global_rep: 396 | patches = transformer_layer(patches) 397 | 398 | # [B x Patch x Patches x C] -> [B x C x Patches x Patch] 399 | fm = self.folding(x=patches, info_dict=info_dict) 400 | 401 | fm = self.conv_proj(fm) 402 | 403 | fm = self.fusion(torch.cat((res, fm), dim=1)) 404 | return fm 405 | 406 | 407 | class MobileViT(nn.Module): 408 | """ 409 | This class implements the `MobileViT architecture `_ 410 | """ 411 | 412 | def __init__(self, model_cfg: Dict, num_classes: int = 1000): 413 | super().__init__() 414 | 415 | image_channels = 3 416 | out_channels = 16 417 | 418 | self.conv_1 = ConvLayer( 419 | in_channels=image_channels, 420 | out_channels=out_channels, 421 | kernel_size=3, 422 | stride=2 423 | ) 424 | 425 | self.layer_1, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer1"]) 426 | self.layer_2, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer2"]) 427 | self.layer_3, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer3"]) 428 | self.layer_4, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer4"]) 429 | self.layer_5, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg["layer5"]) 430 | 431 | exp_channels = min(model_cfg["last_layer_exp_factor"] * out_channels, 960) 432 | self.conv_1x1_exp = ConvLayer( 433 | in_channels=out_channels, 434 | out_channels=exp_channels, 435 | kernel_size=1 436 | ) 437 | 438 | self.classifier = nn.Sequential() 439 | self.classifier.add_module(name="global_pool", module=nn.AdaptiveAvgPool2d(1)) 440 | self.classifier.add_module(name="flatten", module=nn.Flatten()) 441 | if 0.0 < model_cfg["cls_dropout"] < 1.0: 442 | self.classifier.add_module(name="dropout", module=nn.Dropout(p=model_cfg["cls_dropout"])) 443 | self.classifier.add_module(name="fc", module=nn.Linear(in_features=exp_channels, out_features=num_classes)) 444 | 445 | # weight init 446 | self.apply(self.init_parameters) 447 | 448 | def _make_layer(self, input_channel, cfg: Dict) -> Tuple[nn.Sequential, int]: 449 | block_type = cfg.get("block_type", "mobilevit") 450 | if block_type.lower() == "mobilevit": 451 | return self._make_mit_layer(input_channel=input_channel, cfg=cfg) 452 | else: 453 | return self._make_mobilenet_layer(input_channel=input_channel, cfg=cfg) 454 | 455 | @staticmethod 456 | def _make_mobilenet_layer(input_channel: int, cfg: Dict) -> Tuple[nn.Sequential, int]: 457 | output_channels = cfg.get("out_channels") 458 | num_blocks = cfg.get("num_blocks", 2) 459 | expand_ratio = cfg.get("expand_ratio", 4) 460 | block = [] 461 | 462 | for i in range(num_blocks): 463 | stride = cfg.get("stride", 1) if i == 0 else 1 464 | 465 | layer = InvertedResidual( 466 | in_channels=input_channel, 467 | out_channels=output_channels, 468 | stride=stride, 469 | expand_ratio=expand_ratio 470 | ) 471 | block.append(layer) 472 | input_channel = output_channels 473 | 474 | return nn.Sequential(*block), input_channel 475 | 476 | @staticmethod 477 | def _make_mit_layer(input_channel: int, cfg: Dict) -> [nn.Sequential, int]: 478 | stride = cfg.get("stride", 1) 479 | block = [] 480 | 481 | if stride == 2: 482 | layer = InvertedResidual( 483 | in_channels=input_channel, 484 | out_channels=cfg.get("out_channels"), 485 | stride=stride, 486 | expand_ratio=cfg.get("mv_expand_ratio", 4) 487 | ) 488 | 489 | block.append(layer) 490 | input_channel = cfg.get("out_channels") 491 | 492 | transformer_dim = cfg["transformer_channels"] 493 | ffn_dim = cfg.get("ffn_dim") 494 | num_heads = cfg.get("num_heads", 4) 495 | head_dim = transformer_dim // num_heads 496 | 497 | if transformer_dim % head_dim != 0: 498 | raise ValueError("Transformer input dimension should be divisible by head dimension. " 499 | "Got {} and {}.".format(transformer_dim, head_dim)) 500 | 501 | block.append(MobileViTBlock( 502 | in_channels=input_channel, 503 | transformer_dim=transformer_dim, 504 | ffn_dim=ffn_dim, 505 | n_transformer_blocks=cfg.get("transformer_blocks", 1), 506 | patch_h=cfg.get("patch_h", 2), 507 | patch_w=cfg.get("patch_w", 2), 508 | dropout=cfg.get("dropout", 0.1), 509 | ffn_dropout=cfg.get("ffn_dropout", 0.0), 510 | attn_dropout=cfg.get("attn_dropout", 0.1), 511 | head_dim=head_dim, 512 | conv_ksize=3 513 | )) 514 | 515 | return nn.Sequential(*block), input_channel 516 | 517 | @staticmethod 518 | def init_parameters(m): 519 | if isinstance(m, nn.Conv2d): 520 | if m.weight is not None: 521 | nn.init.kaiming_normal_(m.weight, mode="fan_out") 522 | if m.bias is not None: 523 | nn.init.zeros_(m.bias) 524 | elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)): 525 | if m.weight is not None: 526 | nn.init.ones_(m.weight) 527 | if m.bias is not None: 528 | nn.init.zeros_(m.bias) 529 | elif isinstance(m, (nn.Linear,)): 530 | if m.weight is not None: 531 | nn.init.trunc_normal_(m.weight, mean=0.0, std=0.02) 532 | if m.bias is not None: 533 | nn.init.zeros_(m.bias) 534 | else: 535 | pass 536 | 537 | def forward(self, x: Tensor) -> Tensor: 538 | x = self.conv_1(x) 539 | x = self.layer_1(x) 540 | x = self.layer_2(x) 541 | 542 | x = self.layer_3(x) 543 | x = self.layer_4(x) 544 | x = self.layer_5(x) 545 | x = self.conv_1x1_exp(x) 546 | x = self.classifier(x) 547 | return x 548 | 549 | 550 | def mobile_vit_xx_small(num_classes: int = 1000): 551 | # pretrain weight link 552 | # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xxs.pt 553 | config = get_config("xx_small") 554 | m = MobileViT(config, num_classes=num_classes) 555 | return m 556 | 557 | 558 | def mobile_vit_x_small(num_classes: int = 1000): 559 | # pretrain weight link 560 | # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xs.pt 561 | config = get_config("x_small") 562 | m = MobileViT(config, num_classes=num_classes) 563 | return m 564 | 565 | 566 | def mobile_vit_small(num_classes: int = 1000): 567 | # pretrain weight link 568 | # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_s.pt 569 | config = get_config("small") 570 | m = MobileViT(config, num_classes=num_classes) 571 | return m 572 | 573 | if __name__ == '__main__': 574 | pass 575 | -------------------------------------------------------------------------------- /about_transformer/mobile_vit/model_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : model_config.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/16 21:58 7 | # @Classes : 8 | 9 | # 针对不同大小的MobileViT模型,返回不同参数 10 | def get_config(mode: str = "xxs") -> dict: 11 | if mode == "xx_small": 12 | mv2_exp_mult = 2 13 | config = { 14 | "layer1": { 15 | "out_channels": 16, 16 | "expand_ratio": mv2_exp_mult, 17 | "num_blocks": 1, 18 | "stride": 1, 19 | "block_type": "mv2", 20 | }, 21 | "layer2": { 22 | "out_channels": 24, 23 | "expand_ratio": mv2_exp_mult, 24 | "num_blocks": 3, 25 | "stride": 2, 26 | "block_type": "mv2", 27 | }, 28 | "layer3": { # 28x28 29 | "out_channels": 48, 30 | "transformer_channels": 64, 31 | "ffn_dim": 128, 32 | "transformer_blocks": 2, 33 | "patch_h": 2, # 8, 34 | "patch_w": 2, # 8, 35 | "stride": 2, 36 | "mv_expand_ratio": mv2_exp_mult, 37 | "num_heads": 4, 38 | "block_type": "mobilevit", 39 | }, 40 | "layer4": { # 14x14 41 | "out_channels": 64, 42 | "transformer_channels": 80, 43 | "ffn_dim": 160, 44 | "transformer_blocks": 4, 45 | "patch_h": 2, # 4, 46 | "patch_w": 2, # 4, 47 | "stride": 2, 48 | "mv_expand_ratio": mv2_exp_mult, 49 | "num_heads": 4, 50 | "block_type": "mobilevit", 51 | }, 52 | "layer5": { # 7x7 53 | "out_channels": 80, 54 | "transformer_channels": 96, 55 | "ffn_dim": 192, 56 | "transformer_blocks": 3, 57 | "patch_h": 2, 58 | "patch_w": 2, 59 | "stride": 2, 60 | "mv_expand_ratio": mv2_exp_mult, 61 | "num_heads": 4, 62 | "block_type": "mobilevit", 63 | }, 64 | "last_layer_exp_factor": 4, 65 | "cls_dropout": 0.1 66 | } 67 | elif mode == "x_small": 68 | mv2_exp_mult = 4 69 | config = { 70 | "layer1": { 71 | "out_channels": 32, 72 | "expand_ratio": mv2_exp_mult, 73 | "num_blocks": 1, 74 | "stride": 1, 75 | "block_type": "mv2", 76 | }, 77 | "layer2": { 78 | "out_channels": 48, 79 | "expand_ratio": mv2_exp_mult, 80 | "num_blocks": 3, 81 | "stride": 2, 82 | "block_type": "mv2", 83 | }, 84 | "layer3": { # 28x28 85 | "out_channels": 64, 86 | "transformer_channels": 96, 87 | "ffn_dim": 192, 88 | "transformer_blocks": 2, 89 | "patch_h": 2, 90 | "patch_w": 2, 91 | "stride": 2, 92 | "mv_expand_ratio": mv2_exp_mult, 93 | "num_heads": 4, 94 | "block_type": "mobilevit", 95 | }, 96 | "layer4": { # 14x14 97 | "out_channels": 80, 98 | "transformer_channels": 120, 99 | "ffn_dim": 240, 100 | "transformer_blocks": 4, 101 | "patch_h": 2, 102 | "patch_w": 2, 103 | "stride": 2, 104 | "mv_expand_ratio": mv2_exp_mult, 105 | "num_heads": 4, 106 | "block_type": "mobilevit", 107 | }, 108 | "layer5": { # 7x7 109 | "out_channels": 96, 110 | "transformer_channels": 144, 111 | "ffn_dim": 288, 112 | "transformer_blocks": 3, 113 | "patch_h": 2, 114 | "patch_w": 2, 115 | "stride": 2, 116 | "mv_expand_ratio": mv2_exp_mult, 117 | "num_heads": 4, 118 | "block_type": "mobilevit", 119 | }, 120 | "last_layer_exp_factor": 4, 121 | "cls_dropout": 0.1 122 | } 123 | elif mode == "small": 124 | mv2_exp_mult = 4 125 | config = { 126 | "layer1": { 127 | "out_channels": 32, 128 | "expand_ratio": mv2_exp_mult, 129 | "num_blocks": 1, 130 | "stride": 1, 131 | "block_type": "mv2", 132 | }, 133 | "layer2": { 134 | "out_channels": 64, 135 | "expand_ratio": mv2_exp_mult, 136 | "num_blocks": 3, 137 | "stride": 2, 138 | "block_type": "mv2", 139 | }, 140 | "layer3": { # 28x28 141 | "out_channels": 96, 142 | "transformer_channels": 144, 143 | "ffn_dim": 288, 144 | "transformer_blocks": 2, 145 | "patch_h": 2, 146 | "patch_w": 2, 147 | "stride": 2, 148 | "mv_expand_ratio": mv2_exp_mult, 149 | "num_heads": 4, 150 | "block_type": "mobilevit", 151 | }, 152 | "layer4": { # 14x14 153 | "out_channels": 128, 154 | "transformer_channels": 192, 155 | "ffn_dim": 384, 156 | "transformer_blocks": 4, 157 | "patch_h": 2, 158 | "patch_w": 2, 159 | "stride": 2, 160 | "mv_expand_ratio": mv2_exp_mult, 161 | "num_heads": 4, 162 | "block_type": "mobilevit", 163 | }, 164 | "layer5": { # 7x7 165 | "out_channels": 160, 166 | "transformer_channels": 240, 167 | "ffn_dim": 480, 168 | "transformer_blocks": 3, 169 | "patch_h": 2, 170 | "patch_w": 2, 171 | "stride": 2, 172 | "mv_expand_ratio": mv2_exp_mult, 173 | "num_heads": 4, 174 | "block_type": "mobilevit", 175 | }, 176 | "last_layer_exp_factor": 4, 177 | "cls_dropout": 0.1 178 | } 179 | else: 180 | raise NotImplementedError 181 | 182 | for k in ["layer1", "layer2", "layer3", "layer4", "layer5"]: 183 | config[k].update({"dropout": 0.1, "ffn_dropout": 0.0, "attn_dropout": 0.0}) 184 | 185 | return config 186 | 187 | 188 | if __name__ == '__main__': 189 | pass 190 | -------------------------------------------------------------------------------- /about_transformer/mobile_vit/transformer_encoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : transformer_encoder.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/16 21:58 7 | # @Classes : 8 | from typing import Optional 9 | 10 | import torch 11 | import torch.nn as nn 12 | from torch import Tensor 13 | 14 | 15 | class MultiHeadAttention(nn.Module): 16 | """ 17 | 构建多头注意力或者交叉注意力模块 18 | 输入维度为(N, P, C_{in}),N是batch size,P是patch数,C_{in}是输入的embedding dim 19 | 输出维度与输入维度一致 20 | """ 21 | 22 | def __init__( 23 | self, 24 | embed_dim: int, 25 | num_heads: int, 26 | attn_dropout: float = 0.0, 27 | bias: bool = True, 28 | *args, 29 | **kwargs 30 | ) -> None: 31 | super().__init__() 32 | # 输入的embedding dim必须能被num_heads整除 33 | if embed_dim % num_heads != 0: 34 | raise ValueError( 35 | "Embedding dim must be divisible by number of heads in {}. Got: embed_dim={} and num_heads={}".format( 36 | self.__class__.__name__, embed_dim, num_heads 37 | ) 38 | ) 39 | 40 | self.qkv_proj = nn.Linear(in_features=embed_dim, out_features=3 * embed_dim, bias=bias) 41 | 42 | self.attn_dropout = nn.Dropout(p=attn_dropout) 43 | self.out_proj = nn.Linear(in_features=embed_dim, out_features=embed_dim, bias=bias) 44 | 45 | self.head_dim = embed_dim // num_heads 46 | self.scaling = self.head_dim ** -0.5 47 | self.softmax = nn.Softmax(dim=-1) 48 | self.num_heads = num_heads 49 | self.embed_dim = embed_dim 50 | 51 | def forward(self, x_q: Tensor) -> Tensor: 52 | # [N, P, C] 53 | b_sz, n_patches, in_channels = x_q.shape 54 | 55 | # self-attention 56 | # [N, P, C] -> [N, P, 3C] -> [N, P, 3, h, c] where C = hc 57 | qkv = self.qkv_proj(x_q).reshape(b_sz, n_patches, 3, self.num_heads, -1) 58 | 59 | # [N, P, 3, h, c] -> [N, h, 3, P, C] 60 | qkv = qkv.transpose(1, 3).contiguous() 61 | 62 | # [N, h, 3, P, C] -> [N, h, P, C] x 3 63 | query, key, value = qkv[:, :, 0], qkv[:, :, 1], qkv[:, :, 2] 64 | 65 | query = query * self.scaling 66 | 67 | # [N h, P, c] -> [N, h, c, P] 68 | key = key.transpose(-1, -2) 69 | 70 | # QK^T 71 | # [N, h, P, c] x [N, h, c, P] -> [N, h, P, P] 72 | attn = torch.matmul(query, key) 73 | attn = self.softmax(attn) 74 | attn = self.attn_dropout(attn) 75 | 76 | # weighted sum 77 | # [N, h, P, P] x [N, h, P, c] -> [N, h, P, c] 78 | out = torch.matmul(attn, value) 79 | 80 | # [N, h, P, c] -> [N, P, h, c] -> [N, P, C] 81 | out = out.transpose(1, 2).reshape(b_sz, n_patches, -1) 82 | out = self.out_proj(out) 83 | 84 | return out 85 | 86 | 87 | class TransformerEncoder(nn.Module): 88 | """ 89 | This class defines the pre-norm `Transformer encoder `_ 90 | Args: 91 | embed_dim (int): :math:`C_{in}` from an expected input of size :math:`(N, P, C_{in})` 92 | ffn_latent_dim (int): Inner dimension of the FFN 93 | num_heads (int) : Number of heads in multi-head attention. Default: 8 94 | attn_dropout (float): Dropout rate for attention in multi-head attention. Default: 0.0 95 | dropout (float): Dropout rate. Default: 0.0 96 | ffn_dropout (float): Dropout between FFN layers. Default: 0.0 97 | 98 | Shape: 99 | - Input: :math:`(N, P, C_{in})` where :math:`N` is batch size, :math:`P` is number of patches, 100 | and :math:`C_{in}` is input embedding dim 101 | - Output: same shape as the input 102 | """ 103 | 104 | def __init__( 105 | self, 106 | embed_dim: int, 107 | ffn_latent_dim: int, 108 | num_heads: Optional[int] = 8, 109 | attn_dropout: Optional[float] = 0.0, 110 | dropout: Optional[float] = 0.0, 111 | ffn_dropout: Optional[float] = 0.0, 112 | *args, 113 | **kwargs 114 | ) -> None: 115 | super().__init__() 116 | 117 | attn_unit = MultiHeadAttention( 118 | embed_dim, 119 | num_heads, 120 | attn_dropout=attn_dropout, 121 | bias=True 122 | ) 123 | 124 | self.pre_norm_mha = nn.Sequential( 125 | nn.LayerNorm(embed_dim), 126 | attn_unit, 127 | nn.Dropout(p=dropout) 128 | ) 129 | 130 | self.pre_norm_ffn = nn.Sequential( 131 | nn.LayerNorm(embed_dim), 132 | nn.Linear(in_features=embed_dim, out_features=ffn_latent_dim, bias=True), 133 | nn.SiLU(), 134 | nn.Dropout(p=ffn_dropout), 135 | nn.Linear(in_features=ffn_latent_dim, out_features=embed_dim, bias=True), 136 | nn.Dropout(p=dropout) 137 | ) 138 | self.embed_dim = embed_dim 139 | self.ffn_dim = ffn_latent_dim 140 | self.ffn_dropout = ffn_dropout 141 | self.std_dropout = dropout 142 | 143 | def forward(self, x: Tensor) -> Tensor: 144 | # multi-head attention 145 | res = x 146 | x = self.pre_norm_mha(x) 147 | x = x + res 148 | 149 | # feed forward network 150 | x = x + self.pre_norm_ffn(x) 151 | return x 152 | 153 | 154 | if __name__ == '__main__': 155 | pass 156 | -------------------------------------------------------------------------------- /classic_conv/AlexNet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : AlexNet.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/9 3:54 7 | # @Classes : 8 | import torch 9 | from torch import nn 10 | 11 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 12 | 13 | 14 | class AlexNet(nn.Module): 15 | def __init__(self, num_classes=1000): 16 | """AlexNet网络实现 17 | 18 | :param num_classes: 默认1000,因为原论文参加的竞赛类别就是1000 19 | """ 20 | super(AlexNet, self).__init__() 21 | # 卷积层 22 | self.conv = nn.Sequential( 23 | # 由于LRN层已经证明无用,所以这里不写LRN 24 | # 第一层,输入通道数3,输出通道数96,使用11x11大小的卷积核 25 | nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), # input:[3, 224, 224] output:[96, 55, 55] 26 | nn.ReLU(), 27 | nn.MaxPool2d(kernel_size=3, stride=2), # output:[96, 27, 27] 28 | 29 | # 第二层,开始减小卷积核大小且增大输出通道数,从而提取更多特征 30 | nn.Conv2d(96, 256, 5, 1, 2), # output: [256, 27, 27] 31 | nn.ReLU(), 32 | nn.MaxPool2d(3, 2), # output: [256, 13, 13] 33 | # 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。 34 | # 前两个卷积层后不使用池化层来减小输入的高和宽 35 | nn.Conv2d(256, 384, 3, 1, 1), # output: [384, 13, 13] 36 | nn.ReLU(), 37 | nn.Conv2d(384, 384, 3, 1, 1), # output: [384, 13, 13] 38 | nn.ReLU(), 39 | nn.Conv2d(384, 256, 3, 1, 1), # output: [256, 13, 13] 40 | nn.ReLU(), 41 | nn.MaxPool2d(3, 2) # output: [256, 6, 6] 42 | ) 43 | # 全连接层 44 | self.fc = nn.Sequential( 45 | # 第一个全连接层,输入维度是256*,输出维度是4096 46 | nn.Linear(256 * 6 * 6, 4096), 47 | nn.ReLU(), 48 | nn.Dropout(0.5), 49 | nn.Linear(4096, 4096), 50 | nn.ReLU(), 51 | nn.Dropout(0.5), 52 | # 输出层 53 | nn.Linear(4096, num_classes), 54 | ) 55 | 56 | # 前向传播 57 | def forward(self, img): 58 | feature = self.conv(img) 59 | output = self.fc(feature.view(img.shape[0], -1)) 60 | return output 61 | 62 | 63 | def test(): 64 | # 随机生成一组张量,可视为3张3通道照片,尺寸227x227 65 | x = torch.randn(3, 3, 227, 227) 66 | alex_model = AlexNet() 67 | pred = alex_model(x) 68 | print(x.shape) 69 | print(pred.shape) 70 | 71 | 72 | if __name__ == '__main__': 73 | test() 74 | -------------------------------------------------------------------------------- /classic_conv/SENet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : SENet.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/31 9:34 7 | # @Classes : 搭建SEBlock,以及用于SE-ResNet18/34的SEBasicBlock、用于SE-ResNet50/101/152的SEBottleNeck 8 | # 具体SE-ResNet这里就不写了 9 | import torch 10 | from torch import nn 11 | 12 | 13 | class SEBlock(nn.Module): 14 | def __init__(self, in_channel, r=6): 15 | """ 16 | 17 | :param in_channel: 18 | :param r: 论文中全连接层的r,即通道数缩放因子 19 | """ 20 | super(SEBlock, self).__init__() 21 | # 全局平均池化(Squeeze) 22 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) 23 | # 两个全连接层(Excitation) 24 | self.fc = nn.Sequential( 25 | nn.Linear(in_channel, in_channel // r, bias=False), 26 | nn.ReLU(), 27 | nn.Linear(in_channel // r, in_channel, bias=False), 28 | nn.Sigmoid() 29 | ) 30 | 31 | def forward(self, x): 32 | b, c, h, w = x.size() 33 | # Squeeze,得到通道描述符,即(b, c)张量 34 | out = self.avg_pool(x).view(b, c) 35 | # Excitation,得到每个通道的权重 36 | out = self.fc(out).view(b, c, 1, 1) 37 | # 特征加权后输出 38 | return x * out.expand_as(x) 39 | 40 | 41 | class BasicBlock(nn.Module): 42 | expansion = 1 43 | 44 | def __init__(self, in_channel, out_channel, stride=1, r=6): 45 | super(BasicBlock, self).__init__() 46 | self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False) 47 | self.bn1 = nn.BatchNorm2d(out_channel) 48 | self.relu = nn.ReLU(inplace=True) 49 | 50 | self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False) 51 | self.bn2 = nn.BatchNorm2d(out_channel) 52 | 53 | self.SE = SEBlock(out_channel, r) 54 | 55 | # 防止无法连接,进行1x1下采样 56 | if stride != 1 or in_channel != self.expansion * out_channel: 57 | self.down_sample = nn.Sequential(nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False), 58 | nn.BatchNorm2d(self.expansion * out_channel)) 59 | else: 60 | self.down_sample = lambda x: x 61 | 62 | def forward(self, x): 63 | residual = self.down_sample(x) 64 | 65 | out = self.relu(self.bn1(self.conv1(x))) 66 | out = self.bn2(self.conv2(out)) 67 | 68 | out = self.SE(out) 69 | 70 | out = residual + out 71 | out = self.relu(out) 72 | return out 73 | 74 | 75 | class SEBottleNeck(nn.Module): 76 | expansion = 4 77 | 78 | def __init__(self, in_channel, out_channel, stride=1, r=6): 79 | super(SEBottleNeck, self).__init__() 80 | self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=1, bias=False) 81 | self.bn1 = nn.BatchNorm2d(out_channel) 82 | self.relu = nn.ReLU(inplace=True) 83 | 84 | self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False) 85 | self.bn2 = nn.BatchNorm2d(out_channel) 86 | 87 | self.conv3 = nn.Conv2d(out_channel, out_channel * self.expansion, kernel_size=1, bias=False) 88 | self.bn3 = nn.BatchNorm2d(out_channel * self.expansion) 89 | 90 | self.SE = SEBlock(self.expansion * out_channel, r) 91 | 92 | # 防止无法连接,进行1x1下采样 93 | if stride != 1 or in_channel != self.expansion * out_channel: 94 | self.down_sample = nn.Sequential( 95 | nn.Conv2d(in_channel, self.expansion * out_channel, kernel_size=1, stride=stride, bias=False), 96 | nn.BatchNorm2d(self.expansion * out_channel)) 97 | else: 98 | self.down_sample = lambda x: x 99 | 100 | def forward(self, x): 101 | residual = self.down_sample(x) 102 | 103 | out = self.relu(self.bn1(self.conv1(x))) 104 | 105 | out = self.relu(self.bn2(self.conv2(out))) 106 | 107 | out = self.bn3(self.conv3(out)) 108 | out = self.SE(out) 109 | 110 | out += residual 111 | out = self.relu(out) 112 | 113 | return out 114 | 115 | 116 | def test(): 117 | x = torch.randn(3, 3, 224, 224) 118 | # block = BasicBlock(3, 64) 119 | block = SEBottleNeck(3, 64) 120 | pred = block(x) 121 | print(x.shape) 122 | print(pred.shape) 123 | 124 | 125 | if __name__ == '__main__': 126 | test() 127 | -------------------------------------------------------------------------------- /classic_conv/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @Package Name: 5 | # @File Name : __init__.py.py 6 | # @author : ahua 7 | # @Version : 1.0 8 | # @Start Date : 2024/3/9 3:53 9 | # @Classes : 10 | 11 | 12 | if __name__ == '__main__': 13 | pass 14 | -------------------------------------------------------------------------------- /image_segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : __init__.py.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/9 3:59 7 | # @Classes : 8 | 9 | 10 | if __name__ == '__main__': 11 | pass 12 | -------------------------------------------------------------------------------- /image_segmentation/about_unet/UNet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : UNet.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/16 5:35 7 | # @Classes : 8 | import torch 9 | import torch.nn as nn 10 | import torchvision.transforms.functional as TF 11 | 12 | 13 | class DoubleConv(nn.Module): 14 | """定义连续的俩次卷积""" 15 | 16 | def __init__(self, in_channel, out_channel): 17 | super(DoubleConv, self).__init__() 18 | # 俩次卷积 19 | self.d_conv = nn.Sequential( 20 | # 相比原论文,这里加入了padding与BN 21 | nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False), 22 | nn.BatchNorm2d(out_channel), 23 | nn.ReLU(inplace=True), 24 | 25 | nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False), 26 | nn.BatchNorm2d(out_channel), 27 | nn.ReLU(inplace=True), 28 | ) 29 | 30 | def forward(self, x): 31 | return self.d_conv(x) 32 | 33 | 34 | class UNet(nn.Module): 35 | def __init__(self, in_channel=3, out_channel=2, features=[64, 128, 256, 512]): 36 | """ 37 | 38 | :param in_channel: 39 | :param out_channel: 40 | :param features: 各个采样后对应的通道数 41 | """ 42 | super(UNet, self).__init__() 43 | # 记录一系列上采样和下采样操作层 44 | self.ups = nn.ModuleList() 45 | self.downs = nn.ModuleList() 46 | 47 | # 最大池化下采样 48 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2) 49 | 50 | # 加入 51 | for feature in features: 52 | self.downs.append(DoubleConv(in_channel, feature)) 53 | # 下次的输入通道数变为刚刚的输出通道数 54 | in_channel = feature 55 | 56 | # 上采样最下面的一步俩次卷积 57 | self.final_up = DoubleConv(features[-1], features[-1]*2) 58 | 59 | # 上采样逆置list 60 | for feature in reversed(features): 61 | # 转置卷积上采样,因为进行了拼接,所以输入通道数x2 62 | self.ups.append(nn.ConvTranspose2d(feature * 2, feature, kernel_size=3, stride=1, padding=1)) 63 | self.ups.append(DoubleConv(feature*2, feature)) 64 | 65 | # 最后出结果的1x1卷积 66 | self.final_conv = nn.Conv2d(features[0], out_channel, kernel_size=1) 67 | 68 | def forward(self, x): 69 | # 记录跳跃连接 70 | skip_connections = [] 71 | # 下采样 72 | for down in self.downs: 73 | x = down(x) 74 | skip_connections.append(x) 75 | x = self.pool(x) 76 | # 最下层卷积 77 | x = self.final_up(x) 78 | # 逆置跳跃连接 79 | skip_connections = skip_connections[::-1] 80 | # 上采样 81 | for idx in range(0, len(self.ups), 2): 82 | x = self.ups[idx](x) 83 | skip_connection = skip_connections[idx // 2] 84 | if skip_connection.shape != x.shape: 85 | # 原论文中这里是对skip_connection做裁剪,这里对x做resize 86 | x = TF.resize(x, size=skip_connection.shape[2:]) 87 | x = torch.cat((x, skip_connection), dim=1) 88 | x = self.ups[idx+1](x) 89 | output = self.final_conv(x) 90 | return output 91 | 92 | 93 | def test(): 94 | x = torch.randn(3, 3, 572, 572) 95 | model = UNet() 96 | print(x.shape) 97 | print(model(x).shape) 98 | 99 | 100 | if __name__ == '__main__': 101 | test() 102 | -------------------------------------------------------------------------------- /image_segmentation/about_unet/UNet_pp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : UNet_pp.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/4 22:52 7 | # @Classes : UNet++网络 8 | from torch import nn 9 | import torch 10 | 11 | 12 | class DoubleConv(nn.Module): 13 | """同UNet定义连续的俩次卷积""" 14 | 15 | def __init__(self, in_channel, out_channel): 16 | super(DoubleConv, self).__init__() 17 | # 俩次卷积 18 | self.d_conv = nn.Sequential( 19 | # 相比原论文,这里加入了padding与BN 20 | nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False), 21 | nn.BatchNorm2d(out_channel), 22 | nn.ReLU(inplace=True), 23 | 24 | nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False), 25 | nn.BatchNorm2d(out_channel), 26 | nn.ReLU(inplace=True), 27 | ) 28 | 29 | def forward(self, x): 30 | return self.d_conv(x) 31 | 32 | 33 | class UNetPP(nn.Module): 34 | def __init__(self, in_channel=3, out_channel=2, features=[64, 128, 256, 512, 1024], deep_supervision=False): 35 | """ 36 | 37 | :param in_channel: 38 | :param out_channel: 39 | :param features: 各个采样后对应的通道数 40 | :param deep_supervision: 是否使用深度监督 41 | """ 42 | super(UNetPP, self).__init__() 43 | 44 | self.deep_supervision = deep_supervision 45 | 46 | # 下采样的池化层 47 | self.pool = nn.MaxPool2d(2, 2) 48 | # 双线性插值进行上采样,也可以通过ConvTranspose2d或者先ConvTranspose2d后插值实现,这里为了方便直接插值 49 | self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 50 | 51 | # 原始UNet的下采样层,每个下采样层的第0层卷积 52 | self.conv0_0 = DoubleConv(in_channel, features[0]) 53 | self.conv1_0 = DoubleConv(features[0], features[1]) 54 | self.conv2_0 = DoubleConv(features[1], features[2]) 55 | self.conv3_0 = DoubleConv(features[2], features[3]) 56 | self.conv4_0 = DoubleConv(features[3], features[4]) 57 | 58 | # 每个下采样层的第一层卷积 59 | self.conv0_1 = DoubleConv(features[0] + features[1], features[0]) 60 | self.conv1_1 = DoubleConv(features[1] + features[2], features[1]) 61 | self.conv2_1 = DoubleConv(features[2] + features[3], features[2]) 62 | self.conv3_1 = DoubleConv(features[3] + features[4], features[3]) 63 | 64 | # 每个下采样层的第二层卷积 65 | self.conv0_2 = DoubleConv(features[0] * 2 + features[1], features[0]) 66 | self.conv1_2 = DoubleConv(features[1] * 2 + features[2], features[1]) 67 | self.conv2_2 = DoubleConv(features[2] * 2 + features[3], features[2]) 68 | 69 | # 每个下采样层的第三层卷积 70 | self.conv0_3 = DoubleConv(features[0] * 3 + features[1], features[0]) 71 | self.conv1_3 = DoubleConv(features[1] * 3 + features[2], features[1]) 72 | 73 | # 每个下采样层的第四层卷积 74 | self.conv0_4 = DoubleConv(features[0] * 4 + features[1], features[0]) 75 | 76 | # 分割头,作者原论文写了深度监督之后还过sigmoid,但是UNet没有sigmoid 77 | self.sigmoid = nn.Sigmoid() 78 | if self.deep_supervision: 79 | self.final1 = nn.Conv2d(features[0], out_channel, kernel_size=1) 80 | self.final2 = nn.Conv2d(features[0], out_channel, kernel_size=1) 81 | self.final3 = nn.Conv2d(features[0], out_channel, kernel_size=1) 82 | self.final4 = nn.Conv2d(features[0], out_channel, kernel_size=1) 83 | else: 84 | self.final = nn.Conv2d(features[0], out_channel, kernel_size=1) 85 | 86 | def forward(self, x): 87 | x0_0 = self.conv0_0(x) 88 | x1_0 = self.conv1_0(self.pool(x0_0)) 89 | x0_1 = self.conv0_1(torch.cat([x0_0, self.up(x1_0)], 1)) 90 | 91 | x2_0 = self.conv2_0(self.pool(x1_0)) 92 | x1_1 = self.conv1_1(torch.cat([x1_0, self.up(x2_0)], 1)) 93 | x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.up(x1_1)], 1)) 94 | 95 | x3_0 = self.conv3_0(self.pool(x2_0)) 96 | x2_1 = self.conv2_1(torch.cat([x2_0, self.up(x3_0)], 1)) 97 | x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.up(x2_1)], 1)) 98 | x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.up(x1_2)], 1)) 99 | 100 | x4_0 = self.conv4_0(self.pool(x3_0)) 101 | x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], 1)) 102 | x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.up(x3_1)], 1)) 103 | x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.up(x2_2)], 1)) 104 | x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.up(x1_3)], 1)) 105 | 106 | # 使用深度监督,返回四个分割图 107 | if self.deep_supervision: 108 | output1 = self.final1(x0_1) 109 | output1 = self.sigmoid(output1) 110 | output2 = self.final2(x0_2) 111 | output2 = self.sigmoid(output2) 112 | output3 = self.final3(x0_3) 113 | output3 = self.sigmoid(output3) 114 | output4 = self.final4(x0_4) 115 | output4 = self.sigmoid(output4) 116 | return [output1, output2, output3, output4] 117 | 118 | else: 119 | output = self.final(x0_4) 120 | output = self.sigmoid(output) 121 | return output 122 | 123 | 124 | def test(): 125 | x = torch.randn(3, 3, 224, 224) 126 | model = UNetPP() 127 | print(x.shape) 128 | print(model(x).shape) 129 | 130 | 131 | if __name__ == '__main__': 132 | test() 133 | -------------------------------------------------------------------------------- /image_segmentation/about_unet/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Project Name: Hand-torn_code 4 | # @File Name : __init__.py.py 5 | # @author : ahua 6 | # @Start Date : 2024/3/9 3:59 7 | # @Classes : 8 | 9 | 10 | if __name__ == '__main__': 11 | pass 12 | --------------------------------------------------------------------------------