├── .idea
├── dictionaries
│ └── lxt.xml
├── fuse_seg.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── vcs.xml
└── workspace.xml
├── README.md
└── model
├── __init__.py
├── deeplab_resnet.py
├── large_kernel.py
├── large_kernel_exfuse.py
├── seg_resnet.py
└── seg_resnext.py
/.idea/dictionaries/lxt.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/.idea/fuse_seg.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | BashSupport
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
115 |
116 |
117 |
118 | SynchronizedBatchNorm2d
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 | true
140 | DEFINITION_ORDER
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 | 1526212737885
434 |
435 |
436 | 1526212737885
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # fuse_seg
2 | Pytorch Implementation of Paper:
3 | 1, Enhancing Feature Fusion for Semantic Segmentation (face++) [paper](https://arxiv.org/abs/1804.03821)
4 | 2, Large kernel matters (face++) [paper](https://arxiv.org/abs/1703.02719)
5 | All models are based on resnet101 (MIT pretrained models)
6 |
--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/fuse_seg_pytorch/942f1cd34cc5d0864d91327595ebcbdef1c628d5/model/__init__.py
--------------------------------------------------------------------------------
/model/deeplab_resnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Author: Xiangtai(lxtpku@pku.edu.cn)
4 | # this file contains the baseline resnet(encoder), baseline pspnet(decoder) segmentation models
5 | # and mul grid bottleneck modules which can be used for deeplab models
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 |
10 | import model.seg_resnet as resnet
11 | import model.seg_resnext as resnext
12 |
13 |
14 | # this is for encoder part
15 | # resnet encoder
16 | class Resnet(nn.Module):
17 | def __init__(self, orig_resnet):
18 | super(Resnet, self).__init__()
19 |
20 | # take pretrained resnet, except AvgPool and FC
21 | self.conv1 = orig_resnet.conv1
22 | self.bn1 = orig_resnet.bn1
23 | self.relu1 = orig_resnet.relu1
24 | self.conv2 = orig_resnet.conv2
25 | self.bn2 = orig_resnet.bn2
26 | self.relu2 = orig_resnet.relu2
27 | self.conv3 = orig_resnet.conv3
28 | self.bn3 = orig_resnet.bn3
29 | self.relu3 = orig_resnet.relu3
30 | self.maxpool = orig_resnet.maxpool
31 | self.layer1 = orig_resnet.layer1
32 | self.layer2 = orig_resnet.layer2
33 | self.layer3 = orig_resnet.layer3
34 | self.layer4 = orig_resnet.layer4
35 |
36 | def forward(self, x, return_feature_maps=False):
37 | conv_out = []
38 |
39 | x = self.relu1(self.bn1(self.conv1(x)))
40 | x = self.relu2(self.bn2(self.conv2(x)))
41 | x = self.relu3(self.bn3(self.conv3(x)))
42 | x = self.maxpool(x)
43 |
44 | x = self.layer1(x); conv_out.append(x);
45 | x = self.layer2(x); conv_out.append(x);
46 | x = self.layer3(x); conv_out.append(x);
47 | x = self.layer4(x); conv_out.append(x);
48 |
49 | if return_feature_maps:
50 | return conv_out
51 | return x
52 |
53 | # dilated resnet encoder
54 | class ResnetDilated(nn.Module):
55 | def __init__(self, orig_resnet, dilate_scale=8):
56 | super(ResnetDilated, self).__init__()
57 | from functools import partial
58 |
59 | if dilate_scale == 8:
60 | orig_resnet.layer3.apply(
61 | partial(self._nostride_dilate, dilate=2))
62 | orig_resnet.layer4.apply(
63 | partial(self._nostride_dilate, dilate=4))
64 | elif dilate_scale == 16:
65 | orig_resnet.layer4.apply(
66 | partial(self._nostride_dilate, dilate=2))
67 |
68 | # take pretrained resnet, except AvgPool and FC
69 | self.conv1 = orig_resnet.conv1
70 | self.bn1 = orig_resnet.bn1
71 | self.relu1 = orig_resnet.relu1
72 | self.conv2 = orig_resnet.conv2
73 | self.bn2 = orig_resnet.bn2
74 | self.relu2 = orig_resnet.relu2
75 | self.conv3 = orig_resnet.conv3
76 | self.bn3 = orig_resnet.bn3
77 | self.relu3 = orig_resnet.relu3
78 | self.maxpool = orig_resnet.maxpool
79 | self.layer1 = orig_resnet.layer1
80 | self.layer2 = orig_resnet.layer2
81 | self.layer3 = orig_resnet.layer3
82 | self.layer4 = orig_resnet.layer4
83 |
84 | def _nostride_dilate(self, m, dilate):
85 | classname = m.__class__.__name__
86 | if classname.find('Conv') != -1:
87 | # the convolution with stride
88 | if m.stride == (2, 2):
89 | m.stride = (1, 1)
90 | if m.kernel_size == (3, 3):
91 | m.dilation = (dilate // 2, dilate // 2)
92 | m.padding = (dilate // 2, dilate // 2)
93 | # other convoluions
94 | else:
95 | if m.kernel_size == (3, 3):
96 | m.dilation = (dilate, dilate)
97 | m.padding = (dilate, dilate)
98 |
99 | def forward(self, x, return_feature_maps=False):
100 | conv_out = []
101 |
102 | x = self.relu1(self.bn1(self.conv1(x)))
103 | x = self.relu2(self.bn2(self.conv2(x)))
104 | x = self.relu3(self.bn3(self.conv3(x)))
105 | x = self.maxpool(x)
106 |
107 | x = self.layer1(x); conv_out.append(x);
108 | x = self.layer2(x); conv_out.append(x);
109 | x = self.layer3(x); conv_out.append(x);
110 | x = self.layer4(x); conv_out.append(x);
111 |
112 | if return_feature_maps:
113 | return conv_out
114 | return x
115 |
116 |
117 | # this is for decoder part
118 | # last conv, bilinear upsample
119 | class C1BilinearDeepSup(nn.Module):
120 | def __init__(self, num_class=150, fc_dim=2048, use_softmax=False):
121 | super(C1BilinearDeepSup, self).__init__()
122 | self.use_softmax = use_softmax
123 |
124 | self.cbr = conv3x3_bn_relu(fc_dim, fc_dim // 4, 1)
125 | self.cbr_deepsup = conv3x3_bn_relu(fc_dim // 2, fc_dim // 4, 1)
126 |
127 | # last conv
128 | self.conv_last = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0)
129 | self.conv_last_deepsup = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0)
130 |
131 | def forward(self, conv_out, segSize=None):
132 | conv5 = conv_out[-1]
133 |
134 | x = self.cbr(conv5)
135 | x = self.conv_last(x)
136 |
137 | if self.use_softmax: # is True during inference
138 | x = nn.functional.upsample(x, size=segSize, mode='bilinear')
139 | x = nn.functional.softmax(x, dim=1)
140 | return x
141 |
142 | # deep sup
143 | conv4 = conv_out[-2]
144 | _ = self.cbr_deepsup(conv4)
145 | _ = self.conv_last_deepsup(_)
146 |
147 | x = nn.functional.log_softmax(x, dim=1)
148 | _ = nn.functional.log_softmax(_, dim=1)
149 |
150 | return (x, _)
151 |
152 |
153 | # last conv, bilinear upsample
154 | class C1Bilinear(nn.Module):
155 | def __init__(self, num_class=150, fc_dim=2048, use_softmax=False):
156 | super(C1Bilinear, self).__init__()
157 | self.use_softmax = use_softmax
158 |
159 | self.cbr = conv3x3_bn_relu(fc_dim, fc_dim // 4, 1)
160 |
161 | # last conv
162 | self.conv_last = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0)
163 |
164 | def forward(self, conv_out, segSize=None):
165 | conv5 = conv_out[-1]
166 | x = self.cbr(conv5)
167 | x = self.conv_last(x)
168 |
169 | if self.use_softmax: # is True during inference
170 | x = nn.functional.upsample(x, size=segSize, mode='bilinear')
171 | x = nn.functional.softmax(x, dim=1)
172 | else:
173 | x = nn.functional.log_softmax(x, dim=1)
174 |
175 | return x
176 |
177 |
178 | # pyramid pooling, bilinear upsample
179 | class PPMBilinear(nn.Module):
180 | def __init__(self, num_class=150, fc_dim=4096,
181 | use_softmax=False, pool_scales=(1, 2, 3, 6)):
182 | super(PPMBilinear, self).__init__()
183 | self.use_softmax = use_softmax
184 |
185 | self.ppm = []
186 | for scale in pool_scales:
187 | self.ppm.append(nn.Sequential(
188 | nn.AdaptiveAvgPool2d(scale),
189 | nn.Conv2d(fc_dim, 512, kernel_size=1, bias=False),
190 | nn.BatchNorm2d(512),
191 | nn.ReLU(inplace=True)
192 | ))
193 | self.ppm = nn.ModuleList(self.ppm)
194 |
195 | self.conv_last = nn.Sequential(
196 | nn.Conv2d(fc_dim+len(pool_scales)*512, 512,
197 | kernel_size=3, padding=1, bias=False),
198 | nn.BatchNorm2d(512),
199 | nn.ReLU(inplace=True),
200 | nn.Dropout2d(0.1),
201 | nn.Conv2d(512, num_class, kernel_size=1)
202 | )
203 |
204 | def forward(self, conv_out, segSize=None):
205 | conv5 = conv_out[-1]
206 |
207 | input_size = conv5.size()
208 | ppm_out = [conv5]
209 | for pool_scale in self.ppm:
210 | ppm_out.append(nn.functional.upsample(
211 | pool_scale(conv5),
212 | (input_size[2], input_size[3]),
213 | mode='bilinear'))
214 | ppm_out = torch.cat(ppm_out, 1)
215 |
216 | x = self.conv_last(ppm_out)
217 |
218 | if self.use_softmax: # is True during inference
219 | x = nn.functional.upsample(x, size=segSize, mode='bilinear')
220 | x = nn.functional.softmax(x, dim=1)
221 | else:
222 | x = nn.functional.log_softmax(x, dim=1)
223 | return x
224 |
225 |
226 | # pyramid pooling, bilinear upsample
227 | class PPMBilinearDeepsup(nn.Module):
228 | def __init__(self, num_class=150, fc_dim=4096,
229 | use_softmax=False, pool_scales=(1, 2, 3, 6)):
230 | super(PPMBilinearDeepsup, self).__init__()
231 | self.use_softmax = use_softmax
232 |
233 | self.ppm = []
234 | for scale in pool_scales:
235 | self.ppm.append(nn.Sequential(
236 | nn.AdaptiveAvgPool2d(scale),
237 | nn.Conv2d(fc_dim, 512, kernel_size=1, bias=False),
238 | nn.BatchNorm2d(512),
239 | nn.ReLU(inplace=True)
240 | ))
241 | self.ppm = nn.ModuleList(self.ppm)
242 | self.cbr_deepsup = conv3x3_bn_relu(fc_dim // 2, fc_dim // 4, 1)
243 |
244 | self.conv_last = nn.Sequential(
245 | nn.Conv2d(fc_dim+len(pool_scales)*512, 512,
246 | kernel_size=3, padding=1, bias=False),
247 | nn.BatchNorm2d(512),
248 | nn.ReLU(inplace=True),
249 | nn.Dropout2d(0.1),
250 | nn.Conv2d(512, num_class, kernel_size=1)
251 | )
252 | self.conv_last_deepsup = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0)
253 | self.dropout_deepsup = nn.Dropout2d(0.1)
254 |
255 | def forward(self, conv_out, segSize=None):
256 | conv5 = conv_out[-1]
257 |
258 | input_size = conv5.size()
259 | ppm_out = [conv5]
260 | for pool_scale in self.ppm:
261 | ppm_out.append(nn.functional.upsample(
262 | pool_scale(conv5),
263 | (input_size[2], input_size[3]),
264 | mode='bilinear'))
265 | ppm_out = torch.cat(ppm_out, 1)
266 |
267 | x = self.conv_last(ppm_out)
268 |
269 | if self.use_softmax: # is True during inference
270 | x = nn.functional.upsample(x, size=segSize, mode='bilinear')
271 | x = nn.functional.softmax(x, dim=1)
272 | return x
273 |
274 | # deep sup
275 | conv4 = conv_out[-2]
276 | _ = self.cbr_deepsup(conv4)
277 | _ = self.dropout_deepsup(_)
278 | _ = self.conv_last_deepsup(_)
279 |
280 | x = nn.functional.log_softmax(x, dim=1)
281 | _ = nn.functional.log_softmax(_, dim=1)
282 |
283 | return (x, _)
284 |
285 |
286 | # upernet
287 | class UPerNet(nn.Module):
288 | def __init__(self, num_class=150, fc_dim=4096,
289 | use_softmax=False, pool_scales=(1, 2, 3, 6),
290 | fpn_inplanes=(256,512,1024,2048), fpn_dim=256):
291 | super(UPerNet, self).__init__()
292 | self.use_softmax = use_softmax
293 |
294 | # PPM Module
295 | self.ppm_pooling = []
296 | self.ppm_conv = []
297 |
298 | for scale in pool_scales:
299 | self.ppm_pooling.append(nn.AdaptiveAvgPool2d(scale))
300 | self.ppm_conv.append(nn.Sequential(
301 | nn.Conv2d(fc_dim, 512, kernel_size=1, bias=False),
302 | nn.BatchNorm2d(512),
303 | nn.ReLU(inplace=True)
304 | ))
305 | self.ppm_pooling = nn.ModuleList(self.ppm_pooling)
306 | self.ppm_conv = nn.ModuleList(self.ppm_conv)
307 | self.ppm_last_conv = conv3x3_bn_relu(fc_dim + len(pool_scales)*512, fpn_dim, 1)
308 |
309 | # FPN Module
310 | self.fpn_in = []
311 | for fpn_inplane in fpn_inplanes[:-1]: # skip the top layer
312 | self.fpn_in.append(nn.Sequential(
313 | nn.Conv2d(fpn_inplane, fpn_dim, kernel_size=1, bias=False),
314 | nn.BatchNorm2d(fpn_dim),
315 | nn.ReLU(inplace=True)
316 | ))
317 | self.fpn_in = nn.ModuleList(self.fpn_in)
318 |
319 | self.fpn_out = []
320 | for i in range(len(fpn_inplanes) - 1): # skip the top layer
321 | self.fpn_out.append(nn.Sequential(
322 | conv3x3_bn_relu(fpn_dim, fpn_dim, 1),
323 | ))
324 | self.fpn_out = nn.ModuleList(self.fpn_out)
325 |
326 | self.conv_last = nn.Sequential(
327 | conv3x3_bn_relu(len(fpn_inplanes) * fpn_dim, fpn_dim, 1),
328 | nn.Conv2d(fpn_dim, num_class, kernel_size=1)
329 | )
330 |
331 | def forward(self, conv_out, segSize=None):
332 | conv5 = conv_out[-1]
333 |
334 | input_size = conv5.size()
335 | ppm_out = [conv5]
336 | for pool_scale, pool_conv in zip(self.ppm_pooling, self.ppm_conv):
337 | ppm_out.append(pool_conv(nn.functional.upsample(
338 | pool_scale(conv5),
339 | (input_size[2], input_size[3]),
340 | mode='bilinear')))
341 | ppm_out = torch.cat(ppm_out, 1)
342 | f = self.ppm_last_conv(ppm_out)
343 |
344 | fpn_feature_list = [f]
345 | for i in reversed(range(len(conv_out) - 1)):
346 | conv_x = conv_out[i]
347 | conv_x = self.fpn_in[i](conv_x) # lateral branch
348 |
349 | f = nn.functional.upsample(f, size=conv_x.size()[2:], mode='bilinear') # top-down branch
350 | f = conv_x + f
351 |
352 | fpn_feature_list.append(self.fpn_out[i](f))
353 |
354 | fpn_feature_list.reverse() # [P2 - P5]
355 | output_size = fpn_feature_list[0].size()[2:]
356 | fusion_list = [fpn_feature_list[0]]
357 | for i in range(1, len(fpn_feature_list)):
358 | fusion_list.append(nn.functional.upsample(
359 | fpn_feature_list[i],
360 | output_size,
361 | mode='bilinear'))
362 | fusion_out = torch.cat(fusion_list, 1)
363 | x = self.conv_last(fusion_out)
364 |
365 | if self.use_softmax: # is True during inference
366 | x = nn.functional.upsample(x, size=segSize, mode='bilinear')
367 | x = nn.functional.softmax(x, dim=1)
368 | return x
369 |
370 | x = nn.functional.log_softmax(x, dim=1)
371 |
372 | return x
373 |
374 |
375 |
376 | def conv3x3(in_planes, out_planes, stride=1, has_bias=False):
377 | "3x3 convolution with padding"
378 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
379 | padding=1, bias=has_bias)
380 |
381 |
382 | def conv3x3_bn_relu(in_planes, out_planes, stride=1):
383 | return nn.Sequential(
384 | conv3x3(in_planes, out_planes, stride),
385 | nn.BatchNorm2d(out_planes),
386 | nn.ReLU(inplace=True),
387 | )
388 |
389 | # this is used to build the different models, both encoder and decoder
390 | class ModelBuilder():
391 | # custom weights initialization
392 | def weights_init(self, m):
393 | classname = m.__class__.__name__
394 | if classname.find('Conv') != -1:
395 | nn.init.kaiming_normal(m.weight.data)
396 | elif classname.find('BatchNorm') != -1:
397 | m.weight.data.fill_(1.)
398 | m.bias.data.fill_(1e-4)
399 | elif classname.find('Linear') != -1:
400 | m.weight.data.normal_(0.0, 0.0001)
401 |
402 | def build_encoder(self, arch='resnet50_dilated8', fc_dim=512, weights=''):
403 | pretrained = True if len(weights) == 0 else False
404 | if arch == 'resnet34':
405 | raise NotImplementedError
406 | orig_resnet = resnet.__dict__['resnet34'](pretrained=pretrained)
407 | net_encoder = Resnet(orig_resnet)
408 | elif arch == 'resnet34_dilated8':
409 | raise NotImplementedError
410 | orig_resnet = resnet.__dict__['resnet34'](pretrained=pretrained)
411 | net_encoder = ResnetDilated(orig_resnet,
412 | dilate_scale=8)
413 | elif arch == 'resnet34_dilated16':
414 | raise NotImplementedError
415 | orig_resnet = resnet.__dict__['resnet34'](pretrained=pretrained)
416 | net_encoder = ResnetDilated(orig_resnet,
417 | dilate_scale=16)
418 | elif arch == 'resnet50':
419 | orig_resnet = resnet.__dict__['resnet50'](pretrained=pretrained)
420 | net_encoder = Resnet(orig_resnet)
421 | elif arch == 'resnet50_dilated8':
422 | orig_resnet = resnet.__dict__['resnet50'](pretrained=pretrained)
423 | net_encoder = ResnetDilated(orig_resnet,
424 | dilate_scale=8)
425 | elif arch == 'resnet50_dilated16':
426 | orig_resnet = resnet.__dict__['resnet50'](pretrained=pretrained)
427 | net_encoder = ResnetDilated(orig_resnet,
428 | dilate_scale=16)
429 | elif arch == 'resnet101':
430 | orig_resnet = resnet.__dict__['resnet101'](pretrained=pretrained)
431 | net_encoder = Resnet(orig_resnet)
432 | elif arch == 'resnet101_dilated8':
433 | orig_resnet = resnet.__dict__['resnet101'](pretrained=pretrained)
434 | net_encoder = ResnetDilated(orig_resnet,
435 | dilate_scale=8)
436 | elif arch == 'resnet101_dilated16':
437 | orig_resnet = resnet.__dict__['resnet101'](pretrained=pretrained)
438 | net_encoder = ResnetDilated(orig_resnet,
439 | dilate_scale=16)
440 | elif arch == 'resnext101':
441 | orig_resnext = resnext.__dict__['resnext101'](pretrained=pretrained)
442 | net_encoder = Resnet(orig_resnext) # we can still use class Resnet
443 | else:
444 | raise Exception('Architecture undefined!')
445 |
446 | # net_encoder.apply(self.weights_init)
447 | if len(weights) > 0:
448 | print('Loading weights for net_encoder')
449 | net_encoder.load_state_dict(
450 | torch.load(weights, map_location=lambda storage, loc: storage), strict=False)
451 | return net_encoder
452 |
453 | def build_decoder(self, arch='ppm_bilinear_deepsup',
454 | fc_dim=512, num_class=150,
455 | weights='', use_softmax=False):
456 | if arch == 'c1_bilinear_deepsup':
457 | net_decoder = C1BilinearDeepSup(
458 | num_class=num_class,
459 | fc_dim=fc_dim,
460 | use_softmax=use_softmax)
461 | elif arch == 'c1_bilinear':
462 | net_decoder = C1Bilinear(
463 | num_class=num_class,
464 | fc_dim=fc_dim,
465 | use_softmax=use_softmax)
466 | elif arch == 'ppm_bilinear':
467 | net_decoder = PPMBilinear(
468 | num_class=num_class,
469 | fc_dim=fc_dim,
470 | use_softmax=use_softmax)
471 | elif arch == 'ppm_bilinear_deepsup':
472 | net_decoder = PPMBilinearDeepsup(
473 | num_class=num_class,
474 | fc_dim=fc_dim,
475 | use_softmax=use_softmax)
476 | elif arch == 'upernet_lite':
477 | net_decoder = UPerNet(
478 | num_class=num_class,
479 | fc_dim=fc_dim,
480 | use_softmax=use_softmax,
481 | fpn_dim=256)
482 | elif arch == 'upernet':
483 | net_decoder = UPerNet(
484 | num_class=num_class,
485 | fc_dim=fc_dim,
486 | use_softmax=use_softmax,
487 | fpn_dim=512)
488 | else:
489 | raise Exception('Architecture undefined!')
490 |
491 | net_decoder.apply(self.weights_init)
492 | if len(weights) > 0:
493 | print('Loading weights for net_decoder')
494 | net_decoder.load_state_dict(
495 | torch.load(weights, map_location=lambda storage, loc: storage), strict=False)
496 | return net_decoder
497 |
498 |
499 |
500 | # this is used to build deeplabv3, deeplabv3+
501 |
502 | class _ConvBatchNormReluBlock(nn.Sequential):
503 | def __init__(self, inplanes, outplanes, kernel_size, stride, padding, dilation, relu=True):
504 | super(_ConvBatchNormReluBlock, self).__init__()
505 | self.add_module("cov", nn.Conv2d(in_channels=inplanes,out_channels=outplanes,
506 | kernel_size=kernel_size, stride=stride, padding = padding,
507 | dilation = dilation, bias=False))
508 | self.add_module("bn", nn.BatchNorm2d(num_features=outplanes, momentum=0.999, affine=True))
509 | if relu:
510 | self.add_module("relu", nn.ReLU())
511 | def forward(self, x):
512 | return super(_ConvBatchNormReluBlock, self).forward(x)
513 |
514 | class _ResidualBlockMulGrid(nn.Sequential):
515 | """
516 | Residual Block with multi-grid , note: best model-> (1, 2, 1)
517 | """
518 | def __init__(self, layers, inplanes, midplanes, outplanes, stride, dilation, mulgrid=[1,2,1]):
519 | super(_ResidualBlockMulGrid, self).__init__()
520 | self.add_module("block1", _Bottleneck(inplanes, midplanes, outplanes, stride, dilation * mulgrid[0], True))
521 | self.add_module("block2", _Bottleneck(outplanes, midplanes, outplanes, stride, dilation * mulgrid[1], False))
522 | self.add_module("block3", _Bottleneck(outplanes, midplanes, outplanes, stride, dilation * mulgrid[2], False))
523 | def forward(self, x):
524 | return super(_ResidualBlockMulGrid, self).forward(x)
525 |
526 | class _Bottleneck(nn.Sequential):
527 | def __init__(self, inplanes, midplanes, outplanes, stride, dilation, downsample):
528 | super(_Bottleneck, self).__init__()
529 | self.reduce = _ConvBatchNormReluBlock(inplanes, midplanes, 1, stride, 0, 1)
530 | self.conv3x3 = _ConvBatchNormReluBlock(midplanes, midplanes, 3, 1, dilation, dilation)
531 | self.increase = _ConvBatchNormReluBlock(midplanes, outplanes, 1, 1, 0, 1, relu=False)
532 | self.downsample = downsample
533 | if self.downsample:
534 | self.proj = _ConvBatchNormReluBlock(inplanes, outplanes, 1, stride, 0, 1, relu=False)
535 | def forward(self, x):
536 | h = self.reduce(x)
537 | h = self.conv3x3(h)
538 | h = self.increase(h)
539 | if self.downsample:
540 | h += self.proj(x)
541 | else:
542 | h += x
543 | return F.relu(h)
544 |
545 |
546 |
547 |
548 |
549 | if __name__ == '__main__':
550 | # test for model builder
551 | builder = ModelBuilder()
552 | net_encoder = builder.build_encoder(
553 | arch ="resnet101_dilated8"
554 | ).cuda()
555 | test_input = torch.autograd.Variable(torch.randn(1, 3, 1024, 512), volatile=True).cuda()
556 | out = net_encoder.forward(test_input)
557 | print (out[0].size())
--------------------------------------------------------------------------------
/model/large_kernel.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Implementation of Large Kernel Matters Paper (face++)
4 | # Author: Xiangtai(lxtpku@pku.edu.cn)
5 |
6 | import torch
7 | from torch import nn
8 |
9 |
10 | from model.deeplab_resnet import ModelBuilder
11 |
12 |
13 | class _BoundaryRefineModule(nn.Module):
14 | def __init__(self, dim):
15 | super(_BoundaryRefineModule, self).__init__()
16 | self.relu = nn.ReLU(inplace=True)
17 | self.conv1 = nn.Conv2d(dim, dim, kernel_size=3, padding=1)
18 | self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, padding=1)
19 |
20 | def forward(self, x):
21 | residual = self.conv1(x)
22 | residual = self.relu(residual)
23 | residual = self.conv2(residual)
24 | out = x + residual
25 | return out
26 |
27 |
28 | class _GlobalConvModule(nn.Module):
29 | def __init__(self, in_dim, out_dim, kernel_size):
30 | super(_GlobalConvModule, self).__init__()
31 | pad0 = (kernel_size[0] - 1) / 2
32 | pad1 = (kernel_size[1] - 1) / 2
33 | # kernel size had better be odd number so as to avoid alignment error
34 | super(_GlobalConvModule, self).__init__()
35 | self.conv_l1 = nn.Conv2d(in_dim, out_dim, kernel_size=(kernel_size[0], 1),
36 | padding=(pad0, 0))
37 | self.conv_l2 = nn.Conv2d(out_dim, out_dim, kernel_size=(1, kernel_size[1]),
38 | padding=(0, pad1))
39 | self.conv_r1 = nn.Conv2d(in_dim, out_dim, kernel_size=(1, kernel_size[1]),
40 | padding=(0, pad1))
41 | self.conv_r2 = nn.Conv2d(out_dim, out_dim, kernel_size=(kernel_size[0], 1),
42 | padding=(pad0, 0))
43 |
44 | def forward(self, x):
45 | x_l = self.conv_l1(x)
46 | x_l = self.conv_l2(x_l)
47 | x_r = self.conv_r1(x)
48 | x_r = self.conv_r2(x_r)
49 | x = x_l + x_r
50 | return x
51 |
52 |
53 | class GCN(nn.Module):
54 | def __init__(self, num_classes, kernel_size=7):
55 | super(GCN, self).__init__()
56 | self.resnet_features = ModelBuilder().build_encoder("resnet101")
57 | self.layer0 = nn.Sequential(self.resnet_features.conv1, self.resnet_features.bn1,
58 | self.resnet_features.relu1, self.resnet_features.conv3,
59 | self.resnet_features.bn3, self.resnet_features.relu3
60 | )
61 | self.layer1 = nn.Sequential(self.resnet_features.maxpool, self.resnet_features.layer1)
62 | self.layer2 = self.resnet_features.layer2
63 | self.layer3 = self.resnet_features.layer3
64 | self.layer4 = self.resnet_features.layer4
65 |
66 | self.gcm1 = _GlobalConvModule(2048, num_classes, (kernel_size, kernel_size))
67 | self.gcm2 = _GlobalConvModule(1024, num_classes, (kernel_size, kernel_size))
68 | self.gcm3 = _GlobalConvModule(512, num_classes, (kernel_size, kernel_size))
69 | self.gcm4 = _GlobalConvModule(256, num_classes, (kernel_size, kernel_size))
70 |
71 | self.brm1 = _BoundaryRefineModule(num_classes)
72 | self.brm2 = _BoundaryRefineModule(num_classes)
73 | self.brm3 = _BoundaryRefineModule(num_classes)
74 | self.brm4 = _BoundaryRefineModule(num_classes)
75 | self.brm5 = _BoundaryRefineModule(num_classes)
76 | self.brm6 = _BoundaryRefineModule(num_classes)
77 | self.brm7 = _BoundaryRefineModule(num_classes)
78 | self.brm8 = _BoundaryRefineModule(num_classes)
79 | self.brm9 = _BoundaryRefineModule(num_classes)
80 |
81 | self.deconv1 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False)
82 | self.deconv2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False)
83 | self.deconv3 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False)
84 | self.deconv4 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False)
85 | self.deconv5 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False)
86 |
87 | def forward(self, x):
88 | # suppose input = x , if x 512
89 | f0 = self.layer0(x) # 256
90 | f1 = self.layer1(f0) # 128
91 | f2 = self.layer2(f1) # 64
92 | f3 = self.layer3(f2) # 32
93 | f4 = self.layer4(f3) # 16
94 |
95 | gcfm1 = self.brm1(self.gcm1(f4)) # 16
96 | gcfm2 = self.brm2(self.gcm2(f3)) # 32
97 | gcfm3 = self.brm3(self.gcm3(f2)) # 64
98 | gcfm4 = self.brm4(self.gcm4(f1)) # 128
99 |
100 | fs1 = self.brm5(self.deconv1(gcfm1) + gcfm2) # 32
101 | fs2 = self.brm6(self.deconv2(fs1) + gcfm3) # 64
102 | fs3 = self.brm7(self.deconv3(fs2) + gcfm4) # 128
103 | fs4 = self.brm8(self.deconv4(fs3)) # 256
104 | out = self.brm9(self.deconv5(fs4))
105 |
106 | return out
107 |
108 | def freeze_bn(self):
109 | for m in self.modules():
110 | if isinstance(m, nn.BatchNorm2d):
111 | m.eval()
112 |
113 | if __name__ == '__main__':
114 | model = GCN(20).cuda()
115 | model.freeze_bn()
116 | model.eval()
117 | image = torch.autograd.Variable(torch.randn(1, 3, 512, 512), volatile=True).cuda()
118 | print (model(image).size())
--------------------------------------------------------------------------------
/model/large_kernel_exfuse.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # Implementation of ExFuse: Enhancing Feature Fusion for Semantic Segmentation Paper (face++)
4 | # Author: Xiangtai(lxtpku@pku.edu.cn)
5 | # ###########
6 | # backbone GCN framework(large_kernel.py) and ResNext101 (Resnet) as pretrained model
7 | # Layer Rearrangement (LR) (0.8%): re-arrange the layer in the resnet model
8 | # Semantic Supervision (SS) (1.1%): used when training the model on the ImageNet
9 | # assign auxiliary supervisions directly to the early stages of the encoder network
10 | # Semantic Embedding Branch (SEB) (0.7%)
11 | # Explicit Channel Resolution Embedding (ECRE) (0.5%)
12 | # Densely Adjacent Prediction (0.6%)
13 |
14 | # ###########
15 |
16 | import torch
17 | from torch import nn
18 |
19 | from model.deeplab_resnet import ModelBuilder
20 |
21 | from .large_kernel import _GlobalConvModule
22 |
23 |
24 | class SEB(nn.Module):
25 | def __init__(self, in_channels, out_channels):
26 | super(SEB, self).__init__()
27 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1,padding=1)
28 | self.upsample = nn.Upsample(scale_factor=2, mode="bilinear")
29 |
30 | def forward(self, x):
31 | x1, x2 = x
32 | return x1 * self.upsample(self.conv(x2))
33 |
34 |
35 |
36 | class GCNFuse(nn.Module):
37 | def __init__(self, configer=None,kernel_size=7, dap_k=3):
38 | super(GCNFuse, self).__init__()
39 | self.num_classes =20
40 | num_classes = self.num_classes
41 | self.resnet_features = ModelBuilder().build_encoder("resnet101")
42 | self.layer0 = nn.Sequential(self.resnet_features.conv1, self.resnet_features.bn1,
43 | self.resnet_features.relu1, self.resnet_features.conv3,
44 | self.resnet_features.bn3, self.resnet_features.relu3
45 | )
46 | self.layer1 = nn.Sequential(self.resnet_features.maxpool, self.resnet_features.layer1)
47 | self.layer2 = self.resnet_features.layer2
48 | self.layer3 = self.resnet_features.layer3
49 | self.layer4 = self.resnet_features.layer4
50 |
51 | self.gcm1 = _GlobalConvModule(2048, num_classes * 4, (kernel_size, kernel_size))
52 | self.gcm2 = _GlobalConvModule(1024, num_classes, (kernel_size, kernel_size))
53 | self.gcm3 = _GlobalConvModule(512, num_classes * dap_k**2, (kernel_size, kernel_size))
54 | self.gcm4 = _GlobalConvModule(256, num_classes * dap_k**2, (kernel_size, kernel_size))
55 |
56 | self.deconv1 = nn.ConvTranspose2d(num_classes, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False)
57 | self.deconv2 = nn.ConvTranspose2d(num_classes, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False)
58 | self.deconv3 = nn.ConvTranspose2d(num_classes * dap_k**2, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False)
59 | self.deconv4 = nn.ConvTranspose2d(num_classes * dap_k**2, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False)
60 | self.deconv5 = nn.ConvTranspose2d(num_classes * dap_k**2, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False)
61 |
62 | self.ecre = nn.PixelShuffle(2)
63 |
64 | self.seb1 = SEB(2048, 1024)
65 | self.seb2 = SEB(3072, 512)
66 | self.seb3 = SEB(3584, 256)
67 |
68 | self.upsample2 = nn.Upsample(scale_factor=2, mode="bilinear")
69 | self.upsample4 = nn.Upsample(scale_factor=4, mode="bilinear")
70 |
71 | self.DAP = nn.Sequential(
72 | nn.PixelShuffle(dap_k),
73 | nn.AvgPool2d((dap_k,dap_k))
74 | )
75 |
76 | def forward(self, x):
77 | # suppose input = x , if x 512
78 | f0 = self.layer0(x) # 256
79 | f1 = self.layer1(f0) # 128
80 | print (f1.size())
81 | f2 = self.layer2(f1) # 64
82 | print (f2.size())
83 | f3 = self.layer3(f2) # 32
84 | print (f3.size())
85 | f4 = self.layer4(f3) # 16
86 | print (f4.size())
87 | x = self.gcm1(f4)
88 | out1 = self.ecre(x)
89 | seb1 = self.seb1([f3, f4])
90 | gcn1 = self.gcm2(seb1)
91 |
92 | seb2 = self.seb2([f2, torch.cat([f3, self.upsample2(f4)], dim=1)])
93 | gcn2 = self.gcm3(seb2)
94 |
95 | seb3 = self.seb3([f1, torch.cat([f2, self.upsample2(f3), self.upsample4(f4)], dim=1)])
96 | gcn3 = self.gcm4(seb3)
97 |
98 | y = self.deconv2(gcn1 + out1)
99 | y = self.deconv3(gcn2 + y)
100 | y = self.deconv4(gcn3 + y)
101 | y = self.deconv5(y)
102 | y = self.DAP(y)
103 | return y
104 |
105 | def freeze_bn(self):
106 | for m in self.modules():
107 | if isinstance(m, nn.BatchNorm2d):
108 | m.eval()
109 |
110 |
111 | if __name__ == '__main__':
112 | model = GCNFuse(20).cuda()
113 | model.freeze_bn()
114 | model.eval()
115 | image = torch.autograd.Variable(torch.randn(1, 3, 512, 512), volatile=True).cuda()
116 | res1, res2 = model(image)
117 | print (res1.size(), res2.size())
--------------------------------------------------------------------------------
/model/seg_resnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | #resnet50 and resnet 101
4 | import os
5 | import sys
6 | import torch
7 | import torch.nn as nn
8 | import math
9 |
10 | try:
11 | from urllib import urlretrieve
12 | except ImportError:
13 | from urllib.request import urlretrieve
14 |
15 |
16 | __all__ = ['ResNet', 'resnet50', 'resnet101'] # resnet101 is coming soon!
17 |
18 |
19 | model_urls = {
20 | 'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth',
21 | 'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth'
22 | }
23 |
24 |
25 | def conv3x3(in_planes, out_planes, stride=1):
26 | "3x3 convolution with padding"
27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
28 | padding=1, bias=False)
29 |
30 |
31 | class BasicBlock(nn.Module):
32 | expansion = 1
33 |
34 | def __init__(self, inplanes, planes, stride=1, downsample=None):
35 | super(BasicBlock, self).__init__()
36 | self.conv1 = conv3x3(inplanes, planes, stride)
37 | self.bn1 = nn.BatchNorm2d(planes)
38 | self.relu = nn.ReLU(inplace=True)
39 | self.conv2 = conv3x3(planes, planes)
40 | self.bn2 = nn.BatchNorm2d(planes)
41 | self.downsample = downsample
42 | self.stride = stride
43 |
44 | def forward(self, x):
45 | residual = x
46 |
47 | out = self.conv1(x)
48 | out = self.bn1(out)
49 | out = self.relu(out)
50 |
51 | out = self.conv2(out)
52 | out = self.bn2(out)
53 |
54 | if self.downsample is not None:
55 | residual = self.downsample(x)
56 |
57 | out += residual
58 | out = self.relu(out)
59 |
60 | return out
61 |
62 |
63 | class Bottleneck(nn.Module):
64 | expansion = 4
65 |
66 | def __init__(self, inplanes, planes, stride=1, downsample=None):
67 | super(Bottleneck, self).__init__()
68 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
69 | self.bn1 = nn.BatchNorm2d(planes)
70 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
71 | padding=1, bias=False)
72 | self.bn2 = nn.BatchNorm2d(planes)
73 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
74 | self.bn3 = nn.BatchNorm2d(planes * 4)
75 | self.relu = nn.ReLU(inplace=True)
76 | self.downsample = downsample
77 | self.stride = stride
78 |
79 | def forward(self, x):
80 | residual = x
81 |
82 | out = self.conv1(x)
83 | out = self.bn1(out)
84 | out = self.relu(out)
85 |
86 | out = self.conv2(out)
87 | out = self.bn2(out)
88 | out = self.relu(out)
89 |
90 | out = self.conv3(out)
91 | out = self.bn3(out)
92 |
93 | if self.downsample is not None:
94 | residual = self.downsample(x)
95 |
96 | out += residual
97 | out = self.relu(out)
98 |
99 | return out
100 |
101 |
102 | class ResNet(nn.Module):
103 |
104 | def __init__(self, block, layers, num_classes=1000):
105 | self.inplanes = 128
106 | super(ResNet, self).__init__()
107 | self.conv1 = conv3x3(3, 64, stride=2)
108 | self.bn1 = nn.BatchNorm2d(64)
109 | self.relu1 = nn.ReLU(inplace=True)
110 | self.conv2 = conv3x3(64, 64)
111 | self.bn2 = nn.BatchNorm2d(64)
112 | self.relu2 = nn.ReLU(inplace=True)
113 | self.conv3 = conv3x3(64, 128)
114 | self.bn3 = nn.BatchNorm2d(128)
115 | self.relu3 = nn.ReLU(inplace=True)
116 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
117 |
118 | self.layer1 = self._make_layer(block, 64, layers[0])
119 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
120 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
121 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
122 | self.avgpool = nn.AvgPool2d(7, stride=1)
123 | self.fc = nn.Linear(512 * block.expansion, num_classes)
124 |
125 | for m in self.modules():
126 | if isinstance(m, nn.Conv2d):
127 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
128 | m.weight.data.normal_(0, math.sqrt(2. / n))
129 | elif isinstance(m, nn.BatchNorm2d):
130 | m.weight.data.fill_(1)
131 | m.bias.data.zero_()
132 |
133 | def _make_layer(self, block, planes, blocks, stride=1):
134 | downsample = None
135 | if stride != 1 or self.inplanes != planes * block.expansion:
136 | downsample = nn.Sequential(
137 | nn.Conv2d(self.inplanes, planes * block.expansion,
138 | kernel_size=1, stride=stride, bias=False),
139 | nn.BatchNorm2d(planes * block.expansion),
140 | )
141 |
142 | layers = []
143 | layers.append(block(self.inplanes, planes, stride, downsample))
144 | self.inplanes = planes * block.expansion
145 | for i in range(1, blocks):
146 | layers.append(block(self.inplanes, planes))
147 |
148 | return nn.Sequential(*layers)
149 |
150 | def forward(self, x):
151 | x = self.relu1(self.bn1(self.conv1(x)))
152 | x = self.relu2(self.bn2(self.conv2(x)))
153 | x = self.relu3(self.bn3(self.conv3(x)))
154 | x = self.maxpool(x)
155 |
156 | x = self.layer1(x)
157 | x = self.layer2(x)
158 | x = self.layer3(x)
159 | x = self.layer4(x)
160 |
161 | x = self.avgpool(x)
162 | x = x.view(x.size(0), -1)
163 | x = self.fc(x)
164 |
165 | return x
166 |
167 |
168 |
169 | def resnet50(pretrained=False, **kwargs):
170 | """Constructs a ResNet-50 model.
171 |
172 | Args:
173 | pretrained (bool): If True, returns a model pre-trained on Places
174 | """
175 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
176 | if pretrained:
177 | model.load_state_dict(load_url(model_urls['resnet50']), strict=False)
178 | return model
179 |
180 |
181 | def resnet101(pretrained=False, **kwargs):
182 | """Constructs a ResNet-101 model.
183 |
184 | Args:
185 | pretrained (bool): If True, returns a model pre-trained on Places
186 | """
187 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
188 | if pretrained:
189 | model.load_state_dict(load_url(model_urls['resnet101']), strict=False)
190 | return model
191 |
192 |
193 | def load_url(url, model_dir='/home/xiangtai/projec/pretrained', map_location=None):
194 | if not os.path.exists(model_dir):
195 | os.makedirs(model_dir)
196 | filename = url.split('/')[-1]
197 | cached_file = os.path.join(model_dir, filename)
198 | if not os.path.exists(cached_file):
199 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
200 | urlretrieve(url, cached_file)
201 | return torch.load(cached_file, map_location=map_location)
202 |
203 |
204 | if __name__ == '__main__':
205 | res = resnet101(pretrained=True)
206 | print (res)
--------------------------------------------------------------------------------
/model/seg_resnext.py:
--------------------------------------------------------------------------------
1 | # synchronized batchnorm version of resnext101
2 | import os
3 | import sys
4 | import torch
5 | import torch.nn as nn
6 | import math
7 |
8 | try:
9 | from urllib import urlretrieve
10 | except ImportError:
11 | from urllib.request import urlretrieve
12 |
13 |
14 | __all__ = ['ResNeXt', 'resnext101'] # support resnext 101
15 |
16 | # can not used for now
17 | model_urls = {
18 | 'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth'
19 | }
20 |
21 |
22 | def conv3x3(in_planes, out_planes, stride=1):
23 | "3x3 convolution with padding"
24 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
25 | padding=1, bias=False)
26 |
27 |
28 | class GroupBottleneck(nn.Module):
29 | expansion = 2
30 |
31 | def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None):
32 | super(GroupBottleneck, self).__init__()
33 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
34 | self.bn1 = nn.BatchNorm2d(planes)
35 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
36 | padding=1, groups=groups, bias=False)
37 | self.bn2 = nn.BatchNorm2d(planes)
38 | self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False)
39 | self.bn3 = nn.BatchNorm2d(planes * 2)
40 | self.relu = nn.ReLU(inplace=True)
41 | self.downsample = downsample
42 | self.stride = stride
43 |
44 | def forward(self, x):
45 | residual = x
46 |
47 | out = self.conv1(x)
48 | out = self.bn1(out)
49 | out = self.relu(out)
50 |
51 | out = self.conv2(out)
52 | out = self.bn2(out)
53 | out = self.relu(out)
54 |
55 | out = self.conv3(out)
56 | out = self.bn3(out)
57 |
58 | if self.downsample is not None:
59 | residual = self.downsample(x)
60 |
61 | out += residual
62 | out = self.relu(out)
63 |
64 | return out
65 |
66 |
67 | class ResNeXt(nn.Module):
68 |
69 | def __init__(self, block, layers, groups=32, num_classes=1000):
70 | self.inplanes = 128
71 | super(ResNeXt, self).__init__()
72 | self.conv1 = conv3x3(3, 64, stride=2)
73 | self.bn1 = nn.BatchNorm2d(64)
74 | self.relu1 = nn.ReLU(inplace=True)
75 | self.conv2 = conv3x3(64, 64)
76 | self.bn2 = nn.BatchNorm2d(64)
77 | self.relu2 = nn.ReLU(inplace=True)
78 | self.conv3 = conv3x3(64, 128)
79 | self.bn3 = nn.BatchNorm2d(128)
80 | self.relu3 = nn.ReLU(inplace=True)
81 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
82 |
83 | self.layer1 = self._make_layer(block, 128, layers[0], groups=groups)
84 | self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups)
85 | self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups)
86 | self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups)
87 | self.avgpool = nn.AvgPool2d(7, stride=1)
88 | self.fc = nn.Linear(1024 * block.expansion, num_classes)
89 |
90 | for m in self.modules():
91 | if isinstance(m, nn.Conv2d):
92 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups
93 | m.weight.data.normal_(0, math.sqrt(2. / n))
94 | elif isinstance(m, nn.BatchNorm2d):
95 | m.weight.data.fill_(1)
96 | m.bias.data.zero_()
97 |
98 | def _make_layer(self, block, planes, blocks, stride=1, groups=1):
99 | downsample = None
100 | if stride != 1 or self.inplanes != planes * block.expansion:
101 | downsample = nn.Sequential(
102 | nn.Conv2d(self.inplanes, planes * block.expansion,
103 | kernel_size=1, stride=stride, bias=False),
104 | nn.BatchNorm2d(planes * block.expansion),
105 | )
106 |
107 | layers = []
108 | layers.append(block(self.inplanes, planes, stride, groups, downsample))
109 | self.inplanes = planes * block.expansion
110 | for i in range(1, blocks):
111 | layers.append(block(self.inplanes, planes, groups=groups))
112 |
113 | return nn.Sequential(*layers)
114 |
115 | def forward(self, x):
116 | x = self.relu1(self.bn1(self.conv1(x)))
117 | x = self.relu2(self.bn2(self.conv2(x)))
118 | x = self.relu3(self.bn3(self.conv3(x)))
119 | x = self.maxpool(x)
120 |
121 | x = self.layer1(x)
122 | x = self.layer2(x)
123 | x = self.layer3(x)
124 | x = self.layer4(x)
125 |
126 | x = self.avgpool(x)
127 | x = x.view(x.size(0), -1)
128 | x = self.fc(x)
129 |
130 | return x
131 |
132 |
133 | def resnext101(pretrained=False, **kwargs):
134 | """Constructs a ResNet-101 model.
135 |
136 | Args:
137 | pretrained (bool): If True, returns a model pre-trained on Places
138 | """
139 | model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs)
140 | if pretrained:
141 | model.load_state_dict(load_url(model_urls['resnext101']), strict=False)
142 | return model
143 |
144 |
145 | def load_url(url, model_dir='./models/backbones/pretrained', map_location=None):
146 | if not os.path.exists(model_dir):
147 | os.makedirs(model_dir)
148 | filename = url.split('/')[-1]
149 | cached_file = os.path.join(model_dir, filename)
150 | if not os.path.exists(cached_file):
151 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
152 | urlretrieve(url, cached_file)
153 | return torch.load(cached_file, map_location=map_location)
154 |
155 | if __name__ == '__main__':
156 | res = resnext101(pretrained=True)
157 | print (res)
--------------------------------------------------------------------------------