├── README.md
├── caffe_converter
    └── convert_from_caffe.py
├── data
    ├── VOC_dataset.py
    ├── __init__.py
    ├── base_data_loader.py
    ├── base_dataset.py
    ├── custom_dataset_data_loader.py
    └── data_loader.py
├── models
    ├── Deeplab.py
    ├── Resnet_Deeplab.py
    ├── VGG_Deeplab.py
    ├── __init__.py
    ├── base_model.py
    ├── losses.py
    ├── model_utils.py
    └── models.py
├── options
    ├── __init__.py
    ├── base_options.py
    ├── test_options.py
    └── train_options.py
├── scripts
    ├── test.sh
    └── train.sh
├── test.py
├── test_ops.py
├── train.py
└── utils
    ├── __init__.py
    ├── gradcheck.py
    ├── html.py
    ├── util.py
    └── visualizer.py


/README.md:
--------------------------------------------------------------------------------
1 | This is a pytorch implementation for DeepLab.
2 | 
3 | To convert pretrained model from caffe, please take a look at `caffe_converter/convert_from_caffe.py`
4 | 


--------------------------------------------------------------------------------
/caffe_converter/convert_from_caffe.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os.path as osp
 4 | import caffe
 5 | import torch
 6 | import numpy as np
 7 | import torchvision
 8 | from models import Deeplab
 9 | from collections import OrderedDict
10 | import caffe.proto.caffe_pb2 as caffe_pb2
11 | model = open('downloaded_caffemodel.caffemodel', 'rb')
12 | net_param = caffe_pb2.NetParameter()
13 | net_param.ParseFromString(model.read())
14 | assert (net_param.layer[42].name =='pool6_1x1_norm')
15 | 
16 | caffe_prototxt = 'train.prototxt'  # NOQA
17 | caffe_model_path = 'train_iter_20000.caffemodel'
18 | 
19 | caffe.set_mode_cpu()
20 | caffe_model = caffe.Net(caffe_prototxt, caffe_model_path, caffe.TEST)
21 | caffe_model.forward()
22 | 
23 | torch_model = Deeplab.Deeplab_SS(None, 20, False, vgg=True) # torchvision.models.vgg16()
24 | torch_model_params = torch_model.named_parameters()
25 | W = caffe_model.params['conv1_1'][0].data
26 | print np.mean(W)
27 | newdict  = OrderedDict()
28 | for name, p1 in caffe_model.params.iteritems():
29 |     try:
30 |         p2 = torch_model_params.next()
31 |         print('%s: %s -> %s %s' % (name, p1[0].data.shape, p2[0], p2[1].data.size()))
32 |         p2[1].data = torch.from_numpy(p1[0].data).float()
33 |         print(np.mean(p1[0].data))
34 |         if len(p1) == 2:
35 |             p2 = torch_model_params.next()
36 |             print('%s: %s ->%s %s' % (name, p1[1].data.shape, p2[0], p2[1].data.size()))
37 |             p2[1].data = torch.from_numpy(p1[1].data)
38 |             print(np.mean(p1[1].data),)
39 |     except StopIteration:
40 |         break
41 | 
42 | torch_model_path = 'DeepLab_VGG_caffe.pth'
43 | torch.save(torch_model.state_dict(), torch_model_path)
44 | 
45 | 


--------------------------------------------------------------------------------
/data/VOC_dataset.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import numpy as np
 3 | import torchvision
 4 | import torchvision.transforms as transforms
 5 | import torch
 6 | import h5py
 7 | import time
 8 | from data.base_dataset import *
 9 | from PIL import Image
10 | import math, random
11 | 
12 | 
13 | def make_dataset_fromlst(listfilename):
14 |     """
15 |     NYUlist format:
16 |     imagepath seglabelpath depthpath HHApath
17 |     """
18 |     images = []
19 |     segs = []
20 | 
21 |     with open(listfilename) as f:
22 |         content = f.readlines()
23 |         for x in content:
24 |             imgname, segname = x.strip().split(' ')
25 |             images += [imgname]
26 |             segs += [segname]
27 | 
28 |     return {'images':images, 'segs':segs}
29 | 
30 | class VOCDataset(BaseDataset):
31 |     def initialize(self, opt):
32 |         self.opt = opt
33 |         np.random.seed(8964)
34 |         self.paths_dict = make_dataset_fromlst(opt.list)
35 |         self.len = len(self.paths_dict['images'])
36 |         self.datafile = 'VOC_dataset.py'
37 | 
38 |     def __getitem__(self, index):
39 |         #self.paths['images'][index]
40 |         # print self.opt.scale,self.opt.flip,self.opt.crop,self.opt.colorjitter
41 |         img = np.asarray(Image.open(self.paths_dict['images'][index]))
42 |         seg = np.asarray(Image.open(self.paths_dict['segs'][index])).astype(np.uint8)
43 |         # print(np.unique(seg))
44 | 
45 |         params = get_params(self.opt, seg.shape)
46 |         seg_tensor_tranformed = transform(seg, params, normalize=False,method='nearest',istrain=self.opt.isTrain)
47 |         if self.opt.inputmode == 'bgr-mean':
48 |             img_tensor_tranformed = transform(img, params, normalize=False, istrain=self.opt.isTrain, option=1)
49 |         else:
50 |             img_tensor_tranformed = transform(img, params, istrain=self.opt.isTrain, option=1)
51 |         return {'image':img_tensor_tranformed,
52 |                 'seg': seg_tensor_tranformed,
53 |                 'imgpath': self.paths_dict['segs'][index]}
54 | 
55 |     def __len__(self):
56 |         return self.len
57 | 
58 |     def name(self):
59 |         return 'VOCDataset'
60 | 
61 | class VOCDataset_val(BaseDataset):
62 |     def initialize(self, opt):
63 |         self.opt = opt
64 |         self.paths_dict = make_dataset_fromlst(opt.vallist)
65 |         self.len = len(self.paths_dict['images'])
66 | 
67 |     def __getitem__(self, index):
68 |         img = np.asarray(Image.open(self.paths_dict['images'][index]))#.astype(np.uint8)
69 |         seg = np.asarray(Image.open(self.paths_dict['segs'][index])).astype(np.uint8)
70 | 
71 |         params = get_params(self.opt, seg.shape, test=True)
72 |         seg_tensor_tranformed = transform(seg, params, normalize=False,method='nearest',istrain=self.opt.isTrain)
73 |         if self.opt.inputmode == 'bgr-mean':
74 |             img_tensor_tranformed = transform(img, params, normalize=False, istrain=self.opt.isTrain, option=1)
75 |         else:
76 |             img_tensor_tranformed = transform(img, params, istrain=self.opt.isTrain, option=1)
77 | 
78 |         return {'image':img_tensor_tranformed,
79 |                 'seg': seg_tensor_tranformed,
80 |                 'imgpath': self.paths_dict['segs'][index]}
81 | 
82 |     def __len__(self):
83 |         return self.len
84 | 
85 |     def name(self):
86 |         return 'VOCDataset_val'
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laughtervv/Deeplab-Pytorch/deb98bd27922241070d04b6ab6fa094981c3b827/data/__init__.py


--------------------------------------------------------------------------------
/data/base_data_loader.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class BaseDataLoader():
 3 |     def __init__(self):
 4 |         pass
 5 |     
 6 |     def initialize(self, opt):
 7 |         self.opt = opt
 8 |         pass
 9 | 
10 |     def load_data():
11 |         return None
12 | 
13 |         
14 |         
15 | 


--------------------------------------------------------------------------------
/data/base_dataset.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | from PIL import Image
  3 | import torchvision.transforms as transforms
  4 | import numpy as np
  5 | import torch
  6 | import cv2
  7 | import random
  8 | 
  9 | class BaseDataset(data.Dataset):
 10 |     def __init__(self):
 11 |         super(BaseDataset, self).__init__()
 12 | 
 13 |     def name(self):
 14 |         return 'BaseDataset'
 15 | 
 16 |     def initialize(self, opt):
 17 |         pass
 18 | 
 19 | def get_params(opt, size, test=False):
 20 |     h, w = size
 21 |     if opt.scale and test==False:
 22 |         scale = random.uniform(0.76, 1.75)
 23 |         new_h = h * scale
 24 |         new_w = (new_h * w // h)
 25 | 
 26 |         new_h = int(round(new_h / 8) * 8)
 27 |         new_w = int(round(new_w / 8) * 8)
 28 | 
 29 |     else:
 30 |         new_h = h
 31 |         new_w = w
 32 |         # new_h = int(round(h / 8) * 8)
 33 |         # new_w = int(round(w / 8) * 8)
 34 | 
 35 |     if opt.flip and test==False:
 36 |         flip = random.random() > 0.5
 37 |     else:
 38 |         flip = False
 39 | 
 40 |     crop = False
 41 |     x1 = x2 = y1 = y2 = 0
 42 |     if opt.crop and test==False:
 43 |         # if new_h > 241 and new_w > 321: #424
 44 |         if opt.batchSize > 1:
 45 |             cropsizeh = 321
 46 |             cropsizew = 421#(cropsizeh * new_w // new_h)
 47 |         else:
 48 |             cropscale = random.uniform(0.6,.9)
 49 |             cropsizeh = int (new_h * cropscale)
 50 |             cropsizew = int (new_w * cropscale)
 51 |             # print cropsizeh,cropsizew,new_h,new_w
 52 |         x1 = random.randint(0, np.maximum(0, new_w - cropsizew))
 53 |         y1 = random.randint(0, np.maximum(0, new_h - cropsizeh))
 54 |         x2 = x1 + cropsizew -1
 55 |         y2 = y1 + cropsizeh -1
 56 |         crop = True
 57 | 
 58 |         # if opt.batchSize > 1:
 59 |         #     print cropsizew,cropsizeh
 60 |     if opt.colorjitter and test==False:
 61 |         colorjitter = True
 62 |     else:
 63 |         colorjitter = False
 64 |     return {'scale': (new_w, new_h),
 65 |             'flip': flip,
 66 |             'crop_pos': (x1, x2, y1, y2),
 67 |             'crop': crop,
 68 |             'colorjitter': colorjitter}
 69 | 
 70 | def get_params_sunrgbd(opt, size, test=False, maxcrop=0.8, maxscale=1.75):
 71 |     h, w = size
 72 |     if opt.scale and test==False:
 73 |         scale = random.uniform(0.76, maxscale)
 74 |         new_h = h * scale
 75 |         new_w = (new_h * w // h)
 76 | 
 77 |         new_h = int(round(new_h / 8) * 8)
 78 |         new_w = int(round(new_w / 8) * 8)
 79 | 
 80 |     else:
 81 |         new_h = h
 82 |         new_w = w
 83 |         # new_h = int(round(h / 8) * 8)
 84 |         # new_w = int(round(w / 8) * 8)
 85 | 
 86 |     if opt.flip and test==False:
 87 |         flip = random.random() > 0.5
 88 |     else:
 89 |         flip = False
 90 | 
 91 |     crop = False
 92 |     x1 = x2 = y1 = y2 = 0
 93 |     if opt.crop and test==False:
 94 |         # if new_h > 241 and new_w > 321: #424
 95 |         if opt.batchSize > 1:
 96 |             cropsizeh = 321
 97 |             cropsizew = 421#(cropsizeh * new_w // new_h)
 98 |         else:
 99 |             cropscale = random.uniform(0.6,maxcrop)
100 |             cropsizeh = int (new_h * cropscale)
101 |             cropsizew = int (new_w * cropscale)
102 |             # print cropsizeh,cropsizew,new_h,new_w
103 |         x1 = random.randint(0, np.maximum(0, new_w - cropsizew))
104 |         y1 = random.randint(0, np.maximum(0, new_h - cropsizeh))
105 |         x2 = x1 + cropsizew -1
106 |         y2 = y1 + cropsizeh -1
107 |         crop = True
108 | 
109 |         # if opt.batchSize > 1:
110 |         #     print cropsizew,cropsizeh
111 |     if opt.colorjitter and test==False:
112 |         colorjitter = True
113 |     else:
114 |         colorjitter = False
115 |     return {'scale': (new_w, new_h),
116 |             'flip': flip,
117 |             'crop_pos': (x1, x2, y1, y2),
118 |             'crop': crop,
119 |             'colorjitter': colorjitter}
120 | 
121 | def transform(numpyarray, params, normalize=True, method='linear', istrain=True, colorjitter=False, option=0):
122 |     # print params['crop'],params['colorjitter'],params['flip']
123 |     if method == 'linear':
124 |         numpyarray = cv2.resize(numpyarray, (params['scale'][0], params['scale'][1]), interpolation=cv2.INTER_LINEAR)
125 |     else:
126 |         numpyarray = cv2.resize(numpyarray, (params['scale'][0], params['scale'][1]), interpolation=cv2.INTER_NEAREST)
127 | 
128 |     if istrain:
129 |         if params['crop']:
130 |             # print (numpyarray.shape,params['crop_pos'])
131 |             numpyarray = numpyarray[params['crop_pos'][2]:params['crop_pos'][3],
132 |                                     params['crop_pos'][0]:params['crop_pos'][1],
133 |                                     ...]
134 |         if params['flip']:
135 |             numpyarray = numpyarray[:,
136 |                                     ::-1,
137 |                                     ...]
138 | 
139 |         if option==1:
140 |             if colorjitter and params['colorjitter'] and random.random() > 0.1:
141 |                 # numpyarray += np.random.rand() * 30 - 15
142 |                 # numpyarray[numpyarray > 255] = 255
143 |                 # numpyarray[numpyarray < 0] = 0
144 |                 hsv = cv2.cvtColor(numpyarray, cv2.COLOR_BGR2HSV)
145 |                 hsv[:, :, 0] += np.random.rand() * 70 - 35
146 |                 hsv[:, :, 1] += np.random.rand() * 0.3 - 0.15
147 |                 hsv[:, :, 2] += np.random.rand() * 50 - 25
148 |                 hsv[:, :, 0] = np.clip(hsv[:, :, 0], 0, 360.)
149 |                 hsv[:, :, 1] = np.clip(hsv[:, :, 1], 0, 1.)
150 |                 hsv[:, :, 2] = np.clip(hsv[:, :, 2], 0, 255.)
151 |                 numpyarray = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
152 |                 # print numpyarray.shape
153 | 
154 |     if option == 1:
155 |         if not normalize:
156 |             numpyarray = numpyarray - np.asarray([122.675,116.669,104.008])
157 |             numpyarray = numpyarray.transpose((2, 0, 1))[::-1,:,:].astype(np.float32)
158 |         else:
159 |             numpyarray = numpyarray.transpose((2, 0, 1)).astype(np.float32)/255.
160 | 
161 |     if option == 2:
162 |         if not normalize:
163 |             numpyarray = numpyarray - np.asarray([132.431, 94.076, 118.477])
164 |             numpyarray = numpyarray.transpose((2, 0, 1))[::-1,:,:].astype(np.float32)
165 |         else:
166 |             numpyarray = numpyarray.transpose((2, 0, 1)).astype(np.float32)/255.
167 | 
168 |     if len(numpyarray.shape) == 3:
169 |         torchtensor = torch.from_numpy(numpyarray.copy()).float()#.div(255)
170 |     else:
171 |         torchtensor = torch.from_numpy(np.expand_dims(numpyarray,axis=0).copy())
172 | 
173 |     if normalize:
174 |         # torchtensor = torchtensor.div(255)
175 |         normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
176 |                                          std=[0.229, 0.224, 0.225])
177 |         torchtensor = normalize(torchtensor)
178 | 
179 |     return torchtensor
180 | 
181 | 
182 | 


--------------------------------------------------------------------------------
/data/custom_dataset_data_loader.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data
 2 | from .base_data_loader import BaseDataLoader
 3 | 
 4 | 
 5 | def CreateDataset(opt):
 6 |     dataset = None
 7 |     if opt.dataset_mode == 'nyuv2':
 8 |         # from data.nyuv2_dataset import NYUDataset
 9 |         from data.nyuv2_dataset_crop import NYUDataset,NYUDataset_val
10 |         dataset = NYUDataset()
11 |         if opt.vallist!='':
12 |             dataset_val = NYUDataset_val()
13 |         else:
14 |             dataset_val = None
15 |     elif opt.dataset_mode == 'voc':
16 |         from data.VOC_dataset import VOCDataset,VOCDataset_val
17 |         dataset = VOCDataset()
18 |         if opt.vallist!='':
19 |             dataset_val = VOCDataset_val()
20 |         else:
21 |             dataset_val = None
22 | 
23 |     elif opt.dataset_mode == 'sunrgbd':
24 |         from data.sunrgbd_dataset import SUNRGBDDataset,SUNRGBDDataset_val
25 |         dataset = SUNRGBDDataset()
26 |         if opt.vallist!='':
27 |             dataset_val = SUNRGBDDataset_val()
28 |         else:
29 |             dataset_val = None
30 | 
31 |     elif opt.dataset_mode == 'stanfordindoor':
32 |         from data.stanfordindoor_dataset import StanfordIndoorDataset, StanfordIndoorDataset_val
33 |         dataset = StanfordIndoorDataset()
34 |         if opt.vallist!='':
35 |             dataset_val = StanfordIndoorDataset_val()
36 |         else:
37 |             dataset_val = None
38 | 
39 |     print("dataset [%s] was created" % (dataset.name()))
40 |     dataset.initialize(opt)
41 |     if dataset_val != None:
42 |         dataset_val.initialize(opt)
43 |     return dataset,dataset_val
44 | 
45 | class CustomDatasetDataLoader(BaseDataLoader):
46 |     def name(self):
47 |         return 'CustomDatasetDataLoader'
48 | 
49 |     def initialize(self, opt):
50 |         BaseDataLoader.initialize(self, opt)
51 |         self.dataset, self.dataset_val = CreateDataset(opt)
52 |         self.dataloader = torch.utils.data.DataLoader(
53 |             self.dataset,
54 |             batch_size=opt.batchSize,
55 |             shuffle=not opt.serial_batches,
56 |             num_workers=int(opt.nThreads))
57 |         if self.dataset_val != None:
58 |             self.dataloader_val = torch.utils.data.DataLoader(
59 |                 self.dataset_val,
60 |                 batch_size=1,
61 |                 shuffle=False,
62 |                 num_workers=int(opt.nThreads))
63 |         else:
64 |             self.dataloader_val = None
65 | 
66 | 
67 |     def load_data(self):
68 |         return self.dataloader, self.dataloader_val
69 | 
70 |     def __len__(self):
71 |         return min(len(self.dataset), self.opt.max_dataset_size)
72 | 


--------------------------------------------------------------------------------
/data/data_loader.py:
--------------------------------------------------------------------------------
1 | 
2 | def CreateDataLoader(opt):
3 |     from data.custom_dataset_data_loader import CustomDatasetDataLoader
4 |     data_loader = CustomDatasetDataLoader()
5 |     print(data_loader.name())
6 |     data_loader.initialize(opt)
7 |     return data_loader
8 | 
9 | 


--------------------------------------------------------------------------------
/models/Deeplab.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch.utils.model_zoo as model_zoo
  4 | import torch
  5 | from .base_model import BaseModel
  6 | import numpy as np
  7 | from . import losses
  8 | import shutil
  9 | from utils.util import *
 10 | from torch.autograd import Variable
 11 | from collections import OrderedDict
 12 | from tensorboardX import SummaryWriter
 13 | import os
 14 | import VGG_Deeplab as VGG_Deeplab
 15 | 
 16 | 
 17 | class Deeplab_VGG(nn.Module):
 18 |     def __init__(self, num_classes, depthconv=False):
 19 |         super(Deeplab_VGG,self).__init__()
 20 |         self.Scale = VGG_Deeplab.vgg16(num_classes=num_classes,depthconv=depthconv)
 21 | 
 22 |     def forward(self,x, depth=None):
 23 |         output = self.Scale(x,depth) # for original scale
 24 |         return output
 25 | 
 26 | #------------------------------------------------------#
 27 | 
 28 | class Deeplab_Solver(BaseModel):
 29 |     def __init__(self, opt, dataset=None, encoder='VGG'):
 30 |         BaseModel.initialize(self, opt)
 31 |         self.encoder = encoder
 32 |         if encoder == 'VGG':
 33 |             self.model = Deeplab_VGG(self.opt.label_nc, self.opt.depthconv)
 34 | 
 35 |         if self.opt.isTrain:
 36 |             self.criterionSeg = torch.nn.CrossEntropyLoss(ignore_index=255).cuda()
 37 |             # self.criterionSeg = torch.nn.CrossEntropyLoss(ignore_index=255).cuda()
 38 |             # self.criterionSeg = nn.NLLLoss2d(ignore_index=255)#.cuda()
 39 | 
 40 |             if encoder == 'VGG':
 41 |                 self.optimizer = torch.optim.SGD([{'params': self.model.Scale.get_1x_lr_params_NOscale(), 'lr': self.opt.lr},
 42 |                                                  {'params': self.model.Scale.get_10x_lr_params(), 'lr': self.opt.lr},
 43 |                                                  {'params': self.model.Scale.get_2x_lr_params_NOscale(), 'lr': self.opt.lr, 'weight_decay': 0.},
 44 |                                                  {'params': self.model.Scale.get_20x_lr_params(), 'lr': self.opt.lr, 'weight_decay': 0.}
 45 |                                                   ],
 46 |                                                  lr=self.opt.lr, momentum=self.opt.momentum, weight_decay=self.opt.wd)
 47 | 
 48 |             # self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.opt.lr, momentum=self.opt.momentum, weight_decay=self.opt.wd)
 49 | 
 50 |             self.old_lr = self.opt.lr
 51 |             self.averageloss = []
 52 |             # copy scripts
 53 |             self.model_path = './models' #os.path.dirname(os.path.realpath(__file__))
 54 |             self.data_path = './data' #os.path.dirname(os.path.realpath(__file__))
 55 |             shutil.copyfile(os.path.join(self.model_path, 'Deeplab.py'), os.path.join(self.model_dir, 'Deeplab.py'))
 56 | 
 57 |             if encoder == 'VGG':
 58 |                 shutil.copyfile(os.path.join(self.model_path, 'VGG_Deeplab.py'), os.path.join(self.model_dir, 'VGG_Deeplab.py'))
 59 |             shutil.copyfile(os.path.join(self.model_path, 'model_utils.py'), os.path.join(self.model_dir, 'model_utils.py'))
 60 |             shutil.copyfile(os.path.join(self.data_path, dataset.datafile), os.path.join(self.model_dir, dataset.datafile))
 61 |             shutil.copyfile(os.path.join(self.data_path, 'base_dataset.py'), os.path.join(self.model_dir, 'base_dataset.py'))
 62 | 
 63 |             self.writer = SummaryWriter(self.tensorborad_dir)
 64 |             self.counter = 0
 65 | 
 66 |         if not self.isTrain or self.opt.continue_train:
 67 |             if self.opt.pretrained_model!='':
 68 |                 self.load_pretrained_network(self.model, self.opt.pretrained_model, self.opt.which_epoch, strict=False)
 69 |                 print("Successfully loaded from pretrained model with given path!")
 70 |             else:
 71 |                 self.load()
 72 |                 print("Successfully loaded model, continue training....!")
 73 | 
 74 |         self.model.cuda()
 75 |         self.normweightgrad=0.
 76 |         # if len(opt.gpu_ids):#opt.isTrain and
 77 |         #     self.model = torch.nn.DataParallel(self.model, device_ids=opt.gpu_ids)
 78 | 
 79 |     def forward(self, data, isTrain=True):
 80 |         self.model.zero_grad()
 81 | 
 82 |         self.image = Variable(data['image'], volatile=not isTrain).cuda()
 83 |         if 'depth' in data.keys():
 84 |             self.depth = Variable(data['depth'], volatile=not isTrain).cuda()
 85 |         else:
 86 |             self.depth = None
 87 |         if data['seg'] is not None:
 88 |             self.seggt = Variable(data['seg'], volatile=not isTrain).cuda()
 89 |         else:
 90 |             self.seggt = None
 91 | 
 92 |         input_size = self.image.size()
 93 | 
 94 |         self.segpred = self.model(self.image,self.depth)
 95 |         self.segpred = nn.functional.upsample(self.segpred, size=(input_size[2], input_size[3]), mode='bilinear')
 96 |         # self.segpred = nn.functional.log_softmax(nn.functional.upsample(self.segpred, size=(input_size[2], input_size[3]), mode='bilinear'))
 97 | 
 98 |         if self.opt.isTrain:
 99 |             self.loss = self.criterionSeg(self.segpred, torch.squeeze(self.seggt,1).long())
100 |             self.averageloss += [self.loss.data[0]]
101 | 
102 |         segpred = self.segpred.max(1, keepdim=True)[1]
103 |         return self.seggt, segpred
104 | 
105 | 
106 |     def backward(self, step, total_step):
107 |         self.loss.backward()
108 |         self.optimizer.step()
109 |         # print self.model.Scale.classifier.fc6_2.weight.grad.mean().data.cpu().numpy()
110 |         # self.normweightgrad +=self.model.Scale.classifier.norm.scale.grad.mean().data.cpu().numpy()
111 |         # print self.normweightgrad#self.model.Scale.classifier.norm.scale.grad.mean().data.cpu().numpy()
112 |         if step % self.opt.iterSize  == 0:
113 |             self.update_learning_rate(step, total_step)
114 |             trainingavgloss = np.mean(self.averageloss)
115 |             if self.opt.verbose:
116 |                 print ('  Iter: %d, Loss: %f' % (step, trainingavgloss) )
117 | 
118 |     def get_visuals(self, step):
119 |         ############## Display results and errors ############
120 |         if self.opt.isTrain:
121 |             self.trainingavgloss = np.mean(self.averageloss)
122 |             if self.opt.verbose:
123 |                 print ('  Iter: %d, Loss: %f' % (step, self.trainingavgloss) )
124 |             self.writer.add_scalar(self.opt.name+'/trainingloss/', self.trainingavgloss, step)
125 |             self.averageloss = []
126 | 
127 |         if self.depth is not None:
128 |             return OrderedDict([('image', tensor2im(self.image.data[0], inputmode=self.opt.inputmode)),
129 |                                 ('depth', tensor2im(self.depth.data[0], inputmode='divstd-mean')),
130 |                                 ('segpred', tensor2label(self.segpred.data[0], self.opt.label_nc)),
131 |                                 ('seggt', tensor2label(self.seggt.data[0], self.opt.label_nc))])
132 |         else:
133 |             return OrderedDict([('image', tensor2im(self.image.data[0], inputmode=self.opt.inputmode)),
134 |                                 ('segpred', tensor2label(self.segpred.data[0], self.opt.label_nc)),
135 |                                 ('seggt', tensor2label(self.seggt.data[0], self.opt.label_nc))])
136 | 
137 |     def update_tensorboard(self, data, step):
138 |         if self.opt.isTrain:
139 |             self.writer.add_scalar(self.opt.name+'/Accuracy/', data[0], step)
140 |             self.writer.add_scalar(self.opt.name+'/Accuracy_Class/', data[1], step)
141 |             self.writer.add_scalar(self.opt.name+'/Mean_IoU/', data[2], step)
142 |             self.writer.add_scalar(self.opt.name+'/FWAV_Accuracy/', data[3], step)
143 | 
144 |             self.trainingavgloss = np.mean(self.averageloss)
145 |             self.writer.add_scalars(self.opt.name+'/loss', {"train": self.trainingavgloss,
146 |                                                              "val": np.mean(self.averageloss)}, step)
147 | 
148 |             self.writer.add_scalars('trainingavgloss/', {self.opt.name: self.trainingavgloss}, step)
149 |             self.writer.add_scalars('valloss/', {self.opt.name: np.mean(self.averageloss)}, step)
150 |             self.writer.add_scalars('val_MeanIoU/', {self.opt.name: data[2]}, step)
151 | 
152 |             file_name = os.path.join(self.save_dir, 'MIoU.txt')
153 |             with open(file_name, 'wt') as opt_file:
154 |                 opt_file.write('%f\n' % (data[2]))
155 |             # self.writer.add_scalars('losses/'+self.opt.name, {"train": self.trainingavgloss,
156 |             #                                                  "val": np.mean(self.averageloss)}, step)
157 |             self.averageloss = []
158 | 
159 |     def save(self, which_epoch):
160 |         # self.save_network(self.netG, 'G', which_epoch, self.gpu_ids)
161 |         self.save_network(self.model, 'net', which_epoch, self.gpu_ids)
162 | 
163 |     def load(self):
164 |         self.load_network(self.model, 'net',self.opt.which_epoch)
165 | 
166 |     def update_learning_rate(self, step, total_step):
167 | 
168 |         lr = max(self.opt.lr * ((1 - float(step) / total_step) ** (self.opt.lr_power)), 1e-6)
169 | 
170 |         # drop_ratio = (1. * float(total_step - step) / (total_step - step + 1)) ** self.opt.lr_power
171 |         # lr = self.old_lr * drop_ratio
172 | 
173 |         self.writer.add_scalar(self.opt.name+'/Learning_Rate/', lr, step)
174 | 
175 | 	self.optimizer.param_groups[0]['lr'] = lr
176 | 	self.optimizer.param_groups[1]['lr'] = lr
177 | 	self.optimizer.param_groups[2]['lr'] = lr
178 | 	self.optimizer.param_groups[3]['lr'] = lr
179 | 	# self.optimizer.param_groups[0]['lr'] = lr
180 | 	# self.optimizer.param_groups[1]['lr'] = lr*10
181 | 	# self.optimizer.param_groups[2]['lr'] = lr*2 #* 100
182 | 	# self.optimizer.param_groups[3]['lr'] = lr*20
183 | 	# self.optimizer.param_groups[4]['lr'] = lr*100
184 | 
185 | 
186 |         # torch.nn.utils.clip_grad_norm(self.model.Scale.get_1x_lr_params_NOscale(), 1.)
187 |         # torch.nn.utils.clip_grad_norm(self.model.Scale.get_10x_lr_params(), 1.)
188 | 
189 |         if self.opt.verbose:
190 |             print('     update learning rate: %f -> %f' % (self.old_lr, lr))
191 |         self.old_lr = lr
192 | 
193 | 
194 | 


--------------------------------------------------------------------------------
/models/Resnet_Deeplab.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch.utils.model_zoo as model_zoo
  4 | import torch
  5 | from .base_model import BaseModel, load_pretrained_model
  6 | import numpy as np
  7 | import shutil
  8 | from utils import util
  9 | from collections import OrderedDict
 10 | from tensorboardX import SummaryWriter
 11 | import os
 12 | from torch.autograd import Variable
 13 | from .ops.depthconv.modules import DepthConv
 14 | from .ops.depthavgpooling.modules import Depthavgpooling
 15 | 
 16 | 
 17 | affine_par = True
 18 | 
 19 | 
 20 | def outS(i):
 21 |     i = int(i)
 22 |     i = (i+1)/2
 23 |     i = int(np.ceil((i+1)/2.0))
 24 |     i = (i+1)/2
 25 |     return i
 26 | 
 27 | def conv3x3(in_planes, out_planes, stride=1):
 28 |     "3x3 convolution with padding"
 29 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 30 |                      padding=1, bias=False)
 31 | 
 32 | 
 33 | class BasicBlock(nn.Module):
 34 |     expansion = 1
 35 | 
 36 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 37 |         super(BasicBlock, self).__init__()
 38 |         self.conv1 = conv3x3(inplanes, planes, stride)
 39 |         self.bn1 = nn.BatchNorm2d(planes, affine = affine_par)
 40 |         self.relu = nn.ReLU(inplace=True)
 41 |         self.conv2 = conv3x3(planes, planes)
 42 |         self.bn2 = nn.BatchNorm2d(planes, affine = affine_par)
 43 |         self.downsample = downsample
 44 |         self.stride = stride
 45 | 
 46 |     def forward(self, x):
 47 |         residual = x
 48 | 
 49 |         out = self.conv1(x)
 50 |         out = self.bn1(out)
 51 |         out = self.relu(out)
 52 | 
 53 |         out = self.conv2(out)
 54 |         out = self.bn2(out)
 55 | 
 56 |         if self.downsample is not None:
 57 |             residual = self.downsample(x)
 58 | 
 59 |         out += residual
 60 |         out = self.relu(out)
 61 | 
 62 |         return out
 63 | 
 64 | 
 65 | class Bottleneck(nn.Module):
 66 |     expansion = 4
 67 | 
 68 |     def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None):
 69 |         super(Bottleneck, self).__init__()
 70 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
 71 |         self.bn1 = nn.BatchNorm2d(planes,affine = affine_par)
 72 |         # for i in self.bn1.parameters():
 73 |         #     i.requires_grad = False
 74 | 
 75 |         padding = dilation
 76 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
 77 |                                padding=padding, bias=False, dilation = dilation)
 78 |         self.bn2 = nn.BatchNorm2d(planes,affine = affine_par)
 79 |         # for i in self.bn2.parameters():
 80 |         #     i.requires_grad = False
 81 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 82 |         self.bn3 = nn.BatchNorm2d(planes * 4, affine = affine_par)
 83 |         # for i in self.bn3.parameters():
 84 |         #     i.requires_grad = False
 85 |         self.relu = nn.ReLU(inplace=True)
 86 |         self.downsample = downsample
 87 |         self.stride = stride
 88 | 
 89 | 
 90 |     def forward(self, x):
 91 |         residual = x
 92 | 
 93 |         out = self.conv1(x)
 94 |         out = self.bn1(out)
 95 |         out = self.relu(out)
 96 | 
 97 |         out = self.conv2(out)
 98 |         out = self.bn2(out)
 99 |         out = self.relu(out)
100 | 
101 |         out = self.conv3(out)
102 |         out = self.bn3(out)
103 | 
104 |         if self.downsample is not None:
105 |             residual = self.downsample(x)
106 | 
107 |         out += residual
108 |         out = self.relu(out)
109 | 
110 |         return out
111 | 
112 | 
113 | class DepthConvBottleneck(nn.Module):
114 |     expansion = 4
115 | 
116 |     def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
117 |         super(DepthConvBottleneck, self).__init__()
118 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
119 |         self.bn1 = nn.BatchNorm2d(planes)
120 |         # for i in self.bn1.parameters():
121 |         #     i.requires_grad = False
122 | 
123 |         padding = dilation
124 |         self.conv2 = DepthConv(planes,planes,kernel_size=3,stride=stride,padding=padding, dilation = dilation, bias=False)
125 |             # nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
126 |             #                    padding=1, bias=False)
127 |         self.bn2 = nn.BatchNorm2d(planes)
128 |         # for i in self.bn2.parameters():
129 |         #     i.requires_grad = False
130 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
131 |         self.bn3 = nn.BatchNorm2d(planes * 4)
132 |         # for i in self.bn3.parameters():
133 |         #     i.requires_grad = False
134 |         self.relu = nn.ReLU(inplace=True)
135 |         self.downsample = downsample
136 |         self.stride = stride
137 | 
138 |     def forward(self, x_depth):
139 |         x, depth = x_depth
140 |         residual = x
141 | 
142 |         out = self.conv1(x)
143 |         out = self.bn1(out)
144 |         out = self.relu(out)
145 | 
146 |         # print('d',out.size(), depth.size())
147 |         out = self.conv2(out,depth)
148 |         out = self.bn2(out)
149 |         out = self.relu(out)
150 | 
151 |         out = self.conv3(out)
152 |         out = self.bn3(out)
153 | 
154 |         if self.downsample is not None:
155 |             residual = self.downsample(x)
156 | 
157 |         out += residual
158 |         out = self.relu(out)
159 | 
160 |         return out
161 | 
162 | class Classifier_Module(nn.Module):
163 | 
164 |     def __init__(self, dilation_series, padding_series, num_classes, inplanes, depthconv=False):
165 |         super(Classifier_Module, self).__init__()
166 |         self.conv2d_list = nn.ModuleList()
167 |         for dilation, padding in zip(dilation_series, padding_series):
168 |             if depthconv:
169 |                 conv = DepthConv(inplanes, num_classes, kernel_size=3, stride=1, padding=padding, dilation=dilation, bias= True)
170 |             else:
171 |                 conv = nn.Conv2d(inplanes, num_classes, kernel_size=3, stride=1, padding=padding, dilation=dilation, bias = True)
172 |             self.conv2d_list.append(conv)
173 | 
174 |         for m in self.conv2d_list:
175 |             m.weight.data.normal_(0, 0.01)
176 | 
177 |     def forward(self, x):
178 |         out = self.conv2d_list[0](x)
179 |         for i in range(len(self.conv2d_list)-1):
180 |             out += self.conv2d_list[i+1](x)
181 |         return out
182 | 
183 | class Residual_Covolution(nn.Module):
184 |     def __init__(self, icol, ocol, num_classes):
185 |         super(Residual_Covolution, self).__init__()
186 |         self.conv1 = nn.Conv2d(icol, ocol, kernel_size=3, stride=1, padding=12, dilation=12, bias=True)
187 |         self.conv2 = nn.Conv2d(ocol, num_classes, kernel_size=3, stride=1, padding=12, dilation=12, bias=True)
188 |         self.conv3 = nn.Conv2d(num_classes, ocol, kernel_size=1, stride=1, padding=0, dilation=1, bias=True)
189 |         self.conv4 = nn.Conv2d(ocol, icol, kernel_size=1, stride=1, padding=0, dilation=1, bias=True)
190 |         self.relu = nn.ReLU(inplace=True)
191 | 
192 |     def forward(self, x):
193 |         dow1 = self.conv1(x)
194 |         dow1 = self.relu(dow1)
195 |         seg = self.conv2(dow1)
196 |         inc1 = self.conv3(seg)
197 |         add1 = dow1 + self.relu(inc1)
198 |         inc2 = self.conv4(add1)
199 |         out = x + self.relu(inc2)
200 |         return out, seg
201 | 
202 | class Residual_Refinement_Module(nn.Module):
203 | 
204 |     def __init__(self, num_classes):
205 |         super(Residual_Refinement_Module, self).__init__()
206 |         self.RC1 = Residual_Covolution(2048, 512, num_classes)
207 |         self.RC2 = Residual_Covolution(2048, 512, num_classes)
208 | 
209 |     def forward(self, x):
210 |         x, seg1 = self.RC1(x)
211 |         _, seg2 = self.RC2(x)
212 |         return [seg1, seg1+seg2]
213 | 
214 | class ResNet_Refine(nn.Module):
215 |     def __init__(self, block, layers, num_classes, depthconv=False):
216 |         self.inplanes = 64
217 |         super(ResNet_Refine, self).__init__()
218 |         self.depthconv = depthconv
219 |         if depthconv:
220 |             self.conv1 = DepthConv(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
221 |         else:
222 |             self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
223 | 
224 |         # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
225 |         self.bn1 = nn.BatchNorm2d(64, affine = affine_par)
226 |         # for i in self.bn1.parameters():
227 |         #     i.requires_grad = False
228 |         self.relu = nn.ReLU(inplace=True)
229 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change
230 |         # self.layer1 = self._make_layer(block, 64, layers[0])
231 |         if depthconv:
232 |             self.downsample_depth1 = nn.AvgPool2d(5,padding=1,stride=4)
233 |             self.layer1 = self._make_layer_depthconv(64, layers[0])
234 |         else:
235 |             self.layer1 = self._make_layer(block, 64, layers[0])
236 | 
237 |         if depthconv:
238 |             self.downsample_depth2 = nn.AvgPool2d(3,padding=1,stride=2)
239 |             self.layer2 = self._make_layer_depthconv(128, layers[1], stride=2)
240 |         else:
241 |             self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
242 | 
243 |         if depthconv:
244 |             # self.downsample_depth3 = nn.AvgPool2d(3,padding=1,stride=2)
245 |             self.layer3 = self._make_layer_depthconv(256, layers[2], stride=1, dilation=2)
246 |         else:
247 |             self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2)
248 | 
249 |         if depthconv:
250 |             # self.downsample_depth4 = nn.AvgPool2d(3,padding=1,stride=2)
251 |             self.layer4 = self._make_layer_depthconv(512, layers[3], stride=1, dilation=4)
252 |         else:
253 |             self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4)
254 | 
255 |         self.dropout = nn.Dropout()
256 |         self.layer5 = Residual_Refinement_Module(num_classes)
257 | 
258 |         for m in self.modules():
259 |             if isinstance(m, nn.Conv2d):
260 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
261 |                 m.weight.data.normal_(0, 0.01)
262 |             elif isinstance(m, nn.BatchNorm2d):
263 |                 m.weight.data.fill_(1)
264 |                 m.bias.data.zero_()
265 |                 # for i in m.parameters():
266 |                 #     i.requires_grad = False
267 | 
268 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
269 |         downsample = None
270 |         if stride != 1 or self.inplanes != planes * block.expansion or dilation == 2 or dilation == 4:
271 |             downsample = nn.Sequential(
272 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
273 |                           kernel_size=1, stride=stride, bias=False),
274 |                 nn.BatchNorm2d(planes * block.expansion,affine = affine_par))
275 |         # for i in downsample._modules['1'].parameters():
276 |         #     i.requires_grad = False
277 |         layers = []
278 |         layers.append(block(self.inplanes, planes, stride,dilation=dilation, downsample=downsample))
279 |         self.inplanes = planes * block.expansion
280 |         for i in range(1, blocks):
281 |             layers.append(block(self.inplanes, planes, dilation=dilation))
282 | 
283 |         return nn.Sequential(*layers)
284 | 
285 |     def _make_layer_depthconv(self, planes, blocks, stride=1, dilation = 1):
286 |         downsample = None
287 |         block = DepthConvBottleneck
288 |         if stride != 1 or self.inplanes != planes * block.expansion:
289 |             downsample = nn.Sequential(
290 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
291 |                           kernel_size=1, stride=stride, bias=False),
292 |                 nn.BatchNorm2d(planes * block.expansion),
293 |             )
294 |         layers = []
295 |         layers.append(block(self.inplanes, planes, stride, downsample,dilation=dilation))
296 |         self.inplanes = planes * block.expansion
297 |         for i in range(1, blocks):
298 |             layers.append(block(self.inplanes, planes))
299 | 
300 |         return nn.Sequential(*layers)
301 | 
302 |     def forward(self, x, depth=None):
303 |         if self.depthconv:
304 |             x = self.conv1(x,depth)
305 |         else:
306 |             x = self.conv1(x)
307 | 
308 |         x = self.bn1(x)
309 |         x = self.relu(x)
310 |         x = self.maxpool(x)
311 | 
312 |         if self.depthconv:
313 |             depth = self.downsample_depth1(depth)
314 |             x,depth = self.layer1((x,depth))
315 |         else:
316 |             x = self.layer1(x)
317 | 
318 |         if self.depthconv:
319 |             depth = self.downsample_depth2(depth)
320 |             x,depth = self.layer2((x,depth))
321 |         else:
322 |             x = self.layer2(x)
323 |         if self.depthconv:
324 |             depth = self.downsample_depth3(depth)
325 |             x,_ = self.layer3((x,depth))
326 |         else:
327 |             x = self.layer3(x)
328 |         if self.depthconv:
329 |             x,_ = self.layer4((x,depth))
330 |         else:
331 |             x = self.layer4(x)
332 |         x = self.dropout(x)
333 |         x = self.layer5(x)
334 | 
335 |         return x
336 | 
337 | class ResNet(nn.Module):
338 |     def __init__(self, block, layers, num_classes, depthconv=False, globalpooling=False, pretrain=False):
339 |         self.inplanes = 64
340 |         super(ResNet, self).__init__()
341 |         self.depthconv = depthconv
342 |         if depthconv:
343 |             self.conv1 = DepthConv(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
344 |         else:
345 |             self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
346 | 
347 |         self.bn1 = nn.BatchNorm2d(64, affine = affine_par)
348 |         # for i in self.bn1.parameters():
349 |         #     i.requires_grad = False
350 |         self.relu = nn.ReLU(inplace=True)
351 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) # change
352 | 
353 |         if depthconv:
354 |             self.layer1 = self._make_layer_depthconv(block, 64, layers[0])
355 |             self.downsample_depth1 = nn.AvgPool2d(5,padding=1,stride=4)
356 |         else:
357 |             self.layer1 = self._make_layer(block, 64, layers[0])
358 | 
359 |         if depthconv:
360 |             self.layer2 = self._make_layer_depthconv(block, 128, layers[1], stride=2)
361 |             self.downsample_depth2 = nn.AvgPool2d(3,padding=1,stride=2)
362 |         else:
363 |             self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
364 | 
365 | 
366 |         if depthconv:
367 |             self.layer3 = self._make_layer_depthconv(block, 256, layers[2], stride=1, dilation=2)
368 |             self.downsample_depth3 = nn.AvgPool2d(3,padding=1,stride=2)
369 |         else:
370 |             self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2)
371 | 
372 |         if depthconv:
373 |             # self.downsample_depth4 = nn.AvgPool2d(3,padding=1,stride=2)
374 |             self.layer4 = self._make_layer_depthconv(block, 512, layers[3], stride=1, dilation=4)
375 |         else:
376 |             self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4)
377 |         # self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4)
378 |         self.globalpooling = globalpooling
379 |         if globalpooling:
380 |             self.globalpooling = nn.AdaptiveMaxPool2d((1,1))
381 |             self.inplanes *= 2
382 |         self.dropout = nn.Dropout()
383 |         self.layer5 = self._make_pred_layer(Classifier_Module, [12],[12],num_classes,self.inplanes)
384 |         # self.layer5 = self._make_pred_layer(Classifier_Module, [6,12,18,24],[6,12,18,24],num_classes,self.inplanes)
385 | 
386 |         self.pool5a = nn.AvgPool2d(kernel_size=3, stride=1,padding=1)
387 |         self.pool5a_d = Depthavgpooling(kernel_size=3, stride=1,padding=1)
388 | 
389 |         for m in self.modules():
390 |             if isinstance(m, nn.Conv2d):
391 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
392 |                 # m.weight.data.normal_(0, math.sqrt(2. / n))#(0, 0.01)#
393 |                 torch.nn.init.xavier_uniform(m.weight)
394 |             elif isinstance(m, nn.BatchNorm2d):
395 |                 if affine_par:
396 |                     m.weight.data.fill_(1)
397 |                     m.bias.data.zero_()
398 |                 for i in m.parameters():
399 |                     i.requires_grad = False
400 |         if pretrain:
401 |             load_pretrained_model(self,
402 |                                   model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'),
403 |                                   False)
404 | 
405 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
406 |         downsample = None
407 |         if stride != 1 or self.inplanes != planes * block.expansion or dilation == 2 or dilation == 4:
408 |             downsample = nn.Sequential(
409 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
410 |                           kernel_size=1, stride=stride, bias=False),
411 |                 nn.BatchNorm2d(planes * block.expansion,affine = affine_par))
412 |         # for i in downsample._modules['1'].parameters():
413 |         #     i.requires_grad = False
414 |         layers = []
415 |         layers.append(block(self.inplanes, planes, stride,dilation=dilation, downsample=downsample))
416 |         self.inplanes = planes * block.expansion
417 |         for i in range(1, blocks):
418 |             layers.append(block(self.inplanes, planes, dilation=dilation))
419 | 
420 |         return nn.Sequential(*layers)
421 | 
422 |     def _make_layer_depthconv(self, block,  planes, blocks, stride=1, dilation=1):
423 |         downsample = None
424 |         # block = DepthConvBottleneck
425 |         if stride != 1 or self.inplanes != planes * block.expansion:
426 |             downsample = nn.Sequential(
427 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
428 |                           kernel_size=1, stride=stride, bias=False),
429 |                 nn.BatchNorm2d(planes * block.expansion),
430 |             )
431 |         layers = []
432 |         layers.append(DepthConvBottleneck(self.inplanes, planes, stride, downsample,dilation=dilation))
433 |         self.inplanes = planes * block.expansion
434 |         for i in range(1, blocks):
435 |             layers.append(block(self.inplanes, planes))
436 | 
437 |         return nn.Sequential(*layers)
438 | 
439 |     def _make_pred_layer(self,block, dilation_series, padding_series,num_classes,inplanes):
440 |         return block(dilation_series,padding_series,num_classes,inplanes)
441 | 
442 |     def forward(self, x, depth=None):
443 |         # print self.layer3._modules.values()[13].bn2.running_mean
444 |         if self.depthconv:
445 |             x = self.conv1(x,depth)
446 |         else:
447 |             x = self.conv1(x)
448 |         x = self.bn1(x)
449 |         x = self.relu(x)
450 |         x = self.maxpool(x)
451 | 
452 |         if self.depthconv:
453 |             depth = self.downsample_depth1(depth)
454 |             x = self.layer1((x,depth))
455 |         else:
456 |             x = self.layer1(x)
457 | 
458 |         if self.depthconv:
459 |             # print ('o',x.size(), depth.size())
460 |             # depth = self.downsample_depth2(depth)
461 |             x = self.layer2((x,depth))
462 |         else:
463 |             x = self.layer2(x)
464 | 
465 |         if self.depthconv:
466 |             depth = self.downsample_depth3(depth)
467 |             x = self.layer3((x,depth))
468 |         else:
469 |             x = self.layer3(x)
470 | 
471 |         if self.depthconv:
472 |             x = self.layer4((x,depth))
473 |         else:
474 |             x = self.layer4(x)
475 | 
476 |         if self.globalpooling:
477 |             x_size = x.size()
478 |             globalpool = self.globalpooling(x).repeat(1,1,x_size[2],x_size[3])
479 |             x = torch.cat([x,globalpool], 1)
480 |         x = self.dropout(x)
481 |         x = self.layer5(x)
482 |         if self.depthconv:
483 |             x = self.pool5a_d(x,depth)
484 |         else:
485 |             x = self.pool5a(x)
486 | 
487 | 
488 |         return x
489 | 
490 |     def get_1x_lr_params_NOscale(self):
491 |         """
492 |         This generator returns all the parameters of the net except for
493 |         the last classification layer. Note that for each batchnorm layer,
494 |         requires_grad is set to False in deeplab_resnet.py, therefore this function does not return
495 |         any batchnorm parameter
496 |         """
497 |         b = []
498 | 
499 |         b.append(self.conv1)
500 |         b.append(self.bn1)
501 |         b.append(self.layer1)
502 |         b.append(self.layer2)
503 |         b.append(self.layer3)
504 |         b.append(self.layer4)
505 | 
506 |         for i in range(len(b)):
507 |             for j in b[i].modules():
508 |                 if isinstance(j, nn.Conv2d):
509 |                     for k in j.parameters():
510 |                         if k.requires_grad:
511 |                             yield k
512 | 
513 |     def get_bn_params(self):
514 |         """
515 |         This generator returns all the parameters of the net except for
516 |         the last classification layer. Note that for each batchnorm layer,
517 |         requires_grad is set to False in deeplab_resnet.py, therefore this function does not return
518 |         any batchnorm parameter
519 |         """
520 |         b = []
521 | 
522 |         b.append(self.conv1)
523 |         b.append(self.bn1)
524 |         b.append(self.layer1)
525 |         b.append(self.layer2)
526 |         b.append(self.layer3)
527 |         b.append(self.layer4)
528 |         b.append(self.layer5)
529 | 
530 |         for i in range(len(b)):
531 |             for j in b[i].modules():
532 |                 if isinstance(j, nn.BatchNorm2d):
533 |                     for n, k in j.named_parameters():
534 |                         # print n
535 |                         if k.requires_grad:
536 |                             yield k
537 | 
538 |     def get_10x_lr_params(self):
539 |         """
540 |         This generator returns all the parameters for the last layer of the net,
541 |         which does the classification of pixel into classes
542 |         """
543 |         b = []
544 |         b.append(self.layer5)
545 | 
546 |         # for j in range(len(b)):
547 |         #     for i in b[j]:
548 |         #         yield i
549 | 
550 |         for i in range(len(b)):
551 |             for j in b[i].modules():
552 |                 if isinstance(j, nn.Conv2d):
553 |                     if j.weight is not None:
554 |                         if j.weight.requires_grad:
555 |                             yield j.weight
556 |                 # for k in j.parameters():
557 |                 #     if k.requires_grad:
558 |                 #         yield k
559 | 
560 |     def get_20x_lr_params(self):
561 |         """
562 |         This generator returns all the parameters for the last layer of the net,
563 |         which does the classification of pixel into classes
564 |         """
565 |         b = []
566 |         b.append(self.layer5)
567 | 
568 |         # for j in range(len(b)):
569 |         #     for i in b[j]:
570 |         #         yield i
571 | 
572 |         for i in range(len(b)):
573 |             for j in b[i].modules():
574 |                 if isinstance(j, nn.Conv2d):
575 |                     if j.bias is not None:
576 |                         if j.bias.requires_grad:
577 |                             yield j.bias


--------------------------------------------------------------------------------
/models/VGG_Deeplab.py:
--------------------------------------------------------------------------------
  1 | from model_utils import *
  2 | import torch.nn as nn
  3 | import torch.utils.model_zoo as model_zoo
  4 | import math
  5 | from .ops.depthconv.modules import DepthConv
  6 | from .ops.depthavgpooling.modules import Depthavgpooling
  7 | import torch
  8 | import torchvision
  9 | 
 10 | __all__ = [
 11 |     'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
 12 |     'vgg19_bn', 'vgg19',
 13 | ]
 14 | 
 15 | 
 16 | model_urls = {
 17 |     'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
 18 |     'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
 19 |     'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
 20 |     'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
 21 |     'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
 22 |     'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
 23 |     'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
 24 |     'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
 25 | }
 26 | 
 27 | 
 28 | cfg = {
 29 |  # name:c1_1 c1_2     c2_1 c2_2      c3_1 c3_2 c3_3      c4_1 c4_2 c4_3      c5_1 c5_2 c5_3
 30 |  # dilation:                                                                   2    2    2
 31 |     'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
 32 | }
 33 | 
 34 | depth_cfg = {
 35 |     'D': [0,3,6,10,14],
 36 | }
 37 | 
 38 | 
 39 | class ConvModule(nn.Module):
 40 | 
 41 |     def __init__(self, inplanes, planes, kernel_size=3, stride=1, padding=1, dilation=1,
 42 |                  bn=False,
 43 |                  maxpool=False, pool_kernel=3, pool_stride=2, pool_pad=1):
 44 |         super(ConvModule, self).__init__()
 45 |         conv2d = nn.Conv2d(inplanes,planes,kernel_size=kernel_size,stride=stride,padding=padding,dilation=dilation)
 46 |         layers = []
 47 |         if bn:
 48 |             layers += [nn.BatchNorm2d(planes), nn.ReLU(inplace=True)]
 49 |         else:
 50 |             layers += [nn.ReLU(inplace=True)]
 51 |         if maxpool:
 52 |             layers += [nn.MaxPool2d(kernel_size=pool_kernel, stride=pool_stride,padding=pool_pad)]
 53 | 
 54 |         self.layers = nn.Sequential(*([conv2d]+layers))
 55 |     def forward(self, x):
 56 |         # x = self.conv2d(x)
 57 |         x = self.layers(x)
 58 |         return x
 59 | 
 60 | class DepthConvModule(nn.Module):
 61 | 
 62 |     def __init__(self, inplanes, planes, kernel_size=3, stride=1, padding=1, dilation=1,bn=False):
 63 |         super(DepthConvModule, self).__init__()
 64 | 
 65 |         conv2d = DepthConv(inplanes,planes,kernel_size=kernel_size,stride=stride,padding=padding,dilation=dilation)
 66 |         layers = []
 67 |         if bn:
 68 |             layers += [nn.BatchNorm2d(planes), nn.ReLU(inplace=True)]
 69 |         else:
 70 |             layers += [nn.ReLU(inplace=True)]
 71 |         self.layers = nn.Sequential(*([conv2d]+layers))#(*layers)
 72 | 
 73 |     def forward(self, x, depth):
 74 | 
 75 |         for im,module in enumerate(self.layers._modules.values()):
 76 |             if im==0:
 77 |                 x = module(x,depth)
 78 |             else:
 79 |                 x = module(x)
 80 |         # x = self.conv2d(x, depth)
 81 |         # x = self.layers(x)
 82 |         return x
 83 | 
 84 | 
 85 | class VGG_layer2(nn.Module):
 86 | 
 87 |     def __init__(self, batch_norm=False, depthconv=False):
 88 |         super(VGG_layer2, self).__init__()
 89 |         in_channels = 3
 90 |         self.depthconv = depthconv
 91 |         # if self.depthconv:
 92 |         #     self.conv1_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
 93 |         #     self.conv1_1 = DepthConvModule(3, 64, bn=batch_norm)
 94 |         # else:
 95 |         self.conv1_1 = ConvModule(3, 64, bn=batch_norm)
 96 |         self.conv1_2 = ConvModule(64, 64, bn=batch_norm, maxpool=True)
 97 | 
 98 |         # if self.depthconv:
 99 |         # self.conv2_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
100 |         self.downsample_depth2_1 = nn.AvgPool2d(3,padding=1,stride=2)
101 |         #     self.conv2_1 = DepthConvModule(64, 128, bn=batch_norm)
102 |         # else:
103 |         self.conv2_1 = ConvModule(64, 128, bn=batch_norm)
104 |         self.conv2_2 = ConvModule(128, 128, bn=batch_norm, maxpool=True)
105 | 
106 |         # if self.depthconv:
107 |         #     self.conv3_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
108 |         self.downsample_depth3_1 = nn.AvgPool2d(3,padding=1,stride=2)
109 |         #     self.conv3_1 = DepthConvModule(128, 256, bn=batch_norm)
110 |         # else:
111 |         self.conv3_1 = ConvModule(128, 256, bn=batch_norm)
112 |         self.conv3_2 = ConvModule(256, 256, bn=batch_norm)
113 |         self.conv3_3 = ConvModule(256, 256, bn=batch_norm, maxpool=True)
114 | 
115 |         if self.depthconv:
116 |             self.conv4_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
117 |             self.downsample_depth4_1 = nn.AvgPool2d(3,padding=1,stride=2)
118 |             self.conv4_1 = DepthConvModule(256, 512, bn=batch_norm)
119 |         else:
120 |             self.conv4_1 = ConvModule(256, 512, bn=batch_norm)
121 |         self.conv4_2 = ConvModule(512, 512, bn=batch_norm)
122 |         self.conv4_3 = ConvModule(512, 512, bn=batch_norm,
123 |                                   maxpool=True, pool_kernel=3, pool_stride=1, pool_pad=1)
124 | 
125 |         if self.depthconv:
126 |             self.conv5_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
127 |             self.conv5_1 = DepthConvModule(512, 512, bn=batch_norm,dilation=2,padding=2)
128 |         else:
129 |             self.conv5_1 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2)
130 |         self.conv5_2 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2)
131 |         self.conv5_3 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2,
132 |                                   maxpool=True, pool_kernel=3, pool_stride=1, pool_pad=1)
133 |         self.pool5a = nn.AvgPool2d(kernel_size=3, stride=1,padding=1)
134 |         # self.pool5a = nn.AvgPool2d(kernel_size=3, stride=1,padding=1)
135 | 
136 |     def forward(self, x, depth=None):
137 |         # print x.size()
138 |         # if self.depthconv:
139 |         #     # print self.conv1_1_depthconvweight
140 |         #     x = self.conv1_1(x,self.conv1_1_depthconvweight * depth)
141 |         # else:
142 |         x = self.conv1_1(x)
143 |         x = self.conv1_2(x)
144 |         # if self.depthconv:
145 |         depth = self.downsample_depth2_1(depth)
146 |         #     x = self.conv2_1(x, self.conv2_1_depthconvweight * depth)
147 |         # else:
148 |         x = self.conv2_1(x)
149 |         x = self.conv2_2(x)
150 |         # if self.depthconv:
151 |         depth = self.downsample_depth3_1(depth)
152 |         #     x = self.conv3_1(x, self.conv3_1_depthconvweight * depth)
153 |         # else:
154 |         x = self.conv3_1(x)
155 |         x = self.conv3_2(x)
156 |         x = self.conv3_3(x)
157 |         if self.depthconv:
158 |             depth = self.downsample_depth4_1(depth)
159 |             x = self.conv4_1(x, self.conv4_1_depthconvweight * depth)
160 |         else:
161 |             x = self.conv4_1(x)
162 |         x = self.conv4_2(x)
163 |         x = self.conv4_3(x)
164 |         if self.depthconv:
165 |             x = self.conv5_1(x, self.conv5_1_depthconvweight * depth)
166 |         else:
167 |             x = self.conv5_1(x)
168 |         x = self.conv5_2(x)
169 |         x = self.conv5_3(x)
170 |         x = self.pool5a(x)
171 |         return x,depth
172 | 
173 | class VGG_layer(nn.Module):
174 | 
175 |     def __init__(self, batch_norm=False, depthconv=False):
176 |         super(VGG_layer, self).__init__()
177 |         in_channels = 3
178 |         self.depthconv = depthconv
179 |         if self.depthconv:
180 |             self.conv1_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
181 |             self.conv1_1 = DepthConvModule(3, 64, bn=batch_norm)
182 |         else:
183 |             self.conv1_1 = ConvModule(3, 64, bn=batch_norm)
184 |         self.conv1_2 = ConvModule(64, 64, bn=batch_norm, maxpool=True)
185 | 
186 |         if self.depthconv:
187 |             self.conv2_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
188 |             self.downsample_depth2_1 = nn.AvgPool2d(3,padding=1,stride=2)
189 |             self.conv2_1 = DepthConvModule(64, 128, bn=batch_norm)
190 |         else:
191 |             self.conv2_1 = ConvModule(64, 128, bn=batch_norm)
192 |         self.conv2_2 = ConvModule(128, 128, bn=batch_norm, maxpool=True)
193 | 
194 |         if self.depthconv:
195 |             self.conv3_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
196 |             self.downsample_depth3_1 = nn.AvgPool2d(3,padding=1,stride=2)
197 |             self.conv3_1 = DepthConvModule(128, 256, bn=batch_norm)
198 |         else:
199 |             self.conv3_1 = ConvModule(128, 256, bn=batch_norm)
200 |         self.conv3_2 = ConvModule(256, 256, bn=batch_norm)
201 |         self.conv3_3 = ConvModule(256, 256, bn=batch_norm, maxpool=True)
202 | 
203 |         if self.depthconv:
204 |             self.conv4_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
205 |             self.downsample_depth4_1 = nn.AvgPool2d(3,padding=1,stride=2)
206 |             self.conv4_1 = DepthConvModule(256, 512, bn=batch_norm)
207 |         else:
208 |             self.conv4_1 = ConvModule(256, 512, bn=batch_norm)
209 |         self.conv4_2 = ConvModule(512, 512, bn=batch_norm)
210 |         self.conv4_3 = ConvModule(512, 512, bn=batch_norm,
211 |                                   maxpool=True, pool_kernel=3, pool_stride=1, pool_pad=1)
212 | 
213 |         if self.depthconv:
214 |             self.conv5_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
215 |             self.conv5_1 = DepthConvModule(512, 512, bn=batch_norm,dilation=2,padding=2)
216 |         else:
217 |             self.conv5_1 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2)
218 |         self.conv5_2 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2)
219 |         self.conv5_3 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2,
220 |                                   maxpool=True, pool_kernel=3, pool_stride=1, pool_pad=1)
221 |         self.pool5a = nn.AvgPool2d(kernel_size=3, stride=1,padding=1)
222 |         self.pool5a_d = Depthavgpooling(kernel_size=3, stride=1,padding=1)
223 | 
224 |     def forward(self, x, depth=None):
225 |         # print x.size()
226 |         if self.depthconv:
227 |             # print self.conv1_1_depthconvweight
228 |             x = self.conv1_1(x,self.conv1_1_depthconvweight * depth)
229 |         else:
230 |             x = self.conv1_1(x)
231 |         x = self.conv1_2(x)
232 |         if self.depthconv:
233 |             depth = self.downsample_depth2_1(depth)
234 |             x = self.conv2_1(x, self.conv2_1_depthconvweight * depth)
235 |         else:
236 |             x = self.conv2_1(x)
237 |         # print 'xxxxxx',x.size()
238 |         x = self.conv2_2(x)
239 |         if self.depthconv:
240 |             depth = self.downsample_depth3_1(depth)
241 |             x = self.conv3_1(x, self.conv3_1_depthconvweight * depth)
242 |         else:
243 |             x = self.conv3_1(x)
244 |         x = self.conv3_2(x)
245 |         x = self.conv3_3(x)
246 |         if self.depthconv:
247 |             depth = self.downsample_depth4_1(depth)
248 |             # print (depth.mean(),depth.max(),depth.min())
249 |             # torchvision.utils.save_image(depth.data, 'depth.png')
250 |             x = self.conv4_1(x, self.conv4_1_depthconvweight * depth)
251 |         else:
252 |             x = self.conv4_1(x)
253 |         x = self.conv4_2(x)
254 |         x = self.conv4_3(x)
255 |         if self.depthconv:
256 |             x = self.conv5_1(x, self.conv5_1_depthconvweight * depth)
257 |         else:
258 |             x = self.conv5_1(x)
259 |         x = self.conv5_2(x)
260 |         x = self.conv5_3(x)
261 |         # x = self.pool5a(x,depth)
262 |         if self.depthconv:
263 |             x = self.pool5a_d(x,depth)
264 |         else:
265 |             x = self.pool5a(x)
266 | 
267 |         return x, depth
268 | 
269 | def make_layers(cfg, depth_cfg=[], batch_norm=False, depthconv=False):
270 |     layers = []
271 |     in_channels = 3
272 |     for iv, v in enumerate(cfg):
273 |         if v == 'M':
274 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
275 |         else:
276 |             if depthconv and iv in depth_cfg:
277 |                 conv2d = DepthConv(in_channels, v, kernel_size=3, padding=1)
278 |             else:
279 |                 conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
280 |             if batch_norm:
281 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
282 |             else:
283 |                 layers += [conv2d, nn.ReLU(inplace=True)]
284 |             in_channels = v
285 |     return nn.Sequential(*layers)
286 | 
287 | class Classifier_Module(nn.Module):
288 | 
289 |     def __init__(self, num_classes, inplanes, depthconv=False):
290 |         super(Classifier_Module, self).__init__()
291 |         # [6, 12, 18, 24]
292 |         self.depthconv = depthconv
293 |         if depthconv:
294 |             self.fc6_1_depthconvweight = 1.#nn.Parameter(torch.ones(1))
295 |             self.fc6_1 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=6, dilation=6)  # fc6
296 |         else:
297 |             self.fc6_1 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=6, dilation=6)  # fc6
298 | 
299 |         self.fc7_1 = nn.Sequential(
300 |             *[nn.ReLU(True), nn.Dropout(),
301 |               nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()])  # fc7
302 |         self.fc8_1 = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True)  # fc8
303 | 
304 |         if depthconv:
305 |             self.fc6_2_depthconvweight = 1.#nn.Parameter(torch.ones(1))
306 |             self.fc6_2 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=12, dilation=12)  # fc6
307 |         else:
308 |             self.fc6_2 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=12, dilation=12)  # fc6
309 | 
310 |         self.fc7_2 = nn.Sequential(
311 |             *[nn.ReLU(True), nn.Dropout(),
312 |               nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()])  # fc7
313 |         self.fc8_2 = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True)  # fc8
314 | 
315 |         if depthconv:
316 |             self.fc6_3_depthconvweight = 1.#nn.Parameter(torch.ones(1))
317 |             self.fc6_3 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=18, dilation=18)  # fc6
318 |         else:
319 |             self.fc6_3 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=18, dilation=18)  # fc6
320 | 
321 |         self.fc7_3 = nn.Sequential(
322 |             *[nn.ReLU(True), nn.Dropout(),
323 |               nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()])  # fc7
324 |         self.fc8_3 = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True)  # fc8
325 | 
326 |         if depthconv:
327 |             self.fc6_4_depthconvweight = 1.#nn.Parameter(torch.ones(1))
328 |             self.fc6_4 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=24, dilation=24)  # fc6
329 |         else:
330 |             self.fc6_4 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=24, dilation=24)  # fc6
331 | 
332 |         self.fc7_4 = nn.Sequential(
333 |             *[nn.ReLU(True), nn.Dropout(),
334 |               nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()])  # fc7
335 |         self.fc8_4 = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True)  # fc8
336 | 
337 |     def forward(self, x, depth=None):
338 |         if self.depthconv:
339 |             out1 = self.fc6_1(x, self.fc6_1_depthconvweight * depth)
340 |         else:
341 |             out1 = self.fc6_1(x)
342 |         out1 = self.fc7_1(out1)
343 |         out1 = self.fc8_1(out1)
344 | 
345 |         if self.depthconv:
346 |             out2 = self.fc6_2(x, self.fc6_2_depthconvweight * depth)
347 |         else:
348 |             out2 = self.fc6_2(x)
349 |         out2 = self.fc7_2(out2)
350 |         out2 = self.fc8_2(out2)
351 | 
352 |         if self.depthconv:
353 |             out3 = self.fc6_3(x, self.fc6_3_depthconvweight * depth)
354 |         else:
355 |             out3 = self.fc6_3(x)
356 |         out3 = self.fc7_3(out3)
357 |         out3 = self.fc8_3(out3)
358 | 
359 |         if self.depthconv:
360 |             out4 = self.fc6_4(x, self.fc6_4_depthconvweight * depth)
361 |         else:
362 |             out4 = self.fc6_4(x)
363 |         out4 = self.fc7_4(out4)
364 |         out4 = self.fc8_4(out4)
365 | 
366 |         return out1+out2+out3+out4
367 | 
368 | class Classifier_Module2(nn.Module):
369 | 
370 |     def __init__(self, num_classes, inplanes, depthconv=False):
371 |         super(Classifier_Module2, self).__init__()
372 |         # [6, 12, 18, 24]
373 |         self.depthconv = depthconv
374 |         if depthconv:
375 |             self.fc6_2_depthconvweight = 1.#nn.Parameter(torch.ones(1))
376 |             self.fc6_2 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=12, dilation=12)
377 |             self.downsample_depth = None
378 |         else:
379 |             self.downsample_depth = nn.AvgPool2d(9,padding=1,stride=8)
380 |             self.fc6_2 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=12, dilation=12)  # fc6
381 | 
382 |         self.fc7_2 = nn.Sequential(
383 |             *[nn.ReLU(True), nn.Dropout(),
384 |               nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()])  # fc7
385 | 
386 |         # self.globalpooling = DepthGlobalPool(1024,3)#
387 |         # self.fc8_2 = nn.Conv2d(1024+3, num_classes, kernel_size=1, stride=1, bias=True)  # fc8
388 | 
389 |         self.globalpooling = nn.AdaptiveAvgPool2d((1, 1))#nn.AvgPool2d((54,71))#
390 |         self.dropout = nn.Dropout(0.3)
391 |         # self.norm = CaffeNormalize(1024)#LayerNorm(1024)#nn.InstanceNorm2d(1024).use_running_stats(mode=False)
392 |         self.fc8_2 = nn.Conv2d(2048, num_classes, kernel_size=1, stride=1, bias=True)  # fc8
393 | 
394 |     def forward(self, x, depth=None):
395 |         if self.depthconv:
396 |             out2 = self.fc6_2(x, self.fc6_2_depthconvweight * depth)
397 |         else:
398 |             out2 = self.fc6_2(x)
399 |         out2 = self.fc7_2(out2)
400 |         out2_size = out2.size()
401 | 
402 |         #global pooling
403 |         globalpool = self.globalpooling(out2)
404 |         # globalpool = self.dropout(self.norm(globalpool))
405 |         globalpool = self.dropout(globalpool)#self.norm(globalpool))
406 |         upsample = nn.Upsample((out2_size[2],out2_size[3]), mode='bilinear')#scale_factor=8)
407 |         globalpool = upsample(globalpool)
408 | 
409 |         #global pooling with depth
410 |         # globalpool = self.globalpooling(out2,depth)
411 | 
412 | 
413 |         # print globalpool.mean()
414 |         out2 = torch.cat([out2, globalpool], 1)
415 |         out2 = self.fc8_2(out2)
416 |         # print out2.size()
417 |         return out2
418 | 
419 | class VGG(nn.Module):
420 | 
421 |     def __init__(self, num_classes=20, init_weights=True, depthconv=False,bn=False):
422 |         super(VGG, self).__init__()
423 |         self.features = VGG_layer(batch_norm=bn,depthconv=depthconv)
424 |         self.classifier = Classifier_Module2(num_classes,512,depthconv=depthconv)
425 | 
426 |         if init_weights:
427 |             self._initialize_weights()
428 | 
429 |     def forward(self, x, depth=None):
430 |         x,depth = self.features(x,depth)
431 |         x = self.classifier(x,depth)
432 |         return x
433 | 
434 |     def _initialize_weights(self):
435 |         for m in self.modules():
436 |             if isinstance(m, nn.Conv2d):
437 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
438 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
439 |                 if m.bias is not None:
440 |                     m.bias.data.zero_()
441 |             elif isinstance(m, nn.BatchNorm2d):
442 |                 m.weight.data.fill_(1)
443 |                 m.bias.data.zero_()
444 |             elif isinstance(m, nn.Linear):
445 |                 m.weight.data.normal_(0, 0.01)
446 |                 m.bias.data.zero_()
447 | 
448 |     def get_normalize_params(self):
449 |         b=[]
450 |         b.append(self.classifier.norm)
451 |         for i in b:
452 |             if isinstance(i, CaffeNormalize):
453 |                 yield i.scale
454 | 
455 |     def get_1x_lr_params_NOscale(self):
456 |         """
457 |         This generator returns all the parameters of the net except for
458 |         the last classification layer. Note that for each batchnorm layer,
459 |         requires_grad is set to False in deeplab_resnet.py, therefore this function does not return
460 |         any batchnorm parameter
461 |         """
462 |         b = []
463 | 
464 |         b.append(self.features.conv1_1)
465 |         b.append(self.features.conv1_2)
466 |         b.append(self.features.conv2_1)
467 |         b.append(self.features.conv2_2)
468 |         b.append(self.features.conv3_1)
469 |         b.append(self.features.conv3_2)
470 |         b.append(self.features.conv3_3)
471 |         b.append(self.features.conv4_1)
472 |         b.append(self.features.conv4_2)
473 |         b.append(self.features.conv4_3)
474 |         b.append(self.features.conv5_1)
475 |         b.append(self.features.conv5_2)
476 |         b.append(self.features.conv5_3)
477 |         # b.append(self.classifier.fc6_1)
478 |         b.append(self.classifier.fc6_2)
479 |         # b.append(self.classifier.norm)
480 |         # b.append(self.classifier.fc6_3)
481 |         # b.append(self.classifier.fc6_4)
482 |         # b.append(self.classifier.fc7_1)
483 |         b.append(self.classifier.fc7_2)
484 |         # b.append(self.classifier.fc7_3)
485 |         # b.append(self.classifier.fc7_4)
486 | 
487 |         for i in range(len(b)):
488 |             for j in b[i].modules():
489 |                 if isinstance(j, nn.Conv2d):
490 |                     if j.weight.requires_grad:
491 |                         yield j.weight
492 |                 elif isinstance(j, DepthConv):
493 |                     if j.weight.requires_grad:
494 |                         yield j.weight
495 | 
496 | 
497 |     def get_2x_lr_params_NOscale(self):
498 |         """
499 |         This generator returns all the parameters of the net except for
500 |         the last classification layer. Note that for each batchnorm layer,
501 |         requires_grad is set to False in deeplab_resnet.py, therefore this function does not return
502 |         any batchnorm parameter
503 |         """
504 |         b = []
505 | 
506 |         b.append(self.features.conv1_1)
507 |         b.append(self.features.conv1_2)
508 |         b.append(self.features.conv2_1)
509 |         b.append(self.features.conv2_2)
510 |         b.append(self.features.conv3_1)
511 |         b.append(self.features.conv3_2)
512 |         b.append(self.features.conv3_3)
513 |         b.append(self.features.conv4_1)
514 |         b.append(self.features.conv4_2)
515 |         b.append(self.features.conv4_3)
516 |         b.append(self.features.conv5_1)
517 |         b.append(self.features.conv5_2)
518 |         b.append(self.features.conv5_3)
519 |         # b.append(self.classifier.fc6_1)
520 |         b.append(self.classifier.fc6_2)
521 |         # b.append(self.classifier.fc6_3)
522 |         # b.append(self.classifier.fc6_4)
523 |         # b.append(self.classifier.fc7_1)
524 |         b.append(self.classifier.fc7_2)
525 |         # b.append(self.classifier.globalpooling.model)
526 |         # b.append(self.classifier.fc7_3)
527 |         # b.append(self.classifier.fc7_4)
528 | 
529 |         for i in range(len(b)):
530 |             for j in b[i].modules():
531 |                 if isinstance(j, nn.Conv2d):
532 |                     if j.bias is not None:
533 |                         if j.bias.requires_grad:
534 |                             yield j.bias
535 |                 elif isinstance(j, DepthConv):
536 |                     if j.bias is not None:
537 |                         if j.bias.requires_grad:
538 |                             yield j.bias
539 | 
540 | 
541 |     def get_10x_lr_params(self):
542 |         """
543 |         This generator returns all the parameters for the last layer of the net,
544 |         which does the classification of pixel into classes
545 |         """
546 |         b = []
547 |         # b.append(self.classifier.fc8_1.weight)
548 |         b.append(self.classifier.fc8_2.weight)
549 |         # b.append(self.classifier.globalpooling.model.weight)
550 |         # b.append(self.classifier.fc8_3.weight)
551 |         # b.append(self.classifier.fc8_4.weight)
552 | 
553 |         for i in b:
554 |             yield i
555 |         # for j in range(len(b)):
556 |         #     for i in b[j]:
557 |         #         yield i
558 | 
559 |     def get_20x_lr_params(self):
560 |         """
561 |         This generator returns all the parameters for the last layer of the net,
562 |         which does the classification of pixel into classes
563 |         """
564 |         b = []
565 |         # b.append(self.classifier.fc8_1.bias)
566 |         b.append(self.classifier.fc8_2.bias)
567 |         # b.append(self.classifier.globalpooling.model.bias)
568 |         # b.append(self.classifier.fc8_3.bias)
569 |         # b.append(self.classifier.fc8_4.bias)
570 | 
571 |         for i in b:
572 |             yield i
573 |         # for j in range(len(b)):
574 |         #     for i in b[j]:
575 |         #         yield i
576 | 
577 |     def get_100x_lr_params(self):
578 |         """
579 |         This generator returns all the parameters for the last layer of the net,
580 |         which does the classification of pixel into classes
581 |         """
582 |         b = []
583 |         b.append(self.features.conv1_1_depthconvweight)
584 |         b.append(self.features.conv2_1_depthconvweight)
585 |         b.append(self.features.conv3_1_depthconvweight)
586 |         b.append(self.features.conv4_1_depthconvweight)
587 |         b.append(self.features.conv5_1_depthconvweight)
588 |         b.append(self.classifier.fc6_1_depthconvweight)
589 |         b.append(self.classifier.fc6_2_depthconvweight)
590 |         b.append(self.classifier.fc6_3_depthconvweight)
591 |         b.append(self.classifier.fc6_4_depthconvweight)
592 | 
593 |         for j in range(len(b)):
594 |             yield b[j]
595 |             # for i in b[j]:
596 |             #     yield i
597 | 
598 | 
599 | 
600 | def vgg16(pretrained=False, **kwargs):
601 |     """VGG 16-layer model (configuration "D")
602 |     Args:
603 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
604 |     """
605 |     if pretrained:
606 |         kwargs['init_weights'] = False
607 |     model = VGG(bn=False,**kwargs)
608 |     if pretrained:
609 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg16']))
610 |     return model
611 | 
612 | 
613 | def vgg16_bn(pretrained=False, **kwargs):
614 |     """VGG 16-layer model (configuration "D") with batch normalization
615 |     Args:
616 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
617 |     """
618 |     if pretrained:
619 |         kwargs['init_weights'] = False
620 |     model = VGG(bn=True,**kwargs)
621 |     if pretrained:
622 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn']))
623 |     return model
624 | 
625 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laughtervv/Deeplab-Pytorch/deb98bd27922241070d04b6ab6fa094981c3b827/models/__init__.py


--------------------------------------------------------------------------------
/models/base_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from utils import util
  4 | import torch
  5 | 
  6 | def load_pretrained_model(net, state_dict, strict=True):
  7 |     """Copies parameters and buffers from :attr:`state_dict` into
  8 |     this module and its descendants. If :attr:`strict` is ``True`` then
  9 |     the keys of :attr:`state_dict` must exactly match the keys returned
 10 |     by this module's :func:`state_dict()` function.
 11 | 
 12 |     Arguments:
 13 |         state_dict (dict): A dict containing parameters and
 14 |             persistent buffers.
 15 |         strict (bool): Strictly enforce that the keys in :attr:`state_dict`
 16 |             match the keys returned by this module's `:func:`state_dict()`
 17 |             function.
 18 |     """
 19 |     own_state = net.state_dict()
 20 |     # print state_dict.keys()
 21 |     # print own_state.keys()
 22 |     for name, param in state_dict.items():
 23 |         if name in own_state:
 24 |             # print name, np.mean(param.numpy())
 25 |             if isinstance(param, torch.nn.Parameter):
 26 |                 # backwards compatibility for serialized parameters
 27 |                 param = param.data
 28 |             if strict:
 29 |                 try:
 30 |                     own_state[name].copy_(param)
 31 |                 except Exception:
 32 |                     raise RuntimeError('While copying the parameter named {}, '
 33 |                                        'whose dimensions in the model are {} and '
 34 |                                        'whose dimensions in the checkpoint are {}.'
 35 |                                        .format(name, own_state[name].size(), param.size()))
 36 |             else:
 37 |                 try:
 38 |                     own_state[name].copy_(param)
 39 |                 except Exception:
 40 |                     print('Ignoring Error: While copying the parameter named {}, '
 41 |                                        'whose dimensions in the model are {} and '
 42 |                                        'whose dimensions in the checkpoint are {}.'
 43 |                                        .format(name, own_state[name].size(), param.size()))
 44 | 
 45 |         elif strict:
 46 |             raise KeyError('unexpected key "{}" in state_dict'
 47 |                            .format(name))
 48 |     if strict:
 49 |         missing = set(own_state.keys()) - set(state_dict.keys())
 50 |         if len(missing) > 0:
 51 |             raise KeyError('missing keys in state_dict: "{}"'.format(missing))
 52 | 
 53 | 
 54 | class BaseModel():
 55 | 
 56 |     def name(self):
 57 |         return 'BaseModel'
 58 | 
 59 |     def initialize(self, opt):
 60 |         self.opt = opt
 61 |         self.training = opt.isTrain
 62 |         self.gpu_ids = opt.gpu_ids
 63 |         self.isTrain = opt.isTrain
 64 |         self.num_classes = opt.label_nc
 65 |         self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
 66 |         self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
 67 |         self.tensorborad_dir = os.path.join(self.opt.checkpoints_dir, 'tensorboard', opt.dataset_mode)
 68 |         self.model_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name, 'model')
 69 |         util.mkdirs([self.tensorborad_dir, self.model_dir])
 70 | 
 71 |     def set_input(self, input):
 72 |         self.input = input
 73 | 
 74 |     def forward(self):
 75 |         pass
 76 | 
 77 |     # used in test time, no backprop
 78 |     def test(self):
 79 |         pass
 80 | 
 81 |     def get_image_paths(self):
 82 |         pass
 83 | 
 84 |     def optimize_parameters(self):
 85 |         pass
 86 | 
 87 |     def get_current_visuals(self):
 88 |         return self.input
 89 | 
 90 |     def save(self, label):
 91 |         pass
 92 | 
 93 |     # helper saving function that can be used by subclasses
 94 |     def save_network(self, network, network_label, epoch_label, gpu_ids):
 95 |         save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
 96 |         save_path = os.path.join(self.model_dir, save_filename)
 97 |         torch.save(network.cpu().state_dict(), save_path)
 98 |         if len(gpu_ids) and torch.cuda.is_available():
 99 |             network.cuda()
100 | 
101 |     # helper loading function that can be used by subclasses
102 |     def load_network(self, network, network_label, epoch_label, save_dir=''):
103 |         save_filename = '%s_net_%s.pth' % (epoch_label,network_label)
104 |         if not save_dir:
105 |             save_dir = self.model_dir
106 |         save_path = os.path.join(save_dir, save_filename)        
107 |         if not os.path.isfile(save_path):
108 |             print('%s not exists yet!' % save_path)
109 |         else:
110 |             #network.load_state_dict(torch.load(save_path))
111 |             try:
112 |                 # print torch.load(save_path).keys()
113 |                 # print network.state_dict()['Scale.features.conv2_1_depthconvweight']
114 |                 network.load_state_dict(torch.load(save_path))
115 |             except:   
116 |                 pretrained_dict = torch.load(save_path)                
117 |                 model_dict = network.state_dict()
118 |                 try:
119 |                     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}                    
120 |                     network.load_state_dict(pretrained_dict)
121 |                     print('Pretrained network has excessive layers; Only loading layers that are used' )
122 |                 except:
123 |                     print('Pretrained network has fewer layers; The following are not initialized:' )
124 |                     # from sets import Set
125 |                     # not_initialized = Set()
126 |                     for k, v in pretrained_dict.items():                      
127 |                         if v.size() == model_dict[k].size():
128 |                             model_dict[k] = v
129 |                     not_initialized=[]
130 |                     # print(pretrained_dict.keys())
131 |                     # print(model_dict.keys())
132 |                     for k, v in model_dict.items():
133 |                         if k not in pretrained_dict or v.size() != pretrained_dict[k].size():
134 |                             not_initialized+=[k]#[k.split('.')[0]]
135 |                     print(sorted(not_initialized))
136 |                     network.load_state_dict(model_dict)                  
137 | 
138 |     def update_learning_rate():
139 |         pass
140 | 
141 | 
142 |     def load_pretrained_network(self, network, pretraineddir, epoch_label,strict=True):
143 |         save_filename = '%s.pth' % (epoch_label)
144 |         save_path = os.path.join(pretraineddir, save_filename)
145 |         load_dict = torch.load(save_path, map_location=lambda storage, loc: storage)
146 |         # print (load_dict.values().size())
147 |         load_pretrained_model(network,load_dict,strict)
148 | 


--------------------------------------------------------------------------------
/models/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | from distutils.version import LooseVersion
 6 | 
 7 | class CrossEntropyLoss2d(nn.Module):
 8 |     def __init__(self, weight=None, size_average=False, ignore_index=255):
 9 |         super(CrossEntropyLoss2d, self).__init__()
10 |         self.nll_loss = nn.NLLLoss2d(weight, size_average, ignore_index)
11 | 
12 |     def forward(self, inputs, targets):
13 |         return self.nll_loss(F.log_softmax(inputs), targets)
14 | 
15 | def cross_entropy2d(input, target, weight=None, size_average=True):
16 |     # input: (n, c, h, w), target: (n, h, w)
17 |     n, c, h, w = input.size()
18 |     # log_p: (n, c, h, w)
19 |     if LooseVersion(torch.__version__) < LooseVersion('0.3'):
20 |         # ==0.2.X
21 |         log_p = F.log_softmax(input).cuda()
22 |     else:
23 |         # >=0.3
24 |         log_p = F.log_softmax(input, dim=1).cuda()
25 |     # log_p: (n*h*w, c)
26 |     log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous()
27 |     log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
28 |     log_p = log_p.view(-1, c)
29 |     # target: (n*h*w,)
30 |     # mask = (target != 255)
31 |     # target = target[mask]
32 |     loss = F.nll_loss(log_p, target, weight=weight, size_average=False, ignore_index=255).cuda()
33 |     if size_average:
34 |         loss /= (n*h*w)
35 |     return loss
36 | 
37 | class FocalLoss2d(nn.Module):
38 |     def __init__(self, gamma=2., weight=None, size_average=True, ignore_index=255):
39 |         super(FocalLoss2d, self).__init__()
40 |         self.gamma = gamma
41 |         self.nll_loss = nn.NLLLoss2d(weight, size_average, ignore_index)
42 | 
43 |     def forward(self, inputs, targets):
44 |         return self.nll_loss((1 - F.softmax(inputs)) ** self.gamma * F.log_softmax(inputs), targets)
45 | 
46 | 
47 | class FocalLoss(nn.Module):
48 |     """
49 |         This criterion is a implemenation of Focal Loss, which is proposed in
50 |         Focal Loss for Dense Object Detection.
51 | 
52 |             Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
53 | 
54 |         The losses are averaged across observations for each minibatch.
55 |         Args:
56 |             alpha(1D Tensor, Variable) : the scalar factor for this criterion
57 |             gamma(float, double) : gamma > 0
58 |             size_average(bool): size_average(bool): By default, the losses are averaged over observations for each minibatch.
59 |                                 However, if the field size_average is set to False, the losses are
60 |                                 instead summed for each minibatch.
61 |     """
62 | 
63 |     def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
64 |         super(FocalLoss, self).__init__()
65 |         if alpha is None:
66 |             self.alpha = Variable(torch.ones(class_num+1))
67 |         else:
68 |             if isinstance(alpha, Variable):
69 |                 self.alpha = alpha
70 |             else:
71 |                 self.alpha = Variable(alpha)
72 |         self.gamma = gamma
73 |         self.class_num = class_num
74 |         self.size_average = size_average
75 | 
76 |     def forward(self, inputs, targets):  # variables
77 |         P = F.softmax(inputs)
78 | 
79 |         b,c,h,w = inputs.size()
80 |         class_mask = Variable(torch.zeros([b,c+1,h,w]).cuda())
81 |         class_mask.scatter_(1, targets.long(), 1.)
82 |         class_mask = class_mask[:,:-1,:,:]
83 | 
84 |         if inputs.is_cuda and not self.alpha.is_cuda:
85 |             self.alpha = self.alpha.cuda()
86 |         # print('alpha',self.alpha.size())
87 |         alpha = self.alpha[targets.data.view(-1)].view_as(targets)
88 |         # print (alpha.size(),class_mask.size(),P.size())
89 |         probs = (P * class_mask).sum(1)  # + 1e-6#.view(-1, 1)
90 |         log_p = probs.log()
91 | 
92 |         batch_loss = -alpha * (torch.pow((1 - probs), self.gamma)) * log_p
93 | 
94 |         if self.size_average:
95 |             loss = batch_loss.mean()
96 |         else:
97 |             loss = batch_loss.sum()
98 |         return loss
99 | 


--------------------------------------------------------------------------------
/models/model_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | import numpy as np
  5 | import torchvision
  6 | import time
  7 | 
  8 | class LayerNorm(nn.Module):
  9 | 
 10 |     def __init__(self, features, eps=1e-6, gamma=1.,beta=0.,learnable=False):
 11 |         super(LayerNorm,self).__init__()
 12 |         if learnable:
 13 |             self.gamma = nn.Parameter(torch.ones(features))
 14 |             self.beta = nn.Parameter(torch.zeros(features))
 15 |         else:
 16 |             self.gamma = gamma
 17 |             self.beta = beta
 18 | 
 19 |         self.eps = eps
 20 | 
 21 |     def forward(self, x):
 22 |         x_size = x.size()
 23 |         mean = x.view(x_size[0],x_size[1],x_size[2]*x_size[3]).mean(2)\
 24 |             .view(x_size[0],x_size[1],1,1).repeat(1, 1, x_size[2], x_size[3])
 25 |         std = x.view(x_size[0],x_size[1],x_size[2]*x_size[3]).std(2)\
 26 |             .view(x_size[0],x_size[1],1,1).repeat(1, 1, x_size[2], x_size[3])
 27 |         # print 'mean',mean.size(),'x',x_size
 28 |         return self.gamma * (x - mean) / (std + self.eps) + self.beta
 29 | 
 30 | class CaffeNormalize(nn.Module):
 31 | 
 32 |     def __init__(self, features, eps=1e-7):
 33 |         super(CaffeNormalize,self).__init__()
 34 |         self.scale = nn.Parameter(10.*torch.ones(features))#, requires_grad=False)
 35 |         self.eps = eps
 36 | 
 37 |     def forward(self, x):
 38 |         # print self.scale
 39 |         x_size = x.size()
 40 |         norm = x.norm(2,dim=1,keepdim=True)#.detach()
 41 |         #print norm.data.cpu().numpy(),self.scale.mean().data.cpu().numpy()#,self.scale.grad.mean().data.cpu().numpy()
 42 |         x = x.div(norm+self.eps)
 43 | 
 44 |         return x.mul(self.scale.view(1, x_size[1], 1, 1))
 45 | 
 46 | 
 47 | class DepthGlobalPool(nn.Module):
 48 |     def __init__(self, n_features, n_out):
 49 |         super(DepthGlobalPool, self).__init__()
 50 |         self.model = nn.Conv2d(n_features, n_out, kernel_size=1, padding=0)
 51 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 52 | 
 53 |         self.norm = CaffeNormalize(n_out)
 54 |         self.dropout = nn.Dropout(0.3)
 55 | 
 56 |         n = self.model.kernel_size[0] * self.model.kernel_size[1] * self.model.out_channels
 57 |         self.model.weight.data.normal_(0, np.sqrt(2. / n))
 58 |         if self.model.bias is not None:
 59 |             self.model.bias.data.zero_()
 60 | 
 61 |     def forward(self, features, depth, depthpool=False):
 62 |         # features = self.pool(self.model(features))
 63 |         out2_size = features.size()
 64 |         features = self.model(features)
 65 | 
 66 |         if isinstance(depth, Variable) and depthpool:
 67 |             outfeatures = features.clone()
 68 |             n_c = features.size()[1]
 69 | 
 70 |             # depth-wise average pooling
 71 |             # depthclone = depth.clone()
 72 |             depth = depth.data.cpu().numpy()
 73 |             _, depth_bin = np.histogram(depth)
 74 | 
 75 |             bin_low = depth_bin[0]
 76 |             for bin_high in depth_bin[1:]:
 77 |                 indices = ((depth <= bin_high) & (depth >= bin_low)).nonzero()
 78 |                 if indices[0].shape[0] != 0:
 79 |                     for j in range(n_c):
 80 |                         output_ins = features[indices[0], indices[1] + j, indices[2], indices[3]]
 81 |                         mean_feat = torch.mean(output_ins).expand_as(output_ins)
 82 |                         outfeatures[indices[0], indices[1] + j, indices[2], indices[3]] = mean_feat  # torch.mean(output_ins)
 83 |                     bin_low = bin_high
 84 | 
 85 |             # outfeatures = self.norm(outfeatures)
 86 |             outfeatures = self.dropout(outfeatures)
 87 | 
 88 |             # bin_low = depth_bin[0]
 89 |             # for bin_high in depth_bin[1:]:
 90 |             #     indices = ((depth <= bin_high) & (depth >= bin_low)).nonzero()
 91 |             #     if indices[0].shape[0] != 0:
 92 |             #         output_ins = features[indices[0], indices[1], indices[2], indices[3]]
 93 |             #         mean_feat = torch.mean(output_ins).expand_as(output_ins)
 94 |             #         depthclone[indices[0], indices[1], indices[2], indices[3]] = mean_feat
 95 |             #         bin_low = bin_high
 96 |             #
 97 |             # upsample = nn.UpsamplingBilinear2d(scale_factor=8)
 98 |             # torchvision.utils.save_image(upsample(depthclone).data, 'depth_feature1.png', normalize=True, range=(0, 1))
 99 |             # outfeatures = self.dropout(outfeatures)
100 |         else:
101 |             features = self.pool(features)
102 |             # features = self.norm(features)
103 |             outfeatures = self.dropout(features)
104 |             self.upsample = nn.UpsamplingBilinear2d((out2_size[2],out2_size[3]))
105 |             outfeatures = self.upsample(outfeatures)
106 | 
107 |         return outfeatures
108 | 


--------------------------------------------------------------------------------
/models/models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def create_model(opt, dataset=None):
 4 | 
 5 |     if opt.model == 'DeeplabVGG':
 6 |         from .Deeplab import Deeplab_Solver
 7 |         model = Deeplab_Solver(opt, dataset)
 8 |     elif opt.model == 'DeeplabVGG_HHA':
 9 |         from .Deeplab_HHA import Deeplab_HHA_Solver
10 |         model = Deeplab_HHA_Solver(opt, dataset)
11 |     elif opt.model == 'DeeplabResnet':
12 |         from .Deeplab import Deeplab_Solver
13 |         model = Deeplab_Solver(opt, dataset,'Resnet')
14 | 
15 |     print("model [%s] was created" % (model.name()))
16 | 
17 |     return model
18 | 


--------------------------------------------------------------------------------
/options/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laughtervv/Deeplab-Pytorch/deb98bd27922241070d04b6ab6fa094981c3b827/options/__init__.py


--------------------------------------------------------------------------------
/options/base_options.py:
--------------------------------------------------------------------------------
  1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 
  2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
  3 | import argparse
  4 | import os
  5 | from utils import util
  6 | import torch
  7 | 
  8 | class BaseOptions():
  9 |     def __init__(self):
 10 |         self.parser = argparse.ArgumentParser()
 11 |         self.initialized = False
 12 | 
 13 |     def initialize(self):
 14 |         # experiment specifics
 15 |         self.parser.add_argument('--name', type=str, default='label2city', help='name of the experiment. It decides where to store samples and models')
 16 |         self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
 17 |         self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here')
 18 |         self.parser.add_argument('--model', type=str, default='DeeplabVGG', help='model: DeeplabVGG, DeeplabVGG_HHA')
 19 |         self.parser.add_argument('--encoder', type=str, default='resnet50_dilated8', help='pretrained_model')
 20 |         self.parser.add_argument('--decoder', type=str, default='psp_bilinear', help='pretrained_model')
 21 |         self.parser.add_argument('--depthconv', action='store_true', help='if specified, use depthconv')
 22 |         self.parser.add_argument('--depthglobalpool', action='store_true', help='if specified, use global pooling with depth')
 23 |         self.parser.add_argument('--pretrained_model', type=str, default='', help='pretrained_model')
 24 |         self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
 25 |         self.parser.add_argument('--pretrained_model_HHA', type=str, default='', help='pretrained_model')
 26 |         self.parser.add_argument('--which_epoch_HHA', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
 27 |         self.parser.add_argument('--pretrained_model_rgb', type=str, default='', help='pretrained_model')
 28 |         self.parser.add_argument('--which_epoch_rgb', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
 29 | 
 30 |         # input/output sizes
 31 |         self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size')
 32 |         self.parser.add_argument('--fineSize', type=str, default='480,640', help='then crop to this size')
 33 |         self.parser.add_argument('--label_nc', type=int, default=40, help='# of input image channels')
 34 | 
 35 |         # for setting inputs
 36 |         self.parser.add_argument('--dataroot', type=str, default='',
 37 |                                  help='chooses how datasets are loaded. [nyuv2]')
 38 |         self.parser.add_argument('--dataset_mode', type=str, default='nyuv2',
 39 |                                  help='chooses how datasets are loaded. [nyuv2]')
 40 |         self.parser.add_argument('--list', type=str, default='', help='image and seg mask list file')
 41 |         self.parser.add_argument('--vallist', type=str, default='', help='image and seg mask list file')
 42 | 
 43 |         # for data augmentation
 44 |         self.parser.add_argument('--flip', action='store_true',help='if specified, flip the images for data argumentation')
 45 |         self.parser.add_argument('--scale', action='store_true',help='if specified, scale the images for data argumentation')
 46 |         self.parser.add_argument('--crop', action='store_true',help='if specified, crop the images for data argumentation')
 47 |         self.parser.add_argument('--colorjitter', action='store_true',help='if specified, crop the images for data argumentation')
 48 |         self.parser.add_argument('--inputmode', default='bgr-mean', type=str, help='input image normalize option: bgr-mean, divstd-mean')
 49 | 
 50 |         self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly')
 51 |         self.parser.add_argument('--nThreads', default=1, type=int, help='# threads for loading data')
 52 |         self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.')
 53 | 
 54 |         # for displays
 55 |         self.parser.add_argument('--display_winsize', type=int, default=512,  help='display window size')
 56 |         self.parser.add_argument('--tf_log', action='store_true', help='if specified, use tensorboard logging. Requires tensorflow installed')
 57 |         self.parser.add_argument('--verbose', action='store_true', help='if specified, print loss while training')
 58 | 
 59 | 
 60 |     def parse(self, save=True):
 61 |         if not self.initialized:
 62 |             self.initialize()
 63 |         self.opt = self.parser.parse_args()
 64 |         self.opt.isTrain = self.isTrain   # train or test
 65 | 
 66 |         str_ids = self.opt.gpu_ids.split(',')
 67 |         self.opt.gpu_ids = []
 68 |         for str_id in str_ids:
 69 |             id = int(str_id)
 70 |             if id >= 0:
 71 |                 self.opt.gpu_ids.append(id)
 72 | 
 73 |         str_sizes = self.opt.fineSize.split(',')
 74 |         self.opt.fineSize = []
 75 |         for str_size in str_sizes:
 76 |             size_ = int(str_size)
 77 |             if size_ >= 0:
 78 |                 self.opt.fineSize.append(size_)
 79 | 
 80 |         # set gpu ids
 81 |         if len(self.opt.gpu_ids) > 0:
 82 |             torch.cuda.set_device(self.opt.gpu_ids[0])
 83 | 
 84 |         args = vars(self.opt)
 85 | 
 86 |         print('------------ Options -------------')
 87 |         for k, v in sorted(args.items()):
 88 |             print('%s: %s' % (str(k), str(v)))
 89 |         print('-------------- End ----------------')
 90 | 
 91 |         # save to the disk        
 92 |         expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name)
 93 |         util.mkdirs(expr_dir)
 94 |         if save:
 95 |             file_name = os.path.join(expr_dir, 'opt.txt')
 96 |             with open(file_name, 'wt') as opt_file:
 97 |                 opt_file.write('------------ Options -------------\n')
 98 |                 for k, v in sorted(args.items()):
 99 |                     opt_file.write('%s: %s\n' % (str(k), str(v)))
100 |                 opt_file.write('-------------- End ----------------\n')
101 |         return self.opt
102 | 


--------------------------------------------------------------------------------
/options/test_options.py:
--------------------------------------------------------------------------------
 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 
 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
 3 | from .base_options import BaseOptions
 4 | 
 5 | class TestOptions(BaseOptions):
 6 |     def initialize(self):
 7 |         BaseOptions.initialize(self)
 8 |         self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
 9 |         self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
10 |         self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
11 |         self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
12 |         self.parser.add_argument('--how_many', type=int, default=20, help='how many test images to run')
13 |         self.parser.add_argument('--cluster_path', type=str, default='features_clustered_010.npy', help='the path for clustered results of encoded features')
14 |         self.isTrain = False
15 | 


--------------------------------------------------------------------------------
/options/train_options.py:
--------------------------------------------------------------------------------
 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 
 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
 3 | from .base_options import BaseOptions
 4 | 
 5 | class TrainOptions(BaseOptions):
 6 |     def initialize(self):
 7 |         BaseOptions.initialize(self)
 8 | 
 9 |         # for displays
10 |         self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen')
11 |         self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
12 |         self.parser.add_argument('--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results')
13 |         self.parser.add_argument('--save_epoch_freq', type=int, default=10, help='frequency of saving checkpoints at the end of epochs')        
14 |         self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/')
15 |         self.parser.add_argument('--debug', action='store_true', help='only do one epoch and displays at each iteration')
16 | 
17 |         # for training
18 |         self.parser.add_argument('--loadfroms', action='store_true', help='continue training: load from 32s or 16s')
19 |         self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
20 |         self.parser.add_argument('--use_softmax', action='store_true', help='if specified use softmax loss, otherwise log-softmax')
21 |         self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
22 |         self.parser.add_argument('--nepochs', type=int, default=100, help='# of iter at starting learning rate')
23 |         self.parser.add_argument('--iterSize', type=int, default=10, help='# of iter at starting learning rate')
24 |         self.parser.add_argument('--maxbatchsize', type=int, default=-1, help='# of iter at starting learning rate')
25 |         self.parser.add_argument('--warmup_iters', type=int, default=500, help='# of iter at starting learning rate')
26 |         self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
27 |         self.parser.add_argument('--lr', type=float, default=0.00025, help='initial learning rate for adam')
28 |         self.parser.add_argument('--lr_power', type=float, default=0.9, help='power of learning rate policy')
29 |         self.parser.add_argument('--momentum', type=float, default=0.9, help='momentum for sgd')
30 |         self.parser.add_argument('--wd', type=float, default=0.0004, help='weight decay for sgd')
31 | 
32 |         self.isTrain = True
33 | 


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
1 | python test.py \
2 | --gpu_ids 2 \
3 | --name nyuv2_VGGdeeplab_depthconv \
4 | --dataset_mode nyuv2 \
5 | --depthconv \
6 | --list dataset/lists/nyuv2/test.lst \
7 | --how_many 0
8 | 


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
1 | python train.py \
2 | --name nyuv2_VGGdeeplab_depthconv \
3 | --dataset_mode nyuv2 \
4 | --flip --scale --crop --colorjitter \
5 | --depthconv \
6 | --list ./lists/train.lst \
7 | --vallist ./lists/val.lst \
8 | --continue_train
9 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from collections import OrderedDict
 4 | from options.test_options import TestOptions
 5 | from data.data_loader import CreateDataLoader
 6 | from models.models import create_model
 7 | import utils.util as util
 8 | from utils.visualizer import Visualizer
 9 | from utils import html
10 | from torch.autograd import Variable
11 | 
12 | opt = TestOptions().parse(save=False)
13 | opt.nThreads = 1   
14 | opt.batchSize = 1  
15 | opt.serial_batches = True  # no shuffle
16 | 
17 | data_loader = CreateDataLoader(opt)
18 | dataset, _ = data_loader.load_data()
19 | model = create_model(opt,data_loader.dataset)
20 | visualizer = Visualizer(opt)
21 | # create website
22 | web_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch))
23 | webpage = html.HTML(web_dir, '%s: %s' % (opt.name, pt.which_epoch))
24 | # test
25 | 
26 | 
27 | label_trues, label_preds = [], []
28 | 
29 | model.model.eval()
30 | tic = time.time()
31 | 
32 | accs=[]
33 | for i, data in enumerate(dataset):
34 |     if i >= opt.how_many and opt.how_many!=0:
35 |         break
36 |     seggt, segpred = model.forward(data,False)
37 |     print time.time() - tic
38 |     tic = time.time()
39 | 
40 |     seggt = seggt.data.cpu().numpy()
41 |     segpred = segpred.data.cpu().numpy()
42 | 
43 |     label_trues.append(seggt)
44 |     label_preds.append(segpred)
45 | 
46 |     visuals = model.get_visuals(i)
47 |     img_path = data['imgpath']
48 |     print('process image... %s' % img_path)
49 |     visualizer.save_images(webpage, visuals, img_path)
50 | 
51 | metrics0 = util.label_accuracy_score(
52 |     label_trues, label_preds, n_class=opt.label_nc, returniu=True)
53 | metrics = np.array(metrics0[:4])
54 | metrics *= 100
55 | print('''\
56 |         Accuracy: {0}
57 |         Accuracy Class: {1}
58 |         Mean IU: {2}
59 |         FWAV Accuracy: {3}'''.format(*metrics))
60 | 
61 | webpage.save()
62 | 


--------------------------------------------------------------------------------
/test_ops.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn.modules.utils import _single, _pair, _triple
  4 | from torch.autograd import Variable
  5 | 
  6 | from utils.gradcheck import gradcheck
  7 | from models.ops.depthconv.functions.depthconv import DepthconvFunction
  8 | 
  9 | 
 10 | N, inC, inH, inW = 4, 2, 8, 8
 11 | kH, kW = 3, 3
 12 | pad, stride, dilation = 0, 1, 1
 13 | 
 14 | offC = 1 * 2 * kH * kW
 15 | 
 16 | outC = 1
 17 | outH = (inH + 2 * pad - (dilation * (kH - 1) + 1)) // stride + 1
 18 | outW = (inW + 2 * pad - (dilation * (kW - 1) + 1)) // stride + 1
 19 | 
 20 | conv_offset2d = DepthconvFunction(
 21 |         padding=(pad, pad),
 22 |         stride=(stride, stride),
 23 |         dilation=(dilation, dilation), bias=True)
 24 | conv2d = F.ConvNd(_pair(stride), _pair(pad), _pair(dilation), False,
 25 |                _pair(0), 1, torch.backends.cudnn.benchmark, torch.backends.cudnn.enabled)
 26 | offset = Variable(
 27 |         torch.ones(N, 1, inH, inW).cuda(),
 28 |         requires_grad=False)
 29 | input = Variable(
 30 |         torch.rand(N, inC, inH, inW).cuda(),
 31 |         requires_grad=True)
 32 | input2 = Variable(input.data.clone(),
 33 |         requires_grad=True)
 34 | weight = Variable(
 35 |         10*torch.rand(outC, inC, kH, kW).cuda(),
 36 |         requires_grad=True)
 37 | weight2 = Variable(weight.data.clone(),
 38 |         requires_grad=True)
 39 | bias = Variable(torch.rand(outC).cuda(),requires_grad=True)
 40 | bias2 = Variable(bias.data.clone(),
 41 |         requires_grad=True)
 42 | grad = Variable(
 43 |         torch.rand(N, outC, 6, 6).cuda(),
 44 |         requires_grad=True)
 45 | 
 46 | print bias
 47 | out1 = conv_offset2d(input, offset, weight, bias)
 48 | out2 = conv2d(input2, weight2, bias2)
 49 | print (out1-out2).sum()
 50 | 
 51 | out1.backward(grad)
 52 | out2.backward(grad)
 53 | 
 54 | 
 55 | print (weight.grad-weight2.grad).sum()
 56 | print ('input.grad',input.grad.sum())
 57 | print ('input.grad',input2.grad.sum())
 58 | print (input.grad-input2.grad).sum()
 59 | print (bias.grad-bias2.grad).sum()
 60 | 
 61 | 
 62 | # print bias.data.cpu().numpy().dtype
 63 | 
 64 | # print("pass gradcheck: {}".format(gradcheck(conv_offset2d, (input, offset, weight, bias))))
 65 | # print("pass gradcheck: {}".format(gradcheck(conv2d, (input, weight,None))))
 66 | 
 67 | import torch
 68 | import torch.nn as nn
 69 | import torch.nn.functional as F
 70 | import numpy as np
 71 | from models.ops.depthavgpooling.functions.depthavgpooling import DepthavgpoolingFunction
 72 | from models.ops.depthavgpooling.modules import Depthavgpooling
 73 | from torch.autograd import Variable
 74 | 
 75 | depth = [[[1,0,1,10000],
 76 |          [0,1,10000,1],
 77 |          [1,0,1,0],
 78 |          [0,1,0,1]],
 79 |          ]
 80 | depth = np.zeros([40,40])
 81 | inputarray = torch.Tensor(np.asarray(range(2*40*40)).reshape([1,2,40,40]))
 82 | depth = torch.Tensor(np.asarray(depth).reshape([1,1,40,40]))
 83 | 
 84 | print inputarray
 85 | N, inC, inH, inW = 4, 512, 50, 65
 86 | input = Variable(
 87 |         inputarray,
 88 |         requires_grad=True).cuda()
 89 | depth = Variable(
 90 |     depth,
 91 |         requires_grad=True).cuda()
 92 | kH, kW = 3, 3
 93 | pad, stride, dilation = 1, 1, 1
 94 | depthpooling = Depthavgpooling(kH,stride,pad)
 95 | pooling = nn.AvgPool2d(kernel_size=kH, stride=stride,padding=pad)
 96 | 
 97 | out1 = depthpooling(input, depth)
 98 | out2 = pooling(input)
 99 | 
100 | grad = Variable(
101 |         torch.ones(N, 2, 40, 40).cuda(),
102 |         requires_grad=True)
103 | out1.backward(grad)
104 | 
105 | print out1-out2
106 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from tensorboardX import SummaryWriter
  3 | from collections import OrderedDict
  4 | from options.train_options import TrainOptions
  5 | from data.data_loader import CreateDataLoader
  6 | from models.models import create_model
  7 | import utils.util as util
  8 | from utils.visualizer import Visualizer
  9 | import os
 10 | import numpy as np
 11 | import torch
 12 | from torch.autograd import Variable
 13 | import time
 14 | 
 15 | opt = TrainOptions().parse()
 16 | iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt')
 17 | ioupath_path = os.path.join(opt.checkpoints_dir, opt.name, 'MIoU.txt')
 18 | if opt.continue_train:
 19 |     try:
 20 |         start_epoch, epoch_iter = np.loadtxt(iter_path, delimiter=',', dtype=int)
 21 |     except:
 22 |         start_epoch, epoch_iter = 1, 0
 23 |         
 24 |     try:
 25 |         best_iou = np.loadtxt(ioupath_path, dtype=float)
 26 |     except:
 27 |         best_iou = 0.
 28 |     print('Resuming from epoch %d at iteration %d, previous best IoU %f' % (start_epoch, epoch_iter, best_iou))
 29 | else:
 30 |     start_epoch, epoch_iter = 1, 0
 31 |     best_iou = 0.
 32 | 
 33 | data_loader = CreateDataLoader(opt)
 34 | dataset, dataset_val = data_loader.load_data()
 35 | dataset_size = len(dataset)
 36 | print('#training images = %d' % dataset_size)
 37 | 
 38 | model = create_model(opt, dataset.dataset)
 39 | # print (model)
 40 | visualizer = Visualizer(opt)
 41 | total_steps = (start_epoch - 1) * dataset_size + epoch_iter
 42 | for epoch in range(start_epoch, opt.nepochs):
 43 |     epoch_start_time = time.time()
 44 |     if epoch != start_epoch:
 45 |         epoch_iter = epoch_iter % dataset_size
 46 | 
 47 |     model.model.train()
 48 |     for i, data in enumerate(dataset, start=epoch_iter):
 49 |         iter_start_time = time.time()
 50 |         total_steps += opt.batchSize
 51 |         epoch_iter += opt.batchSize
 52 | 
 53 |         ############## Forward and Backward Pass ######################
 54 |         model.forward(data)
 55 |         model.backward(total_steps, opt.nepochs * dataset.__len__() * opt.batchSize + 1)
 56 | 
 57 |         ############## update tensorboard and web images ######################
 58 |         if total_steps % opt.display_freq == 0:
 59 |             visuals = model.get_visuals(total_steps)
 60 |             visualizer.display_current_results(visuals, epoch, total_steps)
 61 | 
 62 |         ############## Save latest Model   ######################
 63 |         if total_steps % opt.save_latest_freq == 0:
 64 |             print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps))
 65 |             model.save('latest')
 66 |             np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d')
 67 |         # print time.time()-iter_start_time
 68 | 
 69 |     # end of epoch
 70 |     model.model.eval()
 71 |     if dataset_val!=None:
 72 |         label_trues, label_preds = [], []
 73 |         for i, data in enumerate(dataset_val):
 74 |             seggt, segpred = model.forward(data,False)
 75 |             seggt = seggt.data.cpu().numpy()
 76 |             segpred = segpred.data.cpu().numpy()
 77 | 
 78 |             label_trues.append(seggt)
 79 |             label_preds.append(segpred)
 80 | 
 81 |         metrics = util.label_accuracy_score(
 82 |             label_trues, label_preds, n_class=opt.label_nc)
 83 |         metrics = np.array(metrics)
 84 |         metrics *= 100
 85 |         print('''\
 86 |                 Validation:
 87 |                 Accuracy: {0}
 88 |                 Accuracy Class: {1}
 89 |                 Mean IU: {2}
 90 |                 FWAV Accuracy: {3}'''.format(*metrics))
 91 |         model.update_tensorboard(metrics,total_steps)
 92 |     iter_end_time = time.time()
 93 | 
 94 |     print('End of epoch %d / %d \t Time Taken: %d sec' %
 95 |           (epoch+1, opt.nepochs, time.time() - epoch_start_time))
 96 |     if metrics[2]>best_iou:
 97 |         best_iou = metrics[2]
 98 |         print('saving the model at the end of epoch %d, iters %d, loss %f' % (epoch, total_steps, model.trainingavgloss))
 99 |         model.save('best')
100 | 
101 |     ### save model for this epoch
102 |     if epoch % opt.save_epoch_freq == 0:
103 |         print('saving the model at the end of epoch %d, iters %d, loss %f' % (epoch, total_steps, model.trainingavgloss))
104 |         model.save('latest')
105 |         model.save(epoch)
106 |         np.savetxt(iter_path, (epoch + 1, 0), delimiter=',', fmt='%d')
107 | 
108 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laughtervv/Deeplab-Pytorch/deb98bd27922241070d04b6ab6fa094981c3b827/utils/__init__.py


--------------------------------------------------------------------------------
/utils/gradcheck.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Variable
  3 | from collections import Iterable
  4 | import numpy as np
  5 | 
  6 | 
  7 | def iter_variables(x):
  8 |     if isinstance(x, Variable):
  9 |         if x.requires_grad:
 10 |             yield (x.grad.data, x.data) if x.grad is not None else (None, None)
 11 |     elif isinstance(x, Iterable):
 12 |         for elem in x:
 13 |             for result in iter_variables(elem):
 14 |                 yield result
 15 | 
 16 | 
 17 | def zero_gradients(x):
 18 |     if isinstance(x, Variable):
 19 |         if x.grad is not None:
 20 |             x.grad.detach_()
 21 |             x.grad.data.zero_()
 22 |     elif isinstance(x, Iterable):
 23 |         for elem in x:
 24 |             zero_gradients(elem)
 25 | 
 26 | 
 27 | def make_jacobian(input, num_out):
 28 |     if isinstance(input, Variable) and not input.requires_grad:
 29 |         return None
 30 |     elif torch.is_tensor(input) or isinstance(input, Variable):
 31 |         return torch.zeros(input.nelement(), num_out)
 32 |     elif isinstance(input, Iterable):
 33 |         jacobians = list(filter(
 34 |             lambda x: x is not None, (make_jacobian(elem, num_out) for elem in input)))
 35 |         if not jacobians:
 36 |             return None
 37 |         return type(input)(jacobians)
 38 |     else:
 39 |         return None
 40 | 
 41 | 
 42 | def iter_tensors(x, only_requiring_grad=False):
 43 |     if torch.is_tensor(x):
 44 |         yield x
 45 |     elif isinstance(x, Variable):
 46 |         if x.requires_grad or not only_requiring_grad:
 47 |             yield x.data
 48 |     elif isinstance(x, Iterable):
 49 |         for elem in x:
 50 |             for result in iter_tensors(elem, only_requiring_grad):
 51 |                 yield result
 52 | 
 53 | 
 54 | def contiguous(input):
 55 |     if torch.is_tensor(input):
 56 |         return input.contiguous()
 57 |     elif isinstance(input, Variable):
 58 |         return input.contiguous()
 59 |     elif isinstance(input, Iterable):
 60 |         return type(input)(contiguous(e) for e in input)
 61 |     return input
 62 | 
 63 | 
 64 | def get_numerical_jacobian(fn, input, target, eps=1e-3):
 65 |     # To be able to use .view(-1) input must be contiguous
 66 |     input = contiguous(input)
 67 |     output_size = fn(input).numel()
 68 |     jacobian = make_jacobian(target, output_size)
 69 | 
 70 |     # It's much easier to iterate over flattened lists of tensors.
 71 |     # These are reference to the same objects in jacobian, so any changes
 72 |     # will be reflected in it as well.
 73 |     x_tensors = [t for t in iter_tensors(target, True)]
 74 |     j_tensors = [t for t in iter_tensors(jacobian)]
 75 | 
 76 |     outa = torch.DoubleTensor(output_size)
 77 |     outb = torch.DoubleTensor(output_size)
 78 | 
 79 |     # TODO: compare structure
 80 |     for x_tensor, d_tensor in zip(x_tensors, j_tensors):
 81 |         flat_tensor = x_tensor.view(-1)
 82 |         for i in range(flat_tensor.nelement()):
 83 |             orig = flat_tensor[i]
 84 |             flat_tensor[i] = orig - eps
 85 |             outa.copy_(fn(input), broadcast=False)
 86 |             flat_tensor[i] = orig + eps
 87 |             outb.copy_(fn(input), broadcast=False)
 88 |             flat_tensor[i] = orig
 89 | 
 90 |             outb.add_(-1, outa).div_(2 * eps)
 91 |             d_tensor[i] = outb
 92 | 
 93 |     return jacobian
 94 | 
 95 | 
 96 | def get_analytical_jacobian(input, output):
 97 |     jacobian = make_jacobian(input, output.numel())
 98 |     jacobian_reentrant = make_jacobian(input, output.numel())
 99 |     grad_output = output.data.clone().zero_()
100 |     flat_grad_output = grad_output.view(-1)
101 |     reentrant = True
102 |     correct_grad_sizes = True
103 | 
104 |     for i in range(flat_grad_output.numel()):
105 |         flat_grad_output.zero_()
106 |         flat_grad_output[i] = 1
107 |         for jacobian_c in (jacobian, jacobian_reentrant):
108 |             zero_gradients(input)
109 |             output.backward(grad_output, create_graph=True)
110 |             for jacobian_x, (d_x, x) in zip(jacobian_c, iter_variables(input)):
111 |                 if d_x is None:
112 |                     jacobian_x[:, i].zero_()
113 |                 else:
114 |                     if d_x.size() != x.size():
115 |                         correct_grad_sizes = False
116 |                     jacobian_x[:, i] = d_x.to_dense() if d_x.is_sparse else d_x
117 | 
118 |     for jacobian_x, jacobian_reentrant_x in zip(jacobian, jacobian_reentrant):
119 |         if (jacobian_x - jacobian_reentrant_x).abs().max() != 0:
120 |             reentrant = False
121 | 
122 |     return jacobian, reentrant, correct_grad_sizes
123 | 
124 | 
125 | def _as_tuple(x):
126 |     if isinstance(x, tuple):
127 |         return x
128 |     elif isinstance(x, list):
129 |         return tuple(x)
130 |     else:
131 |         return x,
132 | 
133 | 
134 | def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3):
135 |     """Check gradients computed via small finite differences
136 |        against analytical gradients
137 | 
138 |     The check between numerical and analytical has the same behaviour as
139 |     numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html
140 |     meaning it check that
141 |         absolute(a - n) <= (atol + rtol * absolute(n))
142 |     is true for all elements of analytical jacobian a and numerical jacobian n.
143 | 
144 |     Args:
145 |         func: Python function that takes Variable inputs and returns
146 |             a tuple of Variables
147 |         inputs: tuple of Variables
148 |         eps: perturbation for finite differences
149 |         atol: absolute tolerance
150 |         rtol: relative tolerance
151 | 
152 |     Returns:
153 |         True if all differences satisfy allclose condition
154 |     """
155 |     output = func(*inputs)
156 |     output = _as_tuple(output)
157 | 
158 |     for i, o in enumerate(output):
159 |         if not o.requires_grad:
160 |             continue
161 |         print 'i:',i,o
162 | 
163 |         def fn(input):
164 |             return _as_tuple(func(*input))[i].data
165 | 
166 |         analytical, reentrant, correct_grad_sizes = get_analytical_jacobian(_as_tuple(inputs), o)
167 |         numerical = get_numerical_jacobian(fn, inputs, inputs, eps)
168 |         # -------------------
169 |         for a in analytical:
170 |             an = a.numpy()
171 |         for n in numerical:
172 |             nn = n.numpy()
173 | 
174 |         diff = []
175 |         for a, n in zip(analytical, numerical):
176 |             dif = (a - n).abs().numpy()
177 |             diff.append(np.max(dif))
178 |         diff_max = max(diff)
179 |         # print(diff_max)
180 | 
181 |         for a, n in zip(analytical, numerical):
182 |             if not ((a - n).abs() <= (atol + rtol * n.abs())).all():
183 |                 print a.sum(),n.sum()
184 |                 print('1111')
185 |                 return False
186 |         # --------------------------
187 |         # different two times
188 |         if not reentrant:
189 |             print('not same for 2')
190 |             return False
191 | 
192 |         if not correct_grad_sizes:
193 |             print('not same size')
194 |             return False
195 | 
196 |     # check if the backward multiplies by grad_output
197 |     zero_gradients(inputs)
198 |     output = _as_tuple(func(*inputs))
199 |     torch.autograd.backward(output, [o.data.new(o.size()).zero_() for o in output])
200 |     var_inputs = list(filter(lambda i: isinstance(i, Variable), inputs))
201 |     if not var_inputs:
202 |         raise RuntimeError("no Variables found in input")
203 |     for i in var_inputs:
204 |         if i.grad is None:
205 |             continue
206 |         if not i.grad.data.eq(0).all():
207 |             print('not all zero')
208 |             return False
209 | 
210 |     return True
211 | 
212 | 
213 | def gradgradcheck(func, inputs, grad_outputs, eps=1e-6, atol=1e-5, rtol=1e-3):
214 |     """Check gradients of gradients computed via small finite differences
215 |        against analytical gradients
216 |     This function checks that backpropagating through the gradients computed
217 |     to the given grad_outputs are correct.
218 | 
219 |     The check between numerical and analytical has the same behaviour as
220 |     numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html
221 |     meaning it check that
222 |         absolute(a - n) <= (atol + rtol * absolute(n))
223 |     is true for all elements of analytical gradient a and numerical gradient n.
224 | 
225 |     Args:
226 |         func: Python function that takes Variable inputs and returns
227 |             a tuple of Variables
228 |         inputs: tuple of Variables
229 |         grad_outputs: tuple of Variables
230 |         eps: perturbation for finite differences
231 |         atol: absolute tolerance
232 |         rtol: relative tolerance
233 | 
234 |     Returns:
235 |         True if all differences satisfy allclose condition
236 |     """
237 | 
238 |     def new_func(*input_args):
239 |         input_args = input_args[:-len(grad_outputs)]
240 |         outputs = func(*input_args)
241 |         outputs = _as_tuple(outputs)
242 |         input_args = tuple(x for x in input_args if isinstance(x, Variable) and x.requires_grad)
243 |         grad_inputs = torch.autograd.grad(outputs, input_args, grad_outputs)
244 |         return grad_inputs
245 | 
246 |     return gradcheck(new_func, inputs + grad_outputs, eps, atol, rtol)
247 | 


--------------------------------------------------------------------------------
/utils/html.py:
--------------------------------------------------------------------------------
 1 | import dominate
 2 | from dominate.tags import *
 3 | import os
 4 | 
 5 | 
 6 | class HTML:
 7 |     def __init__(self, web_dir, title, refresh=0):
 8 |         self.title = title
 9 |         self.web_dir = web_dir
10 |         self.img_dir = os.path.join(self.web_dir, 'images')
11 |         if not os.path.exists(self.web_dir):
12 |             os.makedirs(self.web_dir)
13 |         if not os.path.exists(self.img_dir):
14 |             os.makedirs(self.img_dir)
15 | 
16 |         self.doc = dominate.document(title=title)
17 |         if refresh > 0:
18 |             with self.doc.head:
19 |                 meta(http_equiv="refresh", content=str(refresh))
20 | 
21 |     def get_image_dir(self):
22 |         return self.img_dir
23 | 
24 |     def add_header(self, str):
25 |         with self.doc:
26 |             h3(str)
27 | 
28 |     def add_table(self, border=1):
29 |         self.t = table(border=border, style="table-layout: fixed;")
30 |         self.doc.add(self.t)
31 | 
32 |     def add_images(self, ims, txts, links, width=512):
33 |         self.add_table()
34 |         with self.t:
35 |             with tr():
36 |                 for im, txt, link in zip(ims, txts, links):
37 |                     with td(style="word-wrap: break-word;", halign="center", valign="top"):
38 |                         with p():
39 |                             with a(href=os.path.join('images', link)):
40 |                                 img(style="width:%dpx" % (width), src=os.path.join('images', im))
41 |                             br()
42 |                             p(txt)
43 | 
44 |     def save(self):
45 |         html_file = '%s/index.html' % self.web_dir
46 |         f = open(html_file, 'wt')
47 |         f.write(self.doc.render())
48 |         f.close()
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     html = HTML('web/', 'test_html')
53 |     html.add_header('hello world')
54 | 
55 |     ims = []
56 |     txts = []
57 |     links = []
58 |     for n in range(4):
59 |         ims.append('image_%d.jpg' % n)
60 |         txts.append('text_%d' % n)
61 |         links.append('image_%d.jpg' % n)
62 |     html.add_images(ims, txts, links)
63 |     html.save()
64 | 


--------------------------------------------------------------------------------
/utils/util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import numpy as np
  4 | from PIL import Image
  5 | import numpy as np
  6 | import os
  7 | 
  8 | ####evaluation metrics
  9 | 
 10 | def _fast_hist(label_true, label_pred, n_class):
 11 |     mask = (label_true >= 0) & (label_true < n_class)
 12 |     hist = np.bincount(
 13 |         n_class * label_true[mask].astype(int) +
 14 |         label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class)
 15 |     return hist
 16 | 
 17 | 
 18 | def label_accuracy_score(label_trues, label_preds, n_class, returniu = False):
 19 |     """Returns accuracy score evaluation result.
 20 |       - overall accuracy
 21 |       - mean accuracy
 22 |       - mean IU
 23 |       - fwavacc
 24 |     """
 25 |     hist = np.zeros((n_class, n_class))
 26 |     for lt, lp in zip(label_trues, label_preds):
 27 |         hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
 28 |     acc = np.diag(hist).sum() / hist.sum()
 29 |     acc_cls = np.diag(hist) / hist.sum(axis=1)
 30 |     acc_cls = np.nanmean(acc_cls)
 31 |     iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
 32 |     mean_iu = np.nanmean(iu)
 33 |     freq = hist.sum(axis=1) / hist.sum()
 34 |     fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
 35 |     if returniu:
 36 |         return acc, acc_cls, mean_iu, fwavacc, iu[freq > 0]
 37 |     else:
 38 |         return acc, acc_cls, mean_iu, fwavacc
 39 | 
 40 | ###############################################################################
 41 | # Code from
 42 | # https://github.com/ycszen/pytorch-seg/blob/master/transform.py
 43 | # Modified so it complies with the Citscape label map colors
 44 | ###############################################################################
 45 | def uint82bin(n, count=8):
 46 |     """returns the binary of integer n, count refers to amount of bits"""
 47 |     return ''.join([str((n >> y) & 1) for y in range(count-1, -1, -1)])
 48 | 
 49 | def labelcolormap(N):
 50 |     if N == 35: # cityscape
 51 |         cmap = np.array([(  0,  0,  0), (  0,  0,  0), (  0,  0,  0), (  0,  0,  0), (  0,  0,  0), (111, 74,  0), ( 81,  0, 81),
 52 |                      (128, 64,128), (244, 35,232), (250,170,160), (230,150,140), ( 70, 70, 70), (102,102,156), (190,153,153),
 53 |                      (180,165,180), (150,100,100), (150,120, 90), (153,153,153), (153,153,153), (250,170, 30), (220,220,  0),
 54 |                      (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255,  0,  0), (  0,  0,142), (  0,  0, 70),
 55 |                      (  0, 60,100), (  0,  0, 90), (  0,  0,110), (  0, 80,100), (  0,  0,230), (119, 11, 32), (  0,  0,142)],
 56 |                      dtype=np.uint8)
 57 |     else:
 58 |         cmap = np.zeros((N, 3), dtype=np.uint8)
 59 |         for i in range(N):
 60 |             r, g, b = 0, 0, 0
 61 |             id = i
 62 |             for j in range(7):
 63 |                 str_id = uint82bin(id)
 64 |                 r = r ^ (np.uint8(str_id[-1]) << (7-j))
 65 |                 g = g ^ (np.uint8(str_id[-2]) << (7-j))
 66 |                 b = b ^ (np.uint8(str_id[-3]) << (7-j))
 67 |                 id = id >> 3
 68 |             cmap[i, 0] = r
 69 |             cmap[i, 1] = g
 70 |             cmap[i, 2] = b
 71 |     return cmap
 72 | 
 73 | class Colorize(object):
 74 |     def __init__(self, n=35):
 75 |         n = 256
 76 |         self.cmap = labelcolormap(n)
 77 |         self.cmap = torch.from_numpy(self.cmap[:n])
 78 | 
 79 |     def __call__(self, gray_image):
 80 |         size = gray_image.size()
 81 |         color_image = torch.ByteTensor(3, size[1], size[2]).fill_(0)
 82 | 
 83 |         for label in range(0, len(self.cmap)):
 84 |             mask = (label == gray_image[0]).cpu()
 85 |             color_image[0][mask] = self.cmap[label][0]
 86 |             color_image[1][mask] = self.cmap[label][1]
 87 |             color_image[2][mask] = self.cmap[label][2]
 88 | 
 89 |         return color_image
 90 | # Converts a Tensor into a Numpy array
 91 | # |imtype|: the desired type of the converted numpy array
 92 | def tensor2im(image_tensor, imtype=np.uint8, inputmode=''):
 93 |     if isinstance(image_tensor, list):
 94 |         image_numpy = []
 95 |         for i in range(len(image_tensor)):
 96 |             image_numpy.append(tensor2im(image_tensor[i], imtype, inputmode))
 97 |         return image_numpy
 98 |     image_numpy = image_tensor.cpu().float().numpy()
 99 |     if inputmode=='div255-mean':
100 |         image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
101 |     elif inputmode=='divstd-mean':
102 |         i_max = np.max(image_numpy)
103 |         i_min = np.min(image_numpy)
104 |         image_numpy = np.transpose((image_numpy-i_min)/(i_max-i_min) * 255.0, (1, 2, 0))
105 |     elif inputmode=='bgr-mean':
106 |         image_numpy = np.transpose(image_numpy, (1, 2, 0))[:,:,::-1] + np.asarray([122.675,116.669,104.008])
107 |         # print(image_numpy.max(),image_numpy.min())
108 |     else:
109 |         # print('depth')
110 |         image_numpy = np.transpose(image_numpy, (1, 2, 0))[:,:,::-1] * 255.0
111 |     # image_numpy = np.clip(image_numpy, 0, 255)
112 |     if image_numpy.shape[2] == 1:        
113 |         image_numpy = image_numpy[:,:,0]
114 |     return image_numpy.astype(imtype)
115 | 
116 | # Converts a one-hot tensor into a colorful label map
117 | colormap = Colorize(255)
118 | def tensor2label(label_tensor, n_label, imtype=np.uint8, colorize = True):
119 |     if n_label == 0:
120 |         return tensor2im(label_tensor, imtype)
121 |     label_tensor = label_tensor.cpu().float()    
122 |     if label_tensor.size()[0] > 1:
123 |         label_tensor = label_tensor.max(0, keepdim=True)[1]
124 |     if colorize:
125 |         label_tensor = colormap(label_tensor)
126 |         label_numpy = np.transpose(label_tensor.numpy(), (1, 2, 0))
127 |     else:
128 |         label_numpy = np.squeeze(label_tensor.numpy())
129 |         # print (np.unique(label_numpy.astype(imtype)))
130 |     return label_numpy.astype(imtype)
131 | 
132 | def save_image(image_numpy, image_path, imagesize = None):
133 |     image_pil = Image.fromarray(image_numpy)
134 |     if imagesize is not None:
135 |         img_w, img_h = imagesize
136 |         image_pil = image_pil.resize((img_w, img_h), Image.NEAREST)
137 |     # print(np.unique(np.asarray(image_pil)))
138 |     image_pil.save(image_path)
139 |     # if len(image_numpy.shape)==2:
140 |     #     image_pil = Image.open(image_path)
141 |     #     print(image_path,np.unique(np.asarray(image_pil)))
142 | 
143 | def mkdirs(paths):
144 |     if isinstance(paths, list) and not isinstance(paths, str):
145 |         for path in paths:
146 |             mkdir(path)
147 |     else:
148 |         mkdir(paths)
149 | 
150 | def mkdir(path):
151 |     if not os.path.exists(path):
152 |         os.makedirs(path)
153 | 


--------------------------------------------------------------------------------
/utils/visualizer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import ntpath
  4 | import time
  5 | from . import util
  6 | from . import html
  7 | import scipy.misc
  8 | try:
  9 |     from StringIO import StringIO  # Python 2.7
 10 | except ImportError:
 11 |     from io import BytesIO         # Python 3.x
 12 | 
 13 | class Visualizer():
 14 |     def __init__(self, opt):
 15 |         # self.opt = opt
 16 |         self.tf_log = opt.tf_log
 17 |         self.use_html = opt.isTrain and not opt.no_html
 18 |         self.win_size = opt.display_winsize
 19 |         self.name = opt.name
 20 |         if self.tf_log:
 21 |             import tensorflow as tf
 22 |             self.tf = tf
 23 |             self.log_dir = os.path.join(opt.checkpoints_dir, opt.name, 'logs')
 24 |             self.writer = tf.summary.FileWriter(self.log_dir)
 25 | 
 26 |         if self.use_html:
 27 |             self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
 28 |             self.img_dir = os.path.join(self.web_dir, 'images')
 29 |             print('create web directory %s...' % self.web_dir)
 30 |             util.mkdirs([self.web_dir, self.img_dir])
 31 | 
 32 |     # |visuals|: dictionary of images to display or save
 33 |     def display_current_results(self, visuals, epoch, step):
 34 |         if self.tf_log: # show images in tensorboard output
 35 |             img_summaries = []
 36 |             for label, image_numpy in visuals.items():
 37 |                 # Write the image to a string
 38 |                 try:
 39 |                     s = StringIO()
 40 |                 except:
 41 |                     s = BytesIO()
 42 |                 scipy.misc.toimage(image_numpy).save(s, format="jpeg")
 43 |                 # Create an Image object
 44 |                 img_sum = self.tf.Summary.Image(encoded_image_string=s.getvalue(), height=image_numpy.shape[0], width=image_numpy.shape[1])
 45 |                 # Create a Summary value
 46 |                 img_summaries.append(self.tf.Summary.Value(tag=label, image=img_sum))
 47 | 
 48 |             # Create and write Summary
 49 |             summary = self.tf.Summary(value=img_summaries)
 50 |             self.writer.add_summary(summary, step)
 51 | 
 52 |         if self.use_html: # save images to a html file
 53 |             for label, image_numpy in visuals.items():
 54 |                 if isinstance(image_numpy, list):
 55 |                     for i in range(len(image_numpy)):
 56 |                         img_path = os.path.join(self.img_dir, 'epoch%.3d_%s_%d.jpg' % (epoch, label, i))
 57 |                         util.save_image(image_numpy[i], img_path)
 58 |                 else:
 59 |                     img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.jpg' % (epoch, label))
 60 |                     util.save_image(image_numpy, img_path)
 61 | 
 62 |             # update website
 63 |             webpage = html.HTML(self.web_dir, self.name, refresh=5)
 64 |             for n in range(epoch, 0, -1):
 65 |                 webpage.add_header('epoch [%d]' % n)
 66 |                 ims = []
 67 |                 txts = []
 68 |                 links = []
 69 | 
 70 |                 for label, image_numpy in visuals.items():
 71 |                     if isinstance(image_numpy, list):
 72 |                         for i in range(len(image_numpy)):
 73 |                             img_path = 'epoch%.3d_%s_%d.jpg' % (n, label, i)
 74 |                             ims.append(img_path)
 75 |                             txts.append(label+str(i))
 76 |                             links.append(img_path)
 77 |                     else:
 78 |                         img_path = 'epoch%.3d_%s.jpg' % (n, label)
 79 |                         ims.append(img_path)
 80 |                         txts.append(label)
 81 |                         links.append(img_path)
 82 |                 if len(ims) < 10:
 83 |                     webpage.add_images(ims, txts, links, width=self.win_size)
 84 |                 else:
 85 |                     num = int(round(len(ims)/2.0))
 86 |                     webpage.add_images(ims[:num], txts[:num], links[:num], width=self.win_size)
 87 |                     webpage.add_images(ims[num:], txts[num:], links[num:], width=self.win_size)
 88 |             webpage.save()
 89 | 
 90 |     # errors: dictionary of error labels and values
 91 |     def plot_current_errors(self, errors, step):
 92 |         if self.tf_log:
 93 |             for tag, value in errors.items():
 94 |                 summary = self.tf.Summary(value=[self.tf.Summary.Value(tag=tag, simple_value=value)])
 95 |                 self.writer.add_summary(summary, step)
 96 | 
 97 |     # errors: same format as |errors| of plotCurrentErrors
 98 |     def print_current_errors(self, epoch, i, errors, t):
 99 |         message = '(epoch: %d, iters: %d, time: %.3f) ' % (epoch, i, t)
100 |         for k, v in errors.items():
101 |             if v != 0:
102 |                 message += '%s: %.3f ' % (k, v)
103 | 
104 |         print(message)
105 |         with open(self.log_name, "a") as log_file:
106 |             log_file.write('%s\n' % message)
107 | 
108 |     # save image to the disk
109 |     def save_images(self, webpage, visuals, image_path):
110 |         image_dir = webpage.get_image_dir()
111 |         short_path = ntpath.basename(image_path[0])
112 |         name = os.path.splitext(short_path)[0]
113 | 
114 |         webpage.add_header(name)
115 |         ims = []
116 |         txts = []
117 |         links = []
118 | 
119 |         for label, image_numpy in visuals.items():
120 |             image_name = '%s_%s.jpg' % (name, label)
121 |             save_path = os.path.join(image_dir, image_name)
122 |             util.save_image(image_numpy, save_path)
123 | 
124 |             ims.append(image_name)
125 |             txts.append(label)
126 |             links.append(image_name)
127 |         webpage.add_images(ims, txts, links, width=self.win_size)
128 | 


--------------------------------------------------------------------------------