├── README.md ├── caffe_converter └── convert_from_caffe.py ├── data ├── VOC_dataset.py ├── __init__.py ├── base_data_loader.py ├── base_dataset.py ├── custom_dataset_data_loader.py └── data_loader.py ├── models ├── Deeplab.py ├── Resnet_Deeplab.py ├── VGG_Deeplab.py ├── __init__.py ├── base_model.py ├── losses.py ├── model_utils.py └── models.py ├── options ├── __init__.py ├── base_options.py ├── test_options.py └── train_options.py ├── scripts ├── test.sh └── train.sh ├── test.py ├── test_ops.py ├── train.py └── utils ├── __init__.py ├── gradcheck.py ├── html.py ├── util.py └── visualizer.py /README.md: -------------------------------------------------------------------------------- 1 | This is a pytorch implementation for DeepLab. 2 | 3 | To convert pretrained model from caffe, please take a look at `caffe_converter/convert_from_caffe.py` 4 | -------------------------------------------------------------------------------- /caffe_converter/convert_from_caffe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os.path as osp 4 | import caffe 5 | import torch 6 | import numpy as np 7 | import torchvision 8 | from models import Deeplab 9 | from collections import OrderedDict 10 | import caffe.proto.caffe_pb2 as caffe_pb2 11 | model = open('downloaded_caffemodel.caffemodel', 'rb') 12 | net_param = caffe_pb2.NetParameter() 13 | net_param.ParseFromString(model.read()) 14 | assert (net_param.layer[42].name =='pool6_1x1_norm') 15 | 16 | caffe_prototxt = 'train.prototxt' # NOQA 17 | caffe_model_path = 'train_iter_20000.caffemodel' 18 | 19 | caffe.set_mode_cpu() 20 | caffe_model = caffe.Net(caffe_prototxt, caffe_model_path, caffe.TEST) 21 | caffe_model.forward() 22 | 23 | torch_model = Deeplab.Deeplab_SS(None, 20, False, vgg=True) # torchvision.models.vgg16() 24 | torch_model_params = torch_model.named_parameters() 25 | W = caffe_model.params['conv1_1'][0].data 26 | print np.mean(W) 27 | newdict = OrderedDict() 28 | for name, p1 in caffe_model.params.iteritems(): 29 | try: 30 | p2 = torch_model_params.next() 31 | print('%s: %s -> %s %s' % (name, p1[0].data.shape, p2[0], p2[1].data.size())) 32 | p2[1].data = torch.from_numpy(p1[0].data).float() 33 | print(np.mean(p1[0].data)) 34 | if len(p1) == 2: 35 | p2 = torch_model_params.next() 36 | print('%s: %s ->%s %s' % (name, p1[1].data.shape, p2[0], p2[1].data.size())) 37 | p2[1].data = torch.from_numpy(p1[1].data) 38 | print(np.mean(p1[1].data),) 39 | except StopIteration: 40 | break 41 | 42 | torch_model_path = 'DeepLab_VGG_caffe.pth' 43 | torch.save(torch_model.state_dict(), torch_model_path) 44 | 45 | -------------------------------------------------------------------------------- /data/VOC_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import numpy as np 3 | import torchvision 4 | import torchvision.transforms as transforms 5 | import torch 6 | import h5py 7 | import time 8 | from data.base_dataset import * 9 | from PIL import Image 10 | import math, random 11 | 12 | 13 | def make_dataset_fromlst(listfilename): 14 | """ 15 | NYUlist format: 16 | imagepath seglabelpath depthpath HHApath 17 | """ 18 | images = [] 19 | segs = [] 20 | 21 | with open(listfilename) as f: 22 | content = f.readlines() 23 | for x in content: 24 | imgname, segname = x.strip().split(' ') 25 | images += [imgname] 26 | segs += [segname] 27 | 28 | return {'images':images, 'segs':segs} 29 | 30 | class VOCDataset(BaseDataset): 31 | def initialize(self, opt): 32 | self.opt = opt 33 | np.random.seed(8964) 34 | self.paths_dict = make_dataset_fromlst(opt.list) 35 | self.len = len(self.paths_dict['images']) 36 | self.datafile = 'VOC_dataset.py' 37 | 38 | def __getitem__(self, index): 39 | #self.paths['images'][index] 40 | # print self.opt.scale,self.opt.flip,self.opt.crop,self.opt.colorjitter 41 | img = np.asarray(Image.open(self.paths_dict['images'][index])) 42 | seg = np.asarray(Image.open(self.paths_dict['segs'][index])).astype(np.uint8) 43 | # print(np.unique(seg)) 44 | 45 | params = get_params(self.opt, seg.shape) 46 | seg_tensor_tranformed = transform(seg, params, normalize=False,method='nearest',istrain=self.opt.isTrain) 47 | if self.opt.inputmode == 'bgr-mean': 48 | img_tensor_tranformed = transform(img, params, normalize=False, istrain=self.opt.isTrain, option=1) 49 | else: 50 | img_tensor_tranformed = transform(img, params, istrain=self.opt.isTrain, option=1) 51 | return {'image':img_tensor_tranformed, 52 | 'seg': seg_tensor_tranformed, 53 | 'imgpath': self.paths_dict['segs'][index]} 54 | 55 | def __len__(self): 56 | return self.len 57 | 58 | def name(self): 59 | return 'VOCDataset' 60 | 61 | class VOCDataset_val(BaseDataset): 62 | def initialize(self, opt): 63 | self.opt = opt 64 | self.paths_dict = make_dataset_fromlst(opt.vallist) 65 | self.len = len(self.paths_dict['images']) 66 | 67 | def __getitem__(self, index): 68 | img = np.asarray(Image.open(self.paths_dict['images'][index]))#.astype(np.uint8) 69 | seg = np.asarray(Image.open(self.paths_dict['segs'][index])).astype(np.uint8) 70 | 71 | params = get_params(self.opt, seg.shape, test=True) 72 | seg_tensor_tranformed = transform(seg, params, normalize=False,method='nearest',istrain=self.opt.isTrain) 73 | if self.opt.inputmode == 'bgr-mean': 74 | img_tensor_tranformed = transform(img, params, normalize=False, istrain=self.opt.isTrain, option=1) 75 | else: 76 | img_tensor_tranformed = transform(img, params, istrain=self.opt.isTrain, option=1) 77 | 78 | return {'image':img_tensor_tranformed, 79 | 'seg': seg_tensor_tranformed, 80 | 'imgpath': self.paths_dict['segs'][index]} 81 | 82 | def __len__(self): 83 | return self.len 84 | 85 | def name(self): 86 | return 'VOCDataset_val' 87 | 88 | 89 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laughtervv/Deeplab-Pytorch/deb98bd27922241070d04b6ab6fa094981c3b827/data/__init__.py -------------------------------------------------------------------------------- /data/base_data_loader.py: -------------------------------------------------------------------------------- 1 | 2 | class BaseDataLoader(): 3 | def __init__(self): 4 | pass 5 | 6 | def initialize(self, opt): 7 | self.opt = opt 8 | pass 9 | 10 | def load_data(): 11 | return None 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /data/base_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | from PIL import Image 3 | import torchvision.transforms as transforms 4 | import numpy as np 5 | import torch 6 | import cv2 7 | import random 8 | 9 | class BaseDataset(data.Dataset): 10 | def __init__(self): 11 | super(BaseDataset, self).__init__() 12 | 13 | def name(self): 14 | return 'BaseDataset' 15 | 16 | def initialize(self, opt): 17 | pass 18 | 19 | def get_params(opt, size, test=False): 20 | h, w = size 21 | if opt.scale and test==False: 22 | scale = random.uniform(0.76, 1.75) 23 | new_h = h * scale 24 | new_w = (new_h * w // h) 25 | 26 | new_h = int(round(new_h / 8) * 8) 27 | new_w = int(round(new_w / 8) * 8) 28 | 29 | else: 30 | new_h = h 31 | new_w = w 32 | # new_h = int(round(h / 8) * 8) 33 | # new_w = int(round(w / 8) * 8) 34 | 35 | if opt.flip and test==False: 36 | flip = random.random() > 0.5 37 | else: 38 | flip = False 39 | 40 | crop = False 41 | x1 = x2 = y1 = y2 = 0 42 | if opt.crop and test==False: 43 | # if new_h > 241 and new_w > 321: #424 44 | if opt.batchSize > 1: 45 | cropsizeh = 321 46 | cropsizew = 421#(cropsizeh * new_w // new_h) 47 | else: 48 | cropscale = random.uniform(0.6,.9) 49 | cropsizeh = int (new_h * cropscale) 50 | cropsizew = int (new_w * cropscale) 51 | # print cropsizeh,cropsizew,new_h,new_w 52 | x1 = random.randint(0, np.maximum(0, new_w - cropsizew)) 53 | y1 = random.randint(0, np.maximum(0, new_h - cropsizeh)) 54 | x2 = x1 + cropsizew -1 55 | y2 = y1 + cropsizeh -1 56 | crop = True 57 | 58 | # if opt.batchSize > 1: 59 | # print cropsizew,cropsizeh 60 | if opt.colorjitter and test==False: 61 | colorjitter = True 62 | else: 63 | colorjitter = False 64 | return {'scale': (new_w, new_h), 65 | 'flip': flip, 66 | 'crop_pos': (x1, x2, y1, y2), 67 | 'crop': crop, 68 | 'colorjitter': colorjitter} 69 | 70 | def get_params_sunrgbd(opt, size, test=False, maxcrop=0.8, maxscale=1.75): 71 | h, w = size 72 | if opt.scale and test==False: 73 | scale = random.uniform(0.76, maxscale) 74 | new_h = h * scale 75 | new_w = (new_h * w // h) 76 | 77 | new_h = int(round(new_h / 8) * 8) 78 | new_w = int(round(new_w / 8) * 8) 79 | 80 | else: 81 | new_h = h 82 | new_w = w 83 | # new_h = int(round(h / 8) * 8) 84 | # new_w = int(round(w / 8) * 8) 85 | 86 | if opt.flip and test==False: 87 | flip = random.random() > 0.5 88 | else: 89 | flip = False 90 | 91 | crop = False 92 | x1 = x2 = y1 = y2 = 0 93 | if opt.crop and test==False: 94 | # if new_h > 241 and new_w > 321: #424 95 | if opt.batchSize > 1: 96 | cropsizeh = 321 97 | cropsizew = 421#(cropsizeh * new_w // new_h) 98 | else: 99 | cropscale = random.uniform(0.6,maxcrop) 100 | cropsizeh = int (new_h * cropscale) 101 | cropsizew = int (new_w * cropscale) 102 | # print cropsizeh,cropsizew,new_h,new_w 103 | x1 = random.randint(0, np.maximum(0, new_w - cropsizew)) 104 | y1 = random.randint(0, np.maximum(0, new_h - cropsizeh)) 105 | x2 = x1 + cropsizew -1 106 | y2 = y1 + cropsizeh -1 107 | crop = True 108 | 109 | # if opt.batchSize > 1: 110 | # print cropsizew,cropsizeh 111 | if opt.colorjitter and test==False: 112 | colorjitter = True 113 | else: 114 | colorjitter = False 115 | return {'scale': (new_w, new_h), 116 | 'flip': flip, 117 | 'crop_pos': (x1, x2, y1, y2), 118 | 'crop': crop, 119 | 'colorjitter': colorjitter} 120 | 121 | def transform(numpyarray, params, normalize=True, method='linear', istrain=True, colorjitter=False, option=0): 122 | # print params['crop'],params['colorjitter'],params['flip'] 123 | if method == 'linear': 124 | numpyarray = cv2.resize(numpyarray, (params['scale'][0], params['scale'][1]), interpolation=cv2.INTER_LINEAR) 125 | else: 126 | numpyarray = cv2.resize(numpyarray, (params['scale'][0], params['scale'][1]), interpolation=cv2.INTER_NEAREST) 127 | 128 | if istrain: 129 | if params['crop']: 130 | # print (numpyarray.shape,params['crop_pos']) 131 | numpyarray = numpyarray[params['crop_pos'][2]:params['crop_pos'][3], 132 | params['crop_pos'][0]:params['crop_pos'][1], 133 | ...] 134 | if params['flip']: 135 | numpyarray = numpyarray[:, 136 | ::-1, 137 | ...] 138 | 139 | if option==1: 140 | if colorjitter and params['colorjitter'] and random.random() > 0.1: 141 | # numpyarray += np.random.rand() * 30 - 15 142 | # numpyarray[numpyarray > 255] = 255 143 | # numpyarray[numpyarray < 0] = 0 144 | hsv = cv2.cvtColor(numpyarray, cv2.COLOR_BGR2HSV) 145 | hsv[:, :, 0] += np.random.rand() * 70 - 35 146 | hsv[:, :, 1] += np.random.rand() * 0.3 - 0.15 147 | hsv[:, :, 2] += np.random.rand() * 50 - 25 148 | hsv[:, :, 0] = np.clip(hsv[:, :, 0], 0, 360.) 149 | hsv[:, :, 1] = np.clip(hsv[:, :, 1], 0, 1.) 150 | hsv[:, :, 2] = np.clip(hsv[:, :, 2], 0, 255.) 151 | numpyarray = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) 152 | # print numpyarray.shape 153 | 154 | if option == 1: 155 | if not normalize: 156 | numpyarray = numpyarray - np.asarray([122.675,116.669,104.008]) 157 | numpyarray = numpyarray.transpose((2, 0, 1))[::-1,:,:].astype(np.float32) 158 | else: 159 | numpyarray = numpyarray.transpose((2, 0, 1)).astype(np.float32)/255. 160 | 161 | if option == 2: 162 | if not normalize: 163 | numpyarray = numpyarray - np.asarray([132.431, 94.076, 118.477]) 164 | numpyarray = numpyarray.transpose((2, 0, 1))[::-1,:,:].astype(np.float32) 165 | else: 166 | numpyarray = numpyarray.transpose((2, 0, 1)).astype(np.float32)/255. 167 | 168 | if len(numpyarray.shape) == 3: 169 | torchtensor = torch.from_numpy(numpyarray.copy()).float()#.div(255) 170 | else: 171 | torchtensor = torch.from_numpy(np.expand_dims(numpyarray,axis=0).copy()) 172 | 173 | if normalize: 174 | # torchtensor = torchtensor.div(255) 175 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 176 | std=[0.229, 0.224, 0.225]) 177 | torchtensor = normalize(torchtensor) 178 | 179 | return torchtensor 180 | 181 | 182 | -------------------------------------------------------------------------------- /data/custom_dataset_data_loader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from .base_data_loader import BaseDataLoader 3 | 4 | 5 | def CreateDataset(opt): 6 | dataset = None 7 | if opt.dataset_mode == 'nyuv2': 8 | # from data.nyuv2_dataset import NYUDataset 9 | from data.nyuv2_dataset_crop import NYUDataset,NYUDataset_val 10 | dataset = NYUDataset() 11 | if opt.vallist!='': 12 | dataset_val = NYUDataset_val() 13 | else: 14 | dataset_val = None 15 | elif opt.dataset_mode == 'voc': 16 | from data.VOC_dataset import VOCDataset,VOCDataset_val 17 | dataset = VOCDataset() 18 | if opt.vallist!='': 19 | dataset_val = VOCDataset_val() 20 | else: 21 | dataset_val = None 22 | 23 | elif opt.dataset_mode == 'sunrgbd': 24 | from data.sunrgbd_dataset import SUNRGBDDataset,SUNRGBDDataset_val 25 | dataset = SUNRGBDDataset() 26 | if opt.vallist!='': 27 | dataset_val = SUNRGBDDataset_val() 28 | else: 29 | dataset_val = None 30 | 31 | elif opt.dataset_mode == 'stanfordindoor': 32 | from data.stanfordindoor_dataset import StanfordIndoorDataset, StanfordIndoorDataset_val 33 | dataset = StanfordIndoorDataset() 34 | if opt.vallist!='': 35 | dataset_val = StanfordIndoorDataset_val() 36 | else: 37 | dataset_val = None 38 | 39 | print("dataset [%s] was created" % (dataset.name())) 40 | dataset.initialize(opt) 41 | if dataset_val != None: 42 | dataset_val.initialize(opt) 43 | return dataset,dataset_val 44 | 45 | class CustomDatasetDataLoader(BaseDataLoader): 46 | def name(self): 47 | return 'CustomDatasetDataLoader' 48 | 49 | def initialize(self, opt): 50 | BaseDataLoader.initialize(self, opt) 51 | self.dataset, self.dataset_val = CreateDataset(opt) 52 | self.dataloader = torch.utils.data.DataLoader( 53 | self.dataset, 54 | batch_size=opt.batchSize, 55 | shuffle=not opt.serial_batches, 56 | num_workers=int(opt.nThreads)) 57 | if self.dataset_val != None: 58 | self.dataloader_val = torch.utils.data.DataLoader( 59 | self.dataset_val, 60 | batch_size=1, 61 | shuffle=False, 62 | num_workers=int(opt.nThreads)) 63 | else: 64 | self.dataloader_val = None 65 | 66 | 67 | def load_data(self): 68 | return self.dataloader, self.dataloader_val 69 | 70 | def __len__(self): 71 | return min(len(self.dataset), self.opt.max_dataset_size) 72 | -------------------------------------------------------------------------------- /data/data_loader.py: -------------------------------------------------------------------------------- 1 | 2 | def CreateDataLoader(opt): 3 | from data.custom_dataset_data_loader import CustomDatasetDataLoader 4 | data_loader = CustomDatasetDataLoader() 5 | print(data_loader.name()) 6 | data_loader.initialize(opt) 7 | return data_loader 8 | 9 | -------------------------------------------------------------------------------- /models/Deeplab.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | import torch 5 | from .base_model import BaseModel 6 | import numpy as np 7 | from . import losses 8 | import shutil 9 | from utils.util import * 10 | from torch.autograd import Variable 11 | from collections import OrderedDict 12 | from tensorboardX import SummaryWriter 13 | import os 14 | import VGG_Deeplab as VGG_Deeplab 15 | 16 | 17 | class Deeplab_VGG(nn.Module): 18 | def __init__(self, num_classes, depthconv=False): 19 | super(Deeplab_VGG,self).__init__() 20 | self.Scale = VGG_Deeplab.vgg16(num_classes=num_classes,depthconv=depthconv) 21 | 22 | def forward(self,x, depth=None): 23 | output = self.Scale(x,depth) # for original scale 24 | return output 25 | 26 | #------------------------------------------------------# 27 | 28 | class Deeplab_Solver(BaseModel): 29 | def __init__(self, opt, dataset=None, encoder='VGG'): 30 | BaseModel.initialize(self, opt) 31 | self.encoder = encoder 32 | if encoder == 'VGG': 33 | self.model = Deeplab_VGG(self.opt.label_nc, self.opt.depthconv) 34 | 35 | if self.opt.isTrain: 36 | self.criterionSeg = torch.nn.CrossEntropyLoss(ignore_index=255).cuda() 37 | # self.criterionSeg = torch.nn.CrossEntropyLoss(ignore_index=255).cuda() 38 | # self.criterionSeg = nn.NLLLoss2d(ignore_index=255)#.cuda() 39 | 40 | if encoder == 'VGG': 41 | self.optimizer = torch.optim.SGD([{'params': self.model.Scale.get_1x_lr_params_NOscale(), 'lr': self.opt.lr}, 42 | {'params': self.model.Scale.get_10x_lr_params(), 'lr': self.opt.lr}, 43 | {'params': self.model.Scale.get_2x_lr_params_NOscale(), 'lr': self.opt.lr, 'weight_decay': 0.}, 44 | {'params': self.model.Scale.get_20x_lr_params(), 'lr': self.opt.lr, 'weight_decay': 0.} 45 | ], 46 | lr=self.opt.lr, momentum=self.opt.momentum, weight_decay=self.opt.wd) 47 | 48 | # self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.opt.lr, momentum=self.opt.momentum, weight_decay=self.opt.wd) 49 | 50 | self.old_lr = self.opt.lr 51 | self.averageloss = [] 52 | # copy scripts 53 | self.model_path = './models' #os.path.dirname(os.path.realpath(__file__)) 54 | self.data_path = './data' #os.path.dirname(os.path.realpath(__file__)) 55 | shutil.copyfile(os.path.join(self.model_path, 'Deeplab.py'), os.path.join(self.model_dir, 'Deeplab.py')) 56 | 57 | if encoder == 'VGG': 58 | shutil.copyfile(os.path.join(self.model_path, 'VGG_Deeplab.py'), os.path.join(self.model_dir, 'VGG_Deeplab.py')) 59 | shutil.copyfile(os.path.join(self.model_path, 'model_utils.py'), os.path.join(self.model_dir, 'model_utils.py')) 60 | shutil.copyfile(os.path.join(self.data_path, dataset.datafile), os.path.join(self.model_dir, dataset.datafile)) 61 | shutil.copyfile(os.path.join(self.data_path, 'base_dataset.py'), os.path.join(self.model_dir, 'base_dataset.py')) 62 | 63 | self.writer = SummaryWriter(self.tensorborad_dir) 64 | self.counter = 0 65 | 66 | if not self.isTrain or self.opt.continue_train: 67 | if self.opt.pretrained_model!='': 68 | self.load_pretrained_network(self.model, self.opt.pretrained_model, self.opt.which_epoch, strict=False) 69 | print("Successfully loaded from pretrained model with given path!") 70 | else: 71 | self.load() 72 | print("Successfully loaded model, continue training....!") 73 | 74 | self.model.cuda() 75 | self.normweightgrad=0. 76 | # if len(opt.gpu_ids):#opt.isTrain and 77 | # self.model = torch.nn.DataParallel(self.model, device_ids=opt.gpu_ids) 78 | 79 | def forward(self, data, isTrain=True): 80 | self.model.zero_grad() 81 | 82 | self.image = Variable(data['image'], volatile=not isTrain).cuda() 83 | if 'depth' in data.keys(): 84 | self.depth = Variable(data['depth'], volatile=not isTrain).cuda() 85 | else: 86 | self.depth = None 87 | if data['seg'] is not None: 88 | self.seggt = Variable(data['seg'], volatile=not isTrain).cuda() 89 | else: 90 | self.seggt = None 91 | 92 | input_size = self.image.size() 93 | 94 | self.segpred = self.model(self.image,self.depth) 95 | self.segpred = nn.functional.upsample(self.segpred, size=(input_size[2], input_size[3]), mode='bilinear') 96 | # self.segpred = nn.functional.log_softmax(nn.functional.upsample(self.segpred, size=(input_size[2], input_size[3]), mode='bilinear')) 97 | 98 | if self.opt.isTrain: 99 | self.loss = self.criterionSeg(self.segpred, torch.squeeze(self.seggt,1).long()) 100 | self.averageloss += [self.loss.data[0]] 101 | 102 | segpred = self.segpred.max(1, keepdim=True)[1] 103 | return self.seggt, segpred 104 | 105 | 106 | def backward(self, step, total_step): 107 | self.loss.backward() 108 | self.optimizer.step() 109 | # print self.model.Scale.classifier.fc6_2.weight.grad.mean().data.cpu().numpy() 110 | # self.normweightgrad +=self.model.Scale.classifier.norm.scale.grad.mean().data.cpu().numpy() 111 | # print self.normweightgrad#self.model.Scale.classifier.norm.scale.grad.mean().data.cpu().numpy() 112 | if step % self.opt.iterSize == 0: 113 | self.update_learning_rate(step, total_step) 114 | trainingavgloss = np.mean(self.averageloss) 115 | if self.opt.verbose: 116 | print (' Iter: %d, Loss: %f' % (step, trainingavgloss) ) 117 | 118 | def get_visuals(self, step): 119 | ############## Display results and errors ############ 120 | if self.opt.isTrain: 121 | self.trainingavgloss = np.mean(self.averageloss) 122 | if self.opt.verbose: 123 | print (' Iter: %d, Loss: %f' % (step, self.trainingavgloss) ) 124 | self.writer.add_scalar(self.opt.name+'/trainingloss/', self.trainingavgloss, step) 125 | self.averageloss = [] 126 | 127 | if self.depth is not None: 128 | return OrderedDict([('image', tensor2im(self.image.data[0], inputmode=self.opt.inputmode)), 129 | ('depth', tensor2im(self.depth.data[0], inputmode='divstd-mean')), 130 | ('segpred', tensor2label(self.segpred.data[0], self.opt.label_nc)), 131 | ('seggt', tensor2label(self.seggt.data[0], self.opt.label_nc))]) 132 | else: 133 | return OrderedDict([('image', tensor2im(self.image.data[0], inputmode=self.opt.inputmode)), 134 | ('segpred', tensor2label(self.segpred.data[0], self.opt.label_nc)), 135 | ('seggt', tensor2label(self.seggt.data[0], self.opt.label_nc))]) 136 | 137 | def update_tensorboard(self, data, step): 138 | if self.opt.isTrain: 139 | self.writer.add_scalar(self.opt.name+'/Accuracy/', data[0], step) 140 | self.writer.add_scalar(self.opt.name+'/Accuracy_Class/', data[1], step) 141 | self.writer.add_scalar(self.opt.name+'/Mean_IoU/', data[2], step) 142 | self.writer.add_scalar(self.opt.name+'/FWAV_Accuracy/', data[3], step) 143 | 144 | self.trainingavgloss = np.mean(self.averageloss) 145 | self.writer.add_scalars(self.opt.name+'/loss', {"train": self.trainingavgloss, 146 | "val": np.mean(self.averageloss)}, step) 147 | 148 | self.writer.add_scalars('trainingavgloss/', {self.opt.name: self.trainingavgloss}, step) 149 | self.writer.add_scalars('valloss/', {self.opt.name: np.mean(self.averageloss)}, step) 150 | self.writer.add_scalars('val_MeanIoU/', {self.opt.name: data[2]}, step) 151 | 152 | file_name = os.path.join(self.save_dir, 'MIoU.txt') 153 | with open(file_name, 'wt') as opt_file: 154 | opt_file.write('%f\n' % (data[2])) 155 | # self.writer.add_scalars('losses/'+self.opt.name, {"train": self.trainingavgloss, 156 | # "val": np.mean(self.averageloss)}, step) 157 | self.averageloss = [] 158 | 159 | def save(self, which_epoch): 160 | # self.save_network(self.netG, 'G', which_epoch, self.gpu_ids) 161 | self.save_network(self.model, 'net', which_epoch, self.gpu_ids) 162 | 163 | def load(self): 164 | self.load_network(self.model, 'net',self.opt.which_epoch) 165 | 166 | def update_learning_rate(self, step, total_step): 167 | 168 | lr = max(self.opt.lr * ((1 - float(step) / total_step) ** (self.opt.lr_power)), 1e-6) 169 | 170 | # drop_ratio = (1. * float(total_step - step) / (total_step - step + 1)) ** self.opt.lr_power 171 | # lr = self.old_lr * drop_ratio 172 | 173 | self.writer.add_scalar(self.opt.name+'/Learning_Rate/', lr, step) 174 | 175 | self.optimizer.param_groups[0]['lr'] = lr 176 | self.optimizer.param_groups[1]['lr'] = lr 177 | self.optimizer.param_groups[2]['lr'] = lr 178 | self.optimizer.param_groups[3]['lr'] = lr 179 | # self.optimizer.param_groups[0]['lr'] = lr 180 | # self.optimizer.param_groups[1]['lr'] = lr*10 181 | # self.optimizer.param_groups[2]['lr'] = lr*2 #* 100 182 | # self.optimizer.param_groups[3]['lr'] = lr*20 183 | # self.optimizer.param_groups[4]['lr'] = lr*100 184 | 185 | 186 | # torch.nn.utils.clip_grad_norm(self.model.Scale.get_1x_lr_params_NOscale(), 1.) 187 | # torch.nn.utils.clip_grad_norm(self.model.Scale.get_10x_lr_params(), 1.) 188 | 189 | if self.opt.verbose: 190 | print(' update learning rate: %f -> %f' % (self.old_lr, lr)) 191 | self.old_lr = lr 192 | 193 | 194 | -------------------------------------------------------------------------------- /models/Resnet_Deeplab.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | import torch 5 | from .base_model import BaseModel, load_pretrained_model 6 | import numpy as np 7 | import shutil 8 | from utils import util 9 | from collections import OrderedDict 10 | from tensorboardX import SummaryWriter 11 | import os 12 | from torch.autograd import Variable 13 | from .ops.depthconv.modules import DepthConv 14 | from .ops.depthavgpooling.modules import Depthavgpooling 15 | 16 | 17 | affine_par = True 18 | 19 | 20 | def outS(i): 21 | i = int(i) 22 | i = (i+1)/2 23 | i = int(np.ceil((i+1)/2.0)) 24 | i = (i+1)/2 25 | return i 26 | 27 | def conv3x3(in_planes, out_planes, stride=1): 28 | "3x3 convolution with padding" 29 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 30 | padding=1, bias=False) 31 | 32 | 33 | class BasicBlock(nn.Module): 34 | expansion = 1 35 | 36 | def __init__(self, inplanes, planes, stride=1, downsample=None): 37 | super(BasicBlock, self).__init__() 38 | self.conv1 = conv3x3(inplanes, planes, stride) 39 | self.bn1 = nn.BatchNorm2d(planes, affine = affine_par) 40 | self.relu = nn.ReLU(inplace=True) 41 | self.conv2 = conv3x3(planes, planes) 42 | self.bn2 = nn.BatchNorm2d(planes, affine = affine_par) 43 | self.downsample = downsample 44 | self.stride = stride 45 | 46 | def forward(self, x): 47 | residual = x 48 | 49 | out = self.conv1(x) 50 | out = self.bn1(out) 51 | out = self.relu(out) 52 | 53 | out = self.conv2(out) 54 | out = self.bn2(out) 55 | 56 | if self.downsample is not None: 57 | residual = self.downsample(x) 58 | 59 | out += residual 60 | out = self.relu(out) 61 | 62 | return out 63 | 64 | 65 | class Bottleneck(nn.Module): 66 | expansion = 4 67 | 68 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None): 69 | super(Bottleneck, self).__init__() 70 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change 71 | self.bn1 = nn.BatchNorm2d(planes,affine = affine_par) 72 | # for i in self.bn1.parameters(): 73 | # i.requires_grad = False 74 | 75 | padding = dilation 76 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change 77 | padding=padding, bias=False, dilation = dilation) 78 | self.bn2 = nn.BatchNorm2d(planes,affine = affine_par) 79 | # for i in self.bn2.parameters(): 80 | # i.requires_grad = False 81 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 82 | self.bn3 = nn.BatchNorm2d(planes * 4, affine = affine_par) 83 | # for i in self.bn3.parameters(): 84 | # i.requires_grad = False 85 | self.relu = nn.ReLU(inplace=True) 86 | self.downsample = downsample 87 | self.stride = stride 88 | 89 | 90 | def forward(self, x): 91 | residual = x 92 | 93 | out = self.conv1(x) 94 | out = self.bn1(out) 95 | out = self.relu(out) 96 | 97 | out = self.conv2(out) 98 | out = self.bn2(out) 99 | out = self.relu(out) 100 | 101 | out = self.conv3(out) 102 | out = self.bn3(out) 103 | 104 | if self.downsample is not None: 105 | residual = self.downsample(x) 106 | 107 | out += residual 108 | out = self.relu(out) 109 | 110 | return out 111 | 112 | 113 | class DepthConvBottleneck(nn.Module): 114 | expansion = 4 115 | 116 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1): 117 | super(DepthConvBottleneck, self).__init__() 118 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 119 | self.bn1 = nn.BatchNorm2d(planes) 120 | # for i in self.bn1.parameters(): 121 | # i.requires_grad = False 122 | 123 | padding = dilation 124 | self.conv2 = DepthConv(planes,planes,kernel_size=3,stride=stride,padding=padding, dilation = dilation, bias=False) 125 | # nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 126 | # padding=1, bias=False) 127 | self.bn2 = nn.BatchNorm2d(planes) 128 | # for i in self.bn2.parameters(): 129 | # i.requires_grad = False 130 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 131 | self.bn3 = nn.BatchNorm2d(planes * 4) 132 | # for i in self.bn3.parameters(): 133 | # i.requires_grad = False 134 | self.relu = nn.ReLU(inplace=True) 135 | self.downsample = downsample 136 | self.stride = stride 137 | 138 | def forward(self, x_depth): 139 | x, depth = x_depth 140 | residual = x 141 | 142 | out = self.conv1(x) 143 | out = self.bn1(out) 144 | out = self.relu(out) 145 | 146 | # print('d',out.size(), depth.size()) 147 | out = self.conv2(out,depth) 148 | out = self.bn2(out) 149 | out = self.relu(out) 150 | 151 | out = self.conv3(out) 152 | out = self.bn3(out) 153 | 154 | if self.downsample is not None: 155 | residual = self.downsample(x) 156 | 157 | out += residual 158 | out = self.relu(out) 159 | 160 | return out 161 | 162 | class Classifier_Module(nn.Module): 163 | 164 | def __init__(self, dilation_series, padding_series, num_classes, inplanes, depthconv=False): 165 | super(Classifier_Module, self).__init__() 166 | self.conv2d_list = nn.ModuleList() 167 | for dilation, padding in zip(dilation_series, padding_series): 168 | if depthconv: 169 | conv = DepthConv(inplanes, num_classes, kernel_size=3, stride=1, padding=padding, dilation=dilation, bias= True) 170 | else: 171 | conv = nn.Conv2d(inplanes, num_classes, kernel_size=3, stride=1, padding=padding, dilation=dilation, bias = True) 172 | self.conv2d_list.append(conv) 173 | 174 | for m in self.conv2d_list: 175 | m.weight.data.normal_(0, 0.01) 176 | 177 | def forward(self, x): 178 | out = self.conv2d_list[0](x) 179 | for i in range(len(self.conv2d_list)-1): 180 | out += self.conv2d_list[i+1](x) 181 | return out 182 | 183 | class Residual_Covolution(nn.Module): 184 | def __init__(self, icol, ocol, num_classes): 185 | super(Residual_Covolution, self).__init__() 186 | self.conv1 = nn.Conv2d(icol, ocol, kernel_size=3, stride=1, padding=12, dilation=12, bias=True) 187 | self.conv2 = nn.Conv2d(ocol, num_classes, kernel_size=3, stride=1, padding=12, dilation=12, bias=True) 188 | self.conv3 = nn.Conv2d(num_classes, ocol, kernel_size=1, stride=1, padding=0, dilation=1, bias=True) 189 | self.conv4 = nn.Conv2d(ocol, icol, kernel_size=1, stride=1, padding=0, dilation=1, bias=True) 190 | self.relu = nn.ReLU(inplace=True) 191 | 192 | def forward(self, x): 193 | dow1 = self.conv1(x) 194 | dow1 = self.relu(dow1) 195 | seg = self.conv2(dow1) 196 | inc1 = self.conv3(seg) 197 | add1 = dow1 + self.relu(inc1) 198 | inc2 = self.conv4(add1) 199 | out = x + self.relu(inc2) 200 | return out, seg 201 | 202 | class Residual_Refinement_Module(nn.Module): 203 | 204 | def __init__(self, num_classes): 205 | super(Residual_Refinement_Module, self).__init__() 206 | self.RC1 = Residual_Covolution(2048, 512, num_classes) 207 | self.RC2 = Residual_Covolution(2048, 512, num_classes) 208 | 209 | def forward(self, x): 210 | x, seg1 = self.RC1(x) 211 | _, seg2 = self.RC2(x) 212 | return [seg1, seg1+seg2] 213 | 214 | class ResNet_Refine(nn.Module): 215 | def __init__(self, block, layers, num_classes, depthconv=False): 216 | self.inplanes = 64 217 | super(ResNet_Refine, self).__init__() 218 | self.depthconv = depthconv 219 | if depthconv: 220 | self.conv1 = DepthConv(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 221 | else: 222 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 223 | 224 | # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 225 | self.bn1 = nn.BatchNorm2d(64, affine = affine_par) 226 | # for i in self.bn1.parameters(): 227 | # i.requires_grad = False 228 | self.relu = nn.ReLU(inplace=True) 229 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change 230 | # self.layer1 = self._make_layer(block, 64, layers[0]) 231 | if depthconv: 232 | self.downsample_depth1 = nn.AvgPool2d(5,padding=1,stride=4) 233 | self.layer1 = self._make_layer_depthconv(64, layers[0]) 234 | else: 235 | self.layer1 = self._make_layer(block, 64, layers[0]) 236 | 237 | if depthconv: 238 | self.downsample_depth2 = nn.AvgPool2d(3,padding=1,stride=2) 239 | self.layer2 = self._make_layer_depthconv(128, layers[1], stride=2) 240 | else: 241 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 242 | 243 | if depthconv: 244 | # self.downsample_depth3 = nn.AvgPool2d(3,padding=1,stride=2) 245 | self.layer3 = self._make_layer_depthconv(256, layers[2], stride=1, dilation=2) 246 | else: 247 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) 248 | 249 | if depthconv: 250 | # self.downsample_depth4 = nn.AvgPool2d(3,padding=1,stride=2) 251 | self.layer4 = self._make_layer_depthconv(512, layers[3], stride=1, dilation=4) 252 | else: 253 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4) 254 | 255 | self.dropout = nn.Dropout() 256 | self.layer5 = Residual_Refinement_Module(num_classes) 257 | 258 | for m in self.modules(): 259 | if isinstance(m, nn.Conv2d): 260 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 261 | m.weight.data.normal_(0, 0.01) 262 | elif isinstance(m, nn.BatchNorm2d): 263 | m.weight.data.fill_(1) 264 | m.bias.data.zero_() 265 | # for i in m.parameters(): 266 | # i.requires_grad = False 267 | 268 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 269 | downsample = None 270 | if stride != 1 or self.inplanes != planes * block.expansion or dilation == 2 or dilation == 4: 271 | downsample = nn.Sequential( 272 | nn.Conv2d(self.inplanes, planes * block.expansion, 273 | kernel_size=1, stride=stride, bias=False), 274 | nn.BatchNorm2d(planes * block.expansion,affine = affine_par)) 275 | # for i in downsample._modules['1'].parameters(): 276 | # i.requires_grad = False 277 | layers = [] 278 | layers.append(block(self.inplanes, planes, stride,dilation=dilation, downsample=downsample)) 279 | self.inplanes = planes * block.expansion 280 | for i in range(1, blocks): 281 | layers.append(block(self.inplanes, planes, dilation=dilation)) 282 | 283 | return nn.Sequential(*layers) 284 | 285 | def _make_layer_depthconv(self, planes, blocks, stride=1, dilation = 1): 286 | downsample = None 287 | block = DepthConvBottleneck 288 | if stride != 1 or self.inplanes != planes * block.expansion: 289 | downsample = nn.Sequential( 290 | nn.Conv2d(self.inplanes, planes * block.expansion, 291 | kernel_size=1, stride=stride, bias=False), 292 | nn.BatchNorm2d(planes * block.expansion), 293 | ) 294 | layers = [] 295 | layers.append(block(self.inplanes, planes, stride, downsample,dilation=dilation)) 296 | self.inplanes = planes * block.expansion 297 | for i in range(1, blocks): 298 | layers.append(block(self.inplanes, planes)) 299 | 300 | return nn.Sequential(*layers) 301 | 302 | def forward(self, x, depth=None): 303 | if self.depthconv: 304 | x = self.conv1(x,depth) 305 | else: 306 | x = self.conv1(x) 307 | 308 | x = self.bn1(x) 309 | x = self.relu(x) 310 | x = self.maxpool(x) 311 | 312 | if self.depthconv: 313 | depth = self.downsample_depth1(depth) 314 | x,depth = self.layer1((x,depth)) 315 | else: 316 | x = self.layer1(x) 317 | 318 | if self.depthconv: 319 | depth = self.downsample_depth2(depth) 320 | x,depth = self.layer2((x,depth)) 321 | else: 322 | x = self.layer2(x) 323 | if self.depthconv: 324 | depth = self.downsample_depth3(depth) 325 | x,_ = self.layer3((x,depth)) 326 | else: 327 | x = self.layer3(x) 328 | if self.depthconv: 329 | x,_ = self.layer4((x,depth)) 330 | else: 331 | x = self.layer4(x) 332 | x = self.dropout(x) 333 | x = self.layer5(x) 334 | 335 | return x 336 | 337 | class ResNet(nn.Module): 338 | def __init__(self, block, layers, num_classes, depthconv=False, globalpooling=False, pretrain=False): 339 | self.inplanes = 64 340 | super(ResNet, self).__init__() 341 | self.depthconv = depthconv 342 | if depthconv: 343 | self.conv1 = DepthConv(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 344 | else: 345 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 346 | 347 | self.bn1 = nn.BatchNorm2d(64, affine = affine_par) 348 | # for i in self.bn1.parameters(): 349 | # i.requires_grad = False 350 | self.relu = nn.ReLU(inplace=True) 351 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) # change 352 | 353 | if depthconv: 354 | self.layer1 = self._make_layer_depthconv(block, 64, layers[0]) 355 | self.downsample_depth1 = nn.AvgPool2d(5,padding=1,stride=4) 356 | else: 357 | self.layer1 = self._make_layer(block, 64, layers[0]) 358 | 359 | if depthconv: 360 | self.layer2 = self._make_layer_depthconv(block, 128, layers[1], stride=2) 361 | self.downsample_depth2 = nn.AvgPool2d(3,padding=1,stride=2) 362 | else: 363 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 364 | 365 | 366 | if depthconv: 367 | self.layer3 = self._make_layer_depthconv(block, 256, layers[2], stride=1, dilation=2) 368 | self.downsample_depth3 = nn.AvgPool2d(3,padding=1,stride=2) 369 | else: 370 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) 371 | 372 | if depthconv: 373 | # self.downsample_depth4 = nn.AvgPool2d(3,padding=1,stride=2) 374 | self.layer4 = self._make_layer_depthconv(block, 512, layers[3], stride=1, dilation=4) 375 | else: 376 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4) 377 | # self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4) 378 | self.globalpooling = globalpooling 379 | if globalpooling: 380 | self.globalpooling = nn.AdaptiveMaxPool2d((1,1)) 381 | self.inplanes *= 2 382 | self.dropout = nn.Dropout() 383 | self.layer5 = self._make_pred_layer(Classifier_Module, [12],[12],num_classes,self.inplanes) 384 | # self.layer5 = self._make_pred_layer(Classifier_Module, [6,12,18,24],[6,12,18,24],num_classes,self.inplanes) 385 | 386 | self.pool5a = nn.AvgPool2d(kernel_size=3, stride=1,padding=1) 387 | self.pool5a_d = Depthavgpooling(kernel_size=3, stride=1,padding=1) 388 | 389 | for m in self.modules(): 390 | if isinstance(m, nn.Conv2d): 391 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 392 | # m.weight.data.normal_(0, math.sqrt(2. / n))#(0, 0.01)# 393 | torch.nn.init.xavier_uniform(m.weight) 394 | elif isinstance(m, nn.BatchNorm2d): 395 | if affine_par: 396 | m.weight.data.fill_(1) 397 | m.bias.data.zero_() 398 | for i in m.parameters(): 399 | i.requires_grad = False 400 | if pretrain: 401 | load_pretrained_model(self, 402 | model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'), 403 | False) 404 | 405 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 406 | downsample = None 407 | if stride != 1 or self.inplanes != planes * block.expansion or dilation == 2 or dilation == 4: 408 | downsample = nn.Sequential( 409 | nn.Conv2d(self.inplanes, planes * block.expansion, 410 | kernel_size=1, stride=stride, bias=False), 411 | nn.BatchNorm2d(planes * block.expansion,affine = affine_par)) 412 | # for i in downsample._modules['1'].parameters(): 413 | # i.requires_grad = False 414 | layers = [] 415 | layers.append(block(self.inplanes, planes, stride,dilation=dilation, downsample=downsample)) 416 | self.inplanes = planes * block.expansion 417 | for i in range(1, blocks): 418 | layers.append(block(self.inplanes, planes, dilation=dilation)) 419 | 420 | return nn.Sequential(*layers) 421 | 422 | def _make_layer_depthconv(self, block, planes, blocks, stride=1, dilation=1): 423 | downsample = None 424 | # block = DepthConvBottleneck 425 | if stride != 1 or self.inplanes != planes * block.expansion: 426 | downsample = nn.Sequential( 427 | nn.Conv2d(self.inplanes, planes * block.expansion, 428 | kernel_size=1, stride=stride, bias=False), 429 | nn.BatchNorm2d(planes * block.expansion), 430 | ) 431 | layers = [] 432 | layers.append(DepthConvBottleneck(self.inplanes, planes, stride, downsample,dilation=dilation)) 433 | self.inplanes = planes * block.expansion 434 | for i in range(1, blocks): 435 | layers.append(block(self.inplanes, planes)) 436 | 437 | return nn.Sequential(*layers) 438 | 439 | def _make_pred_layer(self,block, dilation_series, padding_series,num_classes,inplanes): 440 | return block(dilation_series,padding_series,num_classes,inplanes) 441 | 442 | def forward(self, x, depth=None): 443 | # print self.layer3._modules.values()[13].bn2.running_mean 444 | if self.depthconv: 445 | x = self.conv1(x,depth) 446 | else: 447 | x = self.conv1(x) 448 | x = self.bn1(x) 449 | x = self.relu(x) 450 | x = self.maxpool(x) 451 | 452 | if self.depthconv: 453 | depth = self.downsample_depth1(depth) 454 | x = self.layer1((x,depth)) 455 | else: 456 | x = self.layer1(x) 457 | 458 | if self.depthconv: 459 | # print ('o',x.size(), depth.size()) 460 | # depth = self.downsample_depth2(depth) 461 | x = self.layer2((x,depth)) 462 | else: 463 | x = self.layer2(x) 464 | 465 | if self.depthconv: 466 | depth = self.downsample_depth3(depth) 467 | x = self.layer3((x,depth)) 468 | else: 469 | x = self.layer3(x) 470 | 471 | if self.depthconv: 472 | x = self.layer4((x,depth)) 473 | else: 474 | x = self.layer4(x) 475 | 476 | if self.globalpooling: 477 | x_size = x.size() 478 | globalpool = self.globalpooling(x).repeat(1,1,x_size[2],x_size[3]) 479 | x = torch.cat([x,globalpool], 1) 480 | x = self.dropout(x) 481 | x = self.layer5(x) 482 | if self.depthconv: 483 | x = self.pool5a_d(x,depth) 484 | else: 485 | x = self.pool5a(x) 486 | 487 | 488 | return x 489 | 490 | def get_1x_lr_params_NOscale(self): 491 | """ 492 | This generator returns all the parameters of the net except for 493 | the last classification layer. Note that for each batchnorm layer, 494 | requires_grad is set to False in deeplab_resnet.py, therefore this function does not return 495 | any batchnorm parameter 496 | """ 497 | b = [] 498 | 499 | b.append(self.conv1) 500 | b.append(self.bn1) 501 | b.append(self.layer1) 502 | b.append(self.layer2) 503 | b.append(self.layer3) 504 | b.append(self.layer4) 505 | 506 | for i in range(len(b)): 507 | for j in b[i].modules(): 508 | if isinstance(j, nn.Conv2d): 509 | for k in j.parameters(): 510 | if k.requires_grad: 511 | yield k 512 | 513 | def get_bn_params(self): 514 | """ 515 | This generator returns all the parameters of the net except for 516 | the last classification layer. Note that for each batchnorm layer, 517 | requires_grad is set to False in deeplab_resnet.py, therefore this function does not return 518 | any batchnorm parameter 519 | """ 520 | b = [] 521 | 522 | b.append(self.conv1) 523 | b.append(self.bn1) 524 | b.append(self.layer1) 525 | b.append(self.layer2) 526 | b.append(self.layer3) 527 | b.append(self.layer4) 528 | b.append(self.layer5) 529 | 530 | for i in range(len(b)): 531 | for j in b[i].modules(): 532 | if isinstance(j, nn.BatchNorm2d): 533 | for n, k in j.named_parameters(): 534 | # print n 535 | if k.requires_grad: 536 | yield k 537 | 538 | def get_10x_lr_params(self): 539 | """ 540 | This generator returns all the parameters for the last layer of the net, 541 | which does the classification of pixel into classes 542 | """ 543 | b = [] 544 | b.append(self.layer5) 545 | 546 | # for j in range(len(b)): 547 | # for i in b[j]: 548 | # yield i 549 | 550 | for i in range(len(b)): 551 | for j in b[i].modules(): 552 | if isinstance(j, nn.Conv2d): 553 | if j.weight is not None: 554 | if j.weight.requires_grad: 555 | yield j.weight 556 | # for k in j.parameters(): 557 | # if k.requires_grad: 558 | # yield k 559 | 560 | def get_20x_lr_params(self): 561 | """ 562 | This generator returns all the parameters for the last layer of the net, 563 | which does the classification of pixel into classes 564 | """ 565 | b = [] 566 | b.append(self.layer5) 567 | 568 | # for j in range(len(b)): 569 | # for i in b[j]: 570 | # yield i 571 | 572 | for i in range(len(b)): 573 | for j in b[i].modules(): 574 | if isinstance(j, nn.Conv2d): 575 | if j.bias is not None: 576 | if j.bias.requires_grad: 577 | yield j.bias -------------------------------------------------------------------------------- /models/VGG_Deeplab.py: -------------------------------------------------------------------------------- 1 | from model_utils import * 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | import math 5 | from .ops.depthconv.modules import DepthConv 6 | from .ops.depthavgpooling.modules import Depthavgpooling 7 | import torch 8 | import torchvision 9 | 10 | __all__ = [ 11 | 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 12 | 'vgg19_bn', 'vgg19', 13 | ] 14 | 15 | 16 | model_urls = { 17 | 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth', 18 | 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth', 19 | 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth', 20 | 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', 21 | 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth', 22 | 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth', 23 | 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', 24 | 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth', 25 | } 26 | 27 | 28 | cfg = { 29 | # name:c1_1 c1_2 c2_1 c2_2 c3_1 c3_2 c3_3 c4_1 c4_2 c4_3 c5_1 c5_2 c5_3 30 | # dilation: 2 2 2 31 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 32 | } 33 | 34 | depth_cfg = { 35 | 'D': [0,3,6,10,14], 36 | } 37 | 38 | 39 | class ConvModule(nn.Module): 40 | 41 | def __init__(self, inplanes, planes, kernel_size=3, stride=1, padding=1, dilation=1, 42 | bn=False, 43 | maxpool=False, pool_kernel=3, pool_stride=2, pool_pad=1): 44 | super(ConvModule, self).__init__() 45 | conv2d = nn.Conv2d(inplanes,planes,kernel_size=kernel_size,stride=stride,padding=padding,dilation=dilation) 46 | layers = [] 47 | if bn: 48 | layers += [nn.BatchNorm2d(planes), nn.ReLU(inplace=True)] 49 | else: 50 | layers += [nn.ReLU(inplace=True)] 51 | if maxpool: 52 | layers += [nn.MaxPool2d(kernel_size=pool_kernel, stride=pool_stride,padding=pool_pad)] 53 | 54 | self.layers = nn.Sequential(*([conv2d]+layers)) 55 | def forward(self, x): 56 | # x = self.conv2d(x) 57 | x = self.layers(x) 58 | return x 59 | 60 | class DepthConvModule(nn.Module): 61 | 62 | def __init__(self, inplanes, planes, kernel_size=3, stride=1, padding=1, dilation=1,bn=False): 63 | super(DepthConvModule, self).__init__() 64 | 65 | conv2d = DepthConv(inplanes,planes,kernel_size=kernel_size,stride=stride,padding=padding,dilation=dilation) 66 | layers = [] 67 | if bn: 68 | layers += [nn.BatchNorm2d(planes), nn.ReLU(inplace=True)] 69 | else: 70 | layers += [nn.ReLU(inplace=True)] 71 | self.layers = nn.Sequential(*([conv2d]+layers))#(*layers) 72 | 73 | def forward(self, x, depth): 74 | 75 | for im,module in enumerate(self.layers._modules.values()): 76 | if im==0: 77 | x = module(x,depth) 78 | else: 79 | x = module(x) 80 | # x = self.conv2d(x, depth) 81 | # x = self.layers(x) 82 | return x 83 | 84 | 85 | class VGG_layer2(nn.Module): 86 | 87 | def __init__(self, batch_norm=False, depthconv=False): 88 | super(VGG_layer2, self).__init__() 89 | in_channels = 3 90 | self.depthconv = depthconv 91 | # if self.depthconv: 92 | # self.conv1_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 93 | # self.conv1_1 = DepthConvModule(3, 64, bn=batch_norm) 94 | # else: 95 | self.conv1_1 = ConvModule(3, 64, bn=batch_norm) 96 | self.conv1_2 = ConvModule(64, 64, bn=batch_norm, maxpool=True) 97 | 98 | # if self.depthconv: 99 | # self.conv2_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 100 | self.downsample_depth2_1 = nn.AvgPool2d(3,padding=1,stride=2) 101 | # self.conv2_1 = DepthConvModule(64, 128, bn=batch_norm) 102 | # else: 103 | self.conv2_1 = ConvModule(64, 128, bn=batch_norm) 104 | self.conv2_2 = ConvModule(128, 128, bn=batch_norm, maxpool=True) 105 | 106 | # if self.depthconv: 107 | # self.conv3_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 108 | self.downsample_depth3_1 = nn.AvgPool2d(3,padding=1,stride=2) 109 | # self.conv3_1 = DepthConvModule(128, 256, bn=batch_norm) 110 | # else: 111 | self.conv3_1 = ConvModule(128, 256, bn=batch_norm) 112 | self.conv3_2 = ConvModule(256, 256, bn=batch_norm) 113 | self.conv3_3 = ConvModule(256, 256, bn=batch_norm, maxpool=True) 114 | 115 | if self.depthconv: 116 | self.conv4_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 117 | self.downsample_depth4_1 = nn.AvgPool2d(3,padding=1,stride=2) 118 | self.conv4_1 = DepthConvModule(256, 512, bn=batch_norm) 119 | else: 120 | self.conv4_1 = ConvModule(256, 512, bn=batch_norm) 121 | self.conv4_2 = ConvModule(512, 512, bn=batch_norm) 122 | self.conv4_3 = ConvModule(512, 512, bn=batch_norm, 123 | maxpool=True, pool_kernel=3, pool_stride=1, pool_pad=1) 124 | 125 | if self.depthconv: 126 | self.conv5_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 127 | self.conv5_1 = DepthConvModule(512, 512, bn=batch_norm,dilation=2,padding=2) 128 | else: 129 | self.conv5_1 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2) 130 | self.conv5_2 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2) 131 | self.conv5_3 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2, 132 | maxpool=True, pool_kernel=3, pool_stride=1, pool_pad=1) 133 | self.pool5a = nn.AvgPool2d(kernel_size=3, stride=1,padding=1) 134 | # self.pool5a = nn.AvgPool2d(kernel_size=3, stride=1,padding=1) 135 | 136 | def forward(self, x, depth=None): 137 | # print x.size() 138 | # if self.depthconv: 139 | # # print self.conv1_1_depthconvweight 140 | # x = self.conv1_1(x,self.conv1_1_depthconvweight * depth) 141 | # else: 142 | x = self.conv1_1(x) 143 | x = self.conv1_2(x) 144 | # if self.depthconv: 145 | depth = self.downsample_depth2_1(depth) 146 | # x = self.conv2_1(x, self.conv2_1_depthconvweight * depth) 147 | # else: 148 | x = self.conv2_1(x) 149 | x = self.conv2_2(x) 150 | # if self.depthconv: 151 | depth = self.downsample_depth3_1(depth) 152 | # x = self.conv3_1(x, self.conv3_1_depthconvweight * depth) 153 | # else: 154 | x = self.conv3_1(x) 155 | x = self.conv3_2(x) 156 | x = self.conv3_3(x) 157 | if self.depthconv: 158 | depth = self.downsample_depth4_1(depth) 159 | x = self.conv4_1(x, self.conv4_1_depthconvweight * depth) 160 | else: 161 | x = self.conv4_1(x) 162 | x = self.conv4_2(x) 163 | x = self.conv4_3(x) 164 | if self.depthconv: 165 | x = self.conv5_1(x, self.conv5_1_depthconvweight * depth) 166 | else: 167 | x = self.conv5_1(x) 168 | x = self.conv5_2(x) 169 | x = self.conv5_3(x) 170 | x = self.pool5a(x) 171 | return x,depth 172 | 173 | class VGG_layer(nn.Module): 174 | 175 | def __init__(self, batch_norm=False, depthconv=False): 176 | super(VGG_layer, self).__init__() 177 | in_channels = 3 178 | self.depthconv = depthconv 179 | if self.depthconv: 180 | self.conv1_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 181 | self.conv1_1 = DepthConvModule(3, 64, bn=batch_norm) 182 | else: 183 | self.conv1_1 = ConvModule(3, 64, bn=batch_norm) 184 | self.conv1_2 = ConvModule(64, 64, bn=batch_norm, maxpool=True) 185 | 186 | if self.depthconv: 187 | self.conv2_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 188 | self.downsample_depth2_1 = nn.AvgPool2d(3,padding=1,stride=2) 189 | self.conv2_1 = DepthConvModule(64, 128, bn=batch_norm) 190 | else: 191 | self.conv2_1 = ConvModule(64, 128, bn=batch_norm) 192 | self.conv2_2 = ConvModule(128, 128, bn=batch_norm, maxpool=True) 193 | 194 | if self.depthconv: 195 | self.conv3_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 196 | self.downsample_depth3_1 = nn.AvgPool2d(3,padding=1,stride=2) 197 | self.conv3_1 = DepthConvModule(128, 256, bn=batch_norm) 198 | else: 199 | self.conv3_1 = ConvModule(128, 256, bn=batch_norm) 200 | self.conv3_2 = ConvModule(256, 256, bn=batch_norm) 201 | self.conv3_3 = ConvModule(256, 256, bn=batch_norm, maxpool=True) 202 | 203 | if self.depthconv: 204 | self.conv4_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 205 | self.downsample_depth4_1 = nn.AvgPool2d(3,padding=1,stride=2) 206 | self.conv4_1 = DepthConvModule(256, 512, bn=batch_norm) 207 | else: 208 | self.conv4_1 = ConvModule(256, 512, bn=batch_norm) 209 | self.conv4_2 = ConvModule(512, 512, bn=batch_norm) 210 | self.conv4_3 = ConvModule(512, 512, bn=batch_norm, 211 | maxpool=True, pool_kernel=3, pool_stride=1, pool_pad=1) 212 | 213 | if self.depthconv: 214 | self.conv5_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 215 | self.conv5_1 = DepthConvModule(512, 512, bn=batch_norm,dilation=2,padding=2) 216 | else: 217 | self.conv5_1 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2) 218 | self.conv5_2 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2) 219 | self.conv5_3 = ConvModule(512, 512, bn=batch_norm, dilation=2, padding=2, 220 | maxpool=True, pool_kernel=3, pool_stride=1, pool_pad=1) 221 | self.pool5a = nn.AvgPool2d(kernel_size=3, stride=1,padding=1) 222 | self.pool5a_d = Depthavgpooling(kernel_size=3, stride=1,padding=1) 223 | 224 | def forward(self, x, depth=None): 225 | # print x.size() 226 | if self.depthconv: 227 | # print self.conv1_1_depthconvweight 228 | x = self.conv1_1(x,self.conv1_1_depthconvweight * depth) 229 | else: 230 | x = self.conv1_1(x) 231 | x = self.conv1_2(x) 232 | if self.depthconv: 233 | depth = self.downsample_depth2_1(depth) 234 | x = self.conv2_1(x, self.conv2_1_depthconvweight * depth) 235 | else: 236 | x = self.conv2_1(x) 237 | # print 'xxxxxx',x.size() 238 | x = self.conv2_2(x) 239 | if self.depthconv: 240 | depth = self.downsample_depth3_1(depth) 241 | x = self.conv3_1(x, self.conv3_1_depthconvweight * depth) 242 | else: 243 | x = self.conv3_1(x) 244 | x = self.conv3_2(x) 245 | x = self.conv3_3(x) 246 | if self.depthconv: 247 | depth = self.downsample_depth4_1(depth) 248 | # print (depth.mean(),depth.max(),depth.min()) 249 | # torchvision.utils.save_image(depth.data, 'depth.png') 250 | x = self.conv4_1(x, self.conv4_1_depthconvweight * depth) 251 | else: 252 | x = self.conv4_1(x) 253 | x = self.conv4_2(x) 254 | x = self.conv4_3(x) 255 | if self.depthconv: 256 | x = self.conv5_1(x, self.conv5_1_depthconvweight * depth) 257 | else: 258 | x = self.conv5_1(x) 259 | x = self.conv5_2(x) 260 | x = self.conv5_3(x) 261 | # x = self.pool5a(x,depth) 262 | if self.depthconv: 263 | x = self.pool5a_d(x,depth) 264 | else: 265 | x = self.pool5a(x) 266 | 267 | return x, depth 268 | 269 | def make_layers(cfg, depth_cfg=[], batch_norm=False, depthconv=False): 270 | layers = [] 271 | in_channels = 3 272 | for iv, v in enumerate(cfg): 273 | if v == 'M': 274 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 275 | else: 276 | if depthconv and iv in depth_cfg: 277 | conv2d = DepthConv(in_channels, v, kernel_size=3, padding=1) 278 | else: 279 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 280 | if batch_norm: 281 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 282 | else: 283 | layers += [conv2d, nn.ReLU(inplace=True)] 284 | in_channels = v 285 | return nn.Sequential(*layers) 286 | 287 | class Classifier_Module(nn.Module): 288 | 289 | def __init__(self, num_classes, inplanes, depthconv=False): 290 | super(Classifier_Module, self).__init__() 291 | # [6, 12, 18, 24] 292 | self.depthconv = depthconv 293 | if depthconv: 294 | self.fc6_1_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 295 | self.fc6_1 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=6, dilation=6) # fc6 296 | else: 297 | self.fc6_1 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=6, dilation=6) # fc6 298 | 299 | self.fc7_1 = nn.Sequential( 300 | *[nn.ReLU(True), nn.Dropout(), 301 | nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()]) # fc7 302 | self.fc8_1 = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True) # fc8 303 | 304 | if depthconv: 305 | self.fc6_2_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 306 | self.fc6_2 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=12, dilation=12) # fc6 307 | else: 308 | self.fc6_2 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=12, dilation=12) # fc6 309 | 310 | self.fc7_2 = nn.Sequential( 311 | *[nn.ReLU(True), nn.Dropout(), 312 | nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()]) # fc7 313 | self.fc8_2 = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True) # fc8 314 | 315 | if depthconv: 316 | self.fc6_3_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 317 | self.fc6_3 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=18, dilation=18) # fc6 318 | else: 319 | self.fc6_3 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=18, dilation=18) # fc6 320 | 321 | self.fc7_3 = nn.Sequential( 322 | *[nn.ReLU(True), nn.Dropout(), 323 | nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()]) # fc7 324 | self.fc8_3 = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True) # fc8 325 | 326 | if depthconv: 327 | self.fc6_4_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 328 | self.fc6_4 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=24, dilation=24) # fc6 329 | else: 330 | self.fc6_4 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=24, dilation=24) # fc6 331 | 332 | self.fc7_4 = nn.Sequential( 333 | *[nn.ReLU(True), nn.Dropout(), 334 | nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()]) # fc7 335 | self.fc8_4 = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True) # fc8 336 | 337 | def forward(self, x, depth=None): 338 | if self.depthconv: 339 | out1 = self.fc6_1(x, self.fc6_1_depthconvweight * depth) 340 | else: 341 | out1 = self.fc6_1(x) 342 | out1 = self.fc7_1(out1) 343 | out1 = self.fc8_1(out1) 344 | 345 | if self.depthconv: 346 | out2 = self.fc6_2(x, self.fc6_2_depthconvweight * depth) 347 | else: 348 | out2 = self.fc6_2(x) 349 | out2 = self.fc7_2(out2) 350 | out2 = self.fc8_2(out2) 351 | 352 | if self.depthconv: 353 | out3 = self.fc6_3(x, self.fc6_3_depthconvweight * depth) 354 | else: 355 | out3 = self.fc6_3(x) 356 | out3 = self.fc7_3(out3) 357 | out3 = self.fc8_3(out3) 358 | 359 | if self.depthconv: 360 | out4 = self.fc6_4(x, self.fc6_4_depthconvweight * depth) 361 | else: 362 | out4 = self.fc6_4(x) 363 | out4 = self.fc7_4(out4) 364 | out4 = self.fc8_4(out4) 365 | 366 | return out1+out2+out3+out4 367 | 368 | class Classifier_Module2(nn.Module): 369 | 370 | def __init__(self, num_classes, inplanes, depthconv=False): 371 | super(Classifier_Module2, self).__init__() 372 | # [6, 12, 18, 24] 373 | self.depthconv = depthconv 374 | if depthconv: 375 | self.fc6_2_depthconvweight = 1.#nn.Parameter(torch.ones(1)) 376 | self.fc6_2 = DepthConv(inplanes, 1024, kernel_size=3, stride=1, padding=12, dilation=12) 377 | self.downsample_depth = None 378 | else: 379 | self.downsample_depth = nn.AvgPool2d(9,padding=1,stride=8) 380 | self.fc6_2 = nn.Conv2d(inplanes, 1024, kernel_size=3, stride=1, padding=12, dilation=12) # fc6 381 | 382 | self.fc7_2 = nn.Sequential( 383 | *[nn.ReLU(True), nn.Dropout(), 384 | nn.Conv2d(1024, 1024, kernel_size=1, stride=1), nn.ReLU(True), nn.Dropout()]) # fc7 385 | 386 | # self.globalpooling = DepthGlobalPool(1024,3)# 387 | # self.fc8_2 = nn.Conv2d(1024+3, num_classes, kernel_size=1, stride=1, bias=True) # fc8 388 | 389 | self.globalpooling = nn.AdaptiveAvgPool2d((1, 1))#nn.AvgPool2d((54,71))# 390 | self.dropout = nn.Dropout(0.3) 391 | # self.norm = CaffeNormalize(1024)#LayerNorm(1024)#nn.InstanceNorm2d(1024).use_running_stats(mode=False) 392 | self.fc8_2 = nn.Conv2d(2048, num_classes, kernel_size=1, stride=1, bias=True) # fc8 393 | 394 | def forward(self, x, depth=None): 395 | if self.depthconv: 396 | out2 = self.fc6_2(x, self.fc6_2_depthconvweight * depth) 397 | else: 398 | out2 = self.fc6_2(x) 399 | out2 = self.fc7_2(out2) 400 | out2_size = out2.size() 401 | 402 | #global pooling 403 | globalpool = self.globalpooling(out2) 404 | # globalpool = self.dropout(self.norm(globalpool)) 405 | globalpool = self.dropout(globalpool)#self.norm(globalpool)) 406 | upsample = nn.Upsample((out2_size[2],out2_size[3]), mode='bilinear')#scale_factor=8) 407 | globalpool = upsample(globalpool) 408 | 409 | #global pooling with depth 410 | # globalpool = self.globalpooling(out2,depth) 411 | 412 | 413 | # print globalpool.mean() 414 | out2 = torch.cat([out2, globalpool], 1) 415 | out2 = self.fc8_2(out2) 416 | # print out2.size() 417 | return out2 418 | 419 | class VGG(nn.Module): 420 | 421 | def __init__(self, num_classes=20, init_weights=True, depthconv=False,bn=False): 422 | super(VGG, self).__init__() 423 | self.features = VGG_layer(batch_norm=bn,depthconv=depthconv) 424 | self.classifier = Classifier_Module2(num_classes,512,depthconv=depthconv) 425 | 426 | if init_weights: 427 | self._initialize_weights() 428 | 429 | def forward(self, x, depth=None): 430 | x,depth = self.features(x,depth) 431 | x = self.classifier(x,depth) 432 | return x 433 | 434 | def _initialize_weights(self): 435 | for m in self.modules(): 436 | if isinstance(m, nn.Conv2d): 437 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 438 | m.weight.data.normal_(0, math.sqrt(2. / n)) 439 | if m.bias is not None: 440 | m.bias.data.zero_() 441 | elif isinstance(m, nn.BatchNorm2d): 442 | m.weight.data.fill_(1) 443 | m.bias.data.zero_() 444 | elif isinstance(m, nn.Linear): 445 | m.weight.data.normal_(0, 0.01) 446 | m.bias.data.zero_() 447 | 448 | def get_normalize_params(self): 449 | b=[] 450 | b.append(self.classifier.norm) 451 | for i in b: 452 | if isinstance(i, CaffeNormalize): 453 | yield i.scale 454 | 455 | def get_1x_lr_params_NOscale(self): 456 | """ 457 | This generator returns all the parameters of the net except for 458 | the last classification layer. Note that for each batchnorm layer, 459 | requires_grad is set to False in deeplab_resnet.py, therefore this function does not return 460 | any batchnorm parameter 461 | """ 462 | b = [] 463 | 464 | b.append(self.features.conv1_1) 465 | b.append(self.features.conv1_2) 466 | b.append(self.features.conv2_1) 467 | b.append(self.features.conv2_2) 468 | b.append(self.features.conv3_1) 469 | b.append(self.features.conv3_2) 470 | b.append(self.features.conv3_3) 471 | b.append(self.features.conv4_1) 472 | b.append(self.features.conv4_2) 473 | b.append(self.features.conv4_3) 474 | b.append(self.features.conv5_1) 475 | b.append(self.features.conv5_2) 476 | b.append(self.features.conv5_3) 477 | # b.append(self.classifier.fc6_1) 478 | b.append(self.classifier.fc6_2) 479 | # b.append(self.classifier.norm) 480 | # b.append(self.classifier.fc6_3) 481 | # b.append(self.classifier.fc6_4) 482 | # b.append(self.classifier.fc7_1) 483 | b.append(self.classifier.fc7_2) 484 | # b.append(self.classifier.fc7_3) 485 | # b.append(self.classifier.fc7_4) 486 | 487 | for i in range(len(b)): 488 | for j in b[i].modules(): 489 | if isinstance(j, nn.Conv2d): 490 | if j.weight.requires_grad: 491 | yield j.weight 492 | elif isinstance(j, DepthConv): 493 | if j.weight.requires_grad: 494 | yield j.weight 495 | 496 | 497 | def get_2x_lr_params_NOscale(self): 498 | """ 499 | This generator returns all the parameters of the net except for 500 | the last classification layer. Note that for each batchnorm layer, 501 | requires_grad is set to False in deeplab_resnet.py, therefore this function does not return 502 | any batchnorm parameter 503 | """ 504 | b = [] 505 | 506 | b.append(self.features.conv1_1) 507 | b.append(self.features.conv1_2) 508 | b.append(self.features.conv2_1) 509 | b.append(self.features.conv2_2) 510 | b.append(self.features.conv3_1) 511 | b.append(self.features.conv3_2) 512 | b.append(self.features.conv3_3) 513 | b.append(self.features.conv4_1) 514 | b.append(self.features.conv4_2) 515 | b.append(self.features.conv4_3) 516 | b.append(self.features.conv5_1) 517 | b.append(self.features.conv5_2) 518 | b.append(self.features.conv5_3) 519 | # b.append(self.classifier.fc6_1) 520 | b.append(self.classifier.fc6_2) 521 | # b.append(self.classifier.fc6_3) 522 | # b.append(self.classifier.fc6_4) 523 | # b.append(self.classifier.fc7_1) 524 | b.append(self.classifier.fc7_2) 525 | # b.append(self.classifier.globalpooling.model) 526 | # b.append(self.classifier.fc7_3) 527 | # b.append(self.classifier.fc7_4) 528 | 529 | for i in range(len(b)): 530 | for j in b[i].modules(): 531 | if isinstance(j, nn.Conv2d): 532 | if j.bias is not None: 533 | if j.bias.requires_grad: 534 | yield j.bias 535 | elif isinstance(j, DepthConv): 536 | if j.bias is not None: 537 | if j.bias.requires_grad: 538 | yield j.bias 539 | 540 | 541 | def get_10x_lr_params(self): 542 | """ 543 | This generator returns all the parameters for the last layer of the net, 544 | which does the classification of pixel into classes 545 | """ 546 | b = [] 547 | # b.append(self.classifier.fc8_1.weight) 548 | b.append(self.classifier.fc8_2.weight) 549 | # b.append(self.classifier.globalpooling.model.weight) 550 | # b.append(self.classifier.fc8_3.weight) 551 | # b.append(self.classifier.fc8_4.weight) 552 | 553 | for i in b: 554 | yield i 555 | # for j in range(len(b)): 556 | # for i in b[j]: 557 | # yield i 558 | 559 | def get_20x_lr_params(self): 560 | """ 561 | This generator returns all the parameters for the last layer of the net, 562 | which does the classification of pixel into classes 563 | """ 564 | b = [] 565 | # b.append(self.classifier.fc8_1.bias) 566 | b.append(self.classifier.fc8_2.bias) 567 | # b.append(self.classifier.globalpooling.model.bias) 568 | # b.append(self.classifier.fc8_3.bias) 569 | # b.append(self.classifier.fc8_4.bias) 570 | 571 | for i in b: 572 | yield i 573 | # for j in range(len(b)): 574 | # for i in b[j]: 575 | # yield i 576 | 577 | def get_100x_lr_params(self): 578 | """ 579 | This generator returns all the parameters for the last layer of the net, 580 | which does the classification of pixel into classes 581 | """ 582 | b = [] 583 | b.append(self.features.conv1_1_depthconvweight) 584 | b.append(self.features.conv2_1_depthconvweight) 585 | b.append(self.features.conv3_1_depthconvweight) 586 | b.append(self.features.conv4_1_depthconvweight) 587 | b.append(self.features.conv5_1_depthconvweight) 588 | b.append(self.classifier.fc6_1_depthconvweight) 589 | b.append(self.classifier.fc6_2_depthconvweight) 590 | b.append(self.classifier.fc6_3_depthconvweight) 591 | b.append(self.classifier.fc6_4_depthconvweight) 592 | 593 | for j in range(len(b)): 594 | yield b[j] 595 | # for i in b[j]: 596 | # yield i 597 | 598 | 599 | 600 | def vgg16(pretrained=False, **kwargs): 601 | """VGG 16-layer model (configuration "D") 602 | Args: 603 | pretrained (bool): If True, returns a model pre-trained on ImageNet 604 | """ 605 | if pretrained: 606 | kwargs['init_weights'] = False 607 | model = VGG(bn=False,**kwargs) 608 | if pretrained: 609 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16'])) 610 | return model 611 | 612 | 613 | def vgg16_bn(pretrained=False, **kwargs): 614 | """VGG 16-layer model (configuration "D") with batch normalization 615 | Args: 616 | pretrained (bool): If True, returns a model pre-trained on ImageNet 617 | """ 618 | if pretrained: 619 | kwargs['init_weights'] = False 620 | model = VGG(bn=True,**kwargs) 621 | if pretrained: 622 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn'])) 623 | return model 624 | 625 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laughtervv/Deeplab-Pytorch/deb98bd27922241070d04b6ab6fa094981c3b827/models/__init__.py -------------------------------------------------------------------------------- /models/base_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from utils import util 4 | import torch 5 | 6 | def load_pretrained_model(net, state_dict, strict=True): 7 | """Copies parameters and buffers from :attr:`state_dict` into 8 | this module and its descendants. If :attr:`strict` is ``True`` then 9 | the keys of :attr:`state_dict` must exactly match the keys returned 10 | by this module's :func:`state_dict()` function. 11 | 12 | Arguments: 13 | state_dict (dict): A dict containing parameters and 14 | persistent buffers. 15 | strict (bool): Strictly enforce that the keys in :attr:`state_dict` 16 | match the keys returned by this module's `:func:`state_dict()` 17 | function. 18 | """ 19 | own_state = net.state_dict() 20 | # print state_dict.keys() 21 | # print own_state.keys() 22 | for name, param in state_dict.items(): 23 | if name in own_state: 24 | # print name, np.mean(param.numpy()) 25 | if isinstance(param, torch.nn.Parameter): 26 | # backwards compatibility for serialized parameters 27 | param = param.data 28 | if strict: 29 | try: 30 | own_state[name].copy_(param) 31 | except Exception: 32 | raise RuntimeError('While copying the parameter named {}, ' 33 | 'whose dimensions in the model are {} and ' 34 | 'whose dimensions in the checkpoint are {}.' 35 | .format(name, own_state[name].size(), param.size())) 36 | else: 37 | try: 38 | own_state[name].copy_(param) 39 | except Exception: 40 | print('Ignoring Error: While copying the parameter named {}, ' 41 | 'whose dimensions in the model are {} and ' 42 | 'whose dimensions in the checkpoint are {}.' 43 | .format(name, own_state[name].size(), param.size())) 44 | 45 | elif strict: 46 | raise KeyError('unexpected key "{}" in state_dict' 47 | .format(name)) 48 | if strict: 49 | missing = set(own_state.keys()) - set(state_dict.keys()) 50 | if len(missing) > 0: 51 | raise KeyError('missing keys in state_dict: "{}"'.format(missing)) 52 | 53 | 54 | class BaseModel(): 55 | 56 | def name(self): 57 | return 'BaseModel' 58 | 59 | def initialize(self, opt): 60 | self.opt = opt 61 | self.training = opt.isTrain 62 | self.gpu_ids = opt.gpu_ids 63 | self.isTrain = opt.isTrain 64 | self.num_classes = opt.label_nc 65 | self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor 66 | self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) 67 | self.tensorborad_dir = os.path.join(self.opt.checkpoints_dir, 'tensorboard', opt.dataset_mode) 68 | self.model_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name, 'model') 69 | util.mkdirs([self.tensorborad_dir, self.model_dir]) 70 | 71 | def set_input(self, input): 72 | self.input = input 73 | 74 | def forward(self): 75 | pass 76 | 77 | # used in test time, no backprop 78 | def test(self): 79 | pass 80 | 81 | def get_image_paths(self): 82 | pass 83 | 84 | def optimize_parameters(self): 85 | pass 86 | 87 | def get_current_visuals(self): 88 | return self.input 89 | 90 | def save(self, label): 91 | pass 92 | 93 | # helper saving function that can be used by subclasses 94 | def save_network(self, network, network_label, epoch_label, gpu_ids): 95 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 96 | save_path = os.path.join(self.model_dir, save_filename) 97 | torch.save(network.cpu().state_dict(), save_path) 98 | if len(gpu_ids) and torch.cuda.is_available(): 99 | network.cuda() 100 | 101 | # helper loading function that can be used by subclasses 102 | def load_network(self, network, network_label, epoch_label, save_dir=''): 103 | save_filename = '%s_net_%s.pth' % (epoch_label,network_label) 104 | if not save_dir: 105 | save_dir = self.model_dir 106 | save_path = os.path.join(save_dir, save_filename) 107 | if not os.path.isfile(save_path): 108 | print('%s not exists yet!' % save_path) 109 | else: 110 | #network.load_state_dict(torch.load(save_path)) 111 | try: 112 | # print torch.load(save_path).keys() 113 | # print network.state_dict()['Scale.features.conv2_1_depthconvweight'] 114 | network.load_state_dict(torch.load(save_path)) 115 | except: 116 | pretrained_dict = torch.load(save_path) 117 | model_dict = network.state_dict() 118 | try: 119 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 120 | network.load_state_dict(pretrained_dict) 121 | print('Pretrained network has excessive layers; Only loading layers that are used' ) 122 | except: 123 | print('Pretrained network has fewer layers; The following are not initialized:' ) 124 | # from sets import Set 125 | # not_initialized = Set() 126 | for k, v in pretrained_dict.items(): 127 | if v.size() == model_dict[k].size(): 128 | model_dict[k] = v 129 | not_initialized=[] 130 | # print(pretrained_dict.keys()) 131 | # print(model_dict.keys()) 132 | for k, v in model_dict.items(): 133 | if k not in pretrained_dict or v.size() != pretrained_dict[k].size(): 134 | not_initialized+=[k]#[k.split('.')[0]] 135 | print(sorted(not_initialized)) 136 | network.load_state_dict(model_dict) 137 | 138 | def update_learning_rate(): 139 | pass 140 | 141 | 142 | def load_pretrained_network(self, network, pretraineddir, epoch_label,strict=True): 143 | save_filename = '%s.pth' % (epoch_label) 144 | save_path = os.path.join(pretraineddir, save_filename) 145 | load_dict = torch.load(save_path, map_location=lambda storage, loc: storage) 146 | # print (load_dict.values().size()) 147 | load_pretrained_model(network,load_dict,strict) 148 | -------------------------------------------------------------------------------- /models/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from distutils.version import LooseVersion 6 | 7 | class CrossEntropyLoss2d(nn.Module): 8 | def __init__(self, weight=None, size_average=False, ignore_index=255): 9 | super(CrossEntropyLoss2d, self).__init__() 10 | self.nll_loss = nn.NLLLoss2d(weight, size_average, ignore_index) 11 | 12 | def forward(self, inputs, targets): 13 | return self.nll_loss(F.log_softmax(inputs), targets) 14 | 15 | def cross_entropy2d(input, target, weight=None, size_average=True): 16 | # input: (n, c, h, w), target: (n, h, w) 17 | n, c, h, w = input.size() 18 | # log_p: (n, c, h, w) 19 | if LooseVersion(torch.__version__) < LooseVersion('0.3'): 20 | # ==0.2.X 21 | log_p = F.log_softmax(input).cuda() 22 | else: 23 | # >=0.3 24 | log_p = F.log_softmax(input, dim=1).cuda() 25 | # log_p: (n*h*w, c) 26 | log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous() 27 | log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0] 28 | log_p = log_p.view(-1, c) 29 | # target: (n*h*w,) 30 | # mask = (target != 255) 31 | # target = target[mask] 32 | loss = F.nll_loss(log_p, target, weight=weight, size_average=False, ignore_index=255).cuda() 33 | if size_average: 34 | loss /= (n*h*w) 35 | return loss 36 | 37 | class FocalLoss2d(nn.Module): 38 | def __init__(self, gamma=2., weight=None, size_average=True, ignore_index=255): 39 | super(FocalLoss2d, self).__init__() 40 | self.gamma = gamma 41 | self.nll_loss = nn.NLLLoss2d(weight, size_average, ignore_index) 42 | 43 | def forward(self, inputs, targets): 44 | return self.nll_loss((1 - F.softmax(inputs)) ** self.gamma * F.log_softmax(inputs), targets) 45 | 46 | 47 | class FocalLoss(nn.Module): 48 | """ 49 | This criterion is a implemenation of Focal Loss, which is proposed in 50 | Focal Loss for Dense Object Detection. 51 | 52 | Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class]) 53 | 54 | The losses are averaged across observations for each minibatch. 55 | Args: 56 | alpha(1D Tensor, Variable) : the scalar factor for this criterion 57 | gamma(float, double) : gamma > 0 58 | size_average(bool): size_average(bool): By default, the losses are averaged over observations for each minibatch. 59 | However, if the field size_average is set to False, the losses are 60 | instead summed for each minibatch. 61 | """ 62 | 63 | def __init__(self, class_num, alpha=None, gamma=2, size_average=True): 64 | super(FocalLoss, self).__init__() 65 | if alpha is None: 66 | self.alpha = Variable(torch.ones(class_num+1)) 67 | else: 68 | if isinstance(alpha, Variable): 69 | self.alpha = alpha 70 | else: 71 | self.alpha = Variable(alpha) 72 | self.gamma = gamma 73 | self.class_num = class_num 74 | self.size_average = size_average 75 | 76 | def forward(self, inputs, targets): # variables 77 | P = F.softmax(inputs) 78 | 79 | b,c,h,w = inputs.size() 80 | class_mask = Variable(torch.zeros([b,c+1,h,w]).cuda()) 81 | class_mask.scatter_(1, targets.long(), 1.) 82 | class_mask = class_mask[:,:-1,:,:] 83 | 84 | if inputs.is_cuda and not self.alpha.is_cuda: 85 | self.alpha = self.alpha.cuda() 86 | # print('alpha',self.alpha.size()) 87 | alpha = self.alpha[targets.data.view(-1)].view_as(targets) 88 | # print (alpha.size(),class_mask.size(),P.size()) 89 | probs = (P * class_mask).sum(1) # + 1e-6#.view(-1, 1) 90 | log_p = probs.log() 91 | 92 | batch_loss = -alpha * (torch.pow((1 - probs), self.gamma)) * log_p 93 | 94 | if self.size_average: 95 | loss = batch_loss.mean() 96 | else: 97 | loss = batch_loss.sum() 98 | return loss 99 | -------------------------------------------------------------------------------- /models/model_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | import torchvision 6 | import time 7 | 8 | class LayerNorm(nn.Module): 9 | 10 | def __init__(self, features, eps=1e-6, gamma=1.,beta=0.,learnable=False): 11 | super(LayerNorm,self).__init__() 12 | if learnable: 13 | self.gamma = nn.Parameter(torch.ones(features)) 14 | self.beta = nn.Parameter(torch.zeros(features)) 15 | else: 16 | self.gamma = gamma 17 | self.beta = beta 18 | 19 | self.eps = eps 20 | 21 | def forward(self, x): 22 | x_size = x.size() 23 | mean = x.view(x_size[0],x_size[1],x_size[2]*x_size[3]).mean(2)\ 24 | .view(x_size[0],x_size[1],1,1).repeat(1, 1, x_size[2], x_size[3]) 25 | std = x.view(x_size[0],x_size[1],x_size[2]*x_size[3]).std(2)\ 26 | .view(x_size[0],x_size[1],1,1).repeat(1, 1, x_size[2], x_size[3]) 27 | # print 'mean',mean.size(),'x',x_size 28 | return self.gamma * (x - mean) / (std + self.eps) + self.beta 29 | 30 | class CaffeNormalize(nn.Module): 31 | 32 | def __init__(self, features, eps=1e-7): 33 | super(CaffeNormalize,self).__init__() 34 | self.scale = nn.Parameter(10.*torch.ones(features))#, requires_grad=False) 35 | self.eps = eps 36 | 37 | def forward(self, x): 38 | # print self.scale 39 | x_size = x.size() 40 | norm = x.norm(2,dim=1,keepdim=True)#.detach() 41 | #print norm.data.cpu().numpy(),self.scale.mean().data.cpu().numpy()#,self.scale.grad.mean().data.cpu().numpy() 42 | x = x.div(norm+self.eps) 43 | 44 | return x.mul(self.scale.view(1, x_size[1], 1, 1)) 45 | 46 | 47 | class DepthGlobalPool(nn.Module): 48 | def __init__(self, n_features, n_out): 49 | super(DepthGlobalPool, self).__init__() 50 | self.model = nn.Conv2d(n_features, n_out, kernel_size=1, padding=0) 51 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 52 | 53 | self.norm = CaffeNormalize(n_out) 54 | self.dropout = nn.Dropout(0.3) 55 | 56 | n = self.model.kernel_size[0] * self.model.kernel_size[1] * self.model.out_channels 57 | self.model.weight.data.normal_(0, np.sqrt(2. / n)) 58 | if self.model.bias is not None: 59 | self.model.bias.data.zero_() 60 | 61 | def forward(self, features, depth, depthpool=False): 62 | # features = self.pool(self.model(features)) 63 | out2_size = features.size() 64 | features = self.model(features) 65 | 66 | if isinstance(depth, Variable) and depthpool: 67 | outfeatures = features.clone() 68 | n_c = features.size()[1] 69 | 70 | # depth-wise average pooling 71 | # depthclone = depth.clone() 72 | depth = depth.data.cpu().numpy() 73 | _, depth_bin = np.histogram(depth) 74 | 75 | bin_low = depth_bin[0] 76 | for bin_high in depth_bin[1:]: 77 | indices = ((depth <= bin_high) & (depth >= bin_low)).nonzero() 78 | if indices[0].shape[0] != 0: 79 | for j in range(n_c): 80 | output_ins = features[indices[0], indices[1] + j, indices[2], indices[3]] 81 | mean_feat = torch.mean(output_ins).expand_as(output_ins) 82 | outfeatures[indices[0], indices[1] + j, indices[2], indices[3]] = mean_feat # torch.mean(output_ins) 83 | bin_low = bin_high 84 | 85 | # outfeatures = self.norm(outfeatures) 86 | outfeatures = self.dropout(outfeatures) 87 | 88 | # bin_low = depth_bin[0] 89 | # for bin_high in depth_bin[1:]: 90 | # indices = ((depth <= bin_high) & (depth >= bin_low)).nonzero() 91 | # if indices[0].shape[0] != 0: 92 | # output_ins = features[indices[0], indices[1], indices[2], indices[3]] 93 | # mean_feat = torch.mean(output_ins).expand_as(output_ins) 94 | # depthclone[indices[0], indices[1], indices[2], indices[3]] = mean_feat 95 | # bin_low = bin_high 96 | # 97 | # upsample = nn.UpsamplingBilinear2d(scale_factor=8) 98 | # torchvision.utils.save_image(upsample(depthclone).data, 'depth_feature1.png', normalize=True, range=(0, 1)) 99 | # outfeatures = self.dropout(outfeatures) 100 | else: 101 | features = self.pool(features) 102 | # features = self.norm(features) 103 | outfeatures = self.dropout(features) 104 | self.upsample = nn.UpsamplingBilinear2d((out2_size[2],out2_size[3])) 105 | outfeatures = self.upsample(outfeatures) 106 | 107 | return outfeatures 108 | -------------------------------------------------------------------------------- /models/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def create_model(opt, dataset=None): 4 | 5 | if opt.model == 'DeeplabVGG': 6 | from .Deeplab import Deeplab_Solver 7 | model = Deeplab_Solver(opt, dataset) 8 | elif opt.model == 'DeeplabVGG_HHA': 9 | from .Deeplab_HHA import Deeplab_HHA_Solver 10 | model = Deeplab_HHA_Solver(opt, dataset) 11 | elif opt.model == 'DeeplabResnet': 12 | from .Deeplab import Deeplab_Solver 13 | model = Deeplab_Solver(opt, dataset,'Resnet') 14 | 15 | print("model [%s] was created" % (model.name())) 16 | 17 | return model 18 | -------------------------------------------------------------------------------- /options/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laughtervv/Deeplab-Pytorch/deb98bd27922241070d04b6ab6fa094981c3b827/options/__init__.py -------------------------------------------------------------------------------- /options/base_options.py: -------------------------------------------------------------------------------- 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | import argparse 4 | import os 5 | from utils import util 6 | import torch 7 | 8 | class BaseOptions(): 9 | def __init__(self): 10 | self.parser = argparse.ArgumentParser() 11 | self.initialized = False 12 | 13 | def initialize(self): 14 | # experiment specifics 15 | self.parser.add_argument('--name', type=str, default='label2city', help='name of the experiment. It decides where to store samples and models') 16 | self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 17 | self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') 18 | self.parser.add_argument('--model', type=str, default='DeeplabVGG', help='model: DeeplabVGG, DeeplabVGG_HHA') 19 | self.parser.add_argument('--encoder', type=str, default='resnet50_dilated8', help='pretrained_model') 20 | self.parser.add_argument('--decoder', type=str, default='psp_bilinear', help='pretrained_model') 21 | self.parser.add_argument('--depthconv', action='store_true', help='if specified, use depthconv') 22 | self.parser.add_argument('--depthglobalpool', action='store_true', help='if specified, use global pooling with depth') 23 | self.parser.add_argument('--pretrained_model', type=str, default='', help='pretrained_model') 24 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 25 | self.parser.add_argument('--pretrained_model_HHA', type=str, default='', help='pretrained_model') 26 | self.parser.add_argument('--which_epoch_HHA', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 27 | self.parser.add_argument('--pretrained_model_rgb', type=str, default='', help='pretrained_model') 28 | self.parser.add_argument('--which_epoch_rgb', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 29 | 30 | # input/output sizes 31 | self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size') 32 | self.parser.add_argument('--fineSize', type=str, default='480,640', help='then crop to this size') 33 | self.parser.add_argument('--label_nc', type=int, default=40, help='# of input image channels') 34 | 35 | # for setting inputs 36 | self.parser.add_argument('--dataroot', type=str, default='', 37 | help='chooses how datasets are loaded. [nyuv2]') 38 | self.parser.add_argument('--dataset_mode', type=str, default='nyuv2', 39 | help='chooses how datasets are loaded. [nyuv2]') 40 | self.parser.add_argument('--list', type=str, default='', help='image and seg mask list file') 41 | self.parser.add_argument('--vallist', type=str, default='', help='image and seg mask list file') 42 | 43 | # for data augmentation 44 | self.parser.add_argument('--flip', action='store_true',help='if specified, flip the images for data argumentation') 45 | self.parser.add_argument('--scale', action='store_true',help='if specified, scale the images for data argumentation') 46 | self.parser.add_argument('--crop', action='store_true',help='if specified, crop the images for data argumentation') 47 | self.parser.add_argument('--colorjitter', action='store_true',help='if specified, crop the images for data argumentation') 48 | self.parser.add_argument('--inputmode', default='bgr-mean', type=str, help='input image normalize option: bgr-mean, divstd-mean') 49 | 50 | self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') 51 | self.parser.add_argument('--nThreads', default=1, type=int, help='# threads for loading data') 52 | self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.') 53 | 54 | # for displays 55 | self.parser.add_argument('--display_winsize', type=int, default=512, help='display window size') 56 | self.parser.add_argument('--tf_log', action='store_true', help='if specified, use tensorboard logging. Requires tensorflow installed') 57 | self.parser.add_argument('--verbose', action='store_true', help='if specified, print loss while training') 58 | 59 | 60 | def parse(self, save=True): 61 | if not self.initialized: 62 | self.initialize() 63 | self.opt = self.parser.parse_args() 64 | self.opt.isTrain = self.isTrain # train or test 65 | 66 | str_ids = self.opt.gpu_ids.split(',') 67 | self.opt.gpu_ids = [] 68 | for str_id in str_ids: 69 | id = int(str_id) 70 | if id >= 0: 71 | self.opt.gpu_ids.append(id) 72 | 73 | str_sizes = self.opt.fineSize.split(',') 74 | self.opt.fineSize = [] 75 | for str_size in str_sizes: 76 | size_ = int(str_size) 77 | if size_ >= 0: 78 | self.opt.fineSize.append(size_) 79 | 80 | # set gpu ids 81 | if len(self.opt.gpu_ids) > 0: 82 | torch.cuda.set_device(self.opt.gpu_ids[0]) 83 | 84 | args = vars(self.opt) 85 | 86 | print('------------ Options -------------') 87 | for k, v in sorted(args.items()): 88 | print('%s: %s' % (str(k), str(v))) 89 | print('-------------- End ----------------') 90 | 91 | # save to the disk 92 | expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name) 93 | util.mkdirs(expr_dir) 94 | if save: 95 | file_name = os.path.join(expr_dir, 'opt.txt') 96 | with open(file_name, 'wt') as opt_file: 97 | opt_file.write('------------ Options -------------\n') 98 | for k, v in sorted(args.items()): 99 | opt_file.write('%s: %s\n' % (str(k), str(v))) 100 | opt_file.write('-------------- End ----------------\n') 101 | return self.opt 102 | -------------------------------------------------------------------------------- /options/test_options.py: -------------------------------------------------------------------------------- 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | from .base_options import BaseOptions 4 | 5 | class TestOptions(BaseOptions): 6 | def initialize(self): 7 | BaseOptions.initialize(self) 8 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.') 9 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.') 10 | self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') 11 | self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 12 | self.parser.add_argument('--how_many', type=int, default=20, help='how many test images to run') 13 | self.parser.add_argument('--cluster_path', type=str, default='features_clustered_010.npy', help='the path for clustered results of encoded features') 14 | self.isTrain = False 15 | -------------------------------------------------------------------------------- /options/train_options.py: -------------------------------------------------------------------------------- 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | from .base_options import BaseOptions 4 | 5 | class TrainOptions(BaseOptions): 6 | def initialize(self): 7 | BaseOptions.initialize(self) 8 | 9 | # for displays 10 | self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen') 11 | self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console') 12 | self.parser.add_argument('--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results') 13 | self.parser.add_argument('--save_epoch_freq', type=int, default=10, help='frequency of saving checkpoints at the end of epochs') 14 | self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/') 15 | self.parser.add_argument('--debug', action='store_true', help='only do one epoch and displays at each iteration') 16 | 17 | # for training 18 | self.parser.add_argument('--loadfroms', action='store_true', help='continue training: load from 32s or 16s') 19 | self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model') 20 | self.parser.add_argument('--use_softmax', action='store_true', help='if specified use softmax loss, otherwise log-softmax') 21 | self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') 22 | self.parser.add_argument('--nepochs', type=int, default=100, help='# of iter at starting learning rate') 23 | self.parser.add_argument('--iterSize', type=int, default=10, help='# of iter at starting learning rate') 24 | self.parser.add_argument('--maxbatchsize', type=int, default=-1, help='# of iter at starting learning rate') 25 | self.parser.add_argument('--warmup_iters', type=int, default=500, help='# of iter at starting learning rate') 26 | self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') 27 | self.parser.add_argument('--lr', type=float, default=0.00025, help='initial learning rate for adam') 28 | self.parser.add_argument('--lr_power', type=float, default=0.9, help='power of learning rate policy') 29 | self.parser.add_argument('--momentum', type=float, default=0.9, help='momentum for sgd') 30 | self.parser.add_argument('--wd', type=float, default=0.0004, help='weight decay for sgd') 31 | 32 | self.isTrain = True 33 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | python test.py \ 2 | --gpu_ids 2 \ 3 | --name nyuv2_VGGdeeplab_depthconv \ 4 | --dataset_mode nyuv2 \ 5 | --depthconv \ 6 | --list dataset/lists/nyuv2/test.lst \ 7 | --how_many 0 8 | -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | python train.py \ 2 | --name nyuv2_VGGdeeplab_depthconv \ 3 | --dataset_mode nyuv2 \ 4 | --flip --scale --crop --colorjitter \ 5 | --depthconv \ 6 | --list ./lists/train.lst \ 7 | --vallist ./lists/val.lst \ 8 | --continue_train 9 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from collections import OrderedDict 4 | from options.test_options import TestOptions 5 | from data.data_loader import CreateDataLoader 6 | from models.models import create_model 7 | import utils.util as util 8 | from utils.visualizer import Visualizer 9 | from utils import html 10 | from torch.autograd import Variable 11 | 12 | opt = TestOptions().parse(save=False) 13 | opt.nThreads = 1 14 | opt.batchSize = 1 15 | opt.serial_batches = True # no shuffle 16 | 17 | data_loader = CreateDataLoader(opt) 18 | dataset, _ = data_loader.load_data() 19 | model = create_model(opt,data_loader.dataset) 20 | visualizer = Visualizer(opt) 21 | # create website 22 | web_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch)) 23 | webpage = html.HTML(web_dir, '%s: %s' % (opt.name, pt.which_epoch)) 24 | # test 25 | 26 | 27 | label_trues, label_preds = [], [] 28 | 29 | model.model.eval() 30 | tic = time.time() 31 | 32 | accs=[] 33 | for i, data in enumerate(dataset): 34 | if i >= opt.how_many and opt.how_many!=0: 35 | break 36 | seggt, segpred = model.forward(data,False) 37 | print time.time() - tic 38 | tic = time.time() 39 | 40 | seggt = seggt.data.cpu().numpy() 41 | segpred = segpred.data.cpu().numpy() 42 | 43 | label_trues.append(seggt) 44 | label_preds.append(segpred) 45 | 46 | visuals = model.get_visuals(i) 47 | img_path = data['imgpath'] 48 | print('process image... %s' % img_path) 49 | visualizer.save_images(webpage, visuals, img_path) 50 | 51 | metrics0 = util.label_accuracy_score( 52 | label_trues, label_preds, n_class=opt.label_nc, returniu=True) 53 | metrics = np.array(metrics0[:4]) 54 | metrics *= 100 55 | print('''\ 56 | Accuracy: {0} 57 | Accuracy Class: {1} 58 | Mean IU: {2} 59 | FWAV Accuracy: {3}'''.format(*metrics)) 60 | 61 | webpage.save() 62 | -------------------------------------------------------------------------------- /test_ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.modules.utils import _single, _pair, _triple 4 | from torch.autograd import Variable 5 | 6 | from utils.gradcheck import gradcheck 7 | from models.ops.depthconv.functions.depthconv import DepthconvFunction 8 | 9 | 10 | N, inC, inH, inW = 4, 2, 8, 8 11 | kH, kW = 3, 3 12 | pad, stride, dilation = 0, 1, 1 13 | 14 | offC = 1 * 2 * kH * kW 15 | 16 | outC = 1 17 | outH = (inH + 2 * pad - (dilation * (kH - 1) + 1)) // stride + 1 18 | outW = (inW + 2 * pad - (dilation * (kW - 1) + 1)) // stride + 1 19 | 20 | conv_offset2d = DepthconvFunction( 21 | padding=(pad, pad), 22 | stride=(stride, stride), 23 | dilation=(dilation, dilation), bias=True) 24 | conv2d = F.ConvNd(_pair(stride), _pair(pad), _pair(dilation), False, 25 | _pair(0), 1, torch.backends.cudnn.benchmark, torch.backends.cudnn.enabled) 26 | offset = Variable( 27 | torch.ones(N, 1, inH, inW).cuda(), 28 | requires_grad=False) 29 | input = Variable( 30 | torch.rand(N, inC, inH, inW).cuda(), 31 | requires_grad=True) 32 | input2 = Variable(input.data.clone(), 33 | requires_grad=True) 34 | weight = Variable( 35 | 10*torch.rand(outC, inC, kH, kW).cuda(), 36 | requires_grad=True) 37 | weight2 = Variable(weight.data.clone(), 38 | requires_grad=True) 39 | bias = Variable(torch.rand(outC).cuda(),requires_grad=True) 40 | bias2 = Variable(bias.data.clone(), 41 | requires_grad=True) 42 | grad = Variable( 43 | torch.rand(N, outC, 6, 6).cuda(), 44 | requires_grad=True) 45 | 46 | print bias 47 | out1 = conv_offset2d(input, offset, weight, bias) 48 | out2 = conv2d(input2, weight2, bias2) 49 | print (out1-out2).sum() 50 | 51 | out1.backward(grad) 52 | out2.backward(grad) 53 | 54 | 55 | print (weight.grad-weight2.grad).sum() 56 | print ('input.grad',input.grad.sum()) 57 | print ('input.grad',input2.grad.sum()) 58 | print (input.grad-input2.grad).sum() 59 | print (bias.grad-bias2.grad).sum() 60 | 61 | 62 | # print bias.data.cpu().numpy().dtype 63 | 64 | # print("pass gradcheck: {}".format(gradcheck(conv_offset2d, (input, offset, weight, bias)))) 65 | # print("pass gradcheck: {}".format(gradcheck(conv2d, (input, weight,None)))) 66 | 67 | import torch 68 | import torch.nn as nn 69 | import torch.nn.functional as F 70 | import numpy as np 71 | from models.ops.depthavgpooling.functions.depthavgpooling import DepthavgpoolingFunction 72 | from models.ops.depthavgpooling.modules import Depthavgpooling 73 | from torch.autograd import Variable 74 | 75 | depth = [[[1,0,1,10000], 76 | [0,1,10000,1], 77 | [1,0,1,0], 78 | [0,1,0,1]], 79 | ] 80 | depth = np.zeros([40,40]) 81 | inputarray = torch.Tensor(np.asarray(range(2*40*40)).reshape([1,2,40,40])) 82 | depth = torch.Tensor(np.asarray(depth).reshape([1,1,40,40])) 83 | 84 | print inputarray 85 | N, inC, inH, inW = 4, 512, 50, 65 86 | input = Variable( 87 | inputarray, 88 | requires_grad=True).cuda() 89 | depth = Variable( 90 | depth, 91 | requires_grad=True).cuda() 92 | kH, kW = 3, 3 93 | pad, stride, dilation = 1, 1, 1 94 | depthpooling = Depthavgpooling(kH,stride,pad) 95 | pooling = nn.AvgPool2d(kernel_size=kH, stride=stride,padding=pad) 96 | 97 | out1 = depthpooling(input, depth) 98 | out2 = pooling(input) 99 | 100 | grad = Variable( 101 | torch.ones(N, 2, 40, 40).cuda(), 102 | requires_grad=True) 103 | out1.backward(grad) 104 | 105 | print out1-out2 106 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import time 2 | from tensorboardX import SummaryWriter 3 | from collections import OrderedDict 4 | from options.train_options import TrainOptions 5 | from data.data_loader import CreateDataLoader 6 | from models.models import create_model 7 | import utils.util as util 8 | from utils.visualizer import Visualizer 9 | import os 10 | import numpy as np 11 | import torch 12 | from torch.autograd import Variable 13 | import time 14 | 15 | opt = TrainOptions().parse() 16 | iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt') 17 | ioupath_path = os.path.join(opt.checkpoints_dir, opt.name, 'MIoU.txt') 18 | if opt.continue_train: 19 | try: 20 | start_epoch, epoch_iter = np.loadtxt(iter_path, delimiter=',', dtype=int) 21 | except: 22 | start_epoch, epoch_iter = 1, 0 23 | 24 | try: 25 | best_iou = np.loadtxt(ioupath_path, dtype=float) 26 | except: 27 | best_iou = 0. 28 | print('Resuming from epoch %d at iteration %d, previous best IoU %f' % (start_epoch, epoch_iter, best_iou)) 29 | else: 30 | start_epoch, epoch_iter = 1, 0 31 | best_iou = 0. 32 | 33 | data_loader = CreateDataLoader(opt) 34 | dataset, dataset_val = data_loader.load_data() 35 | dataset_size = len(dataset) 36 | print('#training images = %d' % dataset_size) 37 | 38 | model = create_model(opt, dataset.dataset) 39 | # print (model) 40 | visualizer = Visualizer(opt) 41 | total_steps = (start_epoch - 1) * dataset_size + epoch_iter 42 | for epoch in range(start_epoch, opt.nepochs): 43 | epoch_start_time = time.time() 44 | if epoch != start_epoch: 45 | epoch_iter = epoch_iter % dataset_size 46 | 47 | model.model.train() 48 | for i, data in enumerate(dataset, start=epoch_iter): 49 | iter_start_time = time.time() 50 | total_steps += opt.batchSize 51 | epoch_iter += opt.batchSize 52 | 53 | ############## Forward and Backward Pass ###################### 54 | model.forward(data) 55 | model.backward(total_steps, opt.nepochs * dataset.__len__() * opt.batchSize + 1) 56 | 57 | ############## update tensorboard and web images ###################### 58 | if total_steps % opt.display_freq == 0: 59 | visuals = model.get_visuals(total_steps) 60 | visualizer.display_current_results(visuals, epoch, total_steps) 61 | 62 | ############## Save latest Model ###################### 63 | if total_steps % opt.save_latest_freq == 0: 64 | print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps)) 65 | model.save('latest') 66 | np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d') 67 | # print time.time()-iter_start_time 68 | 69 | # end of epoch 70 | model.model.eval() 71 | if dataset_val!=None: 72 | label_trues, label_preds = [], [] 73 | for i, data in enumerate(dataset_val): 74 | seggt, segpred = model.forward(data,False) 75 | seggt = seggt.data.cpu().numpy() 76 | segpred = segpred.data.cpu().numpy() 77 | 78 | label_trues.append(seggt) 79 | label_preds.append(segpred) 80 | 81 | metrics = util.label_accuracy_score( 82 | label_trues, label_preds, n_class=opt.label_nc) 83 | metrics = np.array(metrics) 84 | metrics *= 100 85 | print('''\ 86 | Validation: 87 | Accuracy: {0} 88 | Accuracy Class: {1} 89 | Mean IU: {2} 90 | FWAV Accuracy: {3}'''.format(*metrics)) 91 | model.update_tensorboard(metrics,total_steps) 92 | iter_end_time = time.time() 93 | 94 | print('End of epoch %d / %d \t Time Taken: %d sec' % 95 | (epoch+1, opt.nepochs, time.time() - epoch_start_time)) 96 | if metrics[2]>best_iou: 97 | best_iou = metrics[2] 98 | print('saving the model at the end of epoch %d, iters %d, loss %f' % (epoch, total_steps, model.trainingavgloss)) 99 | model.save('best') 100 | 101 | ### save model for this epoch 102 | if epoch % opt.save_epoch_freq == 0: 103 | print('saving the model at the end of epoch %d, iters %d, loss %f' % (epoch, total_steps, model.trainingavgloss)) 104 | model.save('latest') 105 | model.save(epoch) 106 | np.savetxt(iter_path, (epoch + 1, 0), delimiter=',', fmt='%d') 107 | 108 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laughtervv/Deeplab-Pytorch/deb98bd27922241070d04b6ab6fa094981c3b827/utils/__init__.py -------------------------------------------------------------------------------- /utils/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from collections import Iterable 4 | import numpy as np 5 | 6 | 7 | def iter_variables(x): 8 | if isinstance(x, Variable): 9 | if x.requires_grad: 10 | yield (x.grad.data, x.data) if x.grad is not None else (None, None) 11 | elif isinstance(x, Iterable): 12 | for elem in x: 13 | for result in iter_variables(elem): 14 | yield result 15 | 16 | 17 | def zero_gradients(x): 18 | if isinstance(x, Variable): 19 | if x.grad is not None: 20 | x.grad.detach_() 21 | x.grad.data.zero_() 22 | elif isinstance(x, Iterable): 23 | for elem in x: 24 | zero_gradients(elem) 25 | 26 | 27 | def make_jacobian(input, num_out): 28 | if isinstance(input, Variable) and not input.requires_grad: 29 | return None 30 | elif torch.is_tensor(input) or isinstance(input, Variable): 31 | return torch.zeros(input.nelement(), num_out) 32 | elif isinstance(input, Iterable): 33 | jacobians = list(filter( 34 | lambda x: x is not None, (make_jacobian(elem, num_out) for elem in input))) 35 | if not jacobians: 36 | return None 37 | return type(input)(jacobians) 38 | else: 39 | return None 40 | 41 | 42 | def iter_tensors(x, only_requiring_grad=False): 43 | if torch.is_tensor(x): 44 | yield x 45 | elif isinstance(x, Variable): 46 | if x.requires_grad or not only_requiring_grad: 47 | yield x.data 48 | elif isinstance(x, Iterable): 49 | for elem in x: 50 | for result in iter_tensors(elem, only_requiring_grad): 51 | yield result 52 | 53 | 54 | def contiguous(input): 55 | if torch.is_tensor(input): 56 | return input.contiguous() 57 | elif isinstance(input, Variable): 58 | return input.contiguous() 59 | elif isinstance(input, Iterable): 60 | return type(input)(contiguous(e) for e in input) 61 | return input 62 | 63 | 64 | def get_numerical_jacobian(fn, input, target, eps=1e-3): 65 | # To be able to use .view(-1) input must be contiguous 66 | input = contiguous(input) 67 | output_size = fn(input).numel() 68 | jacobian = make_jacobian(target, output_size) 69 | 70 | # It's much easier to iterate over flattened lists of tensors. 71 | # These are reference to the same objects in jacobian, so any changes 72 | # will be reflected in it as well. 73 | x_tensors = [t for t in iter_tensors(target, True)] 74 | j_tensors = [t for t in iter_tensors(jacobian)] 75 | 76 | outa = torch.DoubleTensor(output_size) 77 | outb = torch.DoubleTensor(output_size) 78 | 79 | # TODO: compare structure 80 | for x_tensor, d_tensor in zip(x_tensors, j_tensors): 81 | flat_tensor = x_tensor.view(-1) 82 | for i in range(flat_tensor.nelement()): 83 | orig = flat_tensor[i] 84 | flat_tensor[i] = orig - eps 85 | outa.copy_(fn(input), broadcast=False) 86 | flat_tensor[i] = orig + eps 87 | outb.copy_(fn(input), broadcast=False) 88 | flat_tensor[i] = orig 89 | 90 | outb.add_(-1, outa).div_(2 * eps) 91 | d_tensor[i] = outb 92 | 93 | return jacobian 94 | 95 | 96 | def get_analytical_jacobian(input, output): 97 | jacobian = make_jacobian(input, output.numel()) 98 | jacobian_reentrant = make_jacobian(input, output.numel()) 99 | grad_output = output.data.clone().zero_() 100 | flat_grad_output = grad_output.view(-1) 101 | reentrant = True 102 | correct_grad_sizes = True 103 | 104 | for i in range(flat_grad_output.numel()): 105 | flat_grad_output.zero_() 106 | flat_grad_output[i] = 1 107 | for jacobian_c in (jacobian, jacobian_reentrant): 108 | zero_gradients(input) 109 | output.backward(grad_output, create_graph=True) 110 | for jacobian_x, (d_x, x) in zip(jacobian_c, iter_variables(input)): 111 | if d_x is None: 112 | jacobian_x[:, i].zero_() 113 | else: 114 | if d_x.size() != x.size(): 115 | correct_grad_sizes = False 116 | jacobian_x[:, i] = d_x.to_dense() if d_x.is_sparse else d_x 117 | 118 | for jacobian_x, jacobian_reentrant_x in zip(jacobian, jacobian_reentrant): 119 | if (jacobian_x - jacobian_reentrant_x).abs().max() != 0: 120 | reentrant = False 121 | 122 | return jacobian, reentrant, correct_grad_sizes 123 | 124 | 125 | def _as_tuple(x): 126 | if isinstance(x, tuple): 127 | return x 128 | elif isinstance(x, list): 129 | return tuple(x) 130 | else: 131 | return x, 132 | 133 | 134 | def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3): 135 | """Check gradients computed via small finite differences 136 | against analytical gradients 137 | 138 | The check between numerical and analytical has the same behaviour as 139 | numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html 140 | meaning it check that 141 | absolute(a - n) <= (atol + rtol * absolute(n)) 142 | is true for all elements of analytical jacobian a and numerical jacobian n. 143 | 144 | Args: 145 | func: Python function that takes Variable inputs and returns 146 | a tuple of Variables 147 | inputs: tuple of Variables 148 | eps: perturbation for finite differences 149 | atol: absolute tolerance 150 | rtol: relative tolerance 151 | 152 | Returns: 153 | True if all differences satisfy allclose condition 154 | """ 155 | output = func(*inputs) 156 | output = _as_tuple(output) 157 | 158 | for i, o in enumerate(output): 159 | if not o.requires_grad: 160 | continue 161 | print 'i:',i,o 162 | 163 | def fn(input): 164 | return _as_tuple(func(*input))[i].data 165 | 166 | analytical, reentrant, correct_grad_sizes = get_analytical_jacobian(_as_tuple(inputs), o) 167 | numerical = get_numerical_jacobian(fn, inputs, inputs, eps) 168 | # ------------------- 169 | for a in analytical: 170 | an = a.numpy() 171 | for n in numerical: 172 | nn = n.numpy() 173 | 174 | diff = [] 175 | for a, n in zip(analytical, numerical): 176 | dif = (a - n).abs().numpy() 177 | diff.append(np.max(dif)) 178 | diff_max = max(diff) 179 | # print(diff_max) 180 | 181 | for a, n in zip(analytical, numerical): 182 | if not ((a - n).abs() <= (atol + rtol * n.abs())).all(): 183 | print a.sum(),n.sum() 184 | print('1111') 185 | return False 186 | # -------------------------- 187 | # different two times 188 | if not reentrant: 189 | print('not same for 2') 190 | return False 191 | 192 | if not correct_grad_sizes: 193 | print('not same size') 194 | return False 195 | 196 | # check if the backward multiplies by grad_output 197 | zero_gradients(inputs) 198 | output = _as_tuple(func(*inputs)) 199 | torch.autograd.backward(output, [o.data.new(o.size()).zero_() for o in output]) 200 | var_inputs = list(filter(lambda i: isinstance(i, Variable), inputs)) 201 | if not var_inputs: 202 | raise RuntimeError("no Variables found in input") 203 | for i in var_inputs: 204 | if i.grad is None: 205 | continue 206 | if not i.grad.data.eq(0).all(): 207 | print('not all zero') 208 | return False 209 | 210 | return True 211 | 212 | 213 | def gradgradcheck(func, inputs, grad_outputs, eps=1e-6, atol=1e-5, rtol=1e-3): 214 | """Check gradients of gradients computed via small finite differences 215 | against analytical gradients 216 | This function checks that backpropagating through the gradients computed 217 | to the given grad_outputs are correct. 218 | 219 | The check between numerical and analytical has the same behaviour as 220 | numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html 221 | meaning it check that 222 | absolute(a - n) <= (atol + rtol * absolute(n)) 223 | is true for all elements of analytical gradient a and numerical gradient n. 224 | 225 | Args: 226 | func: Python function that takes Variable inputs and returns 227 | a tuple of Variables 228 | inputs: tuple of Variables 229 | grad_outputs: tuple of Variables 230 | eps: perturbation for finite differences 231 | atol: absolute tolerance 232 | rtol: relative tolerance 233 | 234 | Returns: 235 | True if all differences satisfy allclose condition 236 | """ 237 | 238 | def new_func(*input_args): 239 | input_args = input_args[:-len(grad_outputs)] 240 | outputs = func(*input_args) 241 | outputs = _as_tuple(outputs) 242 | input_args = tuple(x for x in input_args if isinstance(x, Variable) and x.requires_grad) 243 | grad_inputs = torch.autograd.grad(outputs, input_args, grad_outputs) 244 | return grad_inputs 245 | 246 | return gradcheck(new_func, inputs + grad_outputs, eps, atol, rtol) 247 | -------------------------------------------------------------------------------- /utils/html.py: -------------------------------------------------------------------------------- 1 | import dominate 2 | from dominate.tags import * 3 | import os 4 | 5 | 6 | class HTML: 7 | def __init__(self, web_dir, title, refresh=0): 8 | self.title = title 9 | self.web_dir = web_dir 10 | self.img_dir = os.path.join(self.web_dir, 'images') 11 | if not os.path.exists(self.web_dir): 12 | os.makedirs(self.web_dir) 13 | if not os.path.exists(self.img_dir): 14 | os.makedirs(self.img_dir) 15 | 16 | self.doc = dominate.document(title=title) 17 | if refresh > 0: 18 | with self.doc.head: 19 | meta(http_equiv="refresh", content=str(refresh)) 20 | 21 | def get_image_dir(self): 22 | return self.img_dir 23 | 24 | def add_header(self, str): 25 | with self.doc: 26 | h3(str) 27 | 28 | def add_table(self, border=1): 29 | self.t = table(border=border, style="table-layout: fixed;") 30 | self.doc.add(self.t) 31 | 32 | def add_images(self, ims, txts, links, width=512): 33 | self.add_table() 34 | with self.t: 35 | with tr(): 36 | for im, txt, link in zip(ims, txts, links): 37 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 38 | with p(): 39 | with a(href=os.path.join('images', link)): 40 | img(style="width:%dpx" % (width), src=os.path.join('images', im)) 41 | br() 42 | p(txt) 43 | 44 | def save(self): 45 | html_file = '%s/index.html' % self.web_dir 46 | f = open(html_file, 'wt') 47 | f.write(self.doc.render()) 48 | f.close() 49 | 50 | 51 | if __name__ == '__main__': 52 | html = HTML('web/', 'test_html') 53 | html.add_header('hello world') 54 | 55 | ims = [] 56 | txts = [] 57 | links = [] 58 | for n in range(4): 59 | ims.append('image_%d.jpg' % n) 60 | txts.append('text_%d' % n) 61 | links.append('image_%d.jpg' % n) 62 | html.add_images(ims, txts, links) 63 | html.save() 64 | -------------------------------------------------------------------------------- /utils/util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | import numpy as np 6 | import os 7 | 8 | ####evaluation metrics 9 | 10 | def _fast_hist(label_true, label_pred, n_class): 11 | mask = (label_true >= 0) & (label_true < n_class) 12 | hist = np.bincount( 13 | n_class * label_true[mask].astype(int) + 14 | label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class) 15 | return hist 16 | 17 | 18 | def label_accuracy_score(label_trues, label_preds, n_class, returniu = False): 19 | """Returns accuracy score evaluation result. 20 | - overall accuracy 21 | - mean accuracy 22 | - mean IU 23 | - fwavacc 24 | """ 25 | hist = np.zeros((n_class, n_class)) 26 | for lt, lp in zip(label_trues, label_preds): 27 | hist += _fast_hist(lt.flatten(), lp.flatten(), n_class) 28 | acc = np.diag(hist).sum() / hist.sum() 29 | acc_cls = np.diag(hist) / hist.sum(axis=1) 30 | acc_cls = np.nanmean(acc_cls) 31 | iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) 32 | mean_iu = np.nanmean(iu) 33 | freq = hist.sum(axis=1) / hist.sum() 34 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() 35 | if returniu: 36 | return acc, acc_cls, mean_iu, fwavacc, iu[freq > 0] 37 | else: 38 | return acc, acc_cls, mean_iu, fwavacc 39 | 40 | ############################################################################### 41 | # Code from 42 | # https://github.com/ycszen/pytorch-seg/blob/master/transform.py 43 | # Modified so it complies with the Citscape label map colors 44 | ############################################################################### 45 | def uint82bin(n, count=8): 46 | """returns the binary of integer n, count refers to amount of bits""" 47 | return ''.join([str((n >> y) & 1) for y in range(count-1, -1, -1)]) 48 | 49 | def labelcolormap(N): 50 | if N == 35: # cityscape 51 | cmap = np.array([( 0, 0, 0), ( 0, 0, 0), ( 0, 0, 0), ( 0, 0, 0), ( 0, 0, 0), (111, 74, 0), ( 81, 0, 81), 52 | (128, 64,128), (244, 35,232), (250,170,160), (230,150,140), ( 70, 70, 70), (102,102,156), (190,153,153), 53 | (180,165,180), (150,100,100), (150,120, 90), (153,153,153), (153,153,153), (250,170, 30), (220,220, 0), 54 | (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255, 0, 0), ( 0, 0,142), ( 0, 0, 70), 55 | ( 0, 60,100), ( 0, 0, 90), ( 0, 0,110), ( 0, 80,100), ( 0, 0,230), (119, 11, 32), ( 0, 0,142)], 56 | dtype=np.uint8) 57 | else: 58 | cmap = np.zeros((N, 3), dtype=np.uint8) 59 | for i in range(N): 60 | r, g, b = 0, 0, 0 61 | id = i 62 | for j in range(7): 63 | str_id = uint82bin(id) 64 | r = r ^ (np.uint8(str_id[-1]) << (7-j)) 65 | g = g ^ (np.uint8(str_id[-2]) << (7-j)) 66 | b = b ^ (np.uint8(str_id[-3]) << (7-j)) 67 | id = id >> 3 68 | cmap[i, 0] = r 69 | cmap[i, 1] = g 70 | cmap[i, 2] = b 71 | return cmap 72 | 73 | class Colorize(object): 74 | def __init__(self, n=35): 75 | n = 256 76 | self.cmap = labelcolormap(n) 77 | self.cmap = torch.from_numpy(self.cmap[:n]) 78 | 79 | def __call__(self, gray_image): 80 | size = gray_image.size() 81 | color_image = torch.ByteTensor(3, size[1], size[2]).fill_(0) 82 | 83 | for label in range(0, len(self.cmap)): 84 | mask = (label == gray_image[0]).cpu() 85 | color_image[0][mask] = self.cmap[label][0] 86 | color_image[1][mask] = self.cmap[label][1] 87 | color_image[2][mask] = self.cmap[label][2] 88 | 89 | return color_image 90 | # Converts a Tensor into a Numpy array 91 | # |imtype|: the desired type of the converted numpy array 92 | def tensor2im(image_tensor, imtype=np.uint8, inputmode=''): 93 | if isinstance(image_tensor, list): 94 | image_numpy = [] 95 | for i in range(len(image_tensor)): 96 | image_numpy.append(tensor2im(image_tensor[i], imtype, inputmode)) 97 | return image_numpy 98 | image_numpy = image_tensor.cpu().float().numpy() 99 | if inputmode=='div255-mean': 100 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 101 | elif inputmode=='divstd-mean': 102 | i_max = np.max(image_numpy) 103 | i_min = np.min(image_numpy) 104 | image_numpy = np.transpose((image_numpy-i_min)/(i_max-i_min) * 255.0, (1, 2, 0)) 105 | elif inputmode=='bgr-mean': 106 | image_numpy = np.transpose(image_numpy, (1, 2, 0))[:,:,::-1] + np.asarray([122.675,116.669,104.008]) 107 | # print(image_numpy.max(),image_numpy.min()) 108 | else: 109 | # print('depth') 110 | image_numpy = np.transpose(image_numpy, (1, 2, 0))[:,:,::-1] * 255.0 111 | # image_numpy = np.clip(image_numpy, 0, 255) 112 | if image_numpy.shape[2] == 1: 113 | image_numpy = image_numpy[:,:,0] 114 | return image_numpy.astype(imtype) 115 | 116 | # Converts a one-hot tensor into a colorful label map 117 | colormap = Colorize(255) 118 | def tensor2label(label_tensor, n_label, imtype=np.uint8, colorize = True): 119 | if n_label == 0: 120 | return tensor2im(label_tensor, imtype) 121 | label_tensor = label_tensor.cpu().float() 122 | if label_tensor.size()[0] > 1: 123 | label_tensor = label_tensor.max(0, keepdim=True)[1] 124 | if colorize: 125 | label_tensor = colormap(label_tensor) 126 | label_numpy = np.transpose(label_tensor.numpy(), (1, 2, 0)) 127 | else: 128 | label_numpy = np.squeeze(label_tensor.numpy()) 129 | # print (np.unique(label_numpy.astype(imtype))) 130 | return label_numpy.astype(imtype) 131 | 132 | def save_image(image_numpy, image_path, imagesize = None): 133 | image_pil = Image.fromarray(image_numpy) 134 | if imagesize is not None: 135 | img_w, img_h = imagesize 136 | image_pil = image_pil.resize((img_w, img_h), Image.NEAREST) 137 | # print(np.unique(np.asarray(image_pil))) 138 | image_pil.save(image_path) 139 | # if len(image_numpy.shape)==2: 140 | # image_pil = Image.open(image_path) 141 | # print(image_path,np.unique(np.asarray(image_pil))) 142 | 143 | def mkdirs(paths): 144 | if isinstance(paths, list) and not isinstance(paths, str): 145 | for path in paths: 146 | mkdir(path) 147 | else: 148 | mkdir(paths) 149 | 150 | def mkdir(path): 151 | if not os.path.exists(path): 152 | os.makedirs(path) 153 | -------------------------------------------------------------------------------- /utils/visualizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import ntpath 4 | import time 5 | from . import util 6 | from . import html 7 | import scipy.misc 8 | try: 9 | from StringIO import StringIO # Python 2.7 10 | except ImportError: 11 | from io import BytesIO # Python 3.x 12 | 13 | class Visualizer(): 14 | def __init__(self, opt): 15 | # self.opt = opt 16 | self.tf_log = opt.tf_log 17 | self.use_html = opt.isTrain and not opt.no_html 18 | self.win_size = opt.display_winsize 19 | self.name = opt.name 20 | if self.tf_log: 21 | import tensorflow as tf 22 | self.tf = tf 23 | self.log_dir = os.path.join(opt.checkpoints_dir, opt.name, 'logs') 24 | self.writer = tf.summary.FileWriter(self.log_dir) 25 | 26 | if self.use_html: 27 | self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web') 28 | self.img_dir = os.path.join(self.web_dir, 'images') 29 | print('create web directory %s...' % self.web_dir) 30 | util.mkdirs([self.web_dir, self.img_dir]) 31 | 32 | # |visuals|: dictionary of images to display or save 33 | def display_current_results(self, visuals, epoch, step): 34 | if self.tf_log: # show images in tensorboard output 35 | img_summaries = [] 36 | for label, image_numpy in visuals.items(): 37 | # Write the image to a string 38 | try: 39 | s = StringIO() 40 | except: 41 | s = BytesIO() 42 | scipy.misc.toimage(image_numpy).save(s, format="jpeg") 43 | # Create an Image object 44 | img_sum = self.tf.Summary.Image(encoded_image_string=s.getvalue(), height=image_numpy.shape[0], width=image_numpy.shape[1]) 45 | # Create a Summary value 46 | img_summaries.append(self.tf.Summary.Value(tag=label, image=img_sum)) 47 | 48 | # Create and write Summary 49 | summary = self.tf.Summary(value=img_summaries) 50 | self.writer.add_summary(summary, step) 51 | 52 | if self.use_html: # save images to a html file 53 | for label, image_numpy in visuals.items(): 54 | if isinstance(image_numpy, list): 55 | for i in range(len(image_numpy)): 56 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s_%d.jpg' % (epoch, label, i)) 57 | util.save_image(image_numpy[i], img_path) 58 | else: 59 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.jpg' % (epoch, label)) 60 | util.save_image(image_numpy, img_path) 61 | 62 | # update website 63 | webpage = html.HTML(self.web_dir, self.name, refresh=5) 64 | for n in range(epoch, 0, -1): 65 | webpage.add_header('epoch [%d]' % n) 66 | ims = [] 67 | txts = [] 68 | links = [] 69 | 70 | for label, image_numpy in visuals.items(): 71 | if isinstance(image_numpy, list): 72 | for i in range(len(image_numpy)): 73 | img_path = 'epoch%.3d_%s_%d.jpg' % (n, label, i) 74 | ims.append(img_path) 75 | txts.append(label+str(i)) 76 | links.append(img_path) 77 | else: 78 | img_path = 'epoch%.3d_%s.jpg' % (n, label) 79 | ims.append(img_path) 80 | txts.append(label) 81 | links.append(img_path) 82 | if len(ims) < 10: 83 | webpage.add_images(ims, txts, links, width=self.win_size) 84 | else: 85 | num = int(round(len(ims)/2.0)) 86 | webpage.add_images(ims[:num], txts[:num], links[:num], width=self.win_size) 87 | webpage.add_images(ims[num:], txts[num:], links[num:], width=self.win_size) 88 | webpage.save() 89 | 90 | # errors: dictionary of error labels and values 91 | def plot_current_errors(self, errors, step): 92 | if self.tf_log: 93 | for tag, value in errors.items(): 94 | summary = self.tf.Summary(value=[self.tf.Summary.Value(tag=tag, simple_value=value)]) 95 | self.writer.add_summary(summary, step) 96 | 97 | # errors: same format as |errors| of plotCurrentErrors 98 | def print_current_errors(self, epoch, i, errors, t): 99 | message = '(epoch: %d, iters: %d, time: %.3f) ' % (epoch, i, t) 100 | for k, v in errors.items(): 101 | if v != 0: 102 | message += '%s: %.3f ' % (k, v) 103 | 104 | print(message) 105 | with open(self.log_name, "a") as log_file: 106 | log_file.write('%s\n' % message) 107 | 108 | # save image to the disk 109 | def save_images(self, webpage, visuals, image_path): 110 | image_dir = webpage.get_image_dir() 111 | short_path = ntpath.basename(image_path[0]) 112 | name = os.path.splitext(short_path)[0] 113 | 114 | webpage.add_header(name) 115 | ims = [] 116 | txts = [] 117 | links = [] 118 | 119 | for label, image_numpy in visuals.items(): 120 | image_name = '%s_%s.jpg' % (name, label) 121 | save_path = os.path.join(image_dir, image_name) 122 | util.save_image(image_numpy, save_path) 123 | 124 | ims.append(image_name) 125 | txts.append(label) 126 | links.append(image_name) 127 | webpage.add_images(ims, txts, links, width=self.win_size) 128 | --------------------------------------------------------------------------------