├── README.md ├── __init__.py ├── caffe_pb2.py ├── data └── download.txt ├── data_utils └── dataset.py ├── demo.py ├── evaluate.py ├── landslide.kml ├── main_withargu.py ├── models ├── deeplab3plus.py ├── deeplabv32.py ├── fcn.py ├── gcn.py ├── padded_unet.py ├── pspnet.py ├── resnet │ ├── __init__.py │ ├── resnet_backbone.py │ └── resnet_models.py ├── tools │ ├── ModuleHelper.py │ └── __init__.py ├── unet.py └── vgg │ ├── __init__.py │ ├── vgg_backbone.py │ └── vgg_models.py ├── plot.py ├── pretrained_models └── download.txt ├── requirements.txt └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # Semantic-segmentation-methods-for-landslide-detection 2 | Semantic-segmentation-methods-for-landslide-detection 3 | 4 | ## Project Structure 5 | The descriptions of principal files in this project are introduced as follows: 6 | * ./landslide.kml : the construction of landslide imagery datasets is of great significance to the monitoring and analysis of landslides. Up to now, open landslide imagery datasets are still so scarce that research in this field is developing slowly. Therefore, we create an open labelled remote sensing landslide dataset in which landslides are determined based on field investigation and remote sensing imagery interpretation, as supported by the China Geological Survey (CGS) of China. 7 | * ./data/ : processed data are under in this folder, and the results of experiments will be generated here. The download instructions of dataset are shown in "download.txt". 8 | * ./data_util/ 9 | * dataset.py : codes for generating the dataset for our model 10 | * ./models/ 11 | * resnet/ : codes for resnet models 12 | * tools/ : codes for some model tools 13 | * vgg/ : codes for vgg models 14 | * deeplab3plus.py : codes for DeepLab v3+ model 15 | * deeplabv32.py : codes for DeepLab v3 model 16 | * fcn.py : codes for FCN models 17 | * gcn.py : codes for GCN model 18 | * padded_unet.py : codes for UNet model 19 | * pspnet.py : codes for PSPNet model 20 | * ./pretrained_models/ : pretrained_models (e.g. 3x3resnet50-imagenet.pth). The download instructions of pretrained models are shown in "download.txt". 21 | * ./caffe_pb2.py : codes generated by the protocol buffer compiler for PSPNet building. source: caffe.proto 22 | * ./demo.py : codes for demo 23 | * ./evaluate.py : codes for evaluating models 24 | * ./main_withargu.py : the main function for training and testing models 25 | * ./plot.py : code for the visualization of experimental results 26 | * ./utils.py : containing useful codes that are required in the project (e.g. dataloader, model save, model train, model test, metrics calculation) 27 | 28 | ## Principal Environmental Dependencies 29 | * PyTorch 1.5.0 30 | * tqdm 31 | * numpy 32 | * pandas 33 | * torchvision 34 | * pillow 35 | * pydensecrf 36 | * albumentations 37 | * opencv 38 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterybye/Semantic-segmentation-methods-for-landslide-detection/462fb04bab105b8dba5b5afb1b7f28395bf0d59f/__init__.py -------------------------------------------------------------------------------- /data/download.txt: -------------------------------------------------------------------------------- 1 | Data can be download from Google Drive: 2 | https://drive.google.com/drive/folders/1QNMtXV63W29R2dcFq_X6AmpBvYlDb96q?usp=sharing 3 | 4 | please put the folder named "npz" under data/ -------------------------------------------------------------------------------- /data_utils/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import random 4 | import torch 5 | from PIL import Image 6 | from torch.utils.data import Dataset 7 | from torchvision.datasets import ImageFolder 8 | import cv2 9 | 10 | 11 | class ImageFolderWithPaths(ImageFolder): 12 | 13 | def __getitem__(self, index): 14 | original_tuple = super(ImageFolderWithPaths, self).__getitem__(index) 15 | path = self.imgs[index][0] 16 | tuple_with_path = ((path,) + original_tuple) 17 | return tuple_with_path 18 | 19 | 20 | class SegmentationSet2(Dataset): 21 | def __init__(self, 22 | images, 23 | annotations, 24 | transform=None): 25 | self.images = images 26 | self.annotations = annotations 27 | self.transform = transform 28 | 29 | def __getitem__(self, idx: int): 30 | x, y = self.images[idx], self.annotations[idx] 31 | x = np.load(x)['arr_0'] 32 | y = np.load(y)['arr_0'][:,:,0] 33 | y[y==255] = 1 34 | 35 | img = Image.fromarray(y.astype('uint8')).convert('RGB') 36 | bbox = img.getbbox() 37 | height, width = y.shape[:2] 38 | k1 = int(random.random() * bbox[0]) 39 | k2 = int(random.random() * bbox[1]) 40 | k3 = int(random.random() * (width - bbox[2]) + bbox[2]) 41 | k4 = int(random.random() * (height - bbox[3]) + bbox[3]) 42 | y = y[k2:k4, k1:k3] 43 | x = x[k2:k4, k1:k3] 44 | 45 | if self.transform is not None: 46 | augmented = self.transform(image=x, mask=y) 47 | x, y = augmented['image'], augmented['mask'] 48 | 49 | # x = x.filter(ImageFilter.EDGE_ENHANCE_MORE) 50 | 51 | return os.path.split(self.images[idx])[-1], \ 52 | torch.tensor(np.array(x).astype(np.float32).transpose([2, 0, 1])), \ 53 | torch.tensor(y.astype(np.int32)).long() 54 | 55 | def __len__(self): 56 | return len(self.images) 57 | 58 | class TestSet2(Dataset): 59 | def __init__(self, 60 | images, 61 | annotations, 62 | output_size=(420,420)): 63 | self.images = images 64 | self.annotations = annotations 65 | self.output_size = output_size 66 | 67 | def __getitem__(self, idx: int): 68 | oriidx=idx 69 | idx=idx//5 70 | x, y = self.images[idx], self.annotations[idx] 71 | x = np.load(x)['arr_0'] 72 | y = np.load(y)['arr_0'][:,:,0] 73 | y[y==255] = 1 74 | 75 | img = Image.fromarray(y.astype('uint8')).convert('RGB') 76 | bbox = img.getbbox() 77 | height, width = y.shape[:2] 78 | # bbox[left, up, right, down] 79 | if oriidx%5==0: 80 | regions = [[0,0,bbox[0],bbox[1]], [bbox[0],0,bbox[2],bbox[1]], [bbox[2],0,width,bbox[1]], 81 | [0,bbox[1],bbox[0],bbox[3]],[bbox[2],bbox[1],width,bbox[3]], 82 | [0,bbox[3],bbox[0],height], [bbox[0],bbox[3],bbox[2],height], [bbox[2],bbox[3],width,height],] 83 | ks = [] 84 | for region in regions: 85 | if region[2]-region[0]>self.output_size[1] and region[3]-region[1]>self.output_size[0]: 86 | ks.append(self.getbox(region)) 87 | k = ks[int(random.random()*len(ks))] 88 | print('ks') 89 | print(ks) 90 | print(regions) 91 | print('end') 92 | elif oriidx%5==1: 93 | k = [bbox[0],bbox[1],min(width, bbox[0]+height-bbox[1]),min(height, bbox[1]+width-bbox[0])] 94 | elif oriidx%5==2: 95 | k = [max(0, bbox[2]-height+bbox[1]),bbox[1],bbox[2],min(height, bbox[1]+bbox[2])] 96 | elif oriidx%5==3: 97 | k = [bbox[0], max(0, bbox[3]-width+bbox[0]), min(width, bbox[0]+bbox[3]), bbox[3]] 98 | elif oriidx%5==4: 99 | k = [max(0, bbox[2]-bbox[3]),max(0,bbox[3]-bbox[2]),bbox[2],bbox[3]] 100 | k = [int(t) for t in k] 101 | y = y[k[1]:k[3], k[0]:k[2]] 102 | x = x[k[1]:k[3], k[0]:k[2]] 103 | y = cv2.resize(y,self.output_size) 104 | x = cv2.resize(x,self.output_size) 105 | 106 | return os.path.split(self.images[idx])[-1], \ 107 | torch.tensor(np.array(x).astype(np.float32).transpose([2, 0, 1])), \ 108 | torch.tensor(y.astype(np.int32)).long() 109 | 110 | def __len__(self): 111 | return len(self.images) * 5 112 | 113 | def getbox(self, board): 114 | height, width = self.output_size 115 | x = random.random()*(board[2]-width-board[0])+board[0] 116 | y = random.random()*(board[3]-height-board[1])+board[1] 117 | return [x,y,x+min(board[2]-x,board[3]-y),y+min(board[2]-x,board[3]-y)] 118 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import torch 4 | 5 | from models.deeplabv32 import DeepLabV3 6 | from evaluate import modeleval 7 | from plot import plot 8 | from utils import test_model, get_dataloaders 9 | 10 | num_classes = 2 11 | model_name = 'deeplabv3_2_0.001_1.0-2.070' 12 | model_ft = DeepLabV3(num_classes) 13 | on_device = torch.device("cpu") 14 | 15 | # test model 16 | checkpoint = torch.load(f'data/models/{model_name}.pth',map_location='cuda:0') 17 | model_ft.load_state_dict(checkpoint['model_state_dict']) 18 | test_List = {'jinsha_test', } 19 | 20 | f = open(f'data/evals/{model_name}.txt', 'x') 21 | f.write(model_name + '\n') 22 | for var in test_List: 23 | save_test_model = model_name + '-' + var 24 | data_loaders = get_dataloaders(batch_size=2, test=var) 25 | inputs, labels, outputs = test_model(model_ft, data_loaders['test'], device=on_device) 26 | np.savez(f'data/results/{save_test_model}.npz', inputs=inputs, labels=labels, outputs=outputs) 27 | 28 | # evaluate the model 29 | modeleval(save_test_model, var, f) 30 | # plot test results 31 | plot(save_test_model) 32 | f.close() 33 | sys.exit(0) -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def weird_division(x, y): 4 | return 0 if y == 0 else x / y 5 | 6 | def modeleval(save_test_model, var, f): 7 | results = np.load(f'data/results/{save_test_model}.npz') 8 | inputs, labels, outputs = results['inputs'], results['labels'], results['outputs'] 9 | del results 10 | 11 | n_samples, n_classes, w, h = outputs.shape 12 | predictions = outputs.argmax(axis=1) 13 | 14 | r_count = 0 15 | PA, mPA = 0, 0 16 | m_IOU, background_iou, landslide_iou = 0, 0, 0 17 | pre, rec, f1 = 0, 0, 0 18 | for label, pred in zip(labels, predictions): 19 | PA += (pred == label).sum() / (w * h) #pixel accuracy 20 | mPA += weird_division(((pred == 0) & (label == 0)).sum(), ((label == 0).sum())) + weird_division(((pred == 1) & (label == 1)).sum(), ((label == 1).sum())) 21 | 22 | background_iou += weird_division(((pred == 0) & (label == 0)).sum(), ((pred == 0) | (label == 0)).sum()) 23 | landslide_iou += weird_division(((pred == 1) & (label == 1)).sum(), ((pred == 1) | (label == 1)).sum()) 24 | 25 | pre += weird_division(((pred == 1) & (label == 1)).sum(), (pred == 1).sum()) 26 | rec += weird_division(((pred == 1) & (label == 1)).sum(), (label == 1).sum()) 27 | 28 | if weird_division(((pred == 1) & (label == 1)).sum(), (label == 1).sum()) >= 0.3: 29 | r_count += 1 30 | 31 | 32 | PA /= n_samples 33 | mPA /= (n_samples * 2) 34 | background_iou /= n_samples 35 | landslide_iou /= n_samples 36 | mIOU = (background_iou + landslide_iou) / 2 37 | pre /= n_samples 38 | rec /= n_samples 39 | f1 = (2 * pre * rec) / (pre + rec) 40 | 41 | acc = r_count / n_samples 42 | 43 | f.write('**********' + var + '**********' + '\n') 44 | f.write(f'PA: {PA:.4f}, mPA: {mPA:.4f}'+'\n') 45 | f.write(f'mIOU: {mIOU:.4f}, background_iou: {background_iou:.4f}, landslide_iou: {landslide_iou:.4f}'+'\n') 46 | f.write(f'pre: {pre:.4f}, rec: {rec:.4f}, f1: {f1:.4f}'+'\n') 47 | f.write(f'acc: {acc:.4f}'+'\n') -------------------------------------------------------------------------------- /main_withargu.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | 4 | import numpy as np 5 | import torch 6 | from torch import nn 7 | from torch import optim 8 | from torch.optim.lr_scheduler import ReduceLROnPlateau 9 | 10 | from evaluate import modeleval 11 | from models.deeplab3plus import DeepLabv3_plus 12 | from models.deeplabv32 import DeepLabV3 13 | from models.gcn import GCN 14 | from models.padded_unet import UNet 15 | from models.pspnet import pspnet 16 | from models.fcn import fcn8s 17 | from plot import plot 18 | from utils import train_model, test_model, model_save, get_dataloaders 19 | 20 | 21 | def get_model(_args): 22 | if _args.model == 'unet': 23 | return UNet(_args.channel_in, _args.channel_out) 24 | elif _args.model == 'fcn': 25 | return fcn8s(n_classes=_args.num_classes) 26 | elif _args.model == 'pspnet': 27 | return pspnet(version='landslide') 28 | elif _args.model == 'deeplabv3': 29 | return DeepLabV3(_args.num_classes) 30 | elif _args.model == 'deeplabv3plus': 31 | return DeepLabv3_plus(n_classes=_args.num_classes) 32 | elif _args.model == 'gcn': 33 | return GCN(_args.num_classes) 34 | 35 | def do_train(_model_ft, _model_name, _learning_rate, _class_weights, _data_loaders, _n_epochs, _on_device): 36 | params_to_update = [] 37 | for name, param in _model_ft.named_parameters(): 38 | if param.requires_grad: 39 | params_to_update.append(param) 40 | 41 | # Observe that all parameters are being optimized 42 | optimizer_ft = optim.Adam(params_to_update, lr=_learning_rate) 43 | # optimizer_ft = optim.SGD(params_to_update, lr=learning_rate, momentum=momentum) 44 | 45 | scheduler = ReduceLROnPlateau(optimizer_ft, mode='min', factor=.5, patience=5, verbose=True) 46 | # scheduler = None 47 | 48 | criterion = nn.CrossEntropyLoss(_class_weights.to(_on_device)) 49 | # criterion = ECBCrossEntropyLoss(_class_weights.to(_on_device)) 50 | # criterion = JointLoss(_class_weights.to(_on_device)) 51 | # criterion = SoftDiceLoss(_class_weights.to(_on_device)) 52 | 53 | _model_ft, hist = train_model(_model_ft, _data_loaders, criterion, optimizer_ft, 3, scheduler=scheduler, 54 | num_epochs=_n_epochs, device=_on_device) 55 | 56 | model_save(f'data/models/{_model_name}.pth', _n_epochs, hist[-1], _model_ft.state_dict(), optimizer_ft.state_dict()) 57 | return _model_ft 58 | 59 | 60 | if __name__ == '__main__': 61 | 62 | parser = argparse.ArgumentParser() 63 | 64 | parser.add_argument('--mode', default='all', type=str, 65 | dest='mode', help='train, test, or plot') 66 | parser.add_argument('--dataset', default='npz', type=str, 67 | dest='dataset', help='npz or others') 68 | parser.add_argument('--model', default='unet', type=str, 69 | dest='model', help='Using which network') 70 | parser.add_argument('--max_epochs', default=70, type=int, 71 | dest='max_epochs', help='Max training epochs') 72 | parser.add_argument('--channel_in', default=3, type=int, 73 | dest='channel_in', help='Number of input channels') 74 | parser.add_argument('--channel_out', default=2, type=int, 75 | dest='channel_out', help='Number of output channels') 76 | parser.add_argument('--num_classes', default=2, type=int, 77 | dest='num_classes', help='Number of class') 78 | parser.add_argument('--lr', default=1e-4, type=float, 79 | dest='learning_rate', help='Learning rate') 80 | 81 | # LOC 29477725, 1352863, 568612 -> 0.04, 1., 2.38 82 | # RANDOM 27124146, 2130826, 1615028 -> 0.08, 1., 1.32 83 | # RANDOM-POSITIVE 9901762, 2132390, 313848 -> 0.21, 1., 6.79 84 | parser.add_argument('--class_weights', default=None, type=float, nargs=2, 85 | dest='class_weights', help='Class weights') 86 | parser.add_argument('--cuda', default=1, type=int, 87 | dest='cuda', help='GPU number, in [0, 1, 2, 3]') 88 | parser.add_argument('--bs', default=2, type=int, 89 | dest='batch_size', help='Batch size') 90 | parser.add_argument('--set_name', default='random', type=str, 91 | dest='set_name', help='Data set name') 92 | parser.add_argument('--test', default='all_test', type=str, 93 | dest='test', help='test_data') 94 | 95 | args = parser.parse_args() 96 | if args.mode is None or args.mode not in ['train', 'test', 'plot', 'eval', 'all']: 97 | sys.exit(0) 98 | 99 | n_epochs = args.max_epochs 100 | learning_rate = args.learning_rate 101 | momentum = .9 102 | class_weights = torch.tensor(args.class_weights, dtype=torch.float32) 103 | 104 | model_ft = get_model(args) 105 | model_name = args.model + '_' + str(args.batch_size) + f'_{learning_rate}_' + '-'.join( 106 | [str(round(float(i) + 0.001, 2)) for i in class_weights]) \ 107 | + str(n_epochs) 108 | 109 | ''' 110 | model_name = args.model + f'_{learning_rate}_' + '-'.join( 111 | [str(round(float(i) + 0.001, 2)) for i in class_weights]) \ 112 | + str(n_epochs) 113 | ''' 114 | 115 | on_device = torch.device(f"cuda:{args.cuda}" if torch.cuda.is_available() else "cpu") 116 | model_ft = model_ft.to(on_device) 117 | data_loaders = get_dataloaders(batch_size=args.batch_size, test=args.test) 118 | 119 | if args.mode == 'train': 120 | do_train(model_ft, model_name, learning_rate, class_weights, data_loaders, n_epochs, on_device) 121 | sys.exit(0) 122 | elif args.mode == 'test': 123 | checkpoint = torch.load(f'data/models/{model_name}.pth') 124 | model_ft.load_state_dict(checkpoint['model_state_dict']) 125 | 126 | inputs, labels, outputs = test_model(model_ft, data_loaders['test'], device=on_device) 127 | 128 | np.savez(f'data/results/{model_name}.npz', inputs=inputs, labels=labels, outputs=outputs) 129 | sys.exit(0) 130 | elif args.mode == 'plot': 131 | print(model_name) 132 | plot(model_name) 133 | elif args.mode == 'eval': 134 | modeleval(model_name) 135 | elif args.mode == 'all': 136 | #train model 137 | model_ft = do_train(model_ft, model_name, learning_rate, class_weights, data_loaders, n_epochs, on_device) 138 | 139 | # test model 140 | checkpoint = torch.load(f'data/models/{model_name}.pth') 141 | model_ft.load_state_dict(checkpoint['model_state_dict']) 142 | test_List = {'all_test', 'jinsha_test', 'other_test', 'jinsha_train', 'other_train', 'jinsha_val', 143 | 'other_val'} 144 | 145 | f = open(f'data/evals/{model_name}.txt', 'x') 146 | f.write(model_name + '\n') 147 | for var in test_List: 148 | save_test_model = model_name + '-' + var 149 | data_loaders = get_dataloaders(batch_size=args.batch_size, test=var) 150 | inputs, labels, outputs = test_model(model_ft, data_loaders['test'], device=on_device) 151 | np.savez(f'data/results/{save_test_model}.npz', inputs=inputs, labels=labels, outputs=outputs) 152 | 153 | # evaluate the model 154 | modeleval(save_test_model, var, f) 155 | # plot test results 156 | plot(save_test_model) 157 | f.close() 158 | sys.exit(0) 159 | else: 160 | print("no chosen mode, exit") 161 | sys.exit(0) 162 | -------------------------------------------------------------------------------- /models/deeplab3plus.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | from torch.nn.modules.batchnorm import _BatchNorm 9 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast 10 | 11 | import queue 12 | import collections 13 | import threading 14 | 15 | class FutureResult(object): 16 | """A thread-safe future implementation. Used only as one-to-one pipe.""" 17 | 18 | def __init__(self): 19 | self._result = None 20 | self._lock = threading.Lock() 21 | self._cond = threading.Condition(self._lock) 22 | 23 | def put(self, result): 24 | with self._lock: 25 | assert self._result is None, 'Previous result has\'t been fetched.' 26 | self._result = result 27 | self._cond.notify() 28 | 29 | def get(self): 30 | with self._lock: 31 | if self._result is None: 32 | self._cond.wait() 33 | 34 | res = self._result 35 | self._result = None 36 | return res 37 | 38 | 39 | _MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) 40 | _SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result']) 41 | 42 | class SlavePipe(_SlavePipeBase): 43 | """Pipe for master-slave communication.""" 44 | 45 | def run_slave(self, msg): 46 | self.queue.put((self.identifier, msg)) 47 | ret = self.result.get() 48 | self.queue.put(True) 49 | return ret 50 | 51 | class SyncMaster(object): 52 | 53 | def __init__(self, master_callback): 54 | """ 55 | Args: 56 | master_callback: a callback to be invoked after having collected messages from slave devices. 57 | """ 58 | self._master_callback = master_callback 59 | self._queue = queue.Queue() 60 | self._registry = collections.OrderedDict() 61 | self._activated = False 62 | 63 | def __getstate__(self): 64 | return {'master_callback': self._master_callback} 65 | 66 | def __setstate__(self, state): 67 | self.__init__(state['master_callback']) 68 | 69 | def register_slave(self, identifier): 70 | """ 71 | Register an slave device. 72 | Args: 73 | identifier: an identifier, usually is the device id. 74 | Returns: a `SlavePipe` object which can be used to communicate with the master device. 75 | """ 76 | if self._activated: 77 | assert self._queue.empty(), 'Queue is not clean before next initialization.' 78 | self._activated = False 79 | self._registry.clear() 80 | future = FutureResult() 81 | self._registry[identifier] = _MasterRegistry(future) 82 | return SlavePipe(identifier, self._queue, future) 83 | 84 | def run_master(self, master_msg): 85 | """ 86 | Main entry for the master device in each forward pass. 87 | The messages were first collected from each devices (including the master device), and then 88 | an callback will be invoked to compute the message to be sent back to each devices 89 | (including the master device). 90 | Args: 91 | master_msg: the message that the master want to send to itself. This will be placed as the first 92 | message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example. 93 | Returns: the message to be sent back to the master device. 94 | """ 95 | self._activated = True 96 | 97 | intermediates = [(0, master_msg)] 98 | for i in range(self.nr_slaves): 99 | intermediates.append(self._queue.get()) 100 | 101 | results = self._master_callback(intermediates) 102 | assert results[0][0] == 0, 'The first result should belongs to the master.' 103 | 104 | for i, res in results: 105 | if i == 0: 106 | continue 107 | self._registry[i].result.put(res) 108 | 109 | for i in range(self.nr_slaves): 110 | assert self._queue.get() is True 111 | 112 | return results[0][1] 113 | 114 | @property 115 | def nr_slaves(self): 116 | return len(self._registry) 117 | 118 | 119 | def _sum_ft(tensor): 120 | """sum over the first and last dimention""" 121 | return tensor.sum(dim=0).sum(dim=-1) 122 | 123 | 124 | def _unsqueeze_ft(tensor): 125 | """add new dementions at the front and the tail""" 126 | return tensor.unsqueeze(0).unsqueeze(-1) 127 | 128 | 129 | _ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size']) 130 | _MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std']) 131 | 132 | class _SynchronizedBatchNorm(_BatchNorm): 133 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True): 134 | super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine) 135 | 136 | self._sync_master = SyncMaster(self._data_parallel_master) 137 | 138 | self._is_parallel = False 139 | self._parallel_id = None 140 | self._slave_pipe = None 141 | 142 | def forward(self, input): 143 | # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation. 144 | if not (self._is_parallel and self.training): 145 | return F.batch_norm( 146 | input, self.running_mean, self.running_var, self.weight, self.bias, 147 | self.training, self.momentum, self.eps) 148 | 149 | # Resize the input to (B, C, -1). 150 | input_shape = input.size() 151 | input = input.view(input.size(0), self.num_features, -1) 152 | 153 | # Compute the sum and square-sum. 154 | sum_size = input.size(0) * input.size(2) 155 | input_sum = _sum_ft(input) 156 | input_ssum = _sum_ft(input ** 2) 157 | 158 | # Reduce-and-broadcast the statistics. 159 | if self._parallel_id == 0: 160 | mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size)) 161 | else: 162 | mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size)) 163 | 164 | # Compute the output. 165 | if self.affine: 166 | # MJY:: Fuse the multiplication for speed. 167 | output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias) 168 | else: 169 | output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std) 170 | 171 | # Reshape it. 172 | return output.view(input_shape) 173 | 174 | def __data_parallel_replicate__(self, ctx, copy_id): 175 | self._is_parallel = True 176 | self._parallel_id = copy_id 177 | 178 | # parallel_id == 0 means master device. 179 | if self._parallel_id == 0: 180 | ctx.sync_master = self._sync_master 181 | else: 182 | self._slave_pipe = ctx.sync_master.register_slave(copy_id) 183 | 184 | def _data_parallel_master(self, intermediates): 185 | """Reduce the sum and square-sum, compute the statistics, and broadcast it.""" 186 | 187 | # Always using same "device order" makes the ReduceAdd operation faster. 188 | # Thanks to:: Tete Xiao (http://tetexiao.com/) 189 | intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device()) 190 | 191 | to_reduce = [i[1][:2] for i in intermediates] 192 | to_reduce = [j for i in to_reduce for j in i] # flatten 193 | target_gpus = [i[1].sum.get_device() for i in intermediates] 194 | 195 | sum_size = sum([i[1].sum_size for i in intermediates]) 196 | sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce) 197 | mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size) 198 | 199 | broadcasted = Broadcast.apply(target_gpus, mean, inv_std) 200 | 201 | outputs = [] 202 | for i, rec in enumerate(intermediates): 203 | outputs.append((rec[0], _MasterMessage(*broadcasted[i * 2:i * 2 + 2]))) 204 | 205 | return outputs 206 | 207 | def _compute_mean_std(self, sum_, ssum, size): 208 | """Compute the mean and standard-deviation with sum and square-sum. This method 209 | also maintains the moving average on the master device.""" 210 | assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.' 211 | mean = sum_ / size 212 | sumvar = ssum - sum_ * mean 213 | unbias_var = sumvar / (size - 1) 214 | bias_var = sumvar / size 215 | 216 | self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data 217 | self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data 218 | 219 | return mean, bias_var.clamp(self.eps) ** -0.5 220 | 221 | class SynchronizedBatchNorm2d(_SynchronizedBatchNorm): 222 | def _check_input_dim(self, input): 223 | if input.dim() != 4: 224 | raise ValueError('expected 4D input (got {}D input)' 225 | .format(input.dim())) 226 | super(SynchronizedBatchNorm2d, self)._check_input_dim(input) 227 | 228 | BatchNorm2d = SynchronizedBatchNorm2d 229 | 230 | class SeparableConv2d(nn.Module): 231 | def __init__(self, inplanes, planes, kernel_size=3, stride=1, padding=0, dilation=1, bias=False): 232 | super(SeparableConv2d, self)._init_() 233 | 234 | self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, padding, dilation, 235 | groups=inplanes, bias=bias) 236 | self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias) 237 | 238 | def forward(self, x): 239 | x = self.conv1(x) 240 | x = self.pointwise(x) 241 | return x 242 | 243 | 244 | def fixed_padding(inputs, kernel_size, dilation): 245 | kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1) 246 | pad_total = kernel_size_effective - 1 247 | pad_beg = pad_total // 2 248 | pad_end = pad_total - pad_beg 249 | padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end)) 250 | return padded_inputs 251 | 252 | 253 | class SeparableConv2d_same(nn.Module): 254 | def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False): 255 | super(SeparableConv2d_same, self).__init__() 256 | 257 | self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, 0, dilation, 258 | groups=inplanes, bias=bias) 259 | self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias) 260 | 261 | def forward(self, x): 262 | x = fixed_padding(x, self.conv1.kernel_size[0], dilation=self.conv1.dilation[0]) 263 | x = self.conv1(x) 264 | x = self.pointwise(x) 265 | return x 266 | 267 | 268 | class Block(nn.Module): 269 | def __init__(self, inplanes, planes, reps, stride=1, dilation=1, start_with_relu=True, grow_first=True, is_last=False): 270 | super(Block, self).__init__() 271 | 272 | if planes != inplanes or stride != 1: 273 | self.skip = nn.Conv2d(inplanes, planes, 1, stride=stride, bias=False) 274 | self.skipbn = BatchNorm2d(planes) 275 | else: 276 | self.skip = None 277 | 278 | self.relu = nn.ReLU(inplace=True) 279 | rep = [] 280 | 281 | filters = inplanes 282 | if grow_first: 283 | rep.append(self.relu) 284 | rep.append(SeparableConv2d_same(inplanes, planes, 3, stride=1, dilation=dilation)) 285 | rep.append(BatchNorm2d(planes)) 286 | filters = planes 287 | 288 | for i in range(reps - 1): 289 | rep.append(self.relu) 290 | rep.append(SeparableConv2d_same(filters, filters, 3, stride=1, dilation=dilation)) 291 | rep.append(BatchNorm2d(filters)) 292 | 293 | if not grow_first: 294 | rep.append(self.relu) 295 | rep.append(SeparableConv2d_same(inplanes, planes, 3, stride=1, dilation=dilation)) 296 | rep.append(BatchNorm2d(planes)) 297 | 298 | if not start_with_relu: 299 | rep = rep[1:] 300 | 301 | if stride != 1: 302 | rep.append(SeparableConv2d_same(planes, planes, 3, stride=2)) 303 | 304 | if stride == 1 and is_last: 305 | rep.append(SeparableConv2d_same(planes, planes, 3, stride=1)) 306 | 307 | 308 | self.rep = nn.Sequential(*rep) 309 | 310 | def forward(self, inp): 311 | x = self.rep(inp) 312 | 313 | if self.skip is not None: 314 | skip = self.skip(inp) 315 | skip = self.skipbn(skip) 316 | else: 317 | skip = inp 318 | 319 | x += skip 320 | 321 | return x 322 | 323 | 324 | class Xception(nn.Module): 325 | """ 326 | Modified Alighed Xception 327 | """ 328 | def __init__(self, inplanes=3, os=16, pretrained=True): 329 | super(Xception, self).__init__() 330 | 331 | if os == 16: 332 | entry_block3_stride = 2 333 | middle_block_dilation = 1 334 | exit_block_dilations = (1, 2) 335 | elif os == 8: 336 | entry_block3_stride = 1 337 | middle_block_dilation = 2 338 | exit_block_dilations = (2, 4) 339 | else: 340 | raise NotImplementedError 341 | 342 | 343 | # Entry flow 344 | self.conv1 = nn.Conv2d(inplanes, 32, 3, stride=2, padding=1, bias=False) 345 | self.bn1 = BatchNorm2d(32) 346 | self.relu = nn.ReLU(inplace=True) 347 | 348 | self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1, bias=False) 349 | self.bn2 = BatchNorm2d(64) 350 | 351 | self.block1 = Block(64, 128, reps=2, stride=2, start_with_relu=False) 352 | self.block2 = Block(128, 256, reps=2, stride=2, start_with_relu=True, grow_first=True) 353 | self.block3 = Block(256, 728, reps=2, stride=entry_block3_stride, start_with_relu=True, grow_first=True, 354 | is_last=True) 355 | 356 | # Middle flow 357 | self.block4 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 358 | self.block5 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 359 | self.block6 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 360 | self.block7 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 361 | self.block8 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 362 | self.block9 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 363 | self.block10 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 364 | self.block11 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 365 | self.block12 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 366 | self.block13 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 367 | self.block14 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 368 | self.block15 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 369 | self.block16 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 370 | self.block17 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 371 | self.block18 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 372 | self.block19 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation, start_with_relu=True, grow_first=True) 373 | 374 | # Exit flow 375 | self.block20 = Block(728, 1024, reps=2, stride=1, dilation=exit_block_dilations[0], 376 | start_with_relu=True, grow_first=False, is_last=True) 377 | 378 | self.conv3 = SeparableConv2d_same(1024, 1536, 3, stride=1, dilation=exit_block_dilations[1]) 379 | self.bn3 = BatchNorm2d(1536) 380 | 381 | self.conv4 = SeparableConv2d_same(1536, 1536, 3, stride=1, dilation=exit_block_dilations[1]) 382 | self.bn4 = BatchNorm2d(1536) 383 | 384 | self.conv5 = SeparableConv2d_same(1536, 2048, 3, stride=1, dilation=exit_block_dilations[1]) 385 | self.bn5 = BatchNorm2d(2048) 386 | 387 | # Init weights 388 | self._init_weight() 389 | 390 | # Load pretrained model 391 | if pretrained: 392 | self._load_xception_pretrained() 393 | 394 | def forward(self, x): 395 | # Entry flow 396 | x = self.conv1(x) 397 | x = self.bn1(x) 398 | x = self.relu(x) 399 | 400 | x = self.conv2(x) 401 | x = self.bn2(x) 402 | x = self.relu(x) 403 | 404 | x = self.block1(x) 405 | low_level_feat = x 406 | x = self.block2(x) 407 | x = self.block3(x) 408 | 409 | # Middle flow 410 | x = self.block4(x) 411 | x = self.block5(x) 412 | x = self.block6(x) 413 | x = self.block7(x) 414 | x = self.block8(x) 415 | x = self.block9(x) 416 | x = self.block10(x) 417 | x = self.block11(x) 418 | x = self.block12(x) 419 | x = self.block13(x) 420 | x = self.block14(x) 421 | x = self.block15(x) 422 | x = self.block16(x) 423 | x = self.block17(x) 424 | x = self.block18(x) 425 | x = self.block19(x) 426 | 427 | # Exit flow 428 | x = self.block20(x) 429 | x = self.conv3(x) 430 | x = self.bn3(x) 431 | x = self.relu(x) 432 | 433 | x = self.conv4(x) 434 | x = self.bn4(x) 435 | x = self.relu(x) 436 | 437 | x = self.conv5(x) 438 | x = self.bn5(x) 439 | x = self.relu(x) 440 | 441 | return x, low_level_feat 442 | 443 | def _init_weight(self): 444 | for m in self.modules(): 445 | if isinstance(m, nn.Conv2d): 446 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 447 | m.weight.data.normal_(0, math.sqrt(2. / n)) 448 | elif isinstance(m, BatchNorm2d): 449 | m.weight.data.fill_(1) 450 | m.bias.data.zero_() 451 | 452 | def _load_xception_pretrained(self): 453 | pretrain_dict = torch.load('./pretrained_models/xception.pth') 454 | #print(pretrain_dict) 455 | #self.load_state_dict(checkpoint['model_state_dict']) 456 | #pretrain_dict = model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/xception-b5690688.pth') 457 | #pretrain_dict = self.load_state_dict(r'/mnt/windows-E/may/github-landslide2/xception/xception.pth') 458 | 459 | model_dict = {} 460 | state_dict = self.state_dict() 461 | 462 | for k, v in pretrain_dict.items(): 463 | if k in model_dict: 464 | if 'pointwise' in k: 465 | v = v.unsqueeze(-1).unsqueeze(-1) 466 | if k.startswith('block11'): 467 | model_dict[k] = v 468 | model_dict[k.replace('block11', 'block12')] = v 469 | model_dict[k.replace('block11', 'block13')] = v 470 | model_dict[k.replace('block11', 'block14')] = v 471 | model_dict[k.replace('block11', 'block15')] = v 472 | model_dict[k.replace('block11', 'block16')] = v 473 | model_dict[k.replace('block11', 'block17')] = v 474 | model_dict[k.replace('block11', 'block18')] = v 475 | model_dict[k.replace('block11', 'block19')] = v 476 | elif k.startswith('block12'): 477 | model_dict[k.replace('block12', 'block20')] = v 478 | elif k.startswith('bn3'): 479 | model_dict[k] = v 480 | model_dict[k.replace('bn3', 'bn4')] = v 481 | elif k.startswith('conv4'): 482 | model_dict[k.replace('conv4', 'conv5')] = v 483 | elif k.startswith('bn4'): 484 | model_dict[k.replace('bn4', 'bn5')] = v 485 | else: 486 | model_dict[k] = v 487 | state_dict.update(model_dict) 488 | self.load_state_dict(state_dict) 489 | 490 | class ASPP_module(nn.Module): 491 | def __init__(self, inplanes, planes, dilation): 492 | super(ASPP_module, self).__init__() 493 | if dilation == 1: 494 | kernel_size = 1 495 | padding = 0 496 | else: 497 | kernel_size = 3 498 | padding = dilation 499 | self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=kernel_size, 500 | stride=1, padding=padding, dilation=dilation, bias=False) 501 | self.bn = BatchNorm2d(planes) 502 | self.relu = nn.ReLU() 503 | 504 | self._init_weight() 505 | 506 | def forward(self, x): 507 | x = self.atrous_convolution(x) 508 | x = self.bn(x) 509 | 510 | return self.relu(x) 511 | 512 | def _init_weight(self): 513 | for m in self.modules(): 514 | if isinstance(m, nn.Conv2d): 515 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 516 | m.weight.data.normal_(0, math.sqrt(2. / n)) 517 | elif isinstance(m, BatchNorm2d): 518 | m.weight.data.fill_(1) 519 | m.bias.data.zero_() 520 | 521 | 522 | class DeepLabv3_plus(nn.Module): 523 | def __init__(self, nInputChannels=3, n_classes=2, os=8, pretrained=True, freeze_bn=False, _print=True): 524 | if _print: 525 | print("Constructing DeepLabv3+ model...") 526 | print("Backbone: Xception") 527 | print("Number of classes: {}".format(n_classes)) 528 | print("Output stride: {}".format(os)) 529 | print("Number of Input Channels: {}".format(nInputChannels)) 530 | super(DeepLabv3_plus, self).__init__() 531 | 532 | # Atrous Conv 533 | self.xception_features = Xception(nInputChannels, os, pretrained) 534 | 535 | # ASPP 536 | if os == 16: 537 | dilations = [1, 6, 12, 18] 538 | elif os == 8: 539 | dilations = [1, 12, 24, 36] 540 | else: 541 | raise NotImplementedError 542 | 543 | self.aspp1 = ASPP_module(2048, 256, dilation=dilations[0]) 544 | self.aspp2 = ASPP_module(2048, 256, dilation=dilations[1]) 545 | self.aspp3 = ASPP_module(2048, 256, dilation=dilations[2]) 546 | self.aspp4 = ASPP_module(2048, 256, dilation=dilations[3]) 547 | 548 | self.relu = nn.ReLU() 549 | 550 | self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), 551 | nn.Conv2d(2048, 256, 1, stride=1, bias=False), 552 | BatchNorm2d(256), 553 | nn.ReLU()) 554 | 555 | self.conv1 = nn.Conv2d(1280, 256, 1, bias=False) 556 | self.bn1 = BatchNorm2d(256) 557 | 558 | # adopt [1x1, 48] for channel reduction. 559 | self.conv2 = nn.Conv2d(128, 48, 1, bias=False) 560 | self.bn2 = BatchNorm2d(48) 561 | 562 | self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False), 563 | BatchNorm2d(256), 564 | nn.ReLU(), 565 | nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), 566 | BatchNorm2d(256), 567 | nn.ReLU(), 568 | nn.Conv2d(256, n_classes, kernel_size=1, stride=1)) 569 | if freeze_bn: 570 | self._freeze_bn() 571 | 572 | def forward(self, input): 573 | x, low_level_features = self.xception_features(input) 574 | x1 = self.aspp1(x) 575 | x2 = self.aspp2(x) 576 | x3 = self.aspp3(x) 577 | x4 = self.aspp4(x) 578 | x5 = self.global_avg_pool(x) 579 | x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) 580 | 581 | x = torch.cat((x1, x2, x3, x4, x5), dim=1) 582 | 583 | x = self.conv1(x) 584 | x = self.bn1(x) 585 | x = self.relu(x) 586 | x = F.interpolate(x, size=(int(math.ceil(input.size()[-2]/4)), 587 | int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True) 588 | 589 | low_level_features = self.conv2(low_level_features) 590 | low_level_features = self.bn2(low_level_features) 591 | low_level_features = self.relu(low_level_features) 592 | 593 | 594 | x = torch.cat((x, low_level_features), dim=1) 595 | x = self.last_conv(x) 596 | x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True) 597 | 598 | return x 599 | 600 | def _freeze_bn(self): 601 | for m in self.modules(): 602 | if isinstance(m, BatchNorm2d): 603 | m.eval() 604 | 605 | def _init_weight(self): 606 | for m in self.modules(): 607 | if isinstance(m, nn.Conv2d): 608 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 609 | m.weight.data.normal_(0, math.sqrt(2. / n)) 610 | elif isinstance(m, BatchNorm2d): 611 | m.weight.data.fill_(1) 612 | m.bias.data.zero_() 613 | 614 | def get_1x_lr_params(model): 615 | """ 616 | This generator returns all the parameters of the net except for 617 | the last classification layer. Note that for each batchnorm layer, 618 | requires_grad is set to False in deeplab_resnet.py, therefore this function does not return 619 | any batchnorm parameter 620 | """ 621 | b = [model.xception_features] 622 | for i in range(len(b)): 623 | for k in b[i].parameters(): 624 | if k.requires_grad: 625 | yield k 626 | 627 | 628 | def get_10x_lr_params(model): 629 | """ 630 | This generator returns all the parameters for the last layer of the net, 631 | which does the classification of pixel into classes 632 | """ 633 | b = [model.aspp1, model.aspp2, model.aspp3, model.aspp4, model.conv1, model.conv2, model.last_conv] 634 | for j in range(len(b)): 635 | for k in b[j].parameters(): 636 | if k.requires_grad: 637 | yield k 638 | 639 | 640 | 641 | 642 | -------------------------------------------------------------------------------- /models/deeplabv32.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Donny You(youansheng@gmail.com) 4 | # deeplabv3 res101 (synchronized BN version) 5 | 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | from models.tools.ModuleHelper import ModuleHelper 12 | from models.vgg.vgg_backbone import VGGBackbone 13 | from models.resnet.resnet_backbone import ResNetBackbone 14 | 15 | BASE_LOSS_DICT = dict( 16 | ce_loss=0, 17 | ohem_ce_loss=1, 18 | focal_ce_loss=2, 19 | embed_loss=3, 20 | encode_loss=4 21 | ) 22 | 23 | 24 | class BackboneSelector(object): 25 | 26 | def __init__(self, configer): 27 | self.configer = configer 28 | 29 | def get_backbone(self, **params): 30 | # backbone = self.configer.get('network', 'backbone') 31 | backbone = self.configer 32 | 33 | model = None 34 | if 'vgg' in backbone: 35 | model = VGGBackbone(self.configer)(**params) 36 | 37 | # elif 'darknet' in backbone: 38 | # model = DarkNetBackbone(self.configer)(**params) 39 | # 40 | elif 'resnet' in backbone: 41 | model = ResNetBackbone(self.configer)(**params) 42 | # 43 | # elif 'mobilenet' in backbone: 44 | # model = MobileNetBackbone(self.configer)(*params) 45 | # 46 | # elif 'densenet' in backbone: 47 | # model = DenseNetBackbone(self.configer)(**params) 48 | # 49 | # elif 'squeezenet' in backbone: 50 | # model = SqueezeNetBackbone(self.configer)(**params) 51 | 52 | else: 53 | print("no backbone chosen") 54 | exit(1) 55 | 56 | return model 57 | 58 | 59 | class ASPPModule(nn.Module): 60 | """ 61 | Reference: 62 | Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."* 63 | """ 64 | 65 | def __init__(self, features, inner_features=512, out_features=512, dilations=(12, 24, 36), norm_type=None): 66 | super(ASPPModule, self).__init__() 67 | 68 | self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), 69 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, 70 | bias=False), 71 | ModuleHelper.BNReLU(inner_features, norm_type=norm_type)) 72 | self.conv2 = nn.Sequential( 73 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False), 74 | ModuleHelper.BNReLU(inner_features, norm_type=norm_type)) 75 | self.conv3 = nn.Sequential( 76 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False), 77 | ModuleHelper.BNReLU(inner_features, norm_type=norm_type)) 78 | self.conv4 = nn.Sequential( 79 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False), 80 | ModuleHelper.BNReLU(inner_features, norm_type=norm_type)) 81 | self.conv5 = nn.Sequential( 82 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False), 83 | ModuleHelper.BNReLU(inner_features, norm_type=norm_type)) 84 | 85 | self.bottleneck = nn.Sequential( 86 | nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False), 87 | ModuleHelper.BNReLU(out_features, norm_type=norm_type), 88 | nn.Dropout2d(0.1) 89 | ) 90 | 91 | def forward(self, x): 92 | _, _, h, w = x.size() 93 | 94 | feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True) 95 | 96 | feat2 = self.conv2(x) 97 | feat3 = self.conv3(x) 98 | feat4 = self.conv4(x) 99 | feat5 = self.conv5(x) 100 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1) 101 | 102 | bottle = self.bottleneck(out) 103 | return bottle 104 | 105 | 106 | class DeepLabV3(nn.Module): 107 | def __init__(self, num_classes, backbone='deepbase_resnet50_dilated8'): # configer): 108 | self.inplanes = 128 109 | super(DeepLabV3, self).__init__() 110 | # self.configer = configer 111 | self.num_classes = num_classes # self.configer.get('data', 'num_classes') 112 | self.backbone = BackboneSelector(backbone).get_backbone() 113 | 114 | self.head = nn.Sequential(ASPPModule(self.backbone.get_num_features(), 115 | norm_type='sync_batchnorm'), # self.configer.get('network', 'norm_type')), 116 | nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)) 117 | self.dsn = nn.Sequential( 118 | nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1), 119 | ModuleHelper.BNReLU(512, norm_type='sync_batchnorm'), # self.configer.get('network', 'norm_type')), 120 | nn.Dropout2d(0.1), 121 | nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True) 122 | ) 123 | self.valid_loss_dict = {'loss': 'seg_auxce_loss', 'loss_weights': {"aux_loss": 0.4, "seg_loss": 1.0}, 124 | 'loss.loss_type': 'dsnce_loss'} # configer.get('loss', 'loss_weights', configer.get('loss.loss_type')) 125 | 126 | def forward(self, data_dict): 127 | # print(data_dict.size()) 128 | data_dict = {'img': data_dict} 129 | x = self.backbone(data_dict['img']) 130 | x_dsn = self.dsn(x[-2]) 131 | x = self.head(x[-1]) 132 | x_dsn = F.interpolate(x_dsn, size=(data_dict['img'].size(2), data_dict['img'].size(3)), 133 | mode="bilinear", align_corners=True) 134 | x = F.interpolate(x, size=(data_dict['img'].size(2), data_dict['img'].size(3)), 135 | mode="bilinear", align_corners=True) 136 | out_dict = dict(dsn_out=x_dsn, out=x) 137 | # if self.configer.get('phase') == 'test': 138 | # return out_dict 139 | 140 | # loss_dict = dict() 141 | # if 'dsn_ce_loss' in self.valid_loss_dict: 142 | # loss_dict['dsn_ce_loss'] = dict( 143 | # params=[x, data_dict['labelmap']], 144 | # type=torch.cuda.LongTensor([BASE_LOSS_DICT['ce_loss']]), 145 | # weight=torch.cuda.FloatTensor([self.valid_loss_dict['dsn_ce_loss']]) 146 | # ) 147 | # 148 | # if 'ce_loss' in self.valid_loss_dict: 149 | # loss_dict['ce_loss'] = dict( 150 | # params=[x, data_dict['labelmap']], 151 | # type=torch.cuda.LongTensor([BASE_LOSS_DICT['ce_loss']]), 152 | # weight=torch.cuda.FloatTensor([self.valid_loss_dict['ce_loss']]) 153 | # ) 154 | # 155 | # if 'ohem_ce_loss' in self.valid_loss_dict: 156 | # loss_dict['ohem_ce_loss'] = dict( 157 | # params=[x, data_dict['labelmap']], 158 | # type=torch.cuda.LongTensor([BASE_LOSS_DICT['ohem_ce_loss']]), 159 | # weight=torch.cuda.FloatTensor([self.valid_loss_dict['ohem_ce_loss']]) 160 | # ) 161 | return x # out_dict, loss_dict 162 | 163 | 164 | if __name__ == '__main__': 165 | model = DeepLabV3(20, multi_grid=[1, 2, 1]) 166 | model.freeze_bn() 167 | model.eval() 168 | image = torch.autograd.Variable(torch.randn(1, 3, 512, 512), volatile=True) 169 | # print(type(model.resnet_features)) 170 | # print (model(image).size()) 171 | -------------------------------------------------------------------------------- /models/fcn.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | import numpy as np 8 | 9 | 10 | def get_upsampling_weight(in_channels, out_channels, kernel_size): 11 | """Make a 2D bilinear kernel suitable for upsampling""" 12 | factor = (kernel_size + 1) // 2 13 | if kernel_size % 2 == 1: 14 | center = factor - 1 15 | else: 16 | center = factor - 0.5 17 | og = np.ogrid[:kernel_size, :kernel_size] 18 | filt = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) 19 | weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype=np.float64) 20 | weight[range(in_channels), range(out_channels), :, :] = filt 21 | return torch.from_numpy(weight).float() 22 | 23 | def cross_entropy2d(input, target, weight=None, size_average=True): 24 | n, c, h, w = input.size() 25 | nt, ht, wt = target.size() 26 | 27 | # Handle inconsistent size between input and target 28 | if h != ht and w != wt: # upsample labels 29 | input = F.interpolate(input, size=(ht, wt), mode="bilinear", align_corners=True) 30 | 31 | input = input.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) 32 | target = target.view(-1) 33 | loss = F.cross_entropy( 34 | input, target, weight=weight, size_average=size_average, ignore_index=250 35 | ) 36 | return loss 37 | 38 | # FCN32s 39 | class fcn32s(nn.Module): 40 | def __init__(self, channels_in = 3, n_classes=21, learned_billinear=False): 41 | super(fcn32s, self).__init__() 42 | self.learned_billinear = learned_billinear 43 | self.n_classes = n_classes 44 | self.loss = functools.partial(cross_entropy2d, size_average=False) 45 | 46 | self.conv_block1 = nn.Sequential( 47 | nn.Conv2d(3, 64, 3, padding=100), 48 | nn.ReLU(inplace=True), 49 | nn.Conv2d(64, 64, 3, padding=1), 50 | nn.ReLU(inplace=True), 51 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 52 | ) 53 | 54 | self.conv_block2 = nn.Sequential( 55 | nn.Conv2d(64, 128, 3, padding=1), 56 | nn.ReLU(inplace=True), 57 | nn.Conv2d(128, 128, 3, padding=1), 58 | nn.ReLU(inplace=True), 59 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 60 | ) 61 | 62 | self.conv_block3 = nn.Sequential( 63 | nn.Conv2d(128, 256, 3, padding=1), 64 | nn.ReLU(inplace=True), 65 | nn.Conv2d(256, 256, 3, padding=1), 66 | nn.ReLU(inplace=True), 67 | nn.Conv2d(256, 256, 3, padding=1), 68 | nn.ReLU(inplace=True), 69 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 70 | ) 71 | 72 | self.conv_block4 = nn.Sequential( 73 | nn.Conv2d(256, 512, 3, padding=1), 74 | nn.ReLU(inplace=True), 75 | nn.Conv2d(512, 512, 3, padding=1), 76 | nn.ReLU(inplace=True), 77 | nn.Conv2d(512, 512, 3, padding=1), 78 | nn.ReLU(inplace=True), 79 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 80 | ) 81 | 82 | self.conv_block5 = nn.Sequential( 83 | nn.Conv2d(512, 512, 3, padding=1), 84 | nn.ReLU(inplace=True), 85 | nn.Conv2d(512, 512, 3, padding=1), 86 | nn.ReLU(inplace=True), 87 | nn.Conv2d(512, 512, 3, padding=1), 88 | nn.ReLU(inplace=True), 89 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 90 | ) 91 | 92 | self.classifier = nn.Sequential( 93 | nn.Conv2d(512, 4096, 7), 94 | nn.ReLU(inplace=True), 95 | nn.Dropout2d(), 96 | nn.Conv2d(4096, 4096, 1), 97 | nn.ReLU(inplace=True), 98 | nn.Dropout2d(), 99 | nn.Conv2d(4096, self.n_classes, 1), 100 | ) 101 | 102 | if self.learned_billinear: 103 | raise NotImplementedError 104 | 105 | def forward(self, x): 106 | conv1 = self.conv_block1(x) 107 | conv2 = self.conv_block2(conv1) 108 | conv3 = self.conv_block3(conv2) 109 | conv4 = self.conv_block4(conv3) 110 | conv5 = self.conv_block5(conv4) 111 | 112 | score = self.classifier(conv5) 113 | 114 | out = F.upsample(score, x.size()[2:]) 115 | 116 | return out 117 | 118 | def init_vgg16_params(self, vgg16, copy_fc8=True): 119 | blocks = [ 120 | self.conv_block1, 121 | self.conv_block2, 122 | self.conv_block3, 123 | self.conv_block4, 124 | self.conv_block5, 125 | ] 126 | 127 | ranges = [[0, 4], [5, 9], [10, 16], [17, 23], [24, 29]] 128 | features = list(vgg16.features.children()) 129 | 130 | for idx, conv_block in enumerate(blocks): 131 | for l1, l2 in zip(features[ranges[idx][0] : ranges[idx][1]], conv_block): 132 | if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d): 133 | assert l1.weight.size() == l2.weight.size() 134 | assert l1.bias.size() == l2.bias.size() 135 | l2.weight.data = l1.weight.data 136 | l2.bias.data = l1.bias.data 137 | for i1, i2 in zip([0, 3], [0, 3]): 138 | l1 = vgg16.classifier[i1] 139 | l2 = self.classifier[i2] 140 | l2.weight.data = l1.weight.data.view(l2.weight.size()) 141 | l2.bias.data = l1.bias.data.view(l2.bias.size()) 142 | n_class = self.classifier[6].weight.size()[0] 143 | if copy_fc8: 144 | l1 = vgg16.classifier[6] 145 | l2 = self.classifier[6] 146 | l2.weight.data = l1.weight.data[:n_class, :].view(l2.weight.size()) 147 | l2.bias.data = l1.bias.data[:n_class] 148 | 149 | 150 | class fcn16s(nn.Module): 151 | def __init__(self, n_classes=21, learned_billinear=False): 152 | super(fcn16s, self).__init__() 153 | self.learned_billinear = learned_billinear 154 | self.n_classes = n_classes 155 | self.loss = functools.partial(cross_entropy2d, size_average=False) 156 | 157 | self.conv_block1 = nn.Sequential( 158 | nn.Conv2d(3, 64, 3, padding=100), 159 | nn.ReLU(inplace=True), 160 | nn.Conv2d(64, 64, 3, padding=1), 161 | nn.ReLU(inplace=True), 162 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 163 | ) 164 | 165 | self.conv_block2 = nn.Sequential( 166 | nn.Conv2d(64, 128, 3, padding=1), 167 | nn.ReLU(inplace=True), 168 | nn.Conv2d(128, 128, 3, padding=1), 169 | nn.ReLU(inplace=True), 170 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 171 | ) 172 | 173 | self.conv_block3 = nn.Sequential( 174 | nn.Conv2d(128, 256, 3, padding=1), 175 | nn.ReLU(inplace=True), 176 | nn.Conv2d(256, 256, 3, padding=1), 177 | nn.ReLU(inplace=True), 178 | nn.Conv2d(256, 256, 3, padding=1), 179 | nn.ReLU(inplace=True), 180 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 181 | ) 182 | 183 | self.conv_block4 = nn.Sequential( 184 | nn.Conv2d(256, 512, 3, padding=1), 185 | nn.ReLU(inplace=True), 186 | nn.Conv2d(512, 512, 3, padding=1), 187 | nn.ReLU(inplace=True), 188 | nn.Conv2d(512, 512, 3, padding=1), 189 | nn.ReLU(inplace=True), 190 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 191 | ) 192 | 193 | self.conv_block5 = nn.Sequential( 194 | nn.Conv2d(512, 512, 3, padding=1), 195 | nn.ReLU(inplace=True), 196 | nn.Conv2d(512, 512, 3, padding=1), 197 | nn.ReLU(inplace=True), 198 | nn.Conv2d(512, 512, 3, padding=1), 199 | nn.ReLU(inplace=True), 200 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 201 | ) 202 | 203 | self.classifier = nn.Sequential( 204 | nn.Conv2d(512, 4096, 7), 205 | nn.ReLU(inplace=True), 206 | nn.Dropout2d(), 207 | nn.Conv2d(4096, 4096, 1), 208 | nn.ReLU(inplace=True), 209 | nn.Dropout2d(), 210 | nn.Conv2d(4096, self.n_classes, 1), 211 | ) 212 | 213 | self.score_pool4 = nn.Conv2d(512, self.n_classes, 1) 214 | 215 | # TODO: Add support for learned upsampling 216 | if self.learned_billinear: 217 | raise NotImplementedError 218 | 219 | def forward(self, x): 220 | conv1 = self.conv_block1(x) 221 | conv2 = self.conv_block2(conv1) 222 | conv3 = self.conv_block3(conv2) 223 | conv4 = self.conv_block4(conv3) 224 | conv5 = self.conv_block5(conv4) 225 | 226 | score = self.classifier(conv5) 227 | score_pool4 = self.score_pool4(conv4) 228 | 229 | score = F.upsample(score, score_pool4.size()[2:]) 230 | score += score_pool4 231 | out = F.upsample(score, x.size()[2:]) 232 | 233 | return out 234 | 235 | def init_vgg16_params(self, vgg16, copy_fc8=True): 236 | blocks = [ 237 | self.conv_block1, 238 | self.conv_block2, 239 | self.conv_block3, 240 | self.conv_block4, 241 | self.conv_block5, 242 | ] 243 | 244 | ranges = [[0, 4], [5, 9], [10, 16], [17, 23], [24, 29]] 245 | features = list(vgg16.features.children()) 246 | 247 | for idx, conv_block in enumerate(blocks): 248 | for l1, l2 in zip(features[ranges[idx][0] : ranges[idx][1]], conv_block): 249 | if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d): 250 | # print(idx, l1, l2) 251 | assert l1.weight.size() == l2.weight.size() 252 | assert l1.bias.size() == l2.bias.size() 253 | l2.weight.data = l1.weight.data 254 | l2.bias.data = l1.bias.data 255 | for i1, i2 in zip([0, 3], [0, 3]): 256 | l1 = vgg16.classifier[i1] 257 | l2 = self.classifier[i2] 258 | l2.weight.data = l1.weight.data.view(l2.weight.size()) 259 | l2.bias.data = l1.bias.data.view(l2.bias.size()) 260 | n_class = self.classifier[6].weight.size()[0] 261 | if copy_fc8: 262 | l1 = vgg16.classifier[6] 263 | l2 = self.classifier[6] 264 | l2.weight.data = l1.weight.data[:n_class, :].view(l2.weight.size()) 265 | l2.bias.data = l1.bias.data[:n_class] 266 | 267 | 268 | # FCN 8s 269 | class fcn8s(nn.Module): 270 | def __init__(self, n_classes=21, learned_billinear=True): 271 | super(fcn8s, self).__init__() 272 | self.learned_billinear = learned_billinear 273 | self.n_classes = n_classes 274 | self.loss = functools.partial(cross_entropy2d, size_average=False) 275 | 276 | self.conv_block1 = nn.Sequential( 277 | nn.Conv2d(3, 64, 3, padding=100), 278 | nn.ReLU(inplace=True), 279 | nn.Conv2d(64, 64, 3, padding=1), 280 | nn.ReLU(inplace=True), 281 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 282 | ) 283 | 284 | self.conv_block2 = nn.Sequential( 285 | nn.Conv2d(64, 128, 3, padding=1), 286 | nn.ReLU(inplace=True), 287 | nn.Conv2d(128, 128, 3, padding=1), 288 | nn.ReLU(inplace=True), 289 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 290 | ) 291 | 292 | self.conv_block3 = nn.Sequential( 293 | nn.Conv2d(128, 256, 3, padding=1), 294 | nn.ReLU(inplace=True), 295 | nn.Conv2d(256, 256, 3, padding=1), 296 | nn.ReLU(inplace=True), 297 | nn.Conv2d(256, 256, 3, padding=1), 298 | nn.ReLU(inplace=True), 299 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 300 | ) 301 | 302 | self.conv_block4 = nn.Sequential( 303 | nn.Conv2d(256, 512, 3, padding=1), 304 | nn.ReLU(inplace=True), 305 | nn.Conv2d(512, 512, 3, padding=1), 306 | nn.ReLU(inplace=True), 307 | nn.Conv2d(512, 512, 3, padding=1), 308 | nn.ReLU(inplace=True), 309 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 310 | ) 311 | 312 | self.conv_block5 = nn.Sequential( 313 | nn.Conv2d(512, 512, 3, padding=1), 314 | nn.ReLU(inplace=True), 315 | nn.Conv2d(512, 512, 3, padding=1), 316 | nn.ReLU(inplace=True), 317 | nn.Conv2d(512, 512, 3, padding=1), 318 | nn.ReLU(inplace=True), 319 | nn.MaxPool2d(2, stride=2, ceil_mode=True), 320 | ) 321 | 322 | self.classifier = nn.Sequential( 323 | nn.Conv2d(512, 4096, 7), 324 | nn.ReLU(inplace=True), 325 | nn.Dropout2d(), 326 | nn.Conv2d(4096, 4096, 1), 327 | nn.ReLU(inplace=True), 328 | nn.Dropout2d(), 329 | nn.Conv2d(4096, self.n_classes, 1), 330 | ) 331 | 332 | self.score_pool4 = nn.Conv2d(512, self.n_classes, 1) 333 | self.score_pool3 = nn.Conv2d(256, self.n_classes, 1) 334 | 335 | if self.learned_billinear: 336 | self.upscore2 = nn.ConvTranspose2d( 337 | self.n_classes, self.n_classes, 4, stride=2, bias=False 338 | ) 339 | self.upscore4 = nn.ConvTranspose2d( 340 | self.n_classes, self.n_classes, 4, stride=2, bias=False 341 | ) 342 | self.upscore8 = nn.ConvTranspose2d( 343 | self.n_classes, self.n_classes, 16, stride=8, bias=False 344 | ) 345 | 346 | for m in self.modules(): 347 | if isinstance(m, nn.ConvTranspose2d): 348 | m.weight.data.copy_( 349 | get_upsampling_weight(m.in_channels, m.out_channels, m.kernel_size[0]) 350 | ) 351 | 352 | def forward(self, x): 353 | conv1 = self.conv_block1(x) 354 | conv2 = self.conv_block2(conv1) 355 | conv3 = self.conv_block3(conv2) 356 | conv4 = self.conv_block4(conv3) 357 | conv5 = self.conv_block5(conv4) 358 | 359 | score = self.classifier(conv5) 360 | 361 | if self.learned_billinear: 362 | upscore2 = self.upscore2(score) 363 | score_pool4c = self.score_pool4(conv4)[ 364 | :, :, 5 : 5 + upscore2.size()[2], 5 : 5 + upscore2.size()[3] 365 | ] 366 | upscore_pool4 = self.upscore4(upscore2 + score_pool4c) 367 | 368 | score_pool3c = self.score_pool3(conv3)[ 369 | :, :, 9 : 9 + upscore_pool4.size()[2], 9 : 9 + upscore_pool4.size()[3] 370 | ] 371 | 372 | out = self.upscore8(score_pool3c + upscore_pool4)[ 373 | :, :, 31 : 31 + x.size()[2], 31 : 31 + x.size()[3] 374 | ] 375 | return out.contiguous() 376 | 377 | else: 378 | score_pool4 = self.score_pool4(conv4) 379 | score_pool3 = self.score_pool3(conv3) 380 | score = F.upsample(score, score_pool4.size()[2:]) 381 | score += score_pool4 382 | score = F.upsample(score, score_pool3.size()[2:]) 383 | score += score_pool3 384 | out = F.upsample(score, x.size()[2:]) 385 | 386 | return out 387 | 388 | def init_vgg16_params(self, vgg16, copy_fc8=True): 389 | blocks = [ 390 | self.conv_block1, 391 | self.conv_block2, 392 | self.conv_block3, 393 | self.conv_block4, 394 | self.conv_block5, 395 | ] 396 | 397 | ranges = [[0, 4], [5, 9], [10, 16], [17, 23], [24, 29]] 398 | features = list(vgg16.features.children()) 399 | 400 | for idx, conv_block in enumerate(blocks): 401 | for l1, l2 in zip(features[ranges[idx][0] : ranges[idx][1]], conv_block): 402 | if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d): 403 | assert l1.weight.size() == l2.weight.size() 404 | assert l1.bias.size() == l2.bias.size() 405 | l2.weight.data = l1.weight.data 406 | l2.bias.data = l1.bias.data 407 | for i1, i2 in zip([0, 3], [0, 3]): 408 | l1 = vgg16.classifier[i1] 409 | l2 = self.classifier[i2] 410 | l2.weight.data = l1.weight.data.view(l2.weight.size()) 411 | l2.bias.data = l1.bias.data.view(l2.bias.size()) 412 | n_class = self.classifier[6].weight.size()[0] 413 | if copy_fc8: 414 | l1 = vgg16.classifier[6] 415 | l2 = self.classifier[6] 416 | l2.weight.data = l1.weight.data[:n_class, :].view(l2.weight.size()) 417 | l2.bias.data = l1.bias.data[:n_class] 418 | -------------------------------------------------------------------------------- /models/gcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.nn.init as init 5 | import torch.utils.model_zoo as model_zoo 6 | from torchvision import models 7 | 8 | import math 9 | 10 | 11 | class base_GCN(nn.Module): 12 | def __init__(self, inplanes, planes, ks=7): 13 | super(base_GCN, self).__init__() 14 | self.conv_l1 = nn.Conv2d(inplanes, planes, kernel_size=(ks, 1), 15 | padding=((int)(ks/2), 0)) 16 | 17 | self.conv_l2 = nn.Conv2d(planes, planes, kernel_size=(1, ks), 18 | padding=(0, (int)(ks/2))) 19 | self.conv_r1 = nn.Conv2d(inplanes, planes, kernel_size=(1, ks), 20 | padding=(0, (int)(ks/2))) 21 | self.conv_r2 = nn.Conv2d(planes, planes, kernel_size=(ks, 1), 22 | padding=((int)(ks/2), 0)) 23 | 24 | def forward(self, x): 25 | x_l = self.conv_l1(x) 26 | x_l = self.conv_l2(x_l) 27 | 28 | x_r = self.conv_r1(x) 29 | x_r = self.conv_r2(x_r) 30 | 31 | x = x_l + x_r 32 | 33 | return x 34 | 35 | 36 | class Refine(nn.Module): 37 | def __init__(self, planes): 38 | super(Refine, self).__init__() 39 | self.bn = nn.BatchNorm2d(planes) 40 | self.relu = nn.ReLU(inplace=True) 41 | self.conv1 = nn.Conv2d(planes, planes, kernel_size=3, padding=1) 42 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1) 43 | 44 | def forward(self, x): 45 | residual = x 46 | x = self.bn(x) 47 | x = self.relu(x) 48 | x = self.conv1(x) 49 | x = self.bn(x) 50 | x = self.relu(x) 51 | x = self.conv2(x) 52 | 53 | out = residual + x 54 | return out 55 | 56 | 57 | class GCN(nn.Module): 58 | def __init__(self, num_classes): 59 | super(GCN, self).__init__() 60 | 61 | self.num_classes = num_classes 62 | 63 | resnet = models.resnet50(pretrained=True) 64 | 65 | self.conv1 = resnet.conv1 66 | self.bn0 = resnet.bn1 67 | self.relu = resnet.relu 68 | self.maxpool = resnet.maxpool 69 | 70 | self.layer1 = resnet.layer1 71 | self.layer2 = resnet.layer2 72 | self.layer3 = resnet.layer3 73 | self.layer4 = resnet.layer4 74 | 75 | self.gcn1 = base_GCN(2048, self.num_classes) 76 | self.gcn2 = base_GCN(1024, self.num_classes) 77 | self.gcn3 = base_GCN(512, self.num_classes) 78 | self.gcn4 = base_GCN(64, self.num_classes) 79 | self.gcn5 = base_GCN(64, self.num_classes) 80 | 81 | self.refine1 = Refine(self.num_classes) 82 | self.refine2 = Refine(self.num_classes) 83 | self.refine3 = Refine(self.num_classes) 84 | self.refine4 = Refine(self.num_classes) 85 | self.refine5 = Refine(self.num_classes) 86 | self.refine6 = Refine(self.num_classes) 87 | self.refine7 = Refine(self.num_classes) 88 | self.refine8 = Refine(self.num_classes) 89 | self.refine9 = Refine(self.num_classes) 90 | self.refine10 = Refine(self.num_classes) 91 | 92 | self.out0 = self._classifier(2048) 93 | self.out1 = self._classifier(1024) 94 | self.out2 = self._classifier(512) 95 | self.out_e = self._classifier(256) 96 | self.out3 = self._classifier(64) 97 | self.out4 = self._classifier(64) 98 | self.out5 = self._classifier(32) 99 | 100 | self.transformer = nn.Conv2d(256, 64, kernel_size=1) 101 | 102 | def _classifier(self, inplanes): 103 | return nn.Sequential( 104 | nn.Conv2d(inplanes, inplanes, 3, padding=1, bias=False), 105 | nn.BatchNorm2d((int)(inplanes/2)), 106 | nn.ReLU(inplace=True), 107 | nn.Dropout(.1), 108 | nn.Conv2d((int)(inplanes/2), self.num_classes, 1), 109 | ) 110 | 111 | def forward(self, x): 112 | input = x 113 | x = self.conv1(x) 114 | x = self.bn0(x) 115 | x = self.relu(x) 116 | conv_x = x 117 | x = self.maxpool(x) 118 | pool_x = x 119 | 120 | fm1 = self.layer1(x) 121 | fm2 = self.layer2(fm1) 122 | fm3 = self.layer3(fm2) 123 | fm4 = self.layer4(fm3) 124 | 125 | gcfm1 = self.refine1(self.gcn1(fm4)) 126 | gcfm2 = self.refine2(self.gcn2(fm3)) 127 | gcfm3 = self.refine3(self.gcn3(fm2)) 128 | gcfm4 = self.refine4(self.gcn4(pool_x)) 129 | gcfm5 = self.refine5(self.gcn5(conv_x)) 130 | 131 | fs1 = self.refine6(F.upsample_bilinear(gcfm1, fm3.size()[2:]) + gcfm2) 132 | fs2 = self.refine7(F.upsample_bilinear(fs1, fm2.size()[2:]) + gcfm3) 133 | fs3 = self.refine8(F.upsample_bilinear(fs2, pool_x.size()[2:]) + gcfm4) 134 | fs4 = self.refine9(F.upsample_bilinear(fs3, conv_x.size()[2:]) + gcfm5) 135 | out = self.refine10(F.upsample_bilinear(fs4, input.size()[2:])) 136 | 137 | return out 138 | -------------------------------------------------------------------------------- /models/padded_unet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | 6 | class UNet(nn.Module): 7 | def __init__(self, in_channel: int, out_channel: int): 8 | super(UNet, self).__init__() 9 | # Encode 10 | self.conv_encode1 = self.contracting_block(in_channels=in_channel, out_channels=64) 11 | self.conv_maxpool1 = nn.MaxPool2d(kernel_size=2) 12 | self.conv_encode2 = self.contracting_block(64, 128) 13 | self.conv_maxpool2 = nn.MaxPool2d(kernel_size=2) 14 | self.conv_encode3 = self.contracting_block(128, 256) 15 | self.conv_maxpool3 = nn.MaxPool2d(kernel_size=2) 16 | self.conv_encode4 = self.contracting_block(256, 512) 17 | self.conv_maxpool4 = nn.MaxPool2d(kernel_size=2) 18 | # Bottleneck 19 | self.bottleneck = nn.Sequential( 20 | nn.Conv2d(kernel_size=3, in_channels=512, out_channels=1024, padding=1), 21 | nn.ReLU(), 22 | nn.BatchNorm2d(1024), 23 | nn.Conv2d(kernel_size=3, in_channels=1024, out_channels=1024, padding=1), 24 | nn.ReLU(), 25 | nn.BatchNorm2d(1024), 26 | nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=3, stride=2) 27 | ) 28 | # Decode 29 | self.conv_decode4 = self.expansive_block(1024, 512, 256) 30 | self.conv_decode3 = self.expansive_block(512, 256, 128) 31 | self.conv_decode2 = self.expansive_block(256, 128, 64) 32 | self.final_layer = self.final_block(128, 64, out_channel) 33 | 34 | def forward(self, x): 35 | # Encode 36 | # (..., width, height) => (..., width - 4, height - 4) 37 | encode_block1 = self.conv_encode1(x) 38 | # (..., width - 4, height - 4) => (..., (width - 4)/2, (height - 4)/2) 39 | encode_pool1 = self.conv_maxpool1(encode_block1) 40 | # (..., (width - 4) / 2, (height - 4) / 2) => (..., (width - 4)/2 - 4, (height - 4)/2 - 4) 41 | encode_block2 = self.conv_encode2(encode_pool1) 42 | # (..., (width - 4)/2 - 4, (height - 4)/2 - 4) => (..., ((width - 4)/2 - 4)/2, ((height - 4)/2 - 4)/2) 43 | encode_pool2 = self.conv_maxpool2(encode_block2) 44 | # (..., ((width - 4)/2 - 4)/2, ((height - 4)/2 - 4)/2) 45 | # => (..., ((width - 4)/2 - 4)/2 - 4, ((height - 4)/2 - 4)/2 - 4) 46 | encode_block3 = self.conv_encode3(encode_pool2) 47 | # (..., ((width - 4)/2 - 4)/2 - 4, ((height - 4)/2 - 4)/2 - 4) 48 | # => (..., (((width - 4)/2 - 4)/2 - 4) / 2, (((height - 4)/2 - 4)/2 - 4)/2) 49 | encode_pool3 = self.conv_maxpool3(encode_block3) 50 | encode_block4 = self.conv_encode4(encode_pool3) 51 | # (..., ((width - 4)/2 - 4)/2 - 4, ((height - 4)/2 - 4)/2 - 4) 52 | # => (..., (((width - 4)/2 - 4)/2 - 4) / 2, (((height - 4)/2 - 4)/2 - 4)/2) 53 | encode_pool4 = self.conv_maxpool4(encode_block4) 54 | 55 | # Bottleneck 56 | # (..., (((width - 4)/2 - 4)/2 - 4) / 2, (((height - 4)/2 - 4)/2 - 4)/2) 57 | # => (..., (((width - 4)/2 - 4)/2 - 4) / 2, (((height - 4)/2 - 4)/2 - 4)/2) 58 | bottleneck = self.bottleneck(encode_pool4) 59 | # Decode 60 | decode_block4 = self.soft_concat(bottleneck, encode_block4, 1) 61 | cat_layer3 = self.conv_decode4(decode_block4) 62 | decode_block3 = self.soft_concat(cat_layer3, encode_block3, 1) 63 | cat_layer2 = self.conv_decode3(decode_block3) 64 | decode_block2 = self.soft_concat(cat_layer2, encode_block2, 1) 65 | cat_layer1 = self.conv_decode2(decode_block2) 66 | decode_block1 = self.soft_concat(cat_layer1, encode_block1, 1) 67 | final_layer = self.final_layer(decode_block1) 68 | return final_layer 69 | 70 | def contracting_block(self, in_channels, out_channels, kernel_size=3): 71 | # (batch_size, in_channels, width, height) => (batch_size, out_channels, width - 4, height - 4) 72 | return nn.Sequential( 73 | nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=out_channels, padding=1), 74 | nn.ReLU(), 75 | nn.BatchNorm2d(out_channels), 76 | nn.Conv2d(kernel_size=kernel_size, in_channels=out_channels, out_channels=out_channels, padding=1), 77 | nn.ReLU(), 78 | nn.BatchNorm2d(out_channels), 79 | ) 80 | 81 | def expansive_block(self, in_channels, mid_channel, out_channels, kernel_size=3): 82 | # (batch_size, in_channels, width, height) => (batch_size, out_channels, (width - 4) * 2, (height - 4) * 2) 83 | return nn.Sequential( 84 | nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=mid_channel, padding=1), 85 | nn.ReLU(), 86 | nn.BatchNorm2d(mid_channel), 87 | nn.Conv2d(kernel_size=kernel_size, in_channels=mid_channel, out_channels=mid_channel, padding=1), 88 | nn.ReLU(), 89 | nn.BatchNorm2d(mid_channel), 90 | nn.ConvTranspose2d(in_channels=mid_channel, out_channels=out_channels, kernel_size=3, stride=2) 91 | ) 92 | 93 | def final_block(self, in_channels, mid_channel, out_channels, kernel_size=3): 94 | # (batch_size, in_channels, width, height) => (batch_size, out_channels, width - 4, height - 4) 95 | return nn.Sequential( 96 | nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=mid_channel, padding=1), 97 | nn.ReLU(), 98 | nn.BatchNorm2d(mid_channel), 99 | nn.Conv2d(kernel_size=kernel_size, in_channels=mid_channel, out_channels=mid_channel, padding=1), 100 | nn.ReLU(), 101 | nn.BatchNorm2d(mid_channel), 102 | nn.Conv2d(kernel_size=kernel_size, in_channels=mid_channel, out_channels=out_channels, padding=1), 103 | nn.ReLU(), 104 | nn.BatchNorm2d(out_channels), 105 | ) 106 | 107 | @staticmethod 108 | def soft_concat(upsampled: torch.Tensor, bypass: torch.Tensor, dim: int = 1): 109 | pad = bypass.shape[-1] - upsampled.shape[-1] 110 | left_pad, right_pad = pad // 2, pad - (pad // 2) 111 | upsampled = F.pad(upsampled, pad=[left_pad, right_pad, left_pad, right_pad], mode='replicate') 112 | return torch.cat((upsampled, bypass), dim) 113 | -------------------------------------------------------------------------------- /models/pspnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from torch.autograd import Variable 7 | 8 | import caffe_pb2 9 | from models.fcn import cross_entropy2d 10 | 11 | pspnet_specs = { 12 | "pascal": {"n_classes": 21, "input_size": (473, 473), "block_config": [3, 4, 23, 3]}, 13 | "cityscapes": {"n_classes": 19, "input_size": (713, 713), "block_config": [3, 4, 23, 3]}, 14 | "ade20k": {"n_classes": 150, "input_size": (473, 473), "block_config": [3, 4, 6, 3]}, 15 | "landslide":{"n_classes":2, "input_size":(420,420),"block_config":[3, 4, 23, 3]} 16 | } 17 | 18 | def multi_scale_cross_entropy2d(input, target, weight=None, size_average=True, scale_weight=None): 19 | if not isinstance(input, tuple): 20 | return cross_entropy2d(input=input, target=target, weight=weight, size_average=size_average) 21 | 22 | # Auxiliary training for PSPNet [1.0, 0.4] and ICNet [1.0, 0.4, 0.16] 23 | if scale_weight is None: # scale_weight: torch tensor type 24 | n_inp = len(input) 25 | scale = 0.4 26 | scale_weight = torch.pow(scale * torch.ones(n_inp), torch.arange(n_inp).float()).to( 27 | target.device 28 | ) 29 | loss = 0.0 30 | for i, inp in enumerate(input): 31 | loss = loss + scale_weight[i] * cross_entropy2d( 32 | input=inp, target=target, weight=weight, size_average=size_average 33 | ) 34 | return loss 35 | 36 | class conv2DBatchNormRelu(nn.Module): 37 | def __init__( 38 | self, 39 | in_channels, 40 | n_filters, 41 | k_size, 42 | stride, 43 | padding, 44 | bias=True, 45 | dilation=1, 46 | is_batchnorm=True, 47 | ): 48 | super(conv2DBatchNormRelu, self).__init__() 49 | 50 | conv_mod = nn.Conv2d( 51 | int(in_channels), 52 | int(n_filters), 53 | kernel_size=k_size, 54 | padding=padding, 55 | stride=stride, 56 | bias=bias, 57 | dilation=dilation, 58 | ) 59 | 60 | if is_batchnorm: 61 | self.cbr_unit = nn.Sequential( 62 | conv_mod, nn.BatchNorm2d(int(n_filters)), nn.ReLU(inplace=True) 63 | ) 64 | else: 65 | self.cbr_unit = nn.Sequential(conv_mod, nn.ReLU(inplace=True)) 66 | 67 | def forward(self, inputs): 68 | # print(inputs.shape) 69 | outputs = self.cbr_unit(inputs) 70 | return outputs 71 | 72 | class conv2DBatchNorm(nn.Module): 73 | def __init__( 74 | self, 75 | in_channels, 76 | n_filters, 77 | k_size, 78 | stride, 79 | padding, 80 | bias=True, 81 | dilation=1, 82 | is_batchnorm=True, 83 | ): 84 | super(conv2DBatchNorm, self).__init__() 85 | 86 | conv_mod = nn.Conv2d( 87 | int(in_channels), 88 | int(n_filters), 89 | kernel_size=k_size, 90 | padding=padding, 91 | stride=stride, 92 | bias=bias, 93 | dilation=dilation, 94 | ) 95 | 96 | if is_batchnorm: 97 | self.cb_unit = nn.Sequential(conv_mod, nn.BatchNorm2d(int(n_filters))) 98 | else: 99 | self.cb_unit = nn.Sequential(conv_mod) 100 | 101 | def forward(self, inputs): 102 | outputs = self.cb_unit(inputs) 103 | return outputs 104 | 105 | class bottleNeckPSP(nn.Module): 106 | def __init__( 107 | self, in_channels, mid_channels, out_channels, stride, dilation=1, is_batchnorm=True 108 | ): 109 | super(bottleNeckPSP, self).__init__() 110 | 111 | bias = not is_batchnorm 112 | 113 | self.cbr1 = conv2DBatchNormRelu( 114 | in_channels, mid_channels, 1, stride=1, padding=0, bias=bias, is_batchnorm=is_batchnorm 115 | ) 116 | if dilation > 1: 117 | self.cbr2 = conv2DBatchNormRelu( 118 | mid_channels, 119 | mid_channels, 120 | 3, 121 | stride=stride, 122 | padding=dilation, 123 | bias=bias, 124 | dilation=dilation, 125 | is_batchnorm=is_batchnorm, 126 | ) 127 | else: 128 | self.cbr2 = conv2DBatchNormRelu( 129 | mid_channels, 130 | mid_channels, 131 | 3, 132 | stride=stride, 133 | padding=1, 134 | bias=bias, 135 | dilation=1, 136 | is_batchnorm=is_batchnorm, 137 | ) 138 | self.cb3 = conv2DBatchNorm( 139 | mid_channels, out_channels, 1, stride=1, padding=0, bias=bias, is_batchnorm=is_batchnorm 140 | ) 141 | self.cb4 = conv2DBatchNorm( 142 | in_channels, 143 | out_channels, 144 | 1, 145 | stride=stride, 146 | padding=0, 147 | bias=bias, 148 | is_batchnorm=is_batchnorm, 149 | ) 150 | 151 | def forward(self, x): 152 | conv = self.cb3(self.cbr2(self.cbr1(x))) 153 | residual = self.cb4(x) 154 | return F.relu(conv + residual, inplace=True) 155 | 156 | class bottleNeckIdentifyPSP(nn.Module): 157 | def __init__(self, in_channels, mid_channels, stride, dilation=1, is_batchnorm=True): 158 | super(bottleNeckIdentifyPSP, self).__init__() 159 | 160 | bias = not is_batchnorm 161 | 162 | self.cbr1 = conv2DBatchNormRelu( 163 | in_channels, mid_channels, 1, stride=1, padding=0, bias=bias, is_batchnorm=is_batchnorm 164 | ) 165 | if dilation > 1: 166 | self.cbr2 = conv2DBatchNormRelu( 167 | mid_channels, 168 | mid_channels, 169 | 3, 170 | stride=1, 171 | padding=dilation, 172 | bias=bias, 173 | dilation=dilation, 174 | is_batchnorm=is_batchnorm, 175 | ) 176 | else: 177 | self.cbr2 = conv2DBatchNormRelu( 178 | mid_channels, 179 | mid_channels, 180 | 3, 181 | stride=1, 182 | padding=1, 183 | bias=bias, 184 | dilation=1, 185 | is_batchnorm=is_batchnorm, 186 | ) 187 | self.cb3 = conv2DBatchNorm( 188 | mid_channels, in_channels, 1, stride=1, padding=0, bias=bias, is_batchnorm=is_batchnorm 189 | ) 190 | 191 | def forward(self, x): 192 | residual = x 193 | x = self.cb3(self.cbr2(self.cbr1(x))) 194 | return F.relu(x + residual, inplace=True) 195 | 196 | class residualBlockPSP(nn.Module): 197 | def __init__( 198 | self, 199 | n_blocks, 200 | in_channels, 201 | mid_channels, 202 | out_channels, 203 | stride, 204 | dilation=1, 205 | include_range="all", 206 | is_batchnorm=True, 207 | ): 208 | super(residualBlockPSP, self).__init__() 209 | 210 | if dilation > 1: 211 | stride = 1 212 | 213 | # residualBlockPSP = convBlockPSP + identityBlockPSPs 214 | layers = [] 215 | if include_range in ["all", "conv"]: 216 | layers.append( 217 | bottleNeckPSP( 218 | in_channels, 219 | mid_channels, 220 | out_channels, 221 | stride, 222 | dilation, 223 | is_batchnorm=is_batchnorm, 224 | ) 225 | ) 226 | if include_range in ["all", "identity"]: 227 | for i in range(n_blocks - 1): 228 | layers.append( 229 | bottleNeckIdentifyPSP( 230 | out_channels, mid_channels, stride, dilation, is_batchnorm = is_batchnorm 231 | ) 232 | ) 233 | 234 | self.layers = nn.Sequential(*layers) 235 | 236 | def forward(self, x): 237 | return self.layers(x) 238 | 239 | class pyramidPooling(nn.Module): 240 | def __init__( 241 | self, in_channels, pool_sizes, model_name="pspnet", fusion_mode="cat", is_batchnorm=True 242 | ): 243 | super(pyramidPooling, self).__init__() 244 | 245 | bias = not is_batchnorm 246 | 247 | self.paths = [] 248 | for i in range(len(pool_sizes)): 249 | self.paths.append( 250 | conv2DBatchNormRelu( 251 | in_channels, 252 | int(in_channels / len(pool_sizes)), 253 | 1, 254 | 1, 255 | 0, 256 | bias=bias, 257 | is_batchnorm=is_batchnorm, 258 | ) 259 | ) 260 | 261 | self.path_module_list = nn.ModuleList(self.paths) 262 | self.pool_sizes = pool_sizes 263 | self.model_name = model_name 264 | self.fusion_mode = fusion_mode 265 | 266 | def forward(self, x): 267 | h, w = x.shape[2:] 268 | 269 | if self.training or self.model_name != "icnet": # general settings or pspnet 270 | k_sizes = [] 271 | strides = [] 272 | for pool_size in self.pool_sizes: 273 | k_sizes.append((int(h / pool_size), int(w / pool_size))) 274 | strides.append((int(h / pool_size), int(w / pool_size))) 275 | else: # eval mode and icnet: pre-trained for 1025 x 2049 276 | k_sizes = [(8, 15), (13, 25), (17, 33), (33, 65)] 277 | strides = [(5, 10), (10, 20), (16, 32), (33, 65)] 278 | 279 | if self.fusion_mode == "cat": # pspnet: concat (including x) 280 | output_slices = [x] 281 | 282 | for i, (module, pool_size) in enumerate(zip(self.path_module_list, self.pool_sizes)): 283 | out = F.avg_pool2d(x, k_sizes[i], stride=strides[i], padding=0) 284 | # out = F.adaptive_avg_pool2d(x, output_size=(pool_size, pool_size)) 285 | if self.model_name != "icnet": 286 | out = module(out) 287 | out = F.interpolate(out, size=(h, w), mode="bilinear", align_corners=True) 288 | output_slices.append(out) 289 | 290 | return torch.cat(output_slices, dim=1) 291 | else: # icnet: element-wise sum (including x) 292 | pp_sum = x 293 | 294 | for i, (module, pool_size) in enumerate(zip(self.path_module_list, self.pool_sizes)): 295 | out = F.avg_pool2d(x, k_sizes[i], stride=strides[i], padding=0) 296 | # out = F.adaptive_avg_pool2d(x, output_size=(pool_size, pool_size)) 297 | if self.model_name != "icnet": 298 | out = module(out) 299 | out = F.interpolate(out, size=(h, w), mode="bilinear", align_corners=True) 300 | pp_sum = pp_sum + out 301 | 302 | return pp_sum 303 | 304 | class pspnet(nn.Module): 305 | def __init__( 306 | self, n_classes=21, block_config=[3, 4, 23, 3], input_size=(473, 473), version=None 307 | ): 308 | 309 | super(pspnet, self).__init__() 310 | 311 | self.block_config = ( 312 | pspnet_specs[version]["block_config"] if version is not None else block_config 313 | ) 314 | self.n_classes = pspnet_specs[version]["n_classes"] if version is not None else n_classes 315 | self.input_size = pspnet_specs[version]["input_size"] if version is not None else input_size 316 | 317 | # Encoder 318 | self.convbnrelu1_1 = conv2DBatchNormRelu( 319 | in_channels=3, k_size=3, n_filters=64, padding=1, stride=2, bias=False 320 | ) 321 | self.convbnrelu1_2 = conv2DBatchNormRelu( 322 | in_channels=64, k_size=3, n_filters=64, padding=1, stride=1, bias=False 323 | ) 324 | self.convbnrelu1_3 = conv2DBatchNormRelu( 325 | in_channels=64, k_size=3, n_filters=128, padding=1, stride=1, bias=False 326 | ) 327 | 328 | # Vanilla Residual Blocks 329 | self.res_block2 = residualBlockPSP(self.block_config[0], 128, 64, 256, 1, 1) 330 | self.res_block3 = residualBlockPSP(self.block_config[1], 256, 128, 512, 2, 1) 331 | 332 | # Dilated Residual Blocks 333 | self.res_block4 = residualBlockPSP(self.block_config[2], 512, 256, 1024, 1, 2) 334 | self.res_block5 = residualBlockPSP(self.block_config[3], 1024, 512, 2048, 1, 4) 335 | 336 | # Pyramid Pooling Module 337 | self.pyramid_pooling = pyramidPooling(2048, [6, 3, 2, 1]) 338 | 339 | # Final conv layers 340 | self.cbr_final = conv2DBatchNormRelu(4096, 512, 3, 1, 1, False) 341 | self.dropout = nn.Dropout2d(p=0.1, inplace=False) 342 | self.classification = nn.Conv2d(512, self.n_classes, 1, 1, 0) 343 | 344 | # Auxiliary layers for training 345 | self.convbnrelu4_aux = conv2DBatchNormRelu( 346 | in_channels=1024, k_size=3, n_filters=256, padding=1, stride=1, bias=False 347 | ) 348 | self.aux_cls = nn.Conv2d(256, self.n_classes, 1, 1, 0) 349 | 350 | # Define auxiliary loss function 351 | self.loss = multi_scale_cross_entropy2d 352 | 353 | def forward(self, x): 354 | inp_shape = x.shape[2:] 355 | 356 | # H, W -> H/2, W/2 357 | x = self.convbnrelu1_1(x) 358 | x = self.convbnrelu1_2(x) 359 | x = self.convbnrelu1_3(x) 360 | 361 | # H/2, W/2 -> H/4, W/4 362 | x = F.max_pool2d(x, 3, 2, 1) 363 | 364 | # H/4, W/4 -> H/8, W/8 365 | x = self.res_block2(x) 366 | x = self.res_block3(x) 367 | x = self.res_block4(x) 368 | 369 | # Auxiliary layers for training 370 | if self.training: 371 | x_aux = self.convbnrelu4_aux(x) 372 | x_aux = self.dropout(x_aux) 373 | x_aux = self.aux_cls(x_aux) 374 | 375 | x = self.res_block5(x) 376 | 377 | x = self.pyramid_pooling(x) 378 | 379 | x = self.cbr_final(x) 380 | x = self.dropout(x) 381 | 382 | x = self.classification(x) 383 | x = F.interpolate(x, size=inp_shape, mode="bilinear", align_corners=True) 384 | 385 | return x 386 | # if self.training: 387 | # return (x, x_aux) 388 | # else: # eval mode 389 | # return x 390 | 391 | def load_pretrained_model(self, model_path): 392 | """ 393 | Load weights from caffemodel w/o caffe dependency 394 | and plug them in corresponding modules 395 | """ 396 | # Only care about layer_types that have trainable parameters 397 | ltypes = ["BNData", "ConvolutionData", "HoleConvolutionData"] 398 | 399 | def _get_layer_params(layer, ltype): 400 | 401 | if ltype == "BNData": 402 | gamma = np.array(layer.blobs[0].data) 403 | beta = np.array(layer.blobs[1].data) 404 | mean = np.array(layer.blobs[2].data) 405 | var = np.array(layer.blobs[3].data) 406 | return [mean, var, gamma, beta] 407 | 408 | elif ltype in ["ConvolutionData", "HoleConvolutionData"]: 409 | is_bias = layer.convolution_param.bias_term 410 | weights = np.array(layer.blobs[0].data) 411 | bias = [] 412 | if is_bias: 413 | bias = np.array(layer.blobs[1].data) 414 | return [weights, bias] 415 | 416 | elif ltype == "InnerProduct": 417 | raise Exception("Fully connected layers {}, not supported".format(ltype)) 418 | 419 | else: 420 | raise Exception("Unkown layer type {}".format(ltype)) 421 | 422 | net = caffe_pb2.NetParameter() 423 | with open(model_path, "rb") as model_file: 424 | net.MergeFromString(model_file.read()) 425 | 426 | # dict formatted as -> key: :: value: 427 | layer_types = {} 428 | # dict formatted as -> key: :: value:[] 429 | layer_params = {} 430 | 431 | for l in net.layer: 432 | lname = l.name 433 | ltype = l.type 434 | if ltype in ltypes: 435 | print("Processing layer {}".format(lname)) 436 | layer_types[lname] = ltype 437 | layer_params[lname] = _get_layer_params(l, ltype) 438 | 439 | # Set affine=False for all batchnorm modules 440 | def _no_affine_bn(module=None): 441 | if isinstance(module, nn.BatchNorm2d): 442 | module.affine = False 443 | 444 | if len([m for m in module.children()]) > 0: 445 | for child in module.children(): 446 | _no_affine_bn(child) 447 | 448 | # _no_affine_bn(self) 449 | 450 | def _transfer_conv(layer_name, module): 451 | weights, bias = layer_params[layer_name] 452 | w_shape = np.array(module.weight.size()) 453 | 454 | print( 455 | "CONV {}: Original {} and trans weights {}".format( 456 | layer_name, w_shape, weights.shape 457 | ) 458 | ) 459 | 460 | module.weight.data.copy_(torch.from_numpy(weights).view_as(module.weight)) 461 | 462 | if len(bias) != 0: 463 | b_shape = np.array(module.bias.size()) 464 | print( 465 | "CONV {}: Original {} and trans bias {}".format(layer_name, b_shape, bias.shape) 466 | ) 467 | module.bias.data.copy_(torch.from_numpy(bias).view_as(module.bias)) 468 | 469 | def _transfer_conv_bn(conv_layer_name, mother_module): 470 | conv_module = mother_module[0] 471 | bn_module = mother_module[1] 472 | 473 | _transfer_conv(conv_layer_name, conv_module) 474 | 475 | mean, var, gamma, beta = layer_params[conv_layer_name + "/bn"] 476 | print( 477 | "BN {}: Original {} and trans weights {}".format( 478 | conv_layer_name, bn_module.running_mean.size(), mean.shape 479 | ) 480 | ) 481 | bn_module.running_mean.copy_(torch.from_numpy(mean).view_as(bn_module.running_mean)) 482 | bn_module.running_var.copy_(torch.from_numpy(var).view_as(bn_module.running_var)) 483 | bn_module.weight.data.copy_(torch.from_numpy(gamma).view_as(bn_module.weight)) 484 | bn_module.bias.data.copy_(torch.from_numpy(beta).view_as(bn_module.bias)) 485 | 486 | def _transfer_residual(prefix, block): 487 | block_module, n_layers = block[0], block[1] 488 | 489 | bottleneck = block_module.layers[0] 490 | bottleneck_conv_bn_dic = { 491 | prefix + "_1_1x1_reduce": bottleneck.cbr1.cbr_unit, 492 | prefix + "_1_3x3": bottleneck.cbr2.cbr_unit, 493 | prefix + "_1_1x1_proj": bottleneck.cb4.cb_unit, 494 | prefix + "_1_1x1_increase": bottleneck.cb3.cb_unit, 495 | } 496 | 497 | for k, v in bottleneck_conv_bn_dic.items(): 498 | _transfer_conv_bn(k, v) 499 | 500 | for layer_idx in range(2, n_layers + 1): 501 | residual_layer = block_module.layers[layer_idx - 1] 502 | residual_conv_bn_dic = { 503 | "_".join( 504 | map(str, [prefix, layer_idx, "1x1_reduce"]) 505 | ): residual_layer.cbr1.cbr_unit, 506 | "_".join(map(str, [prefix, layer_idx, "3x3"])): residual_layer.cbr2.cbr_unit, 507 | "_".join( 508 | map(str, [prefix, layer_idx, "1x1_increase"]) 509 | ): residual_layer.cb3.cb_unit, 510 | } 511 | 512 | for k, v in residual_conv_bn_dic.items(): 513 | _transfer_conv_bn(k, v) 514 | 515 | convbn_layer_mapping = { 516 | "conv1_1_3x3_s2": self.convbnrelu1_1.cbr_unit, 517 | "conv1_2_3x3": self.convbnrelu1_2.cbr_unit, 518 | "conv1_3_3x3": self.convbnrelu1_3.cbr_unit, 519 | "conv5_3_pool6_conv": self.pyramid_pooling.paths[0].cbr_unit, 520 | "conv5_3_pool3_conv": self.pyramid_pooling.paths[1].cbr_unit, 521 | "conv5_3_pool2_conv": self.pyramid_pooling.paths[2].cbr_unit, 522 | "conv5_3_pool1_conv": self.pyramid_pooling.paths[3].cbr_unit, 523 | "conv5_4": self.cbr_final.cbr_unit, 524 | "conv4_" + str(self.block_config[2] + 1): self.convbnrelu4_aux.cbr_unit, 525 | } # Auxiliary layers for training 526 | 527 | residual_layers = { 528 | "conv2": [self.res_block2, self.block_config[0]], 529 | "conv3": [self.res_block3, self.block_config[1]], 530 | "conv4": [self.res_block4, self.block_config[2]], 531 | "conv5": [self.res_block5, self.block_config[3]], 532 | } 533 | 534 | # Transfer weights for all non-residual conv+bn layers 535 | for k, v in convbn_layer_mapping.items(): 536 | _transfer_conv_bn(k, v) 537 | 538 | # Transfer weights for final non-bn conv layer 539 | _transfer_conv("conv6", self.classification) 540 | _transfer_conv("conv6_1", self.aux_cls) 541 | 542 | # Transfer weights for all residual layers 543 | for k, v in residual_layers.items(): 544 | _transfer_residual(k, v) 545 | 546 | def tile_predict(self, imgs, include_flip_mode=True): 547 | """ 548 | Predict by takin overlapping tiles from the image. 549 | 550 | Strides are adaptively computed from the imgs shape 551 | and input size 552 | 553 | :param imgs: torch.Tensor with shape [N, C, H, W] in BGR format 554 | :param side: int with side length of model input 555 | :param n_classes: int with number of classes in seg output. 556 | """ 557 | 558 | side_x, side_y = self.input_size 559 | n_classes = self.n_classes 560 | n_samples, c, h, w = imgs.shape 561 | # n = int(max(h,w) / float(side) + 1) 562 | n_x = int(h / float(side_x) + 1) 563 | n_y = int(w / float(side_y) + 1) 564 | stride_x = (h - side_x) / float(n_x) 565 | stride_y = (w - side_y) / float(n_y) 566 | 567 | x_ends = [[int(i * stride_x), int(i * stride_x) + side_x] for i in range(n_x + 1)] 568 | y_ends = [[int(i * stride_y), int(i * stride_y) + side_y] for i in range(n_y + 1)] 569 | 570 | pred = np.zeros([n_samples, n_classes, h, w]) 571 | count = np.zeros([h, w]) 572 | 573 | slice_count = 0 574 | for sx, ex in x_ends: 575 | for sy, ey in y_ends: 576 | slice_count += 1 577 | 578 | imgs_slice = imgs[:, :, sx:ex, sy:ey] 579 | if include_flip_mode: 580 | imgs_slice_flip = torch.from_numpy( 581 | np.copy(imgs_slice.cpu().numpy()[:, :, :, ::-1]) 582 | ).float() 583 | 584 | is_model_on_cuda = next(self.parameters()).is_cuda 585 | 586 | inp = Variable(imgs_slice, volatile=True) 587 | if include_flip_mode: 588 | flp = Variable(imgs_slice_flip, volatile=True) 589 | 590 | if is_model_on_cuda: 591 | inp = inp.cuda() 592 | if include_flip_mode: 593 | flp = flp.cuda() 594 | 595 | psub1 = F.softmax(self.forward(inp), dim=1).data.cpu().numpy() 596 | if include_flip_mode: 597 | psub2 = F.softmax(self.forward(flp), dim=1).data.cpu().numpy() 598 | psub = (psub1 + psub2[:, :, :, ::-1]) / 2.0 599 | else: 600 | psub = psub1 601 | 602 | pred[:, :, sx:ex, sy:ey] = psub 603 | count[sx:ex, sy:ey] += 1.0 604 | 605 | score = (pred / count[None, None, ...]).astype(np.float32) 606 | return score / np.expand_dims(score.sum(axis=1), axis=1) -------------------------------------------------------------------------------- /models/resnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterybye/Semantic-segmentation-methods-for-landslide-detection/462fb04bab105b8dba5b5afb1b7f28395bf0d59f/models/resnet/__init__.py -------------------------------------------------------------------------------- /models/resnet/resnet_backbone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Donny You(youansheng@gmail.com) 4 | import torch.nn as nn 5 | from models.resnet.resnet_models import ResNetModels 6 | 7 | 8 | class NormalResnetBackbone(nn.Module): 9 | def __init__(self, orig_resnet): 10 | super(NormalResnetBackbone, self).__init__() 11 | 12 | self.num_features = 2048 13 | # take pretrained resnet, except AvgPool and FC 14 | if hasattr(orig_resnet, 'prefix'): 15 | self.prefix = orig_resnet.prefix 16 | else: 17 | self.prefix = nn.Sequential(orig_resnet.conv1, orig_resnet.bn1, orig_resnet.relu) 18 | self.maxpool = orig_resnet.maxpool 19 | self.layer1 = orig_resnet.layer1 20 | self.layer2 = orig_resnet.layer2 21 | self.layer3 = orig_resnet.layer3 22 | self.layer4 = orig_resnet.layer4 23 | 24 | def get_num_features(self): 25 | return self.num_features 26 | 27 | def forward(self, x): 28 | tuple_features = list() 29 | x = self.prefix(x) 30 | x = self.maxpool(x) 31 | 32 | x = self.layer1(x) 33 | tuple_features.append(x) 34 | x = self.layer2(x) 35 | tuple_features.append(x) 36 | x = self.layer3(x) 37 | tuple_features.append(x) 38 | x = self.layer4(x) 39 | tuple_features.append(x) 40 | 41 | return tuple_features 42 | 43 | 44 | class DilatedResnetBackbone(nn.Module): 45 | def __init__(self, orig_resnet, dilate_scale=8, multi_grid=(1, 2, 4)): 46 | super(DilatedResnetBackbone, self).__init__() 47 | 48 | self.num_features = 2048 49 | from functools import partial 50 | 51 | if dilate_scale == 8: 52 | orig_resnet.layer3.apply(partial(self._nostride_dilate, dilate=2)) 53 | if multi_grid is None: 54 | orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=4)) 55 | else: 56 | for i, r in enumerate(multi_grid): 57 | orig_resnet.layer4[i].apply(partial(self._nostride_dilate, dilate=int(4 * r))) 58 | 59 | elif dilate_scale == 16: 60 | if multi_grid is None: 61 | orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=2)) 62 | else: 63 | for i, r in enumerate(multi_grid): 64 | orig_resnet.layer4[i].apply(partial(self._nostride_dilate, dilate=int(2 * r))) 65 | 66 | # Take pretrained resnet, except AvgPool and FC 67 | self.prefix = orig_resnet.prefix 68 | self.maxpool = orig_resnet.maxpool 69 | self.layer1 = orig_resnet.layer1 70 | self.layer2 = orig_resnet.layer2 71 | self.layer3 = orig_resnet.layer3 72 | self.layer4 = orig_resnet.layer4 73 | 74 | def _nostride_dilate(self, m, dilate): 75 | classname = m.__class__.__name__ 76 | if classname.find('Conv') != -1: 77 | # the convolution with stride 78 | if m.stride == (2, 2): 79 | m.stride = (1, 1) 80 | if m.kernel_size == (3, 3): 81 | m.dilation = (dilate // 2, dilate // 2) 82 | m.padding = (dilate // 2, dilate // 2) 83 | # other convoluions 84 | else: 85 | if m.kernel_size == (3, 3): 86 | m.dilation = (dilate, dilate) 87 | m.padding = (dilate, dilate) 88 | 89 | def get_num_features(self): 90 | return self.num_features 91 | 92 | def forward(self, x): 93 | tuple_features = list() 94 | x = self.prefix(x) 95 | x = self.maxpool(x) 96 | 97 | x = self.layer1(x) 98 | tuple_features.append(x) 99 | x = self.layer2(x) 100 | tuple_features.append(x) 101 | x = self.layer3(x) 102 | tuple_features.append(x) 103 | x = self.layer4(x) 104 | tuple_features.append(x) 105 | 106 | return tuple_features 107 | 108 | 109 | class ResNetBackbone(object): 110 | def __init__(self, configer): 111 | self.configer = configer 112 | self.resnet_models = ResNetModels(self.configer) 113 | 114 | def __call__(self): 115 | arch = self.configer 116 | multi_grid = [1, 2, 1] # self.configer.get('network.multi_grid', default=None) 117 | 118 | # if arch == 'resnet34': 119 | # orig_resnet = self.resnet_models.resnet34() 120 | # arch_net = NormalResnetBackbone(orig_resnet) 121 | # arch_net.num_features = 512 122 | # 123 | # elif arch == 'resnet34_dilated8': 124 | # orig_resnet = self.resnet_models.resnet34() 125 | # arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid) 126 | # arch_net.num_features = 512 127 | # 128 | # elif arch == 'resnet34_dilated16': 129 | # orig_resnet = self.resnet_models.resnet34() 130 | # arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid) 131 | # arch_net.num_features = 512 132 | # elif arch == 'resnet50': 133 | 134 | if arch == 'resnet50': 135 | orig_resnet = self.resnet_models.resnet50() 136 | arch_net = NormalResnetBackbone(orig_resnet) 137 | 138 | elif arch == 'resnet50_dilated8': 139 | orig_resnet = self.resnet_models.resnet50() 140 | arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid) 141 | 142 | elif arch == 'resnet50_dilated16': 143 | orig_resnet = self.resnet_models.resnet50() 144 | arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid) 145 | 146 | elif arch == 'deepbase_resnet50': 147 | orig_resnet = self.resnet_models.deepbase_resnet50() 148 | arch_net = NormalResnetBackbone(orig_resnet) 149 | 150 | elif arch == 'deepbase_resnet50_dilated8': 151 | orig_resnet = self.resnet_models.deepbase_resnet50() 152 | arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid) 153 | 154 | elif arch == 'deepbase_resnet50_dilated16': 155 | orig_resnet = self.resnet_models.deepbase_resnet50() 156 | arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid) 157 | 158 | elif arch == 'resnet101': 159 | orig_resnet = self.resnet_models.resnet101() 160 | arch_net = NormalResnetBackbone(orig_resnet) 161 | 162 | elif arch == 'resnet101_dilated8': 163 | orig_resnet = self.resnet_models.resnet101() 164 | arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid) 165 | 166 | elif arch == 'resnet101_dilated16': 167 | orig_resnet = self.resnet_models.resnet101() 168 | arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid) 169 | 170 | elif arch == 'deepbase_resnet101': 171 | orig_resnet = self.resnet_models.deepbase_resnet101() 172 | arch_net = NormalResnetBackbone(orig_resnet) 173 | 174 | elif arch == 'deepbase_resnet101_dilated8': 175 | orig_resnet = self.resnet_models.deepbase_resnet101() 176 | arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid) 177 | 178 | elif arch == 'deepbase_resnet101_dilated16': 179 | orig_resnet = self.resnet_models.deepbase_resnet101() 180 | arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid) 181 | 182 | elif arch == 'self_pretrained_resnet101': 183 | orig_resnet = self.resnet_models.self_pretrained_resnet101() 184 | arch_net = NormalResnetBackbone(orig_resnet) 185 | else: 186 | raise Exception('Architecture undefined!') 187 | 188 | return arch_net 189 | -------------------------------------------------------------------------------- /models/resnet/resnet_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Donny You(youansheng@gmail.com) 4 | 5 | 6 | import math 7 | 8 | import torch 9 | import torch.nn as nn 10 | from collections import OrderedDict 11 | 12 | import torchvision 13 | 14 | from models.tools.ModuleHelper import ModuleHelper 15 | 16 | model_urls = { 17 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 18 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 19 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 20 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 21 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 22 | } 23 | 24 | 25 | def conv3x3(in_planes, out_planes, stride=1): 26 | "3x3 convolution with padding" 27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 28 | padding=1, bias=False) 29 | 30 | 31 | class BasicBlock(nn.Module): 32 | expansion = 1 33 | 34 | def __init__(self, inplanes, planes, stride=1, downsample=None, norm_type=None): 35 | super(BasicBlock, self).__init__() 36 | self.conv1 = conv3x3(inplanes, planes, stride) 37 | self.bn1 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes) 38 | self.relu = nn.ReLU(inplace=True) 39 | self.conv2 = conv3x3(planes, planes) 40 | self.bn2 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes) 41 | self.downsample = downsample 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.bn1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.bn2(out) 53 | 54 | if self.downsample is not None: 55 | residual = self.downsample(x) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class Bottleneck(nn.Module): 64 | expansion = 4 65 | 66 | def __init__(self, inplanes, planes, stride=1, downsample=None, norm_type=None): 67 | super(Bottleneck, self).__init__() 68 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 69 | self.bn1 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes) 70 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 71 | padding=1, bias=False) 72 | self.bn2 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes) 73 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 74 | self.bn3 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes * 4) 75 | self.relu = nn.ReLU(inplace=True) 76 | self.downsample = downsample 77 | self.stride = stride 78 | 79 | def forward(self, x): 80 | residual = x 81 | 82 | out = self.conv1(x) 83 | out = self.bn1(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv2(out) 87 | out = self.bn2(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv3(out) 91 | out = self.bn3(out) 92 | 93 | if self.downsample is not None: 94 | residual = self.downsample(x) 95 | 96 | out += residual 97 | out = self.relu(out) 98 | 99 | return out 100 | 101 | 102 | class ResNet(nn.Module): 103 | 104 | def __init__(self, block, layers, num_classes=1000, deep_base=False, norm_type=None): 105 | super(ResNet, self).__init__() 106 | self.inplanes = 128 if deep_base else 64 107 | if deep_base: 108 | self.prefix = nn.Sequential(OrderedDict([ 109 | ('conv1', nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)), 110 | ('bn1', ModuleHelper.BatchNorm2d(norm_type=norm_type)(64)), 111 | ('relu1', nn.ReLU(inplace=False)), 112 | ('conv2', nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)), 113 | ('bn2', ModuleHelper.BatchNorm2d(norm_type=norm_type)(64)), 114 | ('relu2', nn.ReLU(inplace=False)), 115 | ('conv3', nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False)), 116 | ('bn3', ModuleHelper.BatchNorm2d(norm_type=norm_type)(self.inplanes)), 117 | ('relu3', nn.ReLU(inplace=False))] 118 | )) 119 | else: 120 | self.prefix = nn.Sequential(OrderedDict([ 121 | ('conv1', nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)), 122 | ('bn1', ModuleHelper.BatchNorm2d(norm_type=norm_type)(self.inplanes)), 123 | ('relu', nn.ReLU(inplace=False))] 124 | )) 125 | 126 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change. 127 | 128 | self.layer1 = self._make_layer(block, 64, layers[0], norm_type=norm_type) 129 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_type=norm_type) 130 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_type=norm_type) 131 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_type=norm_type) 132 | self.avgpool = nn.AvgPool2d(7, stride=1) 133 | self.fc = nn.Linear(512 * block.expansion, num_classes) 134 | 135 | for m in self.modules(): 136 | if isinstance(m, nn.Conv2d): 137 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 138 | m.weight.data.normal_(0, math.sqrt(2. / n)) 139 | elif isinstance(m, ModuleHelper.BatchNorm2d(norm_type=norm_type, ret_cls=True)): 140 | m.weight.data.fill_(1) 141 | m.bias.data.zero_() 142 | 143 | def _make_layer(self, block, planes, blocks, stride=1, norm_type=None): 144 | downsample = None 145 | if stride != 1 or self.inplanes != planes * block.expansion: 146 | downsample = nn.Sequential( 147 | nn.Conv2d(self.inplanes, planes * block.expansion, 148 | kernel_size=1, stride=stride, bias=False), 149 | ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes * block.expansion), 150 | ) 151 | 152 | layers = [] 153 | layers.append(block(self.inplanes, planes, stride, downsample, norm_type=norm_type)) 154 | self.inplanes = planes * block.expansion 155 | for i in range(1, blocks): 156 | layers.append(block(self.inplanes, planes, norm_type=norm_type)) 157 | 158 | return nn.Sequential(*layers) 159 | 160 | def forward(self, x): 161 | x = self.conv1(x) 162 | x = self.bn1(x) 163 | x = self.relu(x) 164 | x = self.maxpool(x) 165 | 166 | x = self.layer1(x) 167 | x = self.layer2(x) 168 | x = self.layer3(x) 169 | x = self.layer4(x) 170 | 171 | x = self.avgpool(x) 172 | x = x.view(x.size(0), -1) 173 | x = self.fc(x) 174 | 175 | return x 176 | 177 | 178 | class ResNetModels(object): 179 | 180 | def __init__(self, configer): 181 | self.configer = configer 182 | self.pretrained = './pretrained_models/3x3resnet50-imagenet.pth' 183 | 184 | def self_pretrained_resnet101(self): 185 | self_pretrained = 'data/models/resnet-kv/resnet101_1e-05_1.0-1.0-1.0_100_030.pth' 186 | model: nn.Module = torchvision.models.resnet101(num_classes=43) 187 | model.load_state_dict(torch.load(self_pretrained)['model_state_dict']) 188 | model.fc = nn.Linear(512, 3) 189 | return model 190 | 191 | # def resnet18(self, **kwargs): 192 | # """Constructs a ResNet-18 model. 193 | # Args: 194 | # pretrained (bool): If True, returns a model pre-trained on Places 195 | # """ 196 | # model = ResNet(BasicBlock, [2, 2, 2, 2], deep_base=False, 197 | # norm_type=self.configer.get('network', 'norm_type'), **kwargs) 198 | # model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained')) 199 | # return model 200 | # 201 | # def deepbase_resnet18(self, **kwargs): 202 | # """Constructs a ResNet-18 model. 203 | # Args: 204 | # pretrained (bool): If True, returns a model pre-trained on Places 205 | # """ 206 | # model = ResNet(BasicBlock, [2, 2, 2, 2], deep_base=True, 207 | # norm_type=self.configer.get('network', 'norm_type'), **kwargs) 208 | # model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained')) 209 | # return model 210 | # 211 | # def resnet34(self, **kwargs): 212 | # """Constructs a ResNet-34 model. 213 | # Args: 214 | # pretrained (bool): If True, returns a model pre-trained on Places 215 | # """ 216 | # model = ResNet(BasicBlock, [3, 4, 6, 3], deep_base=False, 217 | # norm_type=self.configer.get('network', 'norm_type'), **kwargs) 218 | # model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained')) 219 | # return model 220 | # 221 | # def deepbase_resnet34(self, **kwargs): 222 | # """Constructs a ResNet-34 model. 223 | # Args: 224 | # pretrained (bool): If True, returns a model pre-trained on Places 225 | # """ 226 | # model = ResNet(BasicBlock, [3, 4, 6, 3], deep_base=True, 227 | # norm_type=self.configer.get('network', 'norm_type'), **kwargs) 228 | # model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained')) 229 | # return model 230 | 231 | def resnet50(self, **kwargs): 232 | """Constructs a ResNet-50 model. 233 | Args: 234 | pretrained (bool): If True, returns a model pre-trained on Places 235 | """ 236 | model = ResNet(Bottleneck, [3, 4, 6, 3], deep_base=False, 237 | norm_type="sync_batchnorm", **kwargs) # self.configer.get('network', 'norm_type'), **kwargs) 238 | model = ModuleHelper.load_model(model, 239 | pretrained=self.pretrained) # self.configer.get('network', 'pretrained')) 240 | return model 241 | 242 | def deepbase_resnet50(self, **kwargs): 243 | """Constructs a ResNet-50 model. 244 | Args: 245 | pretrained (bool): If True, returns a model pre-trained on Places 246 | """ 247 | model = ResNet(Bottleneck, [3, 4, 6, 3], deep_base=True, 248 | norm_type="sync_batchnorm", **kwargs) # self.configer.get('network', 'norm_type'), **kwargs) 249 | model = ModuleHelper.load_model(model, 250 | pretrained=self.pretrained) # self.configer.get('network', 'pretrained')) 251 | return model 252 | 253 | def resnet101(self, **kwargs): 254 | """Constructs a ResNet-101 model. 255 | Args: 256 | pretrained (bool): If True, returns a model pre-trained on Places 257 | """ 258 | model = ResNet(Bottleneck, [3, 4, 23, 3], deep_base=False, 259 | norm_type="sync_batchnorm", **kwargs) # self.configer.get('network', 'norm_type'), **kwargs) 260 | model = ModuleHelper.load_model(model, 261 | pretrained=self.pretrained) # self.configer.get('network', 'pretrained')) 262 | return model 263 | 264 | def deepbase_resnet101(self, **kwargs): 265 | """Constructs a ResNet-101 model. 266 | Args: 267 | pretrained (bool): If True, returns a model pre-trained on Places 268 | """ 269 | model = ResNet(Bottleneck, [3, 4, 23, 3], deep_base=True, 270 | norm_type="sync_batchnorm", **kwargs) # self.configer.get('network', 'norm_type'), **kwargs) 271 | model = ModuleHelper.load_model(model, 272 | pretrained=self.pretrained) # self.configer.get('network', 'pretrained')) 273 | return model 274 | # 275 | # def resnet152(self, **kwargs): 276 | # """Constructs a ResNet-152 model. 277 | # 278 | # Args: 279 | # pretrained (bool): If True, returns a model pre-trained on Places 280 | # """ 281 | # model = ResNet(Bottleneck, [3, 8, 36, 3], deep_base=False, 282 | # norm_type=self.configer.get('network', 'norm_type'), **kwargs) 283 | # model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained')) 284 | # return model 285 | # 286 | # def deepbase_resnet152(self, **kwargs): 287 | # """Constructs a ResNet-152 model. 288 | # 289 | # Args: 290 | # pretrained (bool): If True, returns a model pre-trained on Places 291 | # """ 292 | # model = ResNet(Bottleneck, [3, 8, 36, 3], deep_base=True, 293 | # norm_type=self.configer.get('network', 'norm_type'), **kwargs) 294 | # model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained')) 295 | # return model 296 | -------------------------------------------------------------------------------- /models/tools/ModuleHelper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch import nn 4 | 5 | class ModuleHelper(object): 6 | 7 | @staticmethod 8 | def BNReLU(num_features, norm_type=None, **kwargs): 9 | if norm_type == 'batchnorm': 10 | return nn.Sequential( 11 | nn.BatchNorm2d(num_features, **kwargs), 12 | nn.ReLU() 13 | ) 14 | # elif norm_type == 'encsync_batchnorm': 15 | # from encoding.nn import BatchNorm2d 16 | # return nn.Sequential( 17 | # BatchNorm2d(num_features, **kwargs), 18 | # nn.ReLU() 19 | # ) 20 | elif norm_type == 'instancenorm': 21 | return nn.Sequential( 22 | nn.InstanceNorm2d(num_features, **kwargs), 23 | nn.ReLU() 24 | ) 25 | else: 26 | return nn.Sequential( 27 | nn.BatchNorm2d(num_features, **kwargs), 28 | nn.ReLU() 29 | ) 30 | print('no available norm') 31 | exit(1) 32 | 33 | @staticmethod 34 | def BatchNorm2d(norm_type=None, ret_cls=False): 35 | if norm_type == 'batchnorm': 36 | return nn.BatchNorm2d 37 | 38 | # elif norm_type == 'encsync_batchnorm': 39 | # from encoding.nn import BatchNorm2d 40 | # return BatchNorm2d 41 | 42 | elif norm_type == 'instancenorm': 43 | return nn.InstanceNorm2d 44 | # elif bn_type == 'inplace_abn': 45 | # from extensions.ops.inplace_abn.bn import InPlaceABNSync 46 | # if ret_cls: 47 | # return InPlaceABNSync 48 | 49 | # return functools.partial(InPlaceABNSync, activation='none') 50 | 51 | else: 52 | return nn.BatchNorm2d 53 | print('no available norm') 54 | exit(1) 55 | 56 | @staticmethod 57 | def load_model(model, pretrained=None, all_match=True, map_location='cpu'): 58 | if pretrained is None: 59 | return model 60 | 61 | if not os.path.exists(pretrained): 62 | print('{} not exists.'.format(pretrained)) 63 | print(os.path.abspath(pretrained)) 64 | return model 65 | 66 | print('Loading pretrained model:{}'.format(pretrained)) 67 | if all_match: 68 | pretrained_dict = torch.load(pretrained, map_location=map_location) 69 | model_dict = model.state_dict() 70 | load_dict = dict() 71 | for k, v in pretrained_dict.items(): 72 | if 'prefix.{}'.format(k) in model_dict: 73 | load_dict['prefix.{}'.format(k)] = v 74 | else: 75 | load_dict[k] = v 76 | 77 | model.load_state_dict(load_dict) 78 | 79 | else: 80 | pretrained_dict = torch.load(pretrained) 81 | model_dict = model.state_dict() 82 | load_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 83 | print('Matched Keys: {}'.format(load_dict.keys())) 84 | model_dict.update(load_dict) 85 | model.load_state_dict(model_dict) 86 | 87 | return model -------------------------------------------------------------------------------- /models/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterybye/Semantic-segmentation-methods-for-landslide-detection/462fb04bab105b8dba5b5afb1b7f28395bf0d59f/models/tools/__init__.py -------------------------------------------------------------------------------- /models/unet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | from torch import nn 4 | 5 | 6 | class UNet(nn.Module): 7 | def __init__(self, in_channel: int, out_channel: int): 8 | super(UNet, self).__init__() 9 | # Encode 10 | self.conv_encode1 = self.contracting_block(in_channels=in_channel, out_channels=64) 11 | self.conv_maxpool1 = nn.MaxPool2d(kernel_size=2) 12 | self.conv_encode2 = self.contracting_block(64, 128) 13 | self.conv_maxpool2 = nn.MaxPool2d(kernel_size=2) 14 | self.conv_encode3 = self.contracting_block(128, 256) 15 | self.conv_maxpool3 = nn.MaxPool2d(kernel_size=2) 16 | self.conv_encode4 = self.contracting_block(256, 512) 17 | self.conv_maxpool4 = nn.MaxPool2d(kernel_size=2) 18 | # Bottleneck 19 | self.bottleneck = nn.Sequential( 20 | nn.Conv2d(kernel_size=3, in_channels=512, out_channels=1024), 21 | nn.ReLU(), 22 | nn.BatchNorm2d(1024), 23 | nn.Conv2d(kernel_size=3, in_channels=1024, out_channels=1024), 24 | nn.ReLU(), 25 | nn.BatchNorm2d(1024), 26 | nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=3, stride=2, padding=1, 27 | output_padding=1) 28 | ) 29 | # Decode 30 | self.conv_decode4 = self.expansive_block(1024, 512, 256) 31 | self.conv_decode3 = self.expansive_block(512, 256, 128) 32 | self.conv_decode2 = self.expansive_block(256, 128, 64) 33 | self.final_layer = self.final_block(128, 64, out_channel) 34 | 35 | def forward(self, x): 36 | # Encode 37 | # (..., width, height) => (..., width - 4, height - 4) 38 | encode_block1 = self.conv_encode1(x) 39 | # (..., width - 4, height - 4) => (..., (width - 4)/2, (height - 4)/2) 40 | encode_pool1 = self.conv_maxpool1(encode_block1) 41 | # (..., (width - 4) / 2, (height - 4) / 2) => (..., (width - 4)/2 - 4, (height - 4)/2 - 4) 42 | encode_block2 = self.conv_encode2(encode_pool1) 43 | # (..., (width - 4)/2 - 4, (height - 4)/2 - 4) => (..., ((width - 4)/2 - 4)/2, ((height - 4)/2 - 4)/2) 44 | encode_pool2 = self.conv_maxpool2(encode_block2) 45 | # (..., ((width - 4)/2 - 4)/2, ((height - 4)/2 - 4)/2) 46 | # => (..., ((width - 4)/2 - 4)/2 - 4, ((height - 4)/2 - 4)/2 - 4) 47 | encode_block3 = self.conv_encode3(encode_pool2) 48 | # (..., ((width - 4)/2 - 4)/2 - 4, ((height - 4)/2 - 4)/2 - 4) 49 | # => (..., (((width - 4)/2 - 4)/2 - 4) / 2, (((height - 4)/2 - 4)/2 - 4)/2) 50 | encode_pool3 = self.conv_maxpool3(encode_block3) 51 | encode_block4 = self.conv_encode4(encode_pool3) 52 | # (..., ((width - 4)/2 - 4)/2 - 4, ((height - 4)/2 - 4)/2 - 4) 53 | # => (..., (((width - 4)/2 - 4)/2 - 4) / 2, (((height - 4)/2 - 4)/2 - 4)/2) 54 | encode_pool4 = self.conv_maxpool4(encode_block4) 55 | 56 | # Bottleneck 57 | # (..., (((width - 4)/2 - 4)/2 - 4) / 2, (((height - 4)/2 - 4)/2 - 4)/2) 58 | # => (..., (((width - 4)/2 - 4)/2 - 4) / 2, (((height - 4)/2 - 4)/2 - 4)/2) 59 | bottleneck = self.bottleneck(encode_pool4) 60 | # Decode 61 | decode_block4 = self.crop_and_concat(bottleneck, encode_block4, crop=True) 62 | cat_layer3 = self.conv_decode4(decode_block4) 63 | decode_block3 = self.crop_and_concat(cat_layer3, encode_block3, crop=True) 64 | cat_layer2 = self.conv_decode3(decode_block3) 65 | decode_block2 = self.crop_and_concat(cat_layer2, encode_block2, crop=True) 66 | cat_layer1 = self.conv_decode2(decode_block2) 67 | decode_block1 = self.crop_and_concat(cat_layer1, encode_block1, crop=True) 68 | final_layer = self.final_layer(decode_block1) 69 | return final_layer 70 | 71 | def contracting_block(self, in_channels, out_channels, kernel_size=3): 72 | # (batch_size, in_channels, width, height) => (batch_size, out_channels, width - 4, height - 4) 73 | return nn.Sequential( 74 | nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=out_channels), 75 | nn.ReLU(), 76 | nn.BatchNorm2d(out_channels), 77 | nn.Conv2d(kernel_size=kernel_size, in_channels=out_channels, out_channels=out_channels), 78 | nn.ReLU(), 79 | nn.BatchNorm2d(out_channels), 80 | ) 81 | 82 | def expansive_block(self, in_channels, mid_channel, out_channels, kernel_size=3): 83 | # (batch_size, in_channels, width, height) => (batch_size, out_channels, (width - 4) * 2, (height - 4) * 2) 84 | return nn.Sequential( 85 | nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=mid_channel), 86 | nn.ReLU(), 87 | nn.BatchNorm2d(mid_channel), 88 | nn.Conv2d(kernel_size=kernel_size, in_channels=mid_channel, out_channels=mid_channel), 89 | nn.ReLU(), 90 | nn.BatchNorm2d(mid_channel), 91 | nn.ConvTranspose2d(in_channels=mid_channel, out_channels=out_channels, kernel_size=3, stride=2, 92 | padding=1, output_padding=1) 93 | ) 94 | 95 | def final_block(self, in_channels, mid_channel, out_channels, kernel_size=3): 96 | # (batch_size, in_channels, width, height) => (batch_size, out_channels, width - 4, height - 4) 97 | return nn.Sequential( 98 | nn.Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=mid_channel), 99 | nn.ReLU(), 100 | nn.BatchNorm2d(mid_channel), 101 | nn.Conv2d(kernel_size=kernel_size, in_channels=mid_channel, out_channels=mid_channel), 102 | nn.ReLU(), 103 | nn.BatchNorm2d(mid_channel), 104 | nn.Conv2d(kernel_size=kernel_size, in_channels=mid_channel, out_channels=out_channels, padding=1), 105 | nn.ReLU(), 106 | nn.BatchNorm2d(out_channels), 107 | ) 108 | 109 | @staticmethod 110 | def crop_and_concat(upsampled, bypass, crop=False): 111 | if crop: 112 | c = (bypass.size()[2] - upsampled.size()[2]) // 2 113 | bypass = F.pad(bypass, [-c, -c, -c, -c]) 114 | return torch.cat((upsampled, bypass), 1) 115 | -------------------------------------------------------------------------------- /models/vgg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waterybye/Semantic-segmentation-methods-for-landslide-detection/462fb04bab105b8dba5b5afb1b7f28395bf0d59f/models/vgg/__init__.py -------------------------------------------------------------------------------- /models/vgg/vgg_backbone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Donny You(youansheng@gmail.com) 4 | # VGG models. 5 | 6 | 7 | from models.vgg.vgg_models import VGGModels 8 | 9 | 10 | class VGGBackbone(object): 11 | def __init__(self, configer): 12 | self.configer = configer 13 | self.vgg_models = VGGModels(self.configer) 14 | 15 | def __call__(self, vgg_cfg=None): 16 | arch = self.configer # .get('network', 'backbone') 17 | if 'bn' in arch: 18 | arch_net = self.vgg_models.vgg_bn(vgg_cfg=vgg_cfg) 19 | 20 | else: 21 | arch_net = self.vgg_models.vgg(vgg_cfg=vgg_cfg) 22 | 23 | return arch_net 24 | -------------------------------------------------------------------------------- /models/vgg/vgg_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Donny You(youansheng@gmail.com) 4 | 5 | 6 | import torch.nn as nn 7 | 8 | from models.tools.ModuleHelper import ModuleHelper 9 | 10 | 11 | model_urls = { 12 | 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth', 13 | 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth', 14 | 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', 15 | 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth', 16 | 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', 17 | 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth', 18 | } 19 | 20 | CONFIG_DICT = { 21 | 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 22 | 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 23 | 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 24 | 'vgg13_dilated8': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512], 25 | 'vgg16_dilated8': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512], 26 | 'vgg19_dilated8': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512], 27 | 'vgg13_dilated16': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512], 28 | 'vgg16_dilated16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512], 29 | 'vgg19_dilated16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512], 30 | } 31 | 32 | 33 | def make_layers(cfg, batch_norm=False): 34 | layers = [] 35 | in_channels = 3 36 | for v in cfg: 37 | if v == 'M': 38 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 39 | 40 | elif v == 'C': 41 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] 42 | 43 | else: 44 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 45 | if batch_norm: 46 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 47 | else: 48 | layers += [conv2d, nn.ReLU(inplace=True)] 49 | in_channels = v 50 | return nn.Sequential(*layers) 51 | 52 | 53 | class VGG(nn.Module): 54 | def __init__(self, cfg_name, vgg_cfg=None, bn=False): 55 | super(VGG, self).__init__() 56 | self.num_features = 512 57 | vgg_cfg = vgg_cfg if vgg_cfg is not None else CONFIG_DICT[cfg_name] 58 | self.features = make_layers(vgg_cfg, bn) 59 | 60 | def get_num_features(self): 61 | return self.num_features 62 | 63 | def forward(self, x): 64 | x = self.features(x) 65 | return x 66 | 67 | 68 | class VGGModels(object): 69 | 70 | def __init__(self, configer): 71 | self.configer = configer 72 | self.pretrained = './pretrained_models/3x3resnet50-imagenet.pth' 73 | 74 | def vgg(self, vgg_cfg=None): 75 | """Constructs a ResNet-18 model. 76 | Args: 77 | pretrained (bool): If True, returns a model pre-trained on Places 78 | """ 79 | backbone = self.configer # .get('network', 'backbone') 80 | model = VGG(cfg_name=backbone, vgg_cfg=vgg_cfg, bn=False) 81 | model = ModuleHelper.load_model(model, pretrained=self.pretrained , all_match=False)# configer.get('network', 'pretrained'), all_match=False) 82 | return model 83 | 84 | def vgg_bn(self, vgg_cfg=None): 85 | backbone = self.configer # .get('network', 'backbone') 86 | model = VGG(cfg_name=backbone, vgg_cfg=vgg_cfg, bn=True) 87 | model = ModuleHelper.load_model(model, pretrained=self.pretrained , all_match=False)# configer.get('network', 'pretrained'), all_match=False) 88 | return model 89 | 90 | 91 | if __name__ == "__main__": 92 | pass 93 | -------------------------------------------------------------------------------- /plot.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from PIL import Image, ImageDraw 5 | from tqdm import tqdm 6 | from pydensecrf import densecrf as dcrf 7 | 8 | 9 | def image_dcrf(unary: np.ndarray, image: np.ndarray, n_classes: int) -> np.ndarray or None: 10 | 11 | unary = -np.log(unary) 12 | unary = unary.transpose(2, 1, 0) 13 | w, h, c = unary.shape 14 | unary = unary.transpose(2, 0, 1).reshape(n_classes, -1) 15 | unary = np.ascontiguousarray(unary) 16 | 17 | d = dcrf.DenseCRF2D(w, h, n_classes) 18 | d.setUnaryEnergy(unary) 19 | d.addPairwiseBilateral(sxy=10, srgb=13, rgbim=np.ascontiguousarray(image), compat=4) 20 | 21 | q = d.inference(5) 22 | return np.argmax(q, axis=0).reshape(w, h).transpose(1, 0) 23 | 24 | 25 | def decode_segmap(map: np.ndarray) -> Image: 26 | n_classes = map.max() + 1 27 | w, h = map.shape 28 | 29 | colors = [np.array([0, 0, 0]), np.array([255, 0, 0]), np.array([0, 255, 0]), np.array([0, 0, 255])] 30 | 31 | decoded = np.zeros((w, h, 3)) 32 | for label in range(n_classes): 33 | decoded[map == label] = colors[label] 34 | return Image.fromarray(decoded.astype(np.int8), 'RGB') 35 | 36 | 37 | def plot(model_namec): 38 | plot_save_dir = f'data/plots/results-{model_namec}' 39 | if os.path.exists(plot_save_dir) == False: 40 | os.makedirs(plot_save_dir) 41 | results = np.load(f'data/results/{model_namec}.npz') 42 | inputs, labels, outputs = results['inputs'], results['labels'], results['outputs'] 43 | del results 44 | 45 | for idx, image in tqdm(enumerate(inputs)): 46 | image, output, target = image.transpose([1, 2, 0]), outputs[idx], labels[idx] 47 | normalized_output = np.exp(output) / np.sum(np.exp(output), axis=0) 48 | image = Image.fromarray(image.astype(np.int8), 'RGB') 49 | prediction, landslide = np.argmax(output, axis=0), normalized_output[1] 50 | 51 | iw, ih, _ = np.array(image).shape 52 | n_classes, tw, th = output.shape 53 | left, top, right, bottom = (iw - tw) / 2, (ih - th) / 2, (iw + tw) / 2, (ih + th) / 2 54 | 55 | draw = ImageDraw.Draw(image) 56 | draw.rectangle([(left, top), (right, bottom)], outline=(255, 0, 0), width=2) 57 | del draw 58 | 59 | target_map = decode_segmap(target) 60 | prediction_map = decode_segmap(prediction) 61 | 62 | landslide = np.stack([landslide * 255] * 3, axis=-1) 63 | landslide = Image.fromarray(landslide.astype(np.int8), 'RGB') 64 | 65 | image.save(os.path.join(plot_save_dir, f'{idx}-image.bmp')) 66 | target_map.save(os.path.join(plot_save_dir, f'{idx}-target.bmp')) 67 | prediction_map.save(os.path.join(plot_save_dir, f'{idx}-prediction.bmp')) 68 | landslide.save(os.path.join(plot_save_dir, f'{idx}-landslide.bmp')) 69 | 70 | try: 71 | mask = image_dcrf(normalized_output, np.array(image)[int(left):int(right), int(top):int(bottom)], n_classes) 72 | mask = decode_segmap(mask) 73 | mask.save(os.path.join(plot_save_dir, f'{idx}-crf.bmp')) 74 | except ModuleNotFoundError: 75 | pass -------------------------------------------------------------------------------- /pretrained_models/download.txt: -------------------------------------------------------------------------------- 1 | Pretrained models can be download from Google Drive: 2 | https://drive.google.com/drive/folders/1QNMtXV63W29R2dcFq_X6AmpBvYlDb96q?usp=sharing 3 | 4 | please put the models in folder "pretained_models" under the current directory. 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | torchvision 3 | torch 4 | numpy 5 | pandas 6 | pillow 7 | pydensecrf 8 | albumentations 9 | opencv -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import time 4 | import numpy as np 5 | import torch 6 | from torch import nn 7 | from albumentations import ( 8 | CLAHE, RandomRotate90, Transpose, Blur, OpticalDistortion, HueSaturationValue, 9 | IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, 10 | OneOf, Compose, 11 | RandomSizedBBoxSafeCrop) 12 | from torch.utils.data import DataLoader 13 | from torchvision import models 14 | from tqdm import tqdm 15 | 16 | from data_utils.dataset import SegmentationSet2, TestSet2 17 | 18 | 19 | def model_save(path: str, epoch: int, loss, model_state_dict, optimizer_state_dict, **kwargs): 20 | to_save = dict() 21 | to_save['epoch'], to_save['loss'], to_save['model_state_dict'], to_save[ 22 | 'optimizer_state_dict'] = epoch, loss, model_state_dict, optimizer_state_dict 23 | for key, val in kwargs.items(): 24 | to_save[key] = val 25 | ndir, _ = os.path.split(path) 26 | if not os.path.exists(ndir): 27 | os.makedirs(ndir) 28 | torch.save(to_save, path) 29 | 30 | def initialize_model(model_name: str, num_classes: int, use_pretrained: bool = True) -> nn.Module: 31 | # Initialize these variables which will be set in this if statement. Each of these 32 | # variables is model specific. 33 | model_ft = None 34 | 35 | if model_name == "resnet18": 36 | """ Resnet18 37 | """ 38 | model_ft = models.resnet18(pretrained=use_pretrained) 39 | for param in model_ft.parameters(): 40 | param.requires_grad = False 41 | num_ftrs = model_ft.fc.in_features 42 | model_ft.fc = nn.Linear(num_ftrs, num_classes) 43 | 44 | elif model_name == "alexnet": 45 | """ Alexnet 46 | """ 47 | model_ft = models.alexnet(pretrained=use_pretrained) 48 | for param in model_ft.parameters(): 49 | param.requires_grad = False 50 | num_ftrs = model_ft.classifier[6].in_features 51 | model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes) 52 | 53 | elif model_name == "vgg11_bn": 54 | """ VGG11_bn 55 | """ 56 | model_ft = models.vgg11_bn(pretrained=use_pretrained) 57 | for param in model_ft.parameters(): 58 | param.requires_grad = False 59 | num_ftrs = model_ft.classifier[6].in_features 60 | model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes) 61 | 62 | elif model_name == "squeezenet1_0": 63 | """ Squeezenet 64 | """ 65 | model_ft = models.squeezenet1_0(pretrained=use_pretrained) 66 | for param in model_ft.parameters(): 67 | param.requires_grad = False 68 | model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1), stride=(1, 1)) 69 | model_ft.num_classes = num_classes 70 | 71 | elif model_name == "densenet121": 72 | """ Densenet 73 | """ 74 | model_ft = models.densenet121(pretrained=use_pretrained) 75 | for param in model_ft.parameters(): 76 | param.requires_grad = False 77 | num_ftrs = model_ft.classifier.in_features 78 | model_ft.classifier = nn.Linear(num_ftrs, num_classes) 79 | 80 | elif model_name == "inception_v3": 81 | """ Inception v3 82 | Be careful, expects (299,299) sized images and has auxiliary output 83 | """ 84 | model_ft = models.inception_v3(pretrained=use_pretrained) 85 | for param in model_ft.parameters(): 86 | param.requires_grad = False 87 | # Handle the auxilary net 88 | num_ftrs = model_ft.AuxLogits.fc.in_features 89 | model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes) 90 | # Handle the primary net 91 | num_ftrs = model_ft.fc.in_features 92 | model_ft.fc = nn.Linear(num_ftrs, num_classes) 93 | 94 | else: 95 | print("Invalid model name, exiting...") 96 | exit() 97 | 98 | return model_ft 99 | 100 | 101 | def get_iou(pred, label): 102 | # landslide IoU 103 | landslide_union = np.logical_or(pred == 1, label == 1) 104 | landslide_intersection = np.logical_and(pred == 1, label == 1) 105 | 106 | landslide_iou = landslide_intersection.sum() / landslide_union.sum() if landslide_union.sum() > 0 else 1 107 | return landslide_iou 108 | 109 | 110 | def train_model(model: nn.Module, dataloaders, criterion, optimizer, n_class, scheduler=None, num_epochs=25, 111 | device=torch.device('cpu')): 112 | since = time.time() 113 | 114 | val_acc_history = [] 115 | 116 | best_model_wts = copy.deepcopy(model.state_dict()) 117 | best_acc = 0.0 118 | best_iou = 0.0 119 | 120 | for epoch in range(num_epochs): 121 | print(f'Epoch {epoch}/{num_epochs - 1}') 122 | print('-' * 10) 123 | 124 | # Each epoch has a training and validation phase 125 | for phase in ['train', 'val']: 126 | if phase == 'train': 127 | model.train() # Set model to training mode 128 | else: 129 | model.eval() # Set model to evaluate mode 130 | 131 | running_loss = 0.0 132 | running_corrects = 0 133 | 134 | tp, tn, fp, fn = [[0. for _ in range(n_class)] for _ in range(4)] # TP, TN, FP, FN for each class 135 | 136 | landslide_union = 0. 137 | landslide_intersection = 0. 138 | 139 | # Iterate over data. 140 | for names, inputs, labels in dataloaders[phase]: 141 | inputs, labels = inputs.to(device), labels.to(device) 142 | 143 | # forward 144 | # track history if only in train 145 | with torch.set_grad_enabled(phase == 'train'): 146 | # Get model outputs and calculate loss 147 | outputs = model(inputs) 148 | 149 | loss = criterion(outputs, labels) 150 | 151 | _, preds = torch.max(outputs.data, 1) 152 | 153 | # backward + optimize only if in training phase 154 | if phase == 'train': 155 | optimizer.zero_grad() # zero the parameter gradients 156 | loss.backward() 157 | optimizer.step() 158 | 159 | # statistics 160 | running_loss += loss.item() * inputs.size(0) 161 | running_corrects += torch.sum(preds == labels.data) 162 | 163 | for i in range(n_class): 164 | tp[i] += torch.sum((preds == i) & (labels.data == i)).float() 165 | tn[i] += torch.sum((preds != i) & (labels.data != i)).float() 166 | fp[i] += torch.sum((preds == i) & (labels.data != i)).float() 167 | fn[i] += torch.sum((preds != i) & (labels.data == i)).float() 168 | 169 | landslide_intersection += torch.sum((preds == 1) & (labels.data == 1)).float() 170 | landslide_union += torch.sum((preds == 1) | (labels == 1)).float() 171 | 172 | epoch_loss = running_loss / len(dataloaders[phase].dataset) 173 | n_samples = len(dataloaders[phase].dataset) * inputs.size(2) * inputs.size(3) 174 | epoch_acc = running_corrects.float() / n_samples 175 | epoch_iou = landslide_intersection / landslide_union 176 | 177 | f1_score = [(2 * tp[i] / (2 * tp[i] + fp[i] + fn[i])).item() for i in range(n_class)] 178 | accs = [((tp[i] + tn[i]) / n_samples).item() for i in range(n_class)] 179 | weights = [0.04, 1., 2.38] 180 | epoch_weighted_acc = sum((accs[i] * weights[i]) / sum(weights) for i in range(n_class)) 181 | 182 | print(f'{phase} Loss: {epoch_loss:.4f}') 183 | print(f'Acc: {epoch_acc:.4f}') 184 | print(f'Weighted Acc {epoch_weighted_acc:.4f}') 185 | print(f'F1-scores: {f1_score[0]:.3f}/{f1_score[1]:.3f}/{f1_score[2]:.3f}') 186 | print(f'Landslide IoU: {epoch_iou:.2f}') 187 | print() 188 | 189 | if phase == 'train' and scheduler: 190 | scheduler.step(epoch_loss) 191 | 192 | # update weight as Error Corrective Boosting: w_i = (median(a) - min(a) + smooth) / (a_i - min(a) + smooth) 193 | if phase == 'val' and hasattr(criterion, 'update_weight'): 194 | mediana, mina = np.median(f1_score), np.min(f1_score) 195 | new_weights = torch.zeros(n_class, dtype=torch.float32) 196 | for i in range(new_weights.size()[0]): 197 | new_weights[i] = (mediana - mina + .5) / (f1_score[i] - mina + .5) 198 | criterion.update_weight(new_weights.to(device)) 199 | 200 | # deep copy the model 201 | if phase == 'val' and epoch_weighted_acc > best_acc: 202 | best_acc = epoch_weighted_acc 203 | if phase == 'val' and epoch_iou >= best_iou: 204 | best_iou = epoch_iou 205 | best_model_wts = copy.deepcopy(model.state_dict()) 206 | if phase == 'val': 207 | val_acc_history.append(epoch_loss) 208 | 209 | time_elapsed = time.time() - since 210 | print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s') 211 | print(f'Best val Acc: {best_acc:4f}') 212 | print(f'Best val landslide IoU: {best_iou:.5f}') 213 | 214 | # load best model weights 215 | model.load_state_dict(best_model_wts) 216 | return model, val_acc_history 217 | 218 | 219 | def test_model(model, dataloader, device=torch.device('cpu')): 220 | since = time.time() 221 | inputs_list, outputs_list, labels_list = list(), list(), list() 222 | 223 | model.eval() # Set model to evaluate mode 224 | 225 | # Iterate over data. 226 | for names, inputs, labels in tqdm(dataloader): 227 | inputs, labels = inputs.to(device), labels.to(device) 228 | 229 | inputs_list.append(inputs.cpu().numpy()) 230 | labels_list.append(labels.cpu().numpy()) 231 | 232 | # forward 233 | # track history if only in train 234 | with torch.set_grad_enabled(False): 235 | outputs = model(inputs) 236 | outputs_list.append(outputs.cpu().numpy()) 237 | 238 | time_elapsed = time.time() - since 239 | print('Test complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 240 | 241 | return np.concatenate(inputs_list), np.concatenate(labels_list), np.concatenate(outputs_list) 242 | 243 | 244 | def pretrain_resnet(model, model_name, learning_rate, dataloaders, num_epochs=25, device=torch.device('cpu'), 245 | n_class=43): 246 | params_to_update = [] 247 | for name, param in model.named_parameters(): 248 | if param.requires_grad: 249 | params_to_update.append(param) 250 | from torch import optim 251 | # Observe that all parameters are being optimized 252 | optimizer = optim.Adam(params_to_update, lr=learning_rate) 253 | # optimizer_ft = optim.SGD(params_to_update, lr=learning_rate, momentum=momentum) 254 | from torch.optim.lr_scheduler import ReduceLROnPlateau 255 | scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=.5, patience=2, verbose=True) 256 | # scheduler = None 257 | 258 | criterion = nn.BCEWithLogitsLoss(pos_weight=(torch.tensor([1.9413, 2.1518, 1.8227, 2.4867, 2.6754, 2.9754, 259 | 3.9839, 4.2460, 7.9527, 6.1611, 6.8814, 15.9447, 260 | 30.1597, 37.2683, 42.7707, 45.6151, 40.5372, 45.2004, 261 | 47.2388, 52.0371, 53.5295, 72.9905, 93.1514, 101.3732, 262 | 103.2222, 110.1599, 121.9205, 135.9204, 157.8678, 263 | 178.4537, 313.5603, 337.1113, 431.0624, 386.8559, 264 | 346.3839, 479.4246, 487.1006, 538.8535, 632.3403, 265 | 630.7993, 1145.4437, 1223.8561, 1707.3519], 266 | device=device))) 267 | 268 | since = time.time() 269 | 270 | val_acc_history = [] 271 | 272 | best_model_wts = copy.deepcopy(model.state_dict()) 273 | best_acc = 0.0 274 | 275 | try: 276 | 277 | for epoch in range(num_epochs): 278 | print(f'Epoch {epoch}/{num_epochs - 1}') 279 | print('-' * 10) 280 | 281 | # Each epoch has a training and validation phase 282 | for phase in ['train', 'val']: 283 | if phase == 'train': 284 | model.train() # Set model to training mode 285 | else: 286 | model.eval() # Set model to evaluate mode 287 | 288 | running_loss = 0.0 289 | running_corrects = 0 290 | 291 | tp, tn, fp, fn = [[0. for _ in range(n_class)] for _ in range(4)] # TP, TN, FP, FN for each class 292 | 293 | # Iterate over data. 294 | for names, inputs, labels in tqdm(dataloaders[phase]): 295 | inputs, labels = inputs.to(device), labels.to(device) 296 | 297 | # forward 298 | # track history if only in train 299 | with torch.set_grad_enabled(phase == 'train'): 300 | # Get model outputs and calculate loss 301 | outputs = model(inputs) 302 | 303 | loss = criterion(outputs, labels) 304 | 305 | preds = outputs.data 306 | preds[preds >= 0.] = 1. 307 | preds[preds < 0.] = 0. 308 | # _, preds = torch.max(outputs.data, 1) # 309 | 310 | # backward + optimize only if in training phase 311 | if phase == 'train': 312 | optimizer.zero_grad() # zero the parameter gradients 313 | loss.backward() 314 | optimizer.step() 315 | 316 | # statistics 317 | running_loss += loss.item() * inputs.size(0) 318 | running_corrects += torch.sum(preds == labels.data) 319 | 320 | for i in range(n_class): 321 | tp[i] += torch.sum((preds[:, i] == 1) & (labels.data[:, i] == 1)).float() 322 | tn[i] += torch.sum((preds[:, i] == 0) & (labels.data[:, i] == 0)).float() 323 | fp[i] += torch.sum((preds[:, i] == 1) & (labels.data[:, i] == 0)).float() 324 | fn[i] += torch.sum((preds[:, i] == 0) & (labels.data[:, i] == 1)).float() 325 | 326 | epoch_loss = running_loss / len(dataloaders[phase].dataset) 327 | n_samples = len(dataloaders[phase].dataset) 328 | epoch_acc = running_corrects.float() / n_samples / n_class 329 | 330 | f1_score = [(2 * tp[i] / (2 * tp[i] + fp[i] + fn[i])).item() for i in range(n_class)] 331 | accs = [((tp[i] + tn[i]) / n_samples).item() for i in range(n_class)] 332 | 333 | print(f'{phase} Loss: {epoch_loss:.4f}') 334 | print(f'Acc: {epoch_acc:.4f}') 335 | print('Accs: ' + '/'.join([f'{accs[i] * 100:.2f}%' for i in range(n_class)])) 336 | print('F1-scores: ' + '/'.join([f'{f1_score[i]:.3f}' for i in range(n_class)])) 337 | print() 338 | 339 | if phase == 'train' and scheduler: 340 | scheduler.step(epoch_loss) 341 | 342 | # deep copy the model 343 | if phase == 'val' and epoch_acc > best_acc: 344 | best_acc = epoch_acc 345 | best_model_wts = copy.deepcopy(model.state_dict()) 346 | 347 | if phase == 'val': 348 | val_acc_history.append(epoch_loss) 349 | model_save(f'data/models/{model_name}_{epoch:03}.pth', num_epochs, val_acc_history[-1], model.state_dict(), 350 | optimizer.state_dict()) 351 | finally: 352 | time_elapsed = time.time() - since 353 | print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s') 354 | print(f'Best val Acc: {best_acc:4f}') 355 | 356 | # load best model weights 357 | model.load_state_dict(best_model_wts) 358 | model_save(f'data/models/{model_name}.pth', num_epochs, val_acc_history[-1], model.state_dict(), 359 | optimizer.state_dict()) 360 | exit(0) 361 | return model 362 | 363 | 364 | def get_dataloaders(input_size=(420, 420), batch_size=2, test=None ): 365 | pnghigh_img_dir, pnghigh_ann_dir = r'data/npz/pnghigh/image', r'data/npz/pnghigh/mask' #56 366 | pnglow_img_dir, pnglow_ann_dir = r'data/npz/pnglow/image', r'data/npz/pnglow/mask' # 12 367 | pngother_img_dir,pngother_ann_dir = r'data/npz/pngother/image', r'data/npz/pngother/mask' #22 368 | 369 | pnghigh_img_lists = np.array(list(map(lambda x: os.path.join(pnghigh_img_dir, x), sorted(os.listdir(pnghigh_img_dir))))) 370 | pnghigh_ann_lists = np.array(list(map(lambda x: os.path.join(pnghigh_ann_dir, x), sorted(os.listdir(pnghigh_ann_dir))))) 371 | 372 | pnglow_img_lists = np.array(list(map(lambda x: os.path.join(pnglow_img_dir, x), sorted(os.listdir(pnglow_img_dir))))) 373 | pnglow_ann_lists = np.array(list(map(lambda x: os.path.join(pnglow_ann_dir, x), sorted(os.listdir(pnglow_ann_dir))))) 374 | 375 | pngother_img_lists = np.array(list(map(lambda x: os.path.join(pngother_img_dir, x), sorted(os.listdir(pngother_img_dir))))) 376 | pngother_ann_lists = np.array(list(map(lambda x: os.path.join(pngother_ann_dir, x), sorted(os.listdir(pngother_ann_dir))))) 377 | 378 | indices_high = np.arange(len(pnghigh_img_lists)) 379 | #np.random.shuffle(indices_high) 380 | #trn1, val1, tst1 = np.split(indices1, [int(0.7 * len(pngs2_image_lists)), int(0.8 * len(pngs2_image_lists))]) 381 | trn_high, val_high, tst_high = np.split(indices_high, [42, 48]) 382 | 383 | indices_low = np.arange(len(pnglow_img_lists)) 384 | #np.random.shuffle(indices_low) 385 | tst_low = indices_low 386 | 387 | indices_other = np.arange(len(pngother_img_lists)) 388 | #np.random.shuffle(indices_other) 389 | # trn1, val1, tst1 = np.split(indices1, [int(0.7 * len(pngs2_image_lists)), int(0.8 * len(pngs2_image_lists))]) 390 | trn_other, val_other, tst_other = np.split(indices_other, [12, 16]) 391 | 392 | 393 | transform = Compose([ 394 | RandomRotate90(), 395 | Flip(), 396 | Transpose(), 397 | OneOf([ 398 | IAAAdditiveGaussianNoise(), 399 | GaussNoise(), 400 | ], p=0.2), 401 | OneOf([ 402 | MotionBlur(p=0.2), 403 | MedianBlur(blur_limit=3, p=0.1), 404 | Blur(blur_limit=3, p=0.1), 405 | ], p=0.2), 406 | # ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2), 407 | OpticalDistortion(p=0.3), 408 | OneOf([ 409 | CLAHE(clip_limit=2), 410 | IAASharpen(), 411 | IAAEmboss(), 412 | RandomBrightnessContrast(), 413 | ], p=0.3), 414 | HueSaturationValue(p=0.3), 415 | RandomSizedBBoxSafeCrop(*input_size, erosion_rate=0.3) 416 | ]) 417 | 418 | np_trn_img = np.hstack((pnghigh_img_lists[trn_high], pngother_img_lists[trn_other])) 419 | np_trn_ann = np.hstack((pnghigh_ann_lists[trn_high], pngother_ann_lists[trn_other])) 420 | 421 | np_val_img = np.hstack((pnghigh_img_lists[val_high], pngother_img_lists[val_other])) 422 | np_val_ann = np.hstack((pnghigh_ann_lists[val_high], pngother_ann_lists[val_other])) 423 | 424 | if test is 'all_test': 425 | np_tst_img = np.hstack((pnghigh_img_lists[tst_high], np.hstack((pnglow_img_lists[tst_low], pngother_img_lists[tst_other])))) 426 | np_tst_ann = np.hstack((pnghigh_ann_lists[tst_high], np.hstack((pnglow_ann_lists[tst_low], pngother_ann_lists[tst_other])))) 427 | elif test is 'jinsha_test': 428 | np_tst_img = np.hstack((pnghigh_img_lists[tst_high],pnghigh_img_lists[tst_low])) 429 | np_tst_ann = np.hstack((pnghigh_ann_lists[tst_high],pnghigh_ann_lists[tst_low])) 430 | elif test is 'other_test': 431 | np_tst_img = pnghigh_img_lists[tst_other] 432 | np_tst_ann = pnghigh_ann_lists[tst_other] 433 | elif test is 'jinsha_train': 434 | np_tst_img = pnghigh_img_lists[trn_high] 435 | np_tst_ann = pnghigh_ann_lists[trn_high] 436 | elif test is 'other_train': 437 | np_tst_img = pnghigh_img_lists[trn_other] 438 | np_tst_ann = pnghigh_ann_lists[trn_other] 439 | elif test is 'jinsha_val': 440 | np_tst_img = pnghigh_img_lists[val_high] 441 | np_tst_ann = pnghigh_ann_lists[val_high] 442 | else: 443 | np_tst_img = pnghigh_img_lists[val_other] 444 | np_tst_ann = pnghigh_ann_lists[val_other] 445 | 446 | image_datasets = { 447 | 'train': SegmentationSet2(np_trn_img,np_trn_ann, transform=transform), 448 | 'val': SegmentationSet2(np_val_img,np_val_ann, transform=transform), 449 | 'test': TestSet2(np_tst_img,np_tst_ann) 450 | } 451 | 452 | data_loaders = { 453 | phase: DataLoader(image_datasets[phase], batch_size=batch_size, shuffle=(phase in ['train']), drop_last=False) 454 | for phase in ['train', 'val', 'test']} 455 | 456 | return data_loaders 457 | --------------------------------------------------------------------------------