├── .gitignore ├── CamVid ├── 32_Classes.txt ├── CamVid.py ├── SegNet.py ├── Test_SegNet.py ├── Train_SegNet.py └── classes.npy ├── Pavements ├── 2_Classes.txt ├── Pavements.py ├── SegNet.py ├── SegNet_Pavement.ipynb ├── TensorBoard.ipynb ├── Test_SegNet_Pavements.py ├── Train_SegNet_Pavements.py ├── classes.npy └── model.json └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | logs/ 3 | weights/ 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /CamVid/32_Classes.txt: -------------------------------------------------------------------------------- 1 | 64 128 64 Animal 2 | 192 0 128 Archway 3 | 0 128 192 Bicyclist 4 | 0 128 64 Bridge 5 | 128 0 0 Building 6 | 64 0 128 Car 7 | 64 0 192 CartLuggagePram 8 | 192 128 64 Child 9 | 192 192 128 Column_Pole 10 | 64 64 128 Fence 11 | 128 0 192 LaneMkgsDriv 12 | 192 0 64 LaneMkgsNonDriv 13 | 128 128 64 Misc_Text 14 | 192 0 192 MotorcycleScooter 15 | 128 64 64 OtherMoving 16 | 64 192 128 ParkingBlock 17 | 64 64 0 Pedestrian 18 | 128 64 128 Road 19 | 128 128 192 RoadShoulder 20 | 0 0 192 Sidewalk 21 | 192 128 128 SignSymbol 22 | 128 128 128 Sky 23 | 64 128 192 SUVPickupTruck 24 | 0 0 64 TrafficCone 25 | 0 64 64 TrafficLight 26 | 192 64 128 Train 27 | 128 128 0 Tree 28 | 192 128 192 Truck_Bus 29 | 64 0 64 Tunnel 30 | 192 192 0 VegetationMisc 31 | 0 0 0 Void 32 | 64 192 0 Wall -------------------------------------------------------------------------------- /CamVid/CamVid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | import os 4 | from skimage import io 5 | import time 6 | import numpy as np 7 | 8 | class CamVid(Dataset): 9 | 10 | @staticmethod 11 | def getLabeled(img_name, lbl_dir): 12 | 13 | #Returns labeled image filename 14 | index = img_name.find('.png') 15 | img_lbl_dir = os.path.join(lbl_dir, (img_name[:index] + '_L' + img_name[index:])) 16 | 17 | return img_lbl_dir 18 | 19 | def __init__(self, classes, raw_dir, lbl_dir, transform=None): 20 | 21 | #classes: (np ndarray) (K, 3) array of RGB values of K classes 22 | #raw_dir: (directory) Folder directory of raw input image files 23 | #lbl_dir: (directory) Folder directory of labeled image files 24 | 25 | self.classes = classes 26 | self.raw_dir = raw_dir 27 | self.lbl_dir = lbl_dir 28 | self.transform = transform 29 | self.list_img = [f for f in os.listdir(self.raw_dir) if not f.startswith('.')] 30 | 31 | def one_Hot(self, image): 32 | 33 | #Used for pixel-wise conversion of labeled images to its respective classes 34 | #Output is a one-hot encoded tensor of (M, N, K) dimensions, MxN resolution, K channels (classes) 35 | 36 | output_shape = (image.shape[0], image.shape[1], self.classes.shape[0]) 37 | output = np.zeros(output_shape) 38 | 39 | for c in range(self.classes.shape[0]): 40 | label = np.nanmin(self.classes[c] == image, axis=2) 41 | output[:, :, c] = label 42 | 43 | return output 44 | 45 | def __len__(self): 46 | 47 | return len(self.list_img) 48 | 49 | def __getitem__(self, idx): 50 | 51 | img_raw_name = self.list_img[idx] 52 | img_raw_dir = os.path.join(self.raw_dir, img_raw_name) 53 | image_raw = io.imread(img_raw_dir) 54 | img_lbl_dir = self.getLabeled(img_raw_name, self.lbl_dir) 55 | image_label = io.imread(img_lbl_dir) 56 | label = self.one_Hot(image_label) 57 | 58 | if self.transform: 59 | image_raw = self.transform(image_raw) 60 | label = self.transform(label) 61 | 62 | data = (image_raw, label) 63 | 64 | return data 65 | 66 | 67 | -------------------------------------------------------------------------------- /CamVid/SegNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class SegNet(nn.Module): 7 | 8 | def __init__(self, in_chn=3, out_chn=32, BN_momentum=0.5): 9 | super(SegNet, self).__init__() 10 | 11 | #SegNet Architecture 12 | #Takes input of size in_chn = 3 (RGB images have 3 channels) 13 | #Outputs size label_chn (N # of classes) 14 | 15 | #ENCODING consists of 5 stages 16 | #Stage 1, 2 has 2 layers of Convolution + Batch Normalization + Max Pool respectively 17 | #Stage 3, 4, 5 has 3 layers of Convolution + Batch Normalization + Max Pool respectively 18 | 19 | #General Max Pool 2D for ENCODING layers 20 | #Pooling indices are stored for Upsampling in DECODING layers 21 | 22 | self.in_chn = in_chn 23 | self.out_chn = out_chn 24 | 25 | self.MaxEn = nn.MaxPool2d(2, stride=2, return_indices=True) 26 | 27 | self.ConvEn11 = nn.Conv2d(self.in_chn, 64, kernel_size=3, padding=1) 28 | self.BNEn11 = nn.BatchNorm2d(64, momentum=BN_momentum) 29 | self.ConvEn12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 30 | self.BNEn12 = nn.BatchNorm2d(64, momentum=BN_momentum) 31 | 32 | self.ConvEn21 = nn.Conv2d(64, 128, kernel_size=3, padding=1) 33 | self.BNEn21 = nn.BatchNorm2d(128, momentum=BN_momentum) 34 | self.ConvEn22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 35 | self.BNEn22 = nn.BatchNorm2d(128, momentum=BN_momentum) 36 | 37 | self.ConvEn31 = nn.Conv2d(128, 256, kernel_size=3, padding=1) 38 | self.BNEn31 = nn.BatchNorm2d(256, momentum=BN_momentum) 39 | self.ConvEn32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 40 | self.BNEn32 = nn.BatchNorm2d(256, momentum=BN_momentum) 41 | self.ConvEn33 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 42 | self.BNEn33 = nn.BatchNorm2d(256, momentum=BN_momentum) 43 | 44 | self.ConvEn41 = nn.Conv2d(256, 512, kernel_size=3, padding=1) 45 | self.BNEn41 = nn.BatchNorm2d(512, momentum=BN_momentum) 46 | self.ConvEn42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 47 | self.BNEn42 = nn.BatchNorm2d(512, momentum=BN_momentum) 48 | self.ConvEn43 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 49 | self.BNEn43 = nn.BatchNorm2d(512, momentum=BN_momentum) 50 | 51 | self.ConvEn51 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 52 | self.BNEn51 = nn.BatchNorm2d(512, momentum=BN_momentum) 53 | self.ConvEn52 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 54 | self.BNEn52 = nn.BatchNorm2d(512, momentum=BN_momentum) 55 | self.ConvEn53 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 56 | self.BNEn53 = nn.BatchNorm2d(512, momentum=BN_momentum) 57 | 58 | 59 | #DECODING consists of 5 stages 60 | #Each stage corresponds to their respective counterparts in ENCODING 61 | 62 | #General Max Pool 2D/Upsampling for DECODING layers 63 | self.MaxDe = nn.MaxUnpool2d(2, stride=2) 64 | 65 | self.ConvDe53 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 66 | self.BNDe53 = nn.BatchNorm2d(512, momentum=BN_momentum) 67 | self.ConvDe52 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 68 | self.BNDe52 = nn.BatchNorm2d(512, momentum=BN_momentum) 69 | self.ConvDe51 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 70 | self.BNDe51 = nn.BatchNorm2d(512, momentum=BN_momentum) 71 | 72 | self.ConvDe43 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 73 | self.BNDe43 = nn.BatchNorm2d(512, momentum=BN_momentum) 74 | self.ConvDe42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 75 | self.BNDe42 = nn.BatchNorm2d(512, momentum=BN_momentum) 76 | self.ConvDe41 = nn.Conv2d(512, 256, kernel_size=3, padding=1) 77 | self.BNDe41 = nn.BatchNorm2d(256, momentum=BN_momentum) 78 | 79 | self.ConvDe33 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 80 | self.BNDe33 = nn.BatchNorm2d(256, momentum=BN_momentum) 81 | self.ConvDe32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 82 | self.BNDe32 = nn.BatchNorm2d(256, momentum=BN_momentum) 83 | self.ConvDe31 = nn.Conv2d(256, 128, kernel_size=3, padding=1) 84 | self.BNDe31 = nn.BatchNorm2d(128, momentum=BN_momentum) 85 | 86 | self.ConvDe22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 87 | self.BNDe22 = nn.BatchNorm2d(128, momentum=BN_momentum) 88 | self.ConvDe21 = nn.Conv2d(128, 64, kernel_size=3, padding=1) 89 | self.BNDe21 = nn.BatchNorm2d(64, momentum=BN_momentum) 90 | 91 | self.ConvDe12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 92 | self.BNDe12 = nn.BatchNorm2d(64, momentum=BN_momentum) 93 | self.ConvDe11 = nn.Conv2d(64, self.out_chn, kernel_size=3, padding=1) 94 | self.BNDe11 = nn.BatchNorm2d(self.out_chn, momentum=BN_momentum) 95 | 96 | def forward(self, x): 97 | 98 | #ENCODE LAYERS 99 | #Stage 1 100 | x = F.relu(self.BNEn11(self.ConvEn11(x))) 101 | x = F.relu(self.BNEn12(self.ConvEn12(x))) 102 | x, ind1 = self.MaxEn(x) 103 | size1 = x.size() 104 | 105 | #Stage 2 106 | x = F.relu(self.BNEn21(self.ConvEn21(x))) 107 | x = F.relu(self.BNEn22(self.ConvEn22(x))) 108 | x, ind2 = self.MaxEn(x) 109 | size2 = x.size() 110 | 111 | #Stage 3 112 | x = F.relu(self.BNEn31(self.ConvEn31(x))) 113 | x = F.relu(self.BNEn32(self.ConvEn32(x))) 114 | x = F.relu(self.BNEn33(self.ConvEn33(x))) 115 | x, ind3 = self.MaxEn(x) 116 | size3 = x.size() 117 | 118 | #Stage 4 119 | x = F.relu(self.BNEn41(self.ConvEn41(x))) 120 | x = F.relu(self.BNEn42(self.ConvEn42(x))) 121 | x = F.relu(self.BNEn43(self.ConvEn43(x))) 122 | x, ind4 = self.MaxEn(x) 123 | size4 = x.size() 124 | 125 | #Stage 5 126 | x = F.relu(self.BNEn51(self.ConvEn51(x))) 127 | x = F.relu(self.BNEn52(self.ConvEn52(x))) 128 | x = F.relu(self.BNEn53(self.ConvEn53(x))) 129 | x, ind5 = self.MaxEn(x) 130 | size5 = x.size() 131 | 132 | #DECODE LAYERS 133 | #Stage 5 134 | x = self.MaxDe(x, ind5, output_size=size4) 135 | x = F.relu(self.BNDe53(self.ConvDe53(x))) 136 | x = F.relu(self.BNDe52(self.ConvDe52(x))) 137 | x = F.relu(self.BNDe51(self.ConvDe51(x))) 138 | 139 | #Stage 4 140 | x = self.MaxDe(x, ind4, output_size=size3) 141 | x = F.relu(self.BNDe43(self.ConvDe43(x))) 142 | x = F.relu(self.BNDe42(self.ConvDe42(x))) 143 | x = F.relu(self.BNDe41(self.ConvDe41(x))) 144 | 145 | #Stage 3 146 | x = self.MaxDe(x, ind3, output_size=size2) 147 | x = F.relu(self.BNDe33(self.ConvDe33(x))) 148 | x = F.relu(self.BNDe32(self.ConvDe32(x))) 149 | x = F.relu(self.BNDe31(self.ConvDe31(x))) 150 | 151 | #Stage 2 152 | x = self.MaxDe(x, ind2, output_size=size1) 153 | x = F.relu(self.BNDe22(self.ConvDe22(x))) 154 | x = F.relu(self.BNDe21(self.ConvDe21(x))) 155 | 156 | #Stage 1 157 | x = self.MaxDe(x, ind1) 158 | x = F.relu(self.BNDe12(self.ConvDe12(x))) 159 | x = self.ConvDe11(x) 160 | 161 | x = F.softmax(x, dim=1) 162 | 163 | return x 164 | 165 | 166 | class Train(): 167 | 168 | @staticmethod 169 | def save_checkpoint(state, path): 170 | torch.save(state, path) 171 | print("Checkpoint saved at {}".format(path)) 172 | 173 | @staticmethod 174 | def Train(trainloader, path=None): #epochs is target epoch, path is provided to load saved checkpoint 175 | 176 | model = SegNet() 177 | optimizer = optim.SGD(model.parameters(), lr=hyperparam.lr, momentum=hyperparam.momentum) 178 | loss_fn = nn.CrossEntropyLoss() 179 | run_epoch = hyperparam.epochs 180 | 181 | if path == None: 182 | epoch = 0 183 | path = os.path.join(os.getcwd(), 'segnet_weights.pth.tar') 184 | print("Creating new checkpoint '{}'".format(path)) 185 | else: 186 | if os.path.isfile(path): 187 | print("Loading checkpoint '{}'".format(path)) 188 | checkpoint = torch.load(path) 189 | epoch = checkpoint['epoch'] 190 | model.load_state_dict(checkpoint['state_dict']) 191 | optimizer.load_state_dict(checkpoint['optimizer']) 192 | print("Loaded checkpoint '{}' (epoch {})".format(path, checkpoint['epoch'])) 193 | else: 194 | print("No checkpoint found at '{}'".format(path)) 195 | 196 | 197 | for i in range(1, run_epoch + 1): 198 | print('Epoch {}:'.format(i)) 199 | sum_loss = 0.0 200 | 201 | for j, data in enumerate(trainloader, 1): 202 | images, labels = data 203 | optimizer.zero_grad() 204 | output = model(images) 205 | loss = loss_fn(output, labels) 206 | loss.backward() 207 | optimizer.step() 208 | 209 | sum_loss += loss.item() 210 | 211 | print('Loss at {} mini-batch: {}'.format(j, loss.item()/trainloader.batch_size)) 212 | 213 | print('Average loss @ epoch: {}'.format((sum_loss/j*trainloader.batch_size))) 214 | 215 | print("Training complete. Saving checkpoint...") 216 | Train.save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict()}, path) 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /CamVid/Test_SegNet.py: -------------------------------------------------------------------------------- 1 | import SegNet 2 | from CamVid import CamVid 3 | 4 | import os 5 | import argparse 6 | import numpy as np 7 | import torch 8 | import torchvision.transforms as transforms 9 | from torchvision.utils import save_image 10 | from itertools import product 11 | 12 | 13 | def build_color_map(): 14 | # assumes no. of classes to be <= 64 15 | color_map = torch.tensor(list(product([63, 127, 191, 255], repeat=3))) 16 | 17 | print() 18 | print("Map of class to color: ") 19 | for class_ind, color in enumerate(color_map): 20 | print("Class: {}, RGB Color: {}".format(class_ind + 1, color)) 21 | 22 | print() 23 | 24 | return color_map 25 | 26 | 27 | def load(model, weight_fn): 28 | 29 | assert os.path.isfile(weight_fn), "{} is not a file.".format(weight_fn) 30 | 31 | checkpoint = torch.load(weight_fn) 32 | epoch = checkpoint['epoch'] 33 | state_dict = checkpoint['state_dict'] 34 | model.load_state_dict(state_dict) 35 | print("Checkpoint is loaded at {} | Epochs: {}".format(weight_fn, epoch)) 36 | 37 | def main(args): 38 | 39 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 40 | 41 | classes_dir = args.classes_dir 42 | camvid_raw_dir = args.camvid_raw_dir 43 | camvid_labelled_dir = args.camvid_labelled_dir 44 | weight_fn = args.weight_fn 45 | res_dir = args.res_dir 46 | 47 | # initialize model in evaluation mode 48 | model = SegNet.SegNet().to(device) 49 | model.eval() 50 | 51 | # load pretrained weights 52 | load(model, weight_fn) 53 | 54 | # create toTensor transform 55 | transform = transforms.Compose([transforms.ToTensor()]) 56 | 57 | # load test dataset 58 | dataset = CamVid(classes_dir, camvid_raw_dir, camvid_labelled_dir, transform=transform) 59 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=4) 60 | 61 | # build color map 62 | color_map = build_color_map() 63 | 64 | # run evaluation 65 | for i, data in enumerate(dataloader): 66 | images = data[0].to(device) 67 | res = model(images) 68 | res = torch.argmax(res, dim=1) # one-hot squashed to pixel-wise labels 69 | 70 | for n in range(res.shape[0]): # loop over each image 71 | res_image = color_map[res[n]].permute(2, 0, 1).to(torch.float).div(255.0) # transpose back to C, H, W, normalize to (0.0, 1.0) 72 | save_image(res_image, os.path.join(res_dir, "img_{}_{}.png".format(i, n))) 73 | 74 | print("Evaluation complete. {} segmented images saved at {}".format((i + 1) * (n + 1), res_dir)) 75 | 76 | if __name__ == "__main__": 77 | parser = argparse.ArgumentParser() 78 | 79 | #FORMAT DIRECTORIES 80 | parser.add_argument("classes_dir", type=str, help="Directory: classes.npy file") 81 | parser.add_argument("camvid_raw_dir", type=str, help="Directory: CamVid raw testing images") 82 | parser.add_argument("camvid_labelled_dir", type=str, help="Directory: CamVid labelled testing images") 83 | parser.add_argument("weight_fn", type=str, help="Path: Trained weights") 84 | parser.add_argument("res_dir", type=str, help="Directory: Model output images") 85 | 86 | args = parser.parse_args() 87 | 88 | main(args) 89 | -------------------------------------------------------------------------------- /CamVid/Train_SegNet.py: -------------------------------------------------------------------------------- 1 | import SegNet 2 | from CamVid import CamVid 3 | import torch 4 | import torchvision.transforms as transforms 5 | import os 6 | import numpy as np 7 | 8 | def main(): 9 | 10 | classes = np.load('classes.npy') 11 | raw_dir = os.path.join(os.getcwd(), 'CamVid_Raw') 12 | lbl_dir = os.path.join(os.getcwd(), 'CamVid_Labeled') 13 | 14 | transform = transforms.Compose([transforms.ToTensor()]) 15 | 16 | trainset = CamVid(classes, raw_dir, lbl_dir, transform=transform) 17 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=12, shuffle=True, num_workers=4) 18 | 19 | resume = input("Resume training? (Y/N) ") 20 | 21 | if resume == 'Y' or resume == 'y': 22 | SegNet.Train.Train(trainloader, os.path.abspath("checkpoint.pth.tar")) 23 | 24 | elif resume == 'N' or resume == 'n': 25 | SegNet.Train.Train(trainloader) 26 | 27 | else: 28 | print("Invalid input, exiting program.") 29 | 30 | if __name__ == '__main__': 31 | main() -------------------------------------------------------------------------------- /CamVid/classes.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vinceecws/SegNet_PyTorch/d870acccfeb6d99be38301981e1a5a14735d2d90/CamVid/classes.npy -------------------------------------------------------------------------------- /Pavements/2_Classes.txt: -------------------------------------------------------------------------------- 1 | 0 0 0 Pavement Surface 2 | 255 255 255 Crack 3 | -------------------------------------------------------------------------------- /Pavements/Pavements.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset 2 | from skimage import io 3 | from torchmetrics.functional import jaccard_index, precision, recall, stat_scores 4 | import torch 5 | import os 6 | import time 7 | import numpy as np 8 | import torchvision.transforms as transforms 9 | 10 | class Pavements(Dataset): 11 | 12 | def __init__(self, raw_dir, lbl_dir, transform=None): 13 | 14 | # raw_dir: (directory) Folder directory of raw input image files 15 | # lbl_dir: (directory) Folder directory of labeled image files 16 | 17 | self.raw_dir = raw_dir 18 | self.lbl_dir = lbl_dir 19 | self.transform = transform 20 | self.list_img = [f for f in os.listdir(self.raw_dir) if not f.startswith('.')] 21 | self.pixel_value_threshold = 127 # Threshold to determine if a pixel belongs to class 0 or 1 22 | 23 | def one_Hot(self, image): 24 | 25 | # Used for pixel-wise conversion of labeled images to its respective classes 26 | # Output is a one-hot encoded tensor of (M, N, 2) dimensions, MxN resolution, 2 channels (classes) 27 | # For annotated images, assumed that they are monochrome, and white pixels are cracks, while black pixels are anything else 28 | 29 | output_shape = (image.shape[0], image.shape[1], 2) 30 | output = np.zeros(output_shape) 31 | 32 | # Threshold pixels such that (<= threshold is pavement surface) & (> threshold is pavement crack) 33 | output[image <= self.pixel_value_threshold, 0] = 1 34 | output[image > self.pixel_value_threshold, 1] = 1 35 | 36 | return output 37 | 38 | def classify(self, image): 39 | output = np.zeros_like(image, dtype=np.int) 40 | 41 | # Threshold pixels such that (<= threshold is pavement surface) & (> threshold is pavement crack) 42 | output[image <= self.pixel_value_threshold] = 0 43 | output[image > self.pixel_value_threshold] = 1 44 | 45 | return output 46 | 47 | 48 | def __len__(self): 49 | 50 | return len(self.list_img) 51 | 52 | def __getitem__(self, idx): 53 | 54 | img_name = self.list_img[idx] 55 | img_raw_dir = os.path.join(self.raw_dir, img_name) 56 | img_lbl_dir = os.path.join(self.lbl_dir, img_name) 57 | image_raw = io.imread(img_raw_dir) 58 | image_label = io.imread(img_lbl_dir) 59 | label = self.classify(image_label) 60 | 61 | if self.transform: 62 | image_raw = self.transform(image_raw) 63 | label = self.transform(label) 64 | 65 | # create toTensor transform to convert input & label from H x W x C (numpy) to C x H x W (PyTorch) 66 | to_tensor = transforms.ToTensor() 67 | 68 | data = (to_tensor(image_raw), label) 69 | 70 | return data 71 | 72 | def compute_pavement_crack_area(self, pred, as_ratio=False): 73 | crack_pixels = torch.where(pred == 1.0)[0].shape[0] 74 | if as_ratio: 75 | total_pixels = pred.nelement() 76 | return crack_pixels / total_pixels 77 | 78 | return crack_pixels 79 | 80 | def compute_precision(self, pred, target, threshold=0.5): 81 | # Precision: TP / (TP + FP) 82 | 83 | return precision(pred, target, average='none', mdmc_average='samplewise', ignore_index=None, 84 | num_classes=2, threshold=0.5, top_k=None, multiclass=None) 85 | 86 | def compute_recall(self, pred, target, threshold=0.5): 87 | # Recall: TP / (TP + FN) 88 | 89 | return recall(pred, target, average='none', mdmc_average='samplewise', ignore_index=None, 90 | num_classes=2, threshold=0.5, top_k=None, multiclass=None) 91 | 92 | def compute_m_iou(self, pred, target, threshold=0.5): 93 | # Mean Intersection over Union (mIoU) a.k.a. Jaccard Index 94 | 95 | return jaccard_index(pred, target, 2, ignore_index=None, absent_score=0.0, 96 | threshold=threshold, average='none') 97 | 98 | def compute_balanced_class_accuracy(self, pred, target): 99 | """ 100 | Balanced class accuracy = (Sensitivity + Specificity) / 2 101 | = ((TP / (TP + FN)) + TN / (TN + FP)) / 2 102 | """ 103 | scores = stat_scores(pred, target, reduce='macro', num_classes=2, 104 | mdmc_reduce='samplewise') # [[[tp, fp, tn, fn, sup]]] 105 | 106 | tp = scores[:, :, 0] 107 | fp = scores[:, :, 1] 108 | tn = scores[:, :, 2] 109 | fn = scores[:, :, 3] 110 | sensitivity = tp / (tp + fn) 111 | specificity = tn / (tn + fp) 112 | 113 | return torch.mean((sensitivity + specificity) / 2, dim=0)[0] 114 | -------------------------------------------------------------------------------- /Pavements/SegNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class SegNet(nn.Module): 7 | 8 | def __init__(self, in_chn=3, out_chn=32, BN_momentum=0.5): 9 | super(SegNet, self).__init__() 10 | 11 | #SegNet Architecture 12 | #Takes input of size in_chn = 3 (RGB images have 3 channels) 13 | #Outputs size label_chn (N # of classes) 14 | 15 | #ENCODING consists of 5 stages 16 | #Stage 1, 2 has 2 layers of Convolution + Batch Normalization + Max Pool respectively 17 | #Stage 3, 4, 5 has 3 layers of Convolution + Batch Normalization + Max Pool respectively 18 | 19 | #General Max Pool 2D for ENCODING layers 20 | #Pooling indices are stored for Upsampling in DECODING layers 21 | 22 | self.in_chn = in_chn 23 | self.out_chn = out_chn 24 | 25 | self.MaxEn = nn.MaxPool2d(2, stride=2, return_indices=True) 26 | 27 | self.ConvEn11 = nn.Conv2d(self.in_chn, 64, kernel_size=3, padding=1) 28 | self.BNEn11 = nn.BatchNorm2d(64, momentum=BN_momentum) 29 | self.ConvEn12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 30 | self.BNEn12 = nn.BatchNorm2d(64, momentum=BN_momentum) 31 | 32 | self.ConvEn21 = nn.Conv2d(64, 128, kernel_size=3, padding=1) 33 | self.BNEn21 = nn.BatchNorm2d(128, momentum=BN_momentum) 34 | self.ConvEn22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 35 | self.BNEn22 = nn.BatchNorm2d(128, momentum=BN_momentum) 36 | 37 | self.ConvEn31 = nn.Conv2d(128, 256, kernel_size=3, padding=1) 38 | self.BNEn31 = nn.BatchNorm2d(256, momentum=BN_momentum) 39 | self.ConvEn32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 40 | self.BNEn32 = nn.BatchNorm2d(256, momentum=BN_momentum) 41 | self.ConvEn33 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 42 | self.BNEn33 = nn.BatchNorm2d(256, momentum=BN_momentum) 43 | 44 | self.ConvEn41 = nn.Conv2d(256, 512, kernel_size=3, padding=1) 45 | self.BNEn41 = nn.BatchNorm2d(512, momentum=BN_momentum) 46 | self.ConvEn42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 47 | self.BNEn42 = nn.BatchNorm2d(512, momentum=BN_momentum) 48 | self.ConvEn43 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 49 | self.BNEn43 = nn.BatchNorm2d(512, momentum=BN_momentum) 50 | 51 | self.ConvEn51 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 52 | self.BNEn51 = nn.BatchNorm2d(512, momentum=BN_momentum) 53 | self.ConvEn52 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 54 | self.BNEn52 = nn.BatchNorm2d(512, momentum=BN_momentum) 55 | self.ConvEn53 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 56 | self.BNEn53 = nn.BatchNorm2d(512, momentum=BN_momentum) 57 | 58 | 59 | #DECODING consists of 5 stages 60 | #Each stage corresponds to their respective counterparts in ENCODING 61 | 62 | #General Max Pool 2D/Upsampling for DECODING layers 63 | self.MaxDe = nn.MaxUnpool2d(2, stride=2) 64 | 65 | self.ConvDe53 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 66 | self.BNDe53 = nn.BatchNorm2d(512, momentum=BN_momentum) 67 | self.ConvDe52 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 68 | self.BNDe52 = nn.BatchNorm2d(512, momentum=BN_momentum) 69 | self.ConvDe51 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 70 | self.BNDe51 = nn.BatchNorm2d(512, momentum=BN_momentum) 71 | 72 | self.ConvDe43 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 73 | self.BNDe43 = nn.BatchNorm2d(512, momentum=BN_momentum) 74 | self.ConvDe42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) 75 | self.BNDe42 = nn.BatchNorm2d(512, momentum=BN_momentum) 76 | self.ConvDe41 = nn.Conv2d(512, 256, kernel_size=3, padding=1) 77 | self.BNDe41 = nn.BatchNorm2d(256, momentum=BN_momentum) 78 | 79 | self.ConvDe33 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 80 | self.BNDe33 = nn.BatchNorm2d(256, momentum=BN_momentum) 81 | self.ConvDe32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) 82 | self.BNDe32 = nn.BatchNorm2d(256, momentum=BN_momentum) 83 | self.ConvDe31 = nn.Conv2d(256, 128, kernel_size=3, padding=1) 84 | self.BNDe31 = nn.BatchNorm2d(128, momentum=BN_momentum) 85 | 86 | self.ConvDe22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) 87 | self.BNDe22 = nn.BatchNorm2d(128, momentum=BN_momentum) 88 | self.ConvDe21 = nn.Conv2d(128, 64, kernel_size=3, padding=1) 89 | self.BNDe21 = nn.BatchNorm2d(64, momentum=BN_momentum) 90 | 91 | self.ConvDe12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) 92 | self.BNDe12 = nn.BatchNorm2d(64, momentum=BN_momentum) 93 | self.ConvDe11 = nn.Conv2d(64, self.out_chn, kernel_size=3, padding=1) 94 | self.BNDe11 = nn.BatchNorm2d(self.out_chn, momentum=BN_momentum) 95 | 96 | def forward(self, x): 97 | 98 | #ENCODE LAYERS 99 | #Stage 1 100 | x = F.relu(self.BNEn11(self.ConvEn11(x))) 101 | x = F.relu(self.BNEn12(self.ConvEn12(x))) 102 | x, ind1 = self.MaxEn(x) 103 | size1 = x.size() 104 | 105 | #Stage 2 106 | x = F.relu(self.BNEn21(self.ConvEn21(x))) 107 | x = F.relu(self.BNEn22(self.ConvEn22(x))) 108 | x, ind2 = self.MaxEn(x) 109 | size2 = x.size() 110 | 111 | #Stage 3 112 | x = F.relu(self.BNEn31(self.ConvEn31(x))) 113 | x = F.relu(self.BNEn32(self.ConvEn32(x))) 114 | x = F.relu(self.BNEn33(self.ConvEn33(x))) 115 | x, ind3 = self.MaxEn(x) 116 | size3 = x.size() 117 | 118 | #Stage 4 119 | x = F.relu(self.BNEn41(self.ConvEn41(x))) 120 | x = F.relu(self.BNEn42(self.ConvEn42(x))) 121 | x = F.relu(self.BNEn43(self.ConvEn43(x))) 122 | x, ind4 = self.MaxEn(x) 123 | size4 = x.size() 124 | 125 | #Stage 5 126 | x = F.relu(self.BNEn51(self.ConvEn51(x))) 127 | x = F.relu(self.BNEn52(self.ConvEn52(x))) 128 | x = F.relu(self.BNEn53(self.ConvEn53(x))) 129 | x, ind5 = self.MaxEn(x) 130 | size5 = x.size() 131 | 132 | #DECODE LAYERS 133 | #Stage 5 134 | x = self.MaxDe(x, ind5, output_size=size4) 135 | x = F.relu(self.BNDe53(self.ConvDe53(x))) 136 | x = F.relu(self.BNDe52(self.ConvDe52(x))) 137 | x = F.relu(self.BNDe51(self.ConvDe51(x))) 138 | 139 | #Stage 4 140 | x = self.MaxDe(x, ind4, output_size=size3) 141 | x = F.relu(self.BNDe43(self.ConvDe43(x))) 142 | x = F.relu(self.BNDe42(self.ConvDe42(x))) 143 | x = F.relu(self.BNDe41(self.ConvDe41(x))) 144 | 145 | #Stage 3 146 | x = self.MaxDe(x, ind3, output_size=size2) 147 | x = F.relu(self.BNDe33(self.ConvDe33(x))) 148 | x = F.relu(self.BNDe32(self.ConvDe32(x))) 149 | x = F.relu(self.BNDe31(self.ConvDe31(x))) 150 | 151 | #Stage 2 152 | x = self.MaxDe(x, ind2, output_size=size1) 153 | x = F.relu(self.BNDe22(self.ConvDe22(x))) 154 | x = F.relu(self.BNDe21(self.ConvDe21(x))) 155 | 156 | #Stage 1 157 | x = self.MaxDe(x, ind1) 158 | x = F.relu(self.BNDe12(self.ConvDe12(x))) 159 | x = self.ConvDe11(x) 160 | 161 | x = F.softmax(x, dim=1) 162 | 163 | return x -------------------------------------------------------------------------------- /Pavements/TensorBoard.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "TensorBoard.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "4mvMV5_3wc99" 23 | }, 24 | "source": [ 25 | "## **TensorBoard**" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "id": "J31GPm8MzGxw" 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "# Load the TensorBoard notebook extension.\n", 37 | "import os\n", 38 | "import tensorflow as tf\n", 39 | "import pandas as pd\n", 40 | "from sklearn.preprocessing import MinMaxScaler\n", 41 | "%load_ext tensorboard\n", 42 | "logs_base_dir = 'the directory of the folder named \"logfiles\"'\n", 43 | "os.makedirs(logs_base_dir, exist_ok=True)\n", 44 | "%tensorboard --logdir {logs_base_dir}\n", 45 | "%tensorboard --logdir=epochs --port 5000" 46 | ] 47 | } 48 | ] 49 | } -------------------------------------------------------------------------------- /Pavements/Test_SegNet_Pavements.py: -------------------------------------------------------------------------------- 1 | import SegNet 2 | from Pavements import Pavements 3 | 4 | import os 5 | import argparse 6 | import json 7 | import numpy as np 8 | import torch 9 | import torchvision.transforms as transforms 10 | from torchvision.utils import save_image 11 | from itertools import product 12 | 13 | 14 | def build_color_map(): 15 | # assumes only classes to be pavement surface (black) & cracks (white) 16 | color_map = torch.tensor([ 17 | [0, 0, 0], 18 | [255, 255, 255] 19 | ]) 20 | 21 | print() 22 | print("Map of class to color: ") 23 | for class_ind, color in enumerate(color_map): 24 | print("Class: {}, RGB Color: {}".format(class_ind + 1, color)) 25 | 26 | print() 27 | 28 | return color_map 29 | 30 | 31 | def load_model_json(): 32 | 33 | # batch_size: Training batch-size 34 | # epochs: No. of epochs to run 35 | # lr: Optimizer learning rate 36 | # momentum: SGD momentum 37 | # no_cuda: Disables CUDA training (**To be implemented) 38 | # seed: Random seed 39 | # in-chn: Input image channels (3 for RGB, 4 for RGB-A) 40 | # out-chn: Output channels/semantic classes (2 for Pavements dataset) 41 | 42 | with open('./model.json') as f: 43 | model_json = json.load(f) 44 | 45 | return model_json 46 | 47 | 48 | def load(model, weight_fn): 49 | 50 | assert os.path.isfile(weight_fn), "{} is not a file.".format(weight_fn) 51 | 52 | checkpoint = torch.load(weight_fn) 53 | epoch = checkpoint['epoch'] 54 | state_dict = checkpoint['state_dict'] 55 | model.load_state_dict(state_dict) 56 | print("Checkpoint is loaded at {} | Epochs: {}".format(weight_fn, epoch)) 57 | 58 | 59 | def main(args): 60 | 61 | cuda_available = torch.cuda.is_available() 62 | model_json = load_model_json() 63 | 64 | 65 | # pavements_raw_dir = os.path.join(os.getcwd(), 'test_raw') 66 | # pavements_labelled_dir = os.path.join(os.getcwd(), 'test_annotated') 67 | # res_dir = os.path.join(os.getcwd(), 'model_output') 68 | # weight_fn = os.path.abspath("segnet_weights.pth.tar") 69 | 70 | 71 | 72 | # initialize model in evaluation mode 73 | model = SegNet.SegNet(in_chn=model_json['in_chn'], out_chn=model_json['out_chn'], BN_momentum=model_json['bn_momentum']) 74 | 75 | if cuda_available: 76 | model.cuda() 77 | 78 | model.eval() 79 | 80 | # load pretrained weights 81 | load(model, args.weight_fn) 82 | 83 | # load test dataset 84 | dataset = Pavements(args.pavements_raw_dir, args.pavements_labelled_dir) 85 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=4) 86 | 87 | # build color map 88 | color_map = build_color_map() 89 | 90 | # init metrics aggregation 91 | num_images = 0 92 | sum_precision = torch.zeros(2) 93 | sum_recall = torch.zeros(2) 94 | sum_m_iou = torch.zeros(2) 95 | sum_balanced_class_accuracy = 0.0 96 | 97 | # run evaluation 98 | for i, data in enumerate(dataloader): 99 | images = data[0] 100 | 101 | if cuda_available: 102 | images = images.cuda() 103 | 104 | res = model(images) 105 | res = torch.argmax(res, dim=1).type(torch.long) # pixel-wise probs squashed to pixel-wise labels 106 | lbl = data[1].type(torch.long) 107 | 108 | if cuda_available: 109 | lbl = lbl.cuda() 110 | 111 | for n in range(res.shape[0]): # loop over each image 112 | image_name = "img_{}_{}.png".format(i, n) 113 | input_image = images[n] 114 | lbl_image = color_map[lbl[n]].permute(2, 0, 1).to(torch.float).div(255.0) 115 | res_image = color_map[res[n]].permute(2, 0, 1).to(torch.float).div(255.0) # transpose back to C, H, W, normalize to (0.0, 1.0) 116 | if cuda_available: 117 | input_image = input_image.cuda() 118 | lbl_image = lbl_image.cuda() 119 | res_image = res_image.cuda() 120 | 121 | compare_image = torch.cat((input_image, lbl_image, res_image), dim=2) 122 | 123 | if cuda_available: 124 | compare_image = compare_image.cuda() 125 | save_image(compare_image, os.path.join(args.res_dir, image_name)) 126 | 127 | # Compute metrics per image & accumulate 128 | precision = dataset.compute_precision(res, lbl).to('cpu') 129 | recall = dataset.compute_recall(res, lbl).to('cpu') 130 | m_iou = dataset.compute_m_iou(res, lbl).to('cpu') 131 | balanced_class_accuracy = dataset.compute_balanced_class_accuracy(res, lbl).to('cpu') 132 | pavement_crack_area = dataset.compute_pavement_crack_area(res, as_ratio=True) * 100.0 133 | print("{} | Precision: {} | Recall: {} | IoU: {} | Balanced Class Accuracy: {} | Crack Area: {:.6f}%" 134 | .format(image_name, precision, recall, m_iou, balanced_class_accuracy, pavement_crack_area)) 135 | 136 | num_images += 1 137 | sum_precision += precision 138 | sum_recall += recall 139 | sum_m_iou += m_iou 140 | sum_balanced_class_accuracy += balanced_class_accuracy 141 | 142 | print("\nEvaluation complete. {} segmented images saved at {}\n".format(num_images, args.res_dir)) 143 | 144 | # Compute global metrics & present 145 | print("Averaged metrics | Precision: {} | Recall: {} | IoU: {} | Balanced Class Accuracy: {}" 146 | .format(*[x / num_images for x in [sum_precision, sum_recall, sum_m_iou, sum_balanced_class_accuracy]])) 147 | 148 | 149 | if __name__ == "__main__": 150 | parser = argparse.ArgumentParser() 151 | 152 | # FORMAT DIRECTORIES 153 | parser.add_argument("pavements_raw_dir", type=str, help="Directory: Pavements raw testing images") 154 | parser.add_argument("pavements_labelled_dir", type=str, help="Directory: Pavements annotated testing images") 155 | parser.add_argument("weight_fn", type=str, help="Path: Trained weights") 156 | parser.add_argument("res_dir", type=str, help="Directory: Model output images") 157 | 158 | args = parser.parse_args() 159 | 160 | main(args) -------------------------------------------------------------------------------- /Pavements/Train_SegNet_Pavements.py: -------------------------------------------------------------------------------- 1 | from SegNet import SegNet 2 | from Pavements import Pavements 3 | from datetime import datetime 4 | from torch.utils.tensorboard import SummaryWriter 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | import argparse 9 | import os 10 | import numpy as np 11 | import json 12 | 13 | def save_checkpoint(state, path): 14 | torch.save(state, path) 15 | print("Checkpoint saved at {}".format(path)) 16 | 17 | 18 | def load_model_json(): 19 | 20 | # batch_size: Training batch-size 21 | # epochs: No. of epochs to run 22 | # lr: Optimizer learning rate 23 | # momentum: SGD momentum 24 | # no_cuda: Disables CUDA training (**To be implemented) 25 | # seed: Random seed 26 | # in-chn: Input image channels (3 for RGB, 4 for RGB-A) 27 | # out-chn: Output channels/semantic classes (2 for Pavements dataset) 28 | 29 | with open(os.path.join(os.getcwd(), 'model.json')) as f: 30 | model_json = json.load(f) 31 | 32 | return model_json 33 | 34 | 35 | def main(args): 36 | 37 | cuda_available = torch.cuda.is_available() 38 | writer = SummaryWriter(args.tensorboard_logs_dir) 39 | 40 | weight_fn = args.weight_fn 41 | model_json = load_model_json() 42 | 43 | assert len(model_json['cross_entropy_loss_weights']) == model_json['out_chn'], "CrossEntropyLoss class weights must be same as no. of output channels" 44 | 45 | trainset = Pavements(args.pavements_raw_dir, args.pavements_labelled_dir) 46 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=model_json['batch_size'], shuffle=True, num_workers=4) 47 | 48 | model = SegNet(in_chn=model_json['in_chn'], out_chn=model_json['out_chn'], BN_momentum=model_json['bn_momentum']) 49 | optimizer = optim.SGD(model.parameters(), lr=model_json['learning_rate'], momentum=model_json['sgd_momentum']) 50 | loss_fn = nn.CrossEntropyLoss(weight=torch.tensor(model_json['cross_entropy_loss_weights'])) 51 | 52 | if cuda_available: 53 | model.cuda() 54 | loss_fn.cuda() 55 | 56 | run_epoch = model_json['epochs'] 57 | epoch = None 58 | if weight_fn is not None: 59 | if os.path.isfile(weight_fn): 60 | print("Loading checkpoint '{}'".format(weight_fn)) 61 | checkpoint = torch.load(weight_fn) 62 | epoch = checkpoint['epoch'] 63 | model.load_state_dict(checkpoint['state_dict']) 64 | optimizer.load_state_dict(checkpoint['optimizer']) 65 | print("Loaded checkpoint '{}' (epoch {})".format(weight_fn, checkpoint['epoch'])) 66 | else: 67 | print("No checkpoint found at '{}'. Will create new checkpoint.".format(weight_fn)) 68 | else: 69 | print("Starting new checkpoint.".format(weight_fn)) 70 | weight_fn = os.path.join(os.getcwd(), "weights/checkpoint_pavements_{}.pth.tar".format(datetime.now().strftime("%Y%m%d_%H%M%S"))) 71 | 72 | for i in range(epoch + 1 if epoch is not None else 1, run_epoch + 1): 73 | print('Epoch {}:'.format(i)) 74 | sum_loss = 0.0 75 | 76 | for j, data in enumerate(trainloader, 1): 77 | images, labels = data 78 | if cuda_available: 79 | images = images.cuda() 80 | labels = labels.cuda() 81 | optimizer.zero_grad() 82 | output = model(images) 83 | loss = loss_fn(output, labels) 84 | loss.backward() 85 | optimizer.step() 86 | 87 | writer.add_scalar('Loss',loss.item()/trainloader.batch_size, j) 88 | sum_loss += loss.item() 89 | 90 | print('Loss at {} mini-batch: {}'.format(j, loss.item() / trainloader.batch_size)) 91 | 92 | print('Average loss @ epoch: {}'.format((sum_loss / (j * trainloader.batch_size)))) 93 | 94 | print("Training complete. Saving checkpoint...") 95 | save_checkpoint({'epoch': run_epoch, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict()}, weight_fn) 96 | 97 | 98 | if __name__ == '__main__': 99 | parser = argparse.ArgumentParser() 100 | 101 | #FORMAT DIRECTORIES 102 | parser.add_argument("pavements_raw_dir", type=str, help="Directory: Pavements raw training images") 103 | parser.add_argument("pavements_labelled_dir", type=str, help="Directory: Pavements annotated training images") 104 | parser.add_argument("tensorboard_logs_dir", type=str, help="Directory: Logs for tensorboard") 105 | parser.add_argument("--weight-fn", type=str, help="Path: Trained weights", default=None) 106 | 107 | args = parser.parse_args() 108 | 109 | main(args) -------------------------------------------------------------------------------- /Pavements/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 4, 3 | "epochs": 10, 4 | "learning_rate": 0.005, 5 | "sgd_momentum": 0.9, 6 | "bn_momentum": 0.5, 7 | "cross_entropy_loss_weights": [1.0, 15.0], 8 | "no_cuda": false, 9 | "seed": 42, 10 | "in_chn": 3, 11 | "out_chn": 2 12 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SegNet_PyTorch 2 | PyTorch implementation of SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation 3 | 4 | Original paper: https://arxiv.org/pdf/1511.00561.pdf 5 | 6 | ### A summary of the original paper is provided in the next section 7 | 8 | ## Pavements Dataset 9 | 10 | This model was employed to examine the feasibility of machine learning-powered monitoring of road infrastructure health. This effort contributes to "Use Of Remote Sensing And Machine Learning Techniques For Resilient Infrastructure Health Monitoring" by Narges Tahaei. The background of the study centers around determining the performance SegNet in identifying pavement cracks given the top view of expressway roads. SegNet is used here to solve a binary pixel-wise image segmentation task, where positive samples (i.e. pixels that are assigned class of 1) represent cracks on the road, and negative samples (i.e. pixels that are assigned class of 0) represent normal road surface. 11 | 12 | The Pavements dataset consists of 1900 raw RGB images taken of interstate expressways in the state of Georgia, USA with a camera mounted on a driving vehicle. The images are cropped to a dimension of 448 x 448, with which corresponding annotated images are produced by setting pixels belonging to pavement cracks to RGB value of 255, 255, 255 (i.e. white), and other pixels to RGB value of 0, 0, 0 (i.e. black). 13 | 14 | ## Training 15 | An 80-20 random split was used to form the training and testing dataset. The model was trained on mini-batch gradient descent with batch size of 4 for 50 epochs. The criterion used is weighted cross-entropy loss, where weights are calculated using the median frequency pixel-wise class balancing method (Predicting Depth, Surface Normals and Semantic Labels with a Common Multi-Scale Convolutional Architecture https://arxiv.org/pdf/1411.4734.pdf). 16 | 17 | 18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | Collages of model input (left), annotated ground-truth (middle), model output (right).
26 |
36 |
37 |
38 | Image taken from: https://arxiv.org/pdf/1511.00561.pdf. The autoencoder architecture of SegNet.
39 |