├── LICENSE ├── README.md ├── backbone.py ├── data └── README.md ├── data_example.png ├── fcn.py ├── loss.py ├── requirements.txt ├── test.py ├── train.py ├── utils.py └── weights └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 yuansheng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Semantic-Segmentation-with-Sparse-Labels 2 | The labels and codes for [Semantic Segmentation of Remote Sensing Images with Sparse Annotations](https://arxiv.org/pdf/2101.03492.pdf). 3 | 4 | ## Data 5 | We provid three types of sparse annotations: polygon, scribble, and point. 6 | example 7 | 8 | ## Usage 9 | 1) install dependencies in ```requirements.txt``` 10 | 2) download and unzip [data](https://drive.google.com/file/d/1E4bhx3H6P8jTdOQG6hS14G_gBBhvwzWU/view?usp=sharing) in the folder ```data```. The directory structure should be as follows: 11 | ``` 12 | path/to/data/ 13 | City/ # Vaihingen or Zurich 14 | img/ # images 15 | line/ # line/scribble-level sparse annotations 16 | point/ # point-level sparse annotations 17 | polygon/ # polygon-level sparse annotations 18 | gt/ # dense gt 19 | eroded_gt/ # dense gt without boundaries 20 | 21 | ``` 22 | 3) download and unzip [weights](https://drive.google.com/file/d/10BYt1lvRNBtgx76lMiuWj7J2kF-tSBV1/view?usp=sharing) in the folder ```weights```. 23 | 4) run ```python train.py``` and ```python test.py``` for testing and training 24 | 25 | ## Citation 26 | If you find they are useful, please kindly cite the following: 27 | ``` 28 | @article{hua2021sparse, 29 | title={Semantic Segmentation of Remote Sensing Images with Sparse Annotations}, 30 | author={Hua, Yuansheng and Marcos, Diego and Mou, Lichao and Zhu, Xiao Xiang and Tuia, Devis}, 31 | journal={IEEE Geoscience and Remote Sensing Letters}, 32 | year={in press} 33 | } 34 | ``` 35 | -------------------------------------------------------------------------------- /backbone.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Input, BatchNormalization, Activation, Lambda, Add, Concatenate 3 | from keras.layers.convolutional import Conv2D 4 | from keras.layers.pooling import MaxPooling2D 5 | #import tensorflow.compat.v1 as tf 6 | 7 | def VGG16(patch_size, bn=False): 8 | 9 | img_input = Input(shape=(patch_size, patch_size, 3), name='input1') 10 | x = conv2d(img_input, 64, 3, 'same', 'block1_conv1', bn) 11 | x = conv2d(x, 64, 3, 'same', 'block1_conv2', bn) 12 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) 13 | 14 | # Block 2 15 | x = conv2d(x, 128, 3, 'same', 'block2_conv1', bn) 16 | x = conv2d(x, 128, 3, 'same', 'block2_conv2', bn) 17 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) 18 | 19 | # Block 3 20 | x = conv2d(x, 256, 3, 'same', 'block3_conv1', bn) 21 | x = conv2d(x, 256, 3, 'same', 'block3_conv2', bn) 22 | x = conv2d(x, 256, 3, 'same', 'block3_conv3', bn) 23 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) 24 | 25 | # Block 4 26 | x = conv2d(x, 512, 3, 'same', 'block4_conv1', bn) 27 | x = conv2d(x, 512, 3, 'same', 'block4_conv2', bn) 28 | x = conv2d(x, 512, 3, 'same', 'block4_conv3', bn) 29 | x= MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) 30 | 31 | # Block 5 32 | x = conv2d(x, 512, 3, 'same', 'block5_conv1', bn) 33 | x = conv2d(x, 512, 3, 'same', 'block5_conv2', bn) 34 | x = conv2d(x, 512, 3, 'same', 'block5_conv3', bn) 35 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) 36 | 37 | model = Model(img_input, x, name='vgg16') 38 | 39 | return model 40 | 41 | def conv2d(x, nb_filters, filter_size, padding, name, bn=False): 42 | x = Conv2D(nb_filters, (filter_size, filter_size), padding=padding, name=name)(x) 43 | if bn==True: 44 | x = BatchNormalization()(x) 45 | x = Activation('relu')(x) 46 | return x 47 | 48 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | Sparse annotations can be downloaded from [Google Drive](https://drive.google.com/file/d/1E4bhx3H6P8jTdOQG6hS14G_gBBhvwzWU/view?usp=sharing). 2 | -------------------------------------------------------------------------------- /data_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hua-YS/Semantic-Segmentation-with-Sparse-Labels/b8244dd8aaaab7b4f62c6547bdf5b386c14bfeef/data_example.png -------------------------------------------------------------------------------- /fcn.py: -------------------------------------------------------------------------------- 1 | from backbone import * 2 | import tensorflow as tf 3 | 4 | def fcn_festa(patch_size, test=True, noclutter=True): 5 | 6 | nb_classes = 5 if noclutter else 6 7 | base_model = VGG16(patch_size, True) 8 | x4 = base_model.get_layer('block4_pool').output 9 | x4 = Lambda(lambda image: tf.image.resize_bilinear(image, [patch_size, patch_size]), name='x4_up')(x4) 10 | x5 = base_model.get_layer('block5_pool').output 11 | x5 = Lambda(lambda image: tf.image.resize_bilinear(image, [patch_size, patch_size]), name='x5_up')(x5) 12 | 13 | x = Add(name='final_feat')([x4, x5]) 14 | x_out = Conv2D(nb_classes, (1, 1), activation='softmax', padding='same', name='final_out')(x) 15 | 16 | if test == True: 17 | return Model(base_model.inputs, x_out, name='vgg16')#fcn_festa') 18 | 19 | return Model(base_model.inputs, [x, x_out], name='vgg16')#fcn_festa') 20 | 21 | -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | #from keras.objectives import * 2 | #from keras.metrics import binary_crossentropy 3 | import keras.backend as K 4 | import tensorflow as tf #tensorflow.compat.v1 as tf 5 | 6 | def L_festa(_, y_pred): 7 | 8 | alpha = 0.5 # weight of neighbour in the feature space 9 | beta = 1.5 # weight of neighbour in the image space 10 | gamma = 1 # weight of far-away in the feature space 11 | 12 | sample_ratio = 0.01 # measure only sample_ratio % samples for computational efficiency 13 | 14 | _, h, w, c = K.int_shape(y_pred) 15 | batch_size = K.shape(y_pred)[0] 16 | # randomly sampling sample_ratio % feature samples 17 | y_pred_all_reshape = K.reshape(y_pred, (batch_size, -1, c)) 18 | random_idxs = tf.random_shuffle(tf.range((h-2)*(w-2)))[:int(h*w*sample_ratio)] 19 | random_idxs = random_idxs + 257 20 | y_pred_reshape = tf.gather(y_pred_all_reshape, random_idxs, axis=1) 21 | 22 | # ***************************** cosine similarity *************************** 23 | # calculating distance in the feature space 24 | xixj = tf.matmul(y_pred_reshape, tf.transpose(y_pred_all_reshape, [0, 2, 1])) 25 | similarity = xixj/(tf.expand_dims(tf.norm(y_pred_reshape, axis=-1), axis = -1)*tf.expand_dims(tf.norm(y_pred_all_reshape, axis=-1), axis = 1)+1e-8) 26 | faraway_feature = tf.reduce_min(similarity, axis = -1) # feature with minimum similarity in the feaure space 27 | 28 | # ***************************** euclidean distance *************************** 29 | distance = tf.expand_dims(tf.square(tf.norm(y_pred_reshape, axis=-1)), axis=-1) - 2*xixj + tf.expand_dims(tf.square(tf.norm(y_pred_all_reshape, axis=-1)), axis = 1) 30 | 31 | ind_diag = K.cast(tf.stack([tf.range(int(h*w*sample_ratio)), random_idxs], axis=1), 'int64') 32 | no_diag = tf.sparse_to_dense(ind_diag, [int(h*w*sample_ratio), h*w], K.repeat_elements(tf.constant([1.0]), int(h*w*sample_ratio), 0), validate_indices=False)*(tf.reduce_max(distance)+1) 33 | no_diag = tf.tile(K.flatten(no_diag), (batch_size, )) 34 | no_diag = K.reshape(no_diag, (batch_size, int(h*w*sample_ratio), h*w)) 35 | 36 | neighbour_feature = tf.reduce_min(distance+no_diag, axis = -1) # feature with minimum distance in the feature space 37 | 38 | # get indexes of 8-neighbouring pixels of the center pixel 39 | random_idxs_L = random_idxs - 1 40 | random_idxs_R = random_idxs + 1 41 | random_idxs_TL = random_idxs - h -1 42 | random_idxs_T = random_idxs - h 43 | random_idxs_TR = random_idxs - h + 1 44 | random_idxs_BL = random_idxs + h -1 45 | random_idxs_B = random_idxs + h 46 | random_idxs_BR = random_idxs + h + 1 47 | 48 | ind_L = K.cast(tf.stack([tf.range(int(h*w*sample_ratio)), random_idxs_L], axis=1), 'int64') 49 | ind_R = K.cast(tf.stack([tf.range(int(h*w*sample_ratio)), random_idxs_R], axis=1), 'int64') 50 | ind_TL = K.cast(tf.stack([tf.range(int(h*w*sample_ratio)), random_idxs_TL], axis=1), 'int64') 51 | ind_T = K.cast(tf.stack([tf.range(int(h*w*sample_ratio)), random_idxs_T], axis=1), 'int64') 52 | ind_TR = K.cast(tf.stack([tf.range(int(h*w*sample_ratio)), random_idxs_TR], axis=1), 'int64') 53 | ind_BL = K.cast(tf.stack([tf.range(int(h*w*sample_ratio)), random_idxs_BL], axis=1), 'int64') 54 | ind_B = K.cast(tf.stack([tf.range(int(h*w*sample_ratio)), random_idxs_B], axis=1), 'int64') 55 | ind_BR = K.cast(tf.stack([tf.range(int(h*w*sample_ratio)), random_idxs_BR], axis=1), 'int64') 56 | ind = tf.concat([ind_L, ind_R, ind_TL, ind_T, ind_TR, ind_BL, ind_B, ind_BR], axis=0) 57 | mask = tf.sparse_to_dense(ind, [int(h*w*sample_ratio), h*w], K.repeat_elements(tf.constant([1.0]), int(h*w*sample_ratio)*8, 0), validate_indices=False) 58 | distance_mask = tf.multiply(distance+no_diag, mask) # calculate distances between 8-neighbouring pixels and the center pixel 59 | neighbour_spatial = tf.reduce_min(distance_mask, axis = -1) # feature with minimum distance in the image space 60 | 61 | delta = alpha*neighbour_feature++beta*neighbour_spatial+gamma*faraway_feature 62 | 63 | loss_reg = tf.reduce_mean(delta) 64 | return loss_reg 65 | 66 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow-gpu==1.12.0 2 | keras==2.2.5 3 | opencv-python==3.4.0 4 | scipy==1.1.0 5 | sklearn==0.19.1 6 | 7 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from fcn import * 2 | from utils import * 3 | 4 | # ************************* path **************************** 5 | weight_path = 'weights/fcn_line.h5' # weights trained on line labels create by annotator 1 6 | out_folder = 'festa' 7 | 8 | # ******************* image configuration ******************* 9 | patch_size = 256 # size of each patch 10 | stride_size = 128 # stride of sliding window 11 | noclutter = True 12 | 13 | # ********************* initialize model ******************** 14 | model = fcn_festa(patch_size, True, noclutter) 15 | model.load_weights(weight_path, by_name=True) 16 | 17 | # ********************* evaluate **************************** 18 | TestModel(model, out_folder, patch_size, stride_size, noclutter) 19 | 20 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from fcn import * 2 | from utils import * 3 | from loss import * 4 | from keras.optimizers import Nadam 5 | from keras.callbacks import ReduceLROnPlateau 6 | 7 | # **************************** path ******************************** 8 | weight_path = 'weights/fcn.h5' 9 | 10 | # ******************** image & label config *********************** 11 | patch_size = 256 # size of each patch 12 | stride_size = 64 # stride of sliding window 13 | noclutter = True # whether taking cluuter/background into consideration 14 | an_type = 'polygon' # type of sparse annotations 15 | an_id = 1 # id of annotators: 1 and 2 are expert, 3 and 4 are non-expert 16 | 17 | # ************************ training scheme ************************* 18 | batch_size = 5 # size of training batch 19 | epochs = 100 # number of training epochs 20 | lr = 2e-4 # initial learning rate 21 | lambda_festa = 0.1 # lambda in Eq. 2, weight of festa 22 | remove_null = True # whether removing patches have no sparse annotations 23 | loss = [L_festa, 'categorical_crossentropy'] # final loss Eq. 2 24 | loss_weights = [lambda_festa, 1] # weight of each loss term in Eq. 2 25 | 26 | # ********************** loading data ***************************** 27 | print('loading training data ...') 28 | X_tra, y_tra, _, _ = dataloader(patch_size, stride_size, an_type, an_id, noclutter, remove_null) 29 | print('training data is loaded.') 30 | # ********************* initialize model ******************** 31 | model = fcn_festa(patch_size, False, noclutter) 32 | optimizer = Nadam(lr=lr) # define yourself, e.g. sgd, adam 33 | model.compile(optimizer=optimizer, loss=loss, loss_weights=loss_weights, metrics=['accuracy']) 34 | print('model is built') 35 | 36 | # ********************* train *********************************** 37 | lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.1, cooldown=0, patience=0, min_lr=0.5e-10) 38 | model.fit(X_tra, [y_tra, y_tra], batch_size=batch_size, shuffle = True, epochs=epochs, validation_split=0.05, callbacks=[lr_reducer]) 39 | model.save_weights(weight_path) 40 | 41 | 42 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import scipy.io as sio 5 | from sklearn.metrics import confusion_matrix 6 | 7 | folder_path = './data/Vaihingen/' 8 | im_header = 'top_mosaic_09cm_area' 9 | trainval_set = [1, 3, 5, 7, 13, 17, 21, 23, 26, 32, 37] 10 | test_set = [11, 15, 28, 30, 34] 11 | im_path = folder_path + 'img/' 12 | gt_path = folder_path + 'eroded_gt/mask_' # for calculating scores 13 | eps = 1e-14 14 | 15 | def dataloader(patch_size=256, stride_size=64, an_type='polygon', an_id=1, noclutter=True, remove_null=True): 16 | 17 | # path of sparse label 18 | sparse_label_path = folder_path + an_type + '/an' + str(an_id) + '/mask_' 19 | 20 | # crop images to patches 21 | for fid in range(len(trainval_set)): 22 | print(im_header + str(trainval_set[fid]) + '.png') 23 | X, y = img2patch(im_header + str(trainval_set[fid]) + '.png', sparse_label_path, patch_size, stride_size, noclutter, remove_null) 24 | X_tra = np.concatenate([X_tra, X], axis=0) if fid>0 else X 25 | y_tra = np.concatenate([y_tra, y], axis=0) if fid>0 else y 26 | 27 | for fid in range(len(test_set)): 28 | print(im_header + str(test_set[fid]) + '.tif') 29 | X, y = img2patch(im_header + str(test_set[fid]) + '.tif', gt_path, patch_size, stride_size, noclutter, remove_null) 30 | X_test = np.concatenate([X_test, X], axis=0) if fid>0 else X 31 | y_test = np.concatenate([y_test, y], axis=0) if fid>0 else y 32 | 33 | X_tra = np.float32(X_tra) 34 | y_tra = np.uint8(y_tra) 35 | X_test = np.float32(X_test) 36 | y_test = np.uint8(y_test) 37 | print('the size of training data:', np.shape(X_tra)) 38 | 39 | return X_tra, y_tra, X_test, y_test 40 | 41 | 42 | def img2patch(filename, label_path, patch_size=256, stride_size=256, noclutter=True, remove_null=True): 43 | 44 | im = cv2.imread(im_path + filename[:-4] + '.tif') 45 | gt = bgr2index(cv2.imread(label_path + filename)) 46 | gt = gt[:, :, 0:5] if noclutter else gt 47 | 48 | # crop an image/mask to patches 49 | X, y = [], [] 50 | im_row, im_col, _ = np.shape(im) 51 | steps_row = int(np.floor((im_row - (patch_size - stride_size)) / stride_size)) 52 | steps_col = int(np.floor((im_col - (patch_size - stride_size)) / stride_size)) 53 | 54 | for i in range(steps_row+1): 55 | for j in range(steps_col+1): 56 | if i == steps_row: 57 | if j == steps_col: 58 | X_patch = im[-patch_size:im_row, -patch_size:im_col, :] 59 | y_patch = gt[-patch_size:im_row, -patch_size:im_col, :] 60 | else: 61 | X_patch = im[-patch_size:im_row, (j * stride_size):(j * stride_size + patch_size),:] 62 | y_patch = gt[-patch_size:im_row, (j * stride_size):(j * stride_size + patch_size),:] 63 | else: 64 | if j == steps_col: 65 | X_patch = im[(i * stride_size):(i * stride_size + patch_size), -patch_size:im_col, :] 66 | y_patch = gt[(i * stride_size):(i * stride_size + patch_size), -patch_size:im_col, :] 67 | else: 68 | X_patch = im[(i * stride_size):(i * stride_size + patch_size), (j * stride_size):(j * stride_size + patch_size), :] 69 | y_patch = gt[(i * stride_size):(i * stride_size + patch_size), (j * stride_size):(j * stride_size + patch_size), :] 70 | 71 | if remove_null and np.sum(y_patch) == 0: 72 | continue 73 | 74 | X.append(X_patch) 75 | y.append(y_patch) 76 | 77 | X = np.float32(X) 78 | y = np.uint8(y) 79 | return X, y 80 | 81 | def bgr2index(gt_bgr, eroded=False): 82 | # mapping BGR W x H x 3 image to W x H x C class index 83 | # opencv read image to BGR format 84 | im_col, im_row, _ = np.shape(gt_bgr) 85 | gt = np.zeros((im_col, im_row, 6)) if not eroded else np.zeros((im_col, im_row, 7)) 86 | gt[(gt_bgr[:, :, 2] == 255) & (gt_bgr[:, :, 1] == 255) & (gt_bgr[:, :, 0] == 255), 0] = 1 87 | gt[(gt_bgr[:, :, 2] == 0) & (gt_bgr[:, :, 1] == 0) & (gt_bgr[:, :, 0] == 255), 1] = 1 88 | gt[(gt_bgr[:, :, 2] == 0) & (gt_bgr[:, :, 1] == 255) & (gt_bgr[:, :, 0] == 255), 2] = 1 89 | gt[(gt_bgr[:, :, 2] == 0) & (gt_bgr[:, :, 1] == 255) & (gt_bgr[:, :, 0] == 0), 3] = 1 90 | gt[(gt_bgr[:, :, 2] == 255) & (gt_bgr[:, :, 1] == 255) & (gt_bgr[:, :, 0] == 0), 4] = 1 91 | gt[(gt_bgr[:, :, 2] == 255) & (gt_bgr[:, :, 1] == 0) & (gt_bgr[:, :, 0] == 0), 5] = 1 92 | if eroded: 93 | gt[(gt_bgr[:, :, 2] == 0) & (gt_bgr[:, :, 1] == 0) & (gt_bgr[:, :, 0] == 0), 6] = 1 94 | 95 | return gt 96 | 97 | def index2bgr(c_map, bgr=True): 98 | 99 | # mapping W x H x 1 class index to W x H x 3 BGR image 100 | im_col, im_row = np.shape(c_map) 101 | c_map_r = np.zeros((im_col, im_row), 'uint8') 102 | c_map_g = np.zeros((im_col, im_row), 'uint8') 103 | c_map_b = np.zeros((im_col, im_row), 'uint8') 104 | c_map_r[c_map == 0] = 255 105 | c_map_r[c_map == 1] = 0 106 | c_map_r[c_map == 2] = 0 107 | c_map_r[c_map == 3] = 0 108 | c_map_r[c_map == 4] = 255 109 | c_map_r[c_map == 5] = 255 110 | c_map_g[c_map == 0] = 255 111 | c_map_g[c_map == 1] = 0 112 | c_map_g[c_map == 2] = 255 113 | c_map_g[c_map == 3] = 255 114 | c_map_g[c_map == 4] = 255 115 | c_map_g[c_map == 5] = 0 116 | c_map_b[c_map == 0] = 255 117 | c_map_b[c_map == 1] = 255 118 | c_map_b[c_map == 2] = 255 119 | c_map_b[c_map == 3] = 0 120 | c_map_b[c_map == 4] = 0 121 | c_map_b[c_map == 5] = 0 122 | c_map_rgb = np.zeros((im_col, im_row, 3), 'uint8'); 123 | c_map_rgb[:, :, 0] = c_map_b if bgr else c_map_r 124 | c_map_rgb[:, :, 1] = c_map_g 125 | c_map_rgb[:, :, 2] = c_map_r if bgr else c_map_b 126 | 127 | return c_map_rgb 128 | 129 | 130 | 131 | def eval_image(gt, pred, acc1, acc2, acc3, acc4, acc5, noclutter=True): 132 | 133 | im_row, im_col = np.shape(pred) 134 | cal_classes = 5 if noclutter else 6 # no. of classes to calculate scores 135 | 136 | if noclutter: 137 | gt[gt == 5] = 6 # pixels in clutter are not considered (regarding them as boundary) 138 | 139 | pred[gt == 6] = 6 # pixels on the boundary are not considered for calculating scores 140 | OA = np.float32(len(np.where((np.float32(pred) - np.float32(gt)) == 0)[0])-len(np.where(gt==6)[0]))/np.float32(im_col*im_row-len(np.where(gt==6)[0])) 141 | acc1 = acc1 + len(np.where((np.float32(pred) - np.float32(gt)) == 0)[0])-len(np.where(gt==6)[0]) 142 | acc2 = acc2 + im_col*im_row-len(np.where(gt==6)[0]) 143 | pred1 = np.reshape(pred, (-1, 1)) 144 | gt1 = np.reshape(gt, (-1, 1)) 145 | idx = np.where(gt1==6)[0] 146 | pred1 = np.delete(pred1, idx) 147 | gt1 = np.delete(gt1, idx) 148 | CM = confusion_matrix(pred1, gt1) 149 | for i in range(cal_classes): 150 | tp = np.float32(CM[i, i]) 151 | acc3[i] = acc3[i] + tp 152 | fp = np.sum(CM[:, i])-tp 153 | acc4[i] = acc4[i] + fp 154 | fn = np.sum(CM[i, :])-tp 155 | acc5[i] = acc5[i] + fn 156 | P = tp/(tp+fp+eps) 157 | R = tp/(tp+fn+eps) 158 | f1 = 2*(P*R)/(P+R+eps) 159 | 160 | return acc1, acc2, acc3, acc4, acc5 161 | 162 | 163 | def pred_image(filename, model, patch_size, stride_size): 164 | 165 | # croppping an image into patches for prediction 166 | X, _ = img2patch(filename, gt_path, patch_size, stride_size, True, False) 167 | pred_patches = model.predict(X) 168 | 169 | # rearranging patchess into an image 170 | # For pixels with multiple predictions, we take their averages 171 | im_row, im_col, _ = np.shape(cv2.imread(im_path + filename)) 172 | steps_col = int(np.floor((im_col - (patch_size - stride_size)) / stride_size)) 173 | steps_row = int(np.floor((im_row - (patch_size - stride_size)) / stride_size)) 174 | im_out = np.zeros((im_row, im_col, np.shape(pred_patches)[-1])) 175 | im_index = np.zeros((im_row, im_col, np.shape(pred_patches)[-1])) # counting the number of predictions for each pixel 176 | 177 | patch_id = 0 178 | for i in range(steps_row+1): 179 | for j in range(steps_col+1): 180 | if i == steps_row: 181 | if j == steps_col: 182 | im_out[-patch_size:im_row, -patch_size:im_col, :] += pred_patches[patch_id] 183 | im_index[-patch_size:im_row, -patch_size:im_col, :] += np.ones((patch_size, patch_size, np.shape(pred_patches)[-1])) 184 | else: 185 | im_out[-patch_size:im_row, (j * stride_size):(j * stride_size + patch_size), :] += pred_patches[patch_id] 186 | im_index[-patch_size:im_row, (j * stride_size):(j * stride_size + patch_size), :] += np.ones((patch_size, patch_size, np.shape(pred_patches)[-1])) 187 | else: 188 | if j == steps_col: 189 | im_out[(i * stride_size):(i * stride_size + patch_size), -patch_size:im_col, :] += pred_patches[patch_id] 190 | im_index[(i * stride_size):(i * stride_size + patch_size), -patch_size:im_col, :] += np.ones((patch_size, patch_size, np.shape(pred_patches)[-1])) 191 | else: 192 | im_out[(i * stride_size):(i * stride_size + patch_size), (j * stride_size):(j * stride_size + patch_size), :] += pred_patches[patch_id] 193 | im_index[(i * stride_size):(i * stride_size + patch_size), (j * stride_size):(j * stride_size + patch_size), :] += np.ones((patch_size, patch_size, np.shape(pred_patches)[-1])) 194 | patch_id += 1 195 | 196 | return im_out/im_index 197 | 198 | def TestModel(model, output_folder='model', patch_size=256, stride_size=128, noclutter=True): 199 | 200 | # path for saving output 201 | output_path = folder_path + 'outputs/' + output_folder + '/' 202 | if not os.path.isdir(output_path): 203 | print('The target folder is created.') 204 | os.mkdir(output_path) 205 | 206 | nb_classes = 5 if noclutter else 6 207 | acc1 = 0.0 # accumulator for correctly classified pixels 208 | acc2 = 0.0 # accumulator for all valid pixels (not including label 0 and 6) 209 | acc3 = np.zeros((nb_classes, 1)) # accumulator for true positives 210 | acc4 = np.zeros((nb_classes, 1)) # accumulator for false positives 211 | acc5 = np.zeros((nb_classes, 1)) # accumulator for false negatives 212 | 213 | # predicting and measuring all images 214 | for im_id in range(len(test_set)): 215 | filename = im_header + str(test_set[im_id]) + '.tif' 216 | print(im_id+1, '/', len(test_set), ': predicting ', filename) 217 | gt = bgr2index(cv2.imread(gt_path + filename), True) 218 | 219 | # predict one image 220 | pred = pred_image(filename, model, patch_size, stride_size) 221 | pred = np.argmax(pred, -1) 222 | gt = np.argmax(gt, -1) 223 | 224 | # evaluate one image 225 | acc1, acc2, acc3, acc4, acc5 = eval_image(gt, pred, acc1, acc2, acc3, acc4, acc5, noclutter) 226 | cv2.imwrite(output_path+filename, index2bgr(pred, True)) 227 | print('Prediction is done. The output is saved in ', output_path) 228 | 229 | OA = acc1/acc2 230 | 231 | f1 = np.zeros((nb_classes, 1)); 232 | iou = np.zeros((nb_classes, 1)); 233 | #ca = np.zeros((nb_classes, 1)); 234 | for i in range(nb_classes): 235 | P = acc3[i]/(acc3[i]+acc4[i]) 236 | R = acc3[i]/(acc3[i]+acc5[i]) 237 | f1[i] = 2*(P*R)/(P+R) 238 | iou[i] = acc3[i]/(acc3[i]+acc4[i]+acc5[i]) 239 | #ca[i] = acc3[i]/(acc3[i]+acc4[i]) 240 | 241 | f1_mean = np.mean(f1) 242 | iou_mean = np.mean(iou) 243 | #ca_mean = np.mean(ca) 244 | print('mean f1:', f1_mean, '\nmean iou:', iou_mean, '\nOA:', OA) 245 | 246 | return 'All predicitions are done, and output images are saved.' 247 | 248 | -------------------------------------------------------------------------------- /weights/README.md: -------------------------------------------------------------------------------- 1 | `fcn_line.h5` can be downloaded from [Google Drive](https://drive.google.com/file/d/10BYt1lvRNBtgx76lMiuWj7J2kF-tSBV1/view?usp=sharing). 2 | --------------------------------------------------------------------------------