├── FCN_VGG16_Model.py └── README.md /FCN_VGG16_Model.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import glob 3 | import json 4 | import numpy as np 5 | import rasterio 6 | from keras.applications.vgg16 import VGG16 7 | from keras.models import Input, Model 8 | from keras.layers import Add, Conv2D, Conv2DTranspose 9 | from keras.optimizers import Adam 10 | 11 | 12 | 13 | def training_mask_generation(input_image_filename, input_geojson_filename): 14 | """ 15 | This function is used to create a binary raster mask from polygons in a given geojson file, so as to label the pixels 16 | in the image as either background or target. 17 | 18 | Inputs: 19 | - input_image_filename: File path of georeferenced image file to be used for model training 20 | - input_geojson_filename: File path of georeferenced geojson file which contains the polygons drawn over the targets 21 | 22 | Outputs: 23 | - mask: Numpy array representing the training mask, with values of 0 for background pixels, and value of 1 for target 24 | pixels. 25 | 26 | """ 27 | 28 | with rasterio.open(input_image_filename) as f: 29 | metadata = f.profile 30 | image = np.transpose(f.read(tuple(np.arange(metadata['count']) + 1)), [1, 2, 0]) 31 | 32 | mask = np.zeros((image.shape[0], image.shape[1])) 33 | 34 | ulx = metadata['transform'][2] 35 | xres = metadata['transform'][0] 36 | uly = metadata['transform'][5] 37 | yres = metadata['transform'][4] 38 | 39 | lrx = ulx + (image.shape[1] * xres) 40 | lry = uly - (image.shape[0] * abs(yres)) 41 | 42 | polygons = json.load(open(input_geojson_filename)) 43 | 44 | for polygon in range(len(polygons['features'])): 45 | coords = np.array(polygons['features'][polygon]['geometry']['coordinates'][0][0]) 46 | xf = ((image.shape[1]) ** 2 / (image.shape[1] + 1)) / (lrx - ulx) 47 | yf = ((image.shape[0]) ** 2 / (image.shape[0] + 1)) / (lry - uly) 48 | coords[:, 1] = yf * (coords[:, 1] - uly) 49 | coords[:, 0] = xf * (coords[:, 0] - ulx) 50 | position = np.round(coords).astype(np.int32) 51 | cv2.fillConvexPoly(mask, position, 1) 52 | 53 | return mask 54 | 55 | 56 | 57 | def image_clip_to_segment_and_convert(image_array, mask_array, image_height_size, image_width_size, mode, percentage_overlap, 58 | buffer): 59 | """ 60 | This function is used to cut up images of any input size into segments of a fixed size, with empty clipped areas 61 | padded with zeros to ensure that segments are of equal fixed sizes and contain valid data values. The function then 62 | returns a 4 - dimensional array containing the entire image and its mask in the form of fixed size segments. 63 | 64 | Inputs: 65 | - image_array: Numpy array representing the image to be used for model training (channels last format) 66 | - mask_array: Numpy array representing the binary raster mask to mark out background and target pixels 67 | - image_height_size: Height of image segments to be used for model training 68 | - image_width_size: Width of image segments to be used for model training 69 | - mode: Integer representing the status of image size 70 | - percentage_overlap: Percentage of overlap between image patches extracted by sliding window to be used for model 71 | training 72 | - buffer: Percentage allowance for image patch to be populated by zeros for positions with no valid data values 73 | 74 | Outputs: 75 | - image_segment_array: 4 - Dimensional numpy array containing the image patches extracted from input image array 76 | - mask_segment_array: 4 - Dimensional numpy array containing the mask patches extracted from input binary raster mask 77 | 78 | """ 79 | 80 | y_size = ((image_array.shape[0] // image_height_size) + 1) * image_height_size 81 | x_size = ((image_array.shape[1] // image_width_size) + 1) * image_width_size 82 | 83 | if mode == 0: 84 | img_complete = np.zeros((y_size, image_array.shape[1], image_array.shape[2])) 85 | mask_complete = np.zeros((y_size, mask_array.shape[1], 1)) 86 | img_complete[0 : image_array.shape[0], 0 : image_array.shape[1], 0 : image_array.shape[2]] = image_array 87 | mask_complete[0 : mask_array.shape[0], 0 : mask_array.shape[1], 0] = mask_array 88 | elif mode == 1: 89 | img_complete = np.zeros((image_array.shape[0], x_size, image_array.shape[2])) 90 | mask_complete = np.zeros((image_array.shape[0], x_size, 1)) 91 | img_complete[0 : image_array.shape[0], 0 : image_array.shape[1], 0 : image_array.shape[2]] = image_array 92 | mask_complete[0 : mask_array.shape[0], 0 : mask_array.shape[1], 0] = mask_array 93 | elif mode == 2: 94 | img_complete = np.zeros((y_size, x_size, image_array.shape[2])) 95 | mask_complete = np.zeros((y_size, x_size, 1)) 96 | img_complete[0 : image_array.shape[0], 0 : image_array.shape[1], 0 : image_array.shape[2]] = image_array 97 | mask_complete[0 : mask_array.shape[0], 0 : mask_array.shape[1], 0] = mask_array 98 | elif mode == 3: 99 | img_complete = image_array 100 | mask_complete = mask_array 101 | 102 | img_list = [] 103 | mask_list = [] 104 | 105 | 106 | for i in range(0, int(img_complete.shape[0] - (2 - buffer) * image_height_size), 107 | int((1 - percentage_overlap) * image_height_size)): 108 | for j in range(0, int(img_complete.shape[1] - (2 - buffer) * image_width_size), 109 | int((1 - percentage_overlap) * image_width_size)): 110 | M_90 = cv2.getRotationMatrix2D((image_width_size / 2, image_height_size / 2), 90, 1.0) 111 | M_180 = cv2.getRotationMatrix2D((image_width_size / 2, image_height_size / 2), 180, 1.0) 112 | M_270 = cv2.getRotationMatrix2D((image_width_size / 2, image_height_size / 2), 270, 1.0) 113 | img_original = img_complete[i : i + image_height_size, j : j + image_width_size, 0 : image_array.shape[2]] 114 | img_rotate_90 = cv2.warpAffine(img_original, M_90, (image_height_size, image_width_size)) 115 | img_rotate_180 = cv2.warpAffine(img_original, M_180, (image_width_size, image_height_size)) 116 | img_rotate_270 = cv2.warpAffine(img_original, M_270, (image_height_size, image_width_size)) 117 | img_flip_hor = cv2.flip(img_original, 0) 118 | img_flip_vert = cv2.flip(img_original, 1) 119 | img_flip_both = cv2.flip(img_original, -1) 120 | img_list.extend([img_original, img_rotate_90, img_rotate_180, img_rotate_270, img_flip_hor, img_flip_vert, 121 | img_flip_both]) 122 | mask_original = mask_complete[i : i + image_height_size, j : j + image_width_size, 0] 123 | mask_rotate_90 = cv2.warpAffine(mask_original, M_90, (image_height_size, image_width_size)) 124 | mask_rotate_180 = cv2.warpAffine(mask_original, M_180, (image_width_size, image_height_size)) 125 | mask_rotate_270 = cv2.warpAffine(mask_original, M_270, (image_height_size, image_width_size)) 126 | mask_flip_hor = cv2.flip(mask_original, 0) 127 | mask_flip_vert = cv2.flip(mask_original, 1) 128 | mask_flip_both = cv2.flip(mask_original, -1) 129 | mask_list.extend([mask_original, mask_rotate_90, mask_rotate_180, mask_rotate_270, mask_flip_hor, mask_flip_vert, 130 | mask_flip_both]) 131 | 132 | image_segment_array = np.zeros((len(img_list), image_height_size, image_width_size, image_array.shape[2])) 133 | mask_segment_array = np.zeros((len(mask_list), image_height_size, image_width_size, 1)) 134 | 135 | for index in range(len(img_list)): 136 | image_segment_array[index] = img_list[index] 137 | mask_segment_array[index, :, :, 0] = mask_list[index] 138 | 139 | return image_segment_array, mask_segment_array 140 | 141 | 142 | 143 | def training_data_generation(DATA_DIR, img_height_size, img_width_size, perc, buff): 144 | """ 145 | This function is used to convert image files and their respective polygon training masks into numpy arrays, so as to 146 | facilitate their use for model training. 147 | 148 | Inputs: 149 | - DATA_DIR: File path of folder containing the image files, and their respective polygons in a subfolder 150 | - img_height_size: Height of image patches to be used for model training 151 | - img_width_size: Width of image patches to be used for model training 152 | - perc: Percentage of overlap between image patches extracted by sliding window to be used for model training 153 | - buff: Percentage allowance for image patch to be populated by zeros for positions with no valid data values 154 | 155 | Outputs: 156 | - img_full_array: 4 - Dimensional numpy array containing image patches extracted from all image files for model training 157 | - mask_full_array: 4 - Dimensional numpy array containing binary raster mask patches extracted from all polygons for 158 | model training 159 | """ 160 | 161 | if perc < 0 or perc > 1: 162 | raise ValueError('Please input a number between 0 and 1 (inclusive) for perc.') 163 | 164 | if buff < 0 or buff > 1: 165 | raise ValueError('Please input a number between 0 and 1 (inclusive) for buff.') 166 | 167 | img_files = glob.glob(DATA_DIR + '\\' + 'Train_*.tif') 168 | polygon_files = glob.glob(DATA_DIR + '\\Training Polygons' + '\\Train_*.geojson') 169 | 170 | img_array_list = [] 171 | mask_array_list = [] 172 | 173 | for file in range(len(img_files)): 174 | with rasterio.open(img_files[file]) as f: 175 | metadata = f.profile 176 | img = np.transpose(f.read(tuple(np.arange(metadata['count']) + 1)), [1, 2, 0]) 177 | 178 | mask = training_mask_generation(img_files[file], polygon_files[file]) 179 | 180 | if (img.shape[0] % img_height_size != 0) and (img.shape[1] % img_width_size == 0): 181 | img_array, mask_array = image_clip_to_segment_and_convert(img, mask, img_height_size, img_width_size, mode = 0, 182 | percentage_overlap = perc, buffer = buff) 183 | elif (img.shape[0] % img_height_size == 0) and (img.shape[1] % img_width_size != 0): 184 | img_array, mask_array = image_clip_to_segment_and_convert(img, mask, img_height_size, img_width_size, mode = 1, 185 | percentage_overlap = perc, buffer = buff) 186 | elif (img.shape[0] % img_height_size != 0) and (img.shape[1] % img_width_size != 0): 187 | img_array, mask_array = image_clip_to_segment_and_convert(img, mask, img_height_size, img_width_size, mode = 2, 188 | percentage_overlap = perc, buffer = buff) 189 | else: 190 | img_array, mask_array = image_clip_to_segment_and_convert(img, mask, img_height_size, img_width_size, mode = 3, 191 | percentage_overlap = perc, buffer = buff) 192 | 193 | img_array_list.append(img_array) 194 | mask_array_list.append(mask_array) 195 | 196 | img_full_array = np.concatenate(img_array_list, axis = 0) 197 | mask_full_array = np.concatenate(mask_array_list, axis = 0) 198 | 199 | return img_full_array, mask_full_array 200 | 201 | 202 | 203 | def FCN_VGG16_model(img_height_size, img_width_size, transfer_learning = True, l_r = 0.0001): 204 | """ 205 | This function is used to generate the Fully Convolutional Network (FCN) architecture as described in the paper 'Water Body 206 | Extraction from Very High Spatial Resolution Remote Sensing Data Based on Fully Convolutional Networks' by Li L., Yan Z., 207 | Shen Q., Cheng G., Gao L., Zhang B. (2019). 208 | 209 | Note that this network only accepts a 3 - channel image as input. 210 | 211 | Inputs: 212 | - img_height_size: Height of image patches to be used for model training 213 | - img_width_size: Width of image patches to be used for model training 214 | - transfer_learning: Boolean indicating whether to utilise transfer learning for the VGG16 backbone 215 | - l_r: Learning rate for the Adam optimizer 216 | 217 | Outputs: 218 | - fcn_vgg16_model: FCN model to be trained using input parameters and network architecture 219 | 220 | """ 221 | 222 | img_input = Input(shape = (img_height_size, img_width_size, 3)) 223 | 224 | 225 | 226 | if transfer_learning: 227 | vgg16_backbone = VGG16(include_top = False, input_tensor = img_input) 228 | else: 229 | vgg16_backbone = VGG16(include_top = False, weights = None, input_tensor = img_input) 230 | 231 | 232 | vgg16_5 = vgg16_backbone.get_layer('block5_pool').output 233 | vgg16_4 = vgg16_backbone.get_layer('block4_pool').output 234 | vgg16_3 = vgg16_backbone.get_layer('block3_pool').output 235 | 236 | 237 | 238 | b5_filtered = Conv2D(2, (1, 1), padding = 'same')(vgg16_5) 239 | b5_upsam = Conv2DTranspose(2, (4, 4), strides = (2, 2), padding = 'same')(b5_filtered) 240 | 241 | 242 | b4_filtered = Conv2D(2, (1, 1), padding = 'same')(vgg16_4) 243 | b4_final = Add()([b5_upsam, b4_filtered]) 244 | b4_upsam = Conv2DTranspose(2, (4, 4), strides = (2, 2), padding = 'same')(b4_final) 245 | 246 | 247 | b3_filtered = Conv2D(2, (1, 1), padding = 'same')(vgg16_3) 248 | b3_final = Add()([b4_upsam, b3_filtered]) 249 | 250 | 251 | final_res_layer = Conv2DTranspose(2, (16, 16), strides = (8, 8), padding = 'same')(b3_final) 252 | 253 | 254 | pred_layer = Conv2D(1, (1, 1), padding = 'same', activation = 'sigmoid')(final_res_layer) 255 | 256 | 257 | fcn_vgg16_model = Model(inputs = img_input, outputs = pred_layer) 258 | fcn_vgg16_model.compile(loss = 'binary_crossentropy', optimizer = Adam(lr = l_r), metrics = ['binary_crossentropy']) 259 | 260 | return fcn_vgg16_model 261 | 262 | 263 | 264 | def image_model_predict(input_image_filename, output_filename, img_height_size, img_width_size, fitted_model, write): 265 | """ 266 | This function cuts up an image into segments of fixed size, and feeds each segment to the model for prediction. The 267 | output mask is then allocated to its corresponding location in the image in order to obtain the complete mask for the 268 | entire image without being constrained by image size. 269 | 270 | Inputs: 271 | - input_image_filename: File path of image file for which prediction is to be conducted 272 | - output_filename: File path of output predicted binary raster mask file 273 | - img_height_size: Height of image patches to be used for model prediction 274 | - img_height_size: Width of image patches to be used for model prediction 275 | - fitted_model: Trained keras model which is to be used for prediction 276 | - write: Boolean indicating whether to write predicted binary raster mask to file 277 | 278 | Output: 279 | - mask_complete: Numpy array of predicted binary raster mask for input image 280 | 281 | """ 282 | 283 | with rasterio.open(input_image_filename) as f: 284 | metadata = f.profile 285 | img = np.transpose(f.read(tuple(np.arange(metadata['count']) + 1)), [1, 2, 0]) 286 | 287 | y_size = ((img.shape[0] // img_height_size) + 1) * img_height_size 288 | x_size = ((img.shape[1] // img_width_size) + 1) * img_width_size 289 | 290 | if (img.shape[0] % img_height_size != 0) and (img.shape[1] % img_width_size == 0): 291 | img_complete = np.zeros((y_size, img.shape[1], img.shape[2])) 292 | img_complete[0 : img.shape[0], 0 : img.shape[1], 0 : img.shape[2]] = img 293 | elif (img.shape[0] % img_height_size == 0) and (img.shape[1] % img_width_size != 0): 294 | img_complete = np.zeros((img.shape[0], x_size, img.shape[2])) 295 | img_complete[0 : img.shape[0], 0 : img.shape[1], 0 : img.shape[2]] = img 296 | elif (img.shape[0] % img_height_size != 0) and (img.shape[1] % img_width_size != 0): 297 | img_complete = np.zeros((y_size, x_size, img.shape[2])) 298 | img_complete[0 : img.shape[0], 0 : img.shape[1], 0 : img.shape[2]] = img 299 | else: 300 | img_complete = img 301 | 302 | mask = np.zeros((img_complete.shape[0], img_complete.shape[1], 1)) 303 | img_holder = np.zeros((1, img_height_size, img_width_size, img.shape[2])) 304 | 305 | for i in range(0, img_complete.shape[0], img_height_size): 306 | for j in range(0, img_complete.shape[1], img_width_size): 307 | img_holder[0] = img_complete[i : i + img_height_size, j : j + img_width_size, 0 : img.shape[2]] 308 | preds = fitted_model.predict(img_holder) 309 | mask[i : i + img_height_size, j : j + img_width_size, 0] = preds[0, :, :, 0] 310 | 311 | mask_complete = np.expand_dims(mask[0 : img.shape[0], 0 : img.shape[1], 0], axis = 2) 312 | mask_complete = np.transpose(mask_complete, [2, 0, 1]).astype('float32') 313 | 314 | 315 | if write: 316 | metadata['count'] = 1 317 | metadata['dtype'] = 'float32' 318 | 319 | with rasterio.open(output_filename, 'w', **metadata) as dst: 320 | dst.write(mask_complete) 321 | 322 | return mask_complete -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Water-Body-Extraction-from-Very-High-Spatial-Resolution-Remote-Sensing-Data-Based-on-FCNs 2 | Python implementation of Convolutional Neural Network (CNN) proposed in academia 3 | 4 | This repository includes functions to preprocess the input images and their respective polygons so as to create the input image patches 5 | and mask patches to be used for model training. The CNN used here is the Fully Convolutional Network (FCN) model implemented in the paper 6 | 'Water Body Extraction from Very High Spatial Resolution Remote Sensing Data Based on Fully Convolutional Networks' by Li L., Yan Z., 7 | Shen Q., Cheng G., Gao L., Zhang B. (2019). 8 | 9 | Requirements: 10 | - cv2 11 | - glob 12 | - json 13 | - numpy 14 | - rasterio 15 | - keras (tensorflow backend) 16 | --------------------------------------------------------------------------------