├── FCN_VGG16_Model.py
└── README.md


/FCN_VGG16_Model.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import glob
  3 | import json
  4 | import numpy as np
  5 | import rasterio
  6 | from keras.applications.vgg16 import VGG16
  7 | from keras.models import Input, Model
  8 | from keras.layers import Add, Conv2D, Conv2DTranspose
  9 | from keras.optimizers import Adam
 10 | 
 11 | 
 12 | 
 13 | def training_mask_generation(input_image_filename, input_geojson_filename):
 14 |     """ 
 15 |     This function is used to create a binary raster mask from polygons in a given geojson file, so as to label the pixels 
 16 |     in the image as either background or target.
 17 |     
 18 |     Inputs:
 19 |     - input_image_filename: File path of georeferenced image file to be used for model training
 20 |     - input_geojson_filename: File path of georeferenced geojson file which contains the polygons drawn over the targets
 21 |     
 22 |     Outputs:
 23 |     - mask: Numpy array representing the training mask, with values of 0 for background pixels, and value of 1 for target 
 24 |             pixels.
 25 |     
 26 |     """
 27 |     
 28 |     with rasterio.open(input_image_filename) as f:
 29 |         metadata = f.profile
 30 |         image = np.transpose(f.read(tuple(np.arange(metadata['count']) + 1)), [1, 2, 0])
 31 |         
 32 |     mask = np.zeros((image.shape[0], image.shape[1]))
 33 |     
 34 |     ulx = metadata['transform'][2]
 35 |     xres = metadata['transform'][0]
 36 |     uly = metadata['transform'][5]
 37 |     yres = metadata['transform'][4]
 38 |                                       
 39 |     lrx = ulx + (image.shape[1] * xres)                                                         
 40 |     lry = uly - (image.shape[0] * abs(yres))
 41 | 
 42 |     polygons = json.load(open(input_geojson_filename))
 43 |     
 44 |     for polygon in range(len(polygons['features'])):
 45 |         coords = np.array(polygons['features'][polygon]['geometry']['coordinates'][0][0])                      
 46 |         xf = ((image.shape[1]) ** 2 / (image.shape[1] + 1)) / (lrx - ulx)
 47 |         yf = ((image.shape[0]) ** 2 / (image.shape[0] + 1)) / (lry - uly)
 48 |         coords[:, 1] = yf * (coords[:, 1] - uly)
 49 |         coords[:, 0] = xf * (coords[:, 0] - ulx)                                       
 50 |         position = np.round(coords).astype(np.int32)
 51 |         cv2.fillConvexPoly(mask, position, 1)
 52 |     
 53 |     return mask
 54 | 
 55 | 
 56 | 
 57 | def image_clip_to_segment_and_convert(image_array, mask_array, image_height_size, image_width_size, mode, percentage_overlap, 
 58 |                                       buffer):
 59 |     """ 
 60 |     This function is used to cut up images of any input size into segments of a fixed size, with empty clipped areas 
 61 |     padded with zeros to ensure that segments are of equal fixed sizes and contain valid data values. The function then 
 62 |     returns a 4 - dimensional array containing the entire image and its mask in the form of fixed size segments. 
 63 |     
 64 |     Inputs:
 65 |     - image_array: Numpy array representing the image to be used for model training (channels last format)
 66 |     - mask_array: Numpy array representing the binary raster mask to mark out background and target pixels
 67 |     - image_height_size: Height of image segments to be used for model training
 68 |     - image_width_size: Width of image segments to be used for model training
 69 |     - mode: Integer representing the status of image size
 70 |     - percentage_overlap: Percentage of overlap between image patches extracted by sliding window to be used for model 
 71 |                           training
 72 |     - buffer: Percentage allowance for image patch to be populated by zeros for positions with no valid data values
 73 |     
 74 |     Outputs:
 75 |     - image_segment_array: 4 - Dimensional numpy array containing the image patches extracted from input image array
 76 |     - mask_segment_array: 4 - Dimensional numpy array containing the mask patches extracted from input binary raster mask
 77 |     
 78 |     """
 79 |     
 80 |     y_size = ((image_array.shape[0] // image_height_size) + 1) * image_height_size
 81 |     x_size = ((image_array.shape[1] // image_width_size) + 1) * image_width_size
 82 |     
 83 |     if mode == 0:
 84 |         img_complete = np.zeros((y_size, image_array.shape[1], image_array.shape[2]))
 85 |         mask_complete = np.zeros((y_size, mask_array.shape[1], 1))
 86 |         img_complete[0 : image_array.shape[0], 0 : image_array.shape[1], 0 : image_array.shape[2]] = image_array
 87 |         mask_complete[0 : mask_array.shape[0], 0 : mask_array.shape[1], 0] = mask_array
 88 |     elif mode == 1:
 89 |         img_complete = np.zeros((image_array.shape[0], x_size, image_array.shape[2]))
 90 |         mask_complete = np.zeros((image_array.shape[0], x_size, 1))
 91 |         img_complete[0 : image_array.shape[0], 0 : image_array.shape[1], 0 : image_array.shape[2]] = image_array
 92 |         mask_complete[0 : mask_array.shape[0], 0 : mask_array.shape[1], 0] = mask_array
 93 |     elif mode == 2:
 94 |         img_complete = np.zeros((y_size, x_size, image_array.shape[2]))
 95 |         mask_complete = np.zeros((y_size, x_size, 1))
 96 |         img_complete[0 : image_array.shape[0], 0 : image_array.shape[1], 0 : image_array.shape[2]] = image_array
 97 |         mask_complete[0 : mask_array.shape[0], 0 : mask_array.shape[1], 0] = mask_array
 98 |     elif mode == 3:
 99 |         img_complete = image_array
100 |         mask_complete = mask_array
101 |         
102 |     img_list = []
103 |     mask_list = []
104 |     
105 |     
106 |     for i in range(0, int(img_complete.shape[0] - (2 - buffer) * image_height_size), 
107 |                    int((1 - percentage_overlap) * image_height_size)):
108 |         for j in range(0, int(img_complete.shape[1] - (2 - buffer) * image_width_size), 
109 |                        int((1 - percentage_overlap) * image_width_size)):
110 |             M_90 = cv2.getRotationMatrix2D((image_width_size / 2, image_height_size / 2), 90, 1.0)
111 |             M_180 = cv2.getRotationMatrix2D((image_width_size / 2, image_height_size / 2), 180, 1.0)
112 |             M_270 = cv2.getRotationMatrix2D((image_width_size / 2, image_height_size / 2), 270, 1.0)
113 |             img_original = img_complete[i : i + image_height_size, j : j + image_width_size, 0 : image_array.shape[2]]
114 |             img_rotate_90 = cv2.warpAffine(img_original, M_90, (image_height_size, image_width_size))
115 |             img_rotate_180 = cv2.warpAffine(img_original, M_180, (image_width_size, image_height_size))
116 |             img_rotate_270 = cv2.warpAffine(img_original, M_270, (image_height_size, image_width_size))
117 |             img_flip_hor = cv2.flip(img_original, 0)
118 |             img_flip_vert = cv2.flip(img_original, 1)
119 |             img_flip_both = cv2.flip(img_original, -1)
120 |             img_list.extend([img_original, img_rotate_90, img_rotate_180, img_rotate_270, img_flip_hor, img_flip_vert, 
121 |                              img_flip_both])
122 |             mask_original = mask_complete[i : i + image_height_size, j : j + image_width_size, 0]
123 |             mask_rotate_90 = cv2.warpAffine(mask_original, M_90, (image_height_size, image_width_size))
124 |             mask_rotate_180 = cv2.warpAffine(mask_original, M_180, (image_width_size, image_height_size))
125 |             mask_rotate_270 = cv2.warpAffine(mask_original, M_270, (image_height_size, image_width_size))
126 |             mask_flip_hor = cv2.flip(mask_original, 0)
127 |             mask_flip_vert = cv2.flip(mask_original, 1)
128 |             mask_flip_both = cv2.flip(mask_original, -1)
129 |             mask_list.extend([mask_original, mask_rotate_90, mask_rotate_180, mask_rotate_270, mask_flip_hor, mask_flip_vert, 
130 |                               mask_flip_both])
131 |     
132 |     image_segment_array = np.zeros((len(img_list), image_height_size, image_width_size, image_array.shape[2]))
133 |     mask_segment_array = np.zeros((len(mask_list), image_height_size, image_width_size, 1))
134 |     
135 |     for index in range(len(img_list)):
136 |         image_segment_array[index] = img_list[index]
137 |         mask_segment_array[index, :, :, 0] = mask_list[index]
138 |         
139 |     return image_segment_array, mask_segment_array
140 | 
141 | 
142 | 
143 | def training_data_generation(DATA_DIR, img_height_size, img_width_size, perc, buff):
144 |     """ 
145 |     This function is used to convert image files and their respective polygon training masks into numpy arrays, so as to 
146 |     facilitate their use for model training.
147 |     
148 |     Inputs:
149 |     - DATA_DIR: File path of folder containing the image files, and their respective polygons in a subfolder
150 |     - img_height_size: Height of image patches to be used for model training
151 |     - img_width_size: Width of image patches to be used for model training
152 |     - perc: Percentage of overlap between image patches extracted by sliding window to be used for model training
153 |     - buff: Percentage allowance for image patch to be populated by zeros for positions with no valid data values
154 |     
155 |     Outputs:
156 |     - img_full_array: 4 - Dimensional numpy array containing image patches extracted from all image files for model training
157 |     - mask_full_array: 4 - Dimensional numpy array containing binary raster mask patches extracted from all polygons for 
158 |                        model training
159 |     """
160 |     
161 |     if perc < 0 or perc > 1:
162 |         raise ValueError('Please input a number between 0 and 1 (inclusive) for perc.')
163 |         
164 |     if buff < 0 or buff > 1:
165 |         raise ValueError('Please input a number between 0 and 1 (inclusive) for buff.')
166 |     
167 |     img_files = glob.glob(DATA_DIR + '\\' + 'Train_*.tif')
168 |     polygon_files = glob.glob(DATA_DIR + '\\Training Polygons' + '\\Train_*.geojson')
169 |     
170 |     img_array_list = []
171 |     mask_array_list = []
172 |     
173 |     for file in range(len(img_files)):
174 |         with rasterio.open(img_files[file]) as f:
175 |             metadata = f.profile
176 |             img = np.transpose(f.read(tuple(np.arange(metadata['count']) + 1)), [1, 2, 0])
177 |             
178 |         mask = training_mask_generation(img_files[file], polygon_files[file])
179 |     
180 |         if (img.shape[0] % img_height_size != 0) and (img.shape[1] % img_width_size == 0):
181 |             img_array, mask_array = image_clip_to_segment_and_convert(img, mask, img_height_size, img_width_size, mode = 0, 
182 |                                                                       percentage_overlap = perc, buffer = buff)
183 |         elif (img.shape[0] % img_height_size == 0) and (img.shape[1] % img_width_size != 0):
184 |             img_array, mask_array = image_clip_to_segment_and_convert(img, mask, img_height_size, img_width_size, mode = 1, 
185 |                                                                       percentage_overlap = perc, buffer = buff)
186 |         elif (img.shape[0] % img_height_size != 0) and (img.shape[1] % img_width_size != 0):
187 |             img_array, mask_array = image_clip_to_segment_and_convert(img, mask, img_height_size, img_width_size, mode = 2, 
188 |                                                                       percentage_overlap = perc, buffer = buff)
189 |         else:
190 |             img_array, mask_array = image_clip_to_segment_and_convert(img, mask, img_height_size, img_width_size, mode = 3, 
191 |                                                                       percentage_overlap = perc, buffer = buff)
192 |         
193 |         img_array_list.append(img_array)
194 |         mask_array_list.append(mask_array)
195 |         
196 |     img_full_array = np.concatenate(img_array_list, axis = 0)
197 |     mask_full_array = np.concatenate(mask_array_list, axis = 0)
198 |     
199 |     return img_full_array, mask_full_array
200 | 
201 | 
202 | 
203 | def FCN_VGG16_model(img_height_size, img_width_size, transfer_learning = True, l_r = 0.0001):
204 |     """
205 |     This function is used to generate the Fully Convolutional Network (FCN) architecture as described in the paper 'Water Body
206 |     Extraction from Very High Spatial Resolution Remote Sensing Data Based on Fully Convolutional Networks' by Li L., Yan Z., 
207 |     Shen Q., Cheng G., Gao L., Zhang B. (2019).
208 |     
209 |     Note that this network only accepts a 3 - channel image as input.
210 |     
211 |     Inputs:
212 |     - img_height_size: Height of image patches to be used for model training
213 |     - img_width_size: Width of image patches to be used for model training
214 |     - transfer_learning: Boolean indicating whether to utilise transfer learning for the VGG16 backbone
215 |     - l_r: Learning rate for the Adam optimizer
216 |     
217 |     Outputs:
218 |     - fcn_vgg16_model: FCN model to be trained using input parameters and network architecture
219 |     
220 |     """
221 |     
222 |     img_input = Input(shape = (img_height_size, img_width_size, 3))
223 |     
224 |     
225 |     
226 |     if transfer_learning:
227 |         vgg16_backbone = VGG16(include_top = False, input_tensor = img_input)
228 |     else:
229 |         vgg16_backbone = VGG16(include_top = False, weights = None, input_tensor = img_input)
230 |     
231 |     
232 |     vgg16_5 = vgg16_backbone.get_layer('block5_pool').output
233 |     vgg16_4 = vgg16_backbone.get_layer('block4_pool').output
234 |     vgg16_3 = vgg16_backbone.get_layer('block3_pool').output
235 |     
236 |     
237 |     
238 |     b5_filtered = Conv2D(2, (1, 1), padding = 'same')(vgg16_5)
239 |     b5_upsam = Conv2DTranspose(2, (4, 4), strides = (2, 2), padding = 'same')(b5_filtered)
240 |     
241 |     
242 |     b4_filtered = Conv2D(2, (1, 1), padding = 'same')(vgg16_4)
243 |     b4_final = Add()([b5_upsam, b4_filtered])
244 |     b4_upsam = Conv2DTranspose(2, (4, 4), strides = (2, 2), padding = 'same')(b4_final)
245 |     
246 |     
247 |     b3_filtered = Conv2D(2, (1, 1), padding = 'same')(vgg16_3)
248 |     b3_final = Add()([b4_upsam, b3_filtered])
249 |     
250 |     
251 |     final_res_layer = Conv2DTranspose(2, (16, 16), strides = (8, 8), padding = 'same')(b3_final)
252 |     
253 |     
254 |     pred_layer = Conv2D(1, (1, 1), padding = 'same', activation = 'sigmoid')(final_res_layer)
255 |     
256 |     
257 |     fcn_vgg16_model = Model(inputs = img_input, outputs = pred_layer)
258 |     fcn_vgg16_model.compile(loss = 'binary_crossentropy', optimizer = Adam(lr = l_r), metrics = ['binary_crossentropy'])
259 |     
260 |     return fcn_vgg16_model
261 | 
262 | 
263 | 
264 | def image_model_predict(input_image_filename, output_filename, img_height_size, img_width_size, fitted_model, write):
265 |     """ 
266 |     This function cuts up an image into segments of fixed size, and feeds each segment to the model for prediction. The 
267 |     output mask is then allocated to its corresponding location in the image in order to obtain the complete mask for the 
268 |     entire image without being constrained by image size. 
269 |     
270 |     Inputs:
271 |     - input_image_filename: File path of image file for which prediction is to be conducted
272 |     - output_filename: File path of output predicted binary raster mask file
273 |     - img_height_size: Height of image patches to be used for model prediction
274 |     - img_height_size: Width of image patches to be used for model prediction
275 |     - fitted_model: Trained keras model which is to be used for prediction
276 |     - write: Boolean indicating whether to write predicted binary raster mask to file
277 |     
278 |     Output:
279 |     - mask_complete: Numpy array of predicted binary raster mask for input image
280 |     
281 |     """
282 |     
283 |     with rasterio.open(input_image_filename) as f:
284 |         metadata = f.profile
285 |         img = np.transpose(f.read(tuple(np.arange(metadata['count']) + 1)), [1, 2, 0])
286 |      
287 |     y_size = ((img.shape[0] // img_height_size) + 1) * img_height_size
288 |     x_size = ((img.shape[1] // img_width_size) + 1) * img_width_size
289 |     
290 |     if (img.shape[0] % img_height_size != 0) and (img.shape[1] % img_width_size == 0):
291 |         img_complete = np.zeros((y_size, img.shape[1], img.shape[2]))
292 |         img_complete[0 : img.shape[0], 0 : img.shape[1], 0 : img.shape[2]] = img
293 |     elif (img.shape[0] % img_height_size == 0) and (img.shape[1] % img_width_size != 0):
294 |         img_complete = np.zeros((img.shape[0], x_size, img.shape[2]))
295 |         img_complete[0 : img.shape[0], 0 : img.shape[1], 0 : img.shape[2]] = img
296 |     elif (img.shape[0] % img_height_size != 0) and (img.shape[1] % img_width_size != 0):
297 |         img_complete = np.zeros((y_size, x_size, img.shape[2]))
298 |         img_complete[0 : img.shape[0], 0 : img.shape[1], 0 : img.shape[2]] = img
299 |     else:
300 |          img_complete = img
301 |             
302 |     mask = np.zeros((img_complete.shape[0], img_complete.shape[1], 1))
303 |     img_holder = np.zeros((1, img_height_size, img_width_size, img.shape[2]))
304 |     
305 |     for i in range(0, img_complete.shape[0], img_height_size):
306 |         for j in range(0, img_complete.shape[1], img_width_size):
307 |             img_holder[0] = img_complete[i : i + img_height_size, j : j + img_width_size, 0 : img.shape[2]]
308 |             preds = fitted_model.predict(img_holder)
309 |             mask[i : i + img_height_size, j : j + img_width_size, 0] = preds[0, :, :, 0]
310 |             
311 |     mask_complete = np.expand_dims(mask[0 : img.shape[0], 0 : img.shape[1], 0], axis = 2)
312 |     mask_complete = np.transpose(mask_complete, [2, 0, 1]).astype('float32')
313 |     
314 |     
315 |     if write:
316 |         metadata['count'] = 1
317 |         metadata['dtype'] = 'float32'
318 |         
319 |         with rasterio.open(output_filename, 'w', **metadata) as dst:
320 |             dst.write(mask_complete)
321 |     
322 |     return mask_complete


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Water-Body-Extraction-from-Very-High-Spatial-Resolution-Remote-Sensing-Data-Based-on-FCNs
 2 | Python implementation of Convolutional Neural Network (CNN) proposed in academia
 3 | 
 4 | This repository includes functions to preprocess the input images and their respective polygons so as to create the input image patches 
 5 | and mask patches to be used for model training. The CNN used here is the Fully Convolutional Network (FCN) model implemented in the paper 
 6 | 'Water Body Extraction from Very High Spatial Resolution Remote Sensing Data Based on Fully Convolutional Networks' by Li L., Yan Z., 
 7 | Shen Q., Cheng G., Gao L., Zhang B. (2019).
 8 | 
 9 | Requirements:
10 | - cv2
11 | - glob
12 | - json
13 | - numpy
14 | - rasterio
15 | - keras (tensorflow backend)
16 | 


--------------------------------------------------------------------------------