├── Caffe-Layers-Custom ├── INFO.txt └── layer_data_augmentator.py ├── ISPRS_Caffe_Models ├── Potsdam_Models │ ├── 01_FCN-Model │ │ ├── deploy.prototxt │ │ ├── link_to_weights │ │ └── train_val.prototxt │ ├── 02_VGG-Model │ │ ├── deploy.prototxt │ │ ├── link_to_weights │ │ └── train_val.prototxt │ ├── 03_SegNet-Model │ │ ├── deploy.prototxt │ │ ├── link_to_weights │ │ ├── train_val_sc1_AUGMENTATION.prototxt │ │ └── train_val_sc1_NO_AUGMENTATION.prototxt │ └── INFO_Models └── Vaihingen_Models │ ├── 01_FCN-Model │ ├── deploy.prototxt │ ├── link_to_weight_Vaihingen │ └── train_val.prototxt │ ├── 02_VGG-Model │ ├── deploy.prototxt │ ├── deploy.prototxt~ │ ├── link_to_weight_Vaihingen │ └── train_val.prototxt │ ├── 03_SegNet-Model │ ├── deploy.prototxt │ ├── link_to_weight_Vaihingen │ ├── train_val_three_streams_AUGMENTATION.prototxt │ └── train_val_three_streams_NO_AUGMENTATION.prototxt │ └── INFO_Models ├── Misc ├── INFO.txt ├── INFO.txt~ ├── count_total_num_params.py ├── h5_to_lmdb.py └── patch_extraction_to_HDF5.py ├── README.md ├── master ├── model_inference └── model_inference_single_model.py └── model_training ├── analysis.py ├── solver.prototxt ├── training_class_boundary_net.py └── visualizations.py /Caffe-Layers-Custom/INFO.txt: -------------------------------------------------------------------------------- 1 | 2 | ============================ INFO ============================ 3 | 4 | These are cuctom-made Python-Layers in Caffe. They require you to have compile 5 | PyCaffe with PythonLayer parameter enabled !!! 6 | 7 | Also you have to append the location you store the file into your PYTHONPATH eviroment variable for this to work. 8 | 9 | ---- Methods ---- 10 | 11 | - Realtime stostastic data-augmentation ---> currently works with batch size equal to 1. 12 | Augmentation hyperpameters are definied inside the the PYTHONLAYER and not in the prototxt 13 | -------------------------------------------------------------------------------- /Caffe-Layers-Custom/layer_data_augmentator.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | #import random 3 | import numpy as np 4 | import skimage 5 | import skimage.transform 6 | 7 | #import pdb 8 | #pdb.set_trace() 9 | 10 | 11 | class RealtimeDataAugmentation(caffe.Layer): 12 | 13 | 14 | """ 15 | This method applies data augmentation to an input image by considering two types of augmentations 16 | 17 | 1. Affine transform {including = scale, translation, rotation, shearing} 18 | 2. Simple flip and/or mirroring 19 | 20 | Input 21 | 22 | image : Multiband Image or arbitary size (HEIGHT, WIDTH, CHANNELS) 23 | 24 | All hyperparameters are defined in the "data_augmentation method" below 25 | 26 | 27 | 28 | TODO 29 | 30 | * Modify layer so that hyperparameters for the augmentation are not explicitly defined here 31 | but are passes directly into the prototxt 32 | 33 | * Currently this version only allows batch-size equal to 1. Larger batches mess-up the alligmed. 34 | Fix this so larger batches are also possible 35 | 36 | """ 37 | 38 | def setup(self, bottom, top): 39 | 40 | assert bottom[0].data.shape[0] == 1, "Currently augmentation works with single-batch input" 41 | assert bottom[0].data.shape[0] == top[0].data.shape[0] 42 | 43 | def reshape(self, bottom, top): 44 | 45 | # Copy shape from bottom 46 | top[0].reshape(*bottom[0].data.shape) 47 | 48 | 49 | def forward(self, bottom, top): 50 | 51 | # # TRANSFORMATIONS # # 52 | 53 | def translation_transformation(img): 54 | center_shift = np.array((img.shape[0], img.shape[1])) / 2. - 0.5 55 | tform_center = skimage.transform.SimilarityTransform(translation=-center_shift) 56 | tform_uncenter = skimage.transform.SimilarityTransform(translation=center_shift) 57 | return tform_center, tform_uncenter 58 | 59 | 60 | def build_augmentation_transform(img, zoom=1.0, rotation=0, shear=0, translation=(0, 0)): 61 | 62 | tform_center, tform_uncenter = translation_transformation(img) 63 | tform_augment = skimage.transform.AffineTransform(scale=(1/zoom, 1/zoom), rotation=np.deg2rad(rotation), shear=np.deg2rad(shear), translation=translation) 64 | tform = tform_center + tform_augment + tform_uncenter # shift to center, augment, shift back (for the rotation/shearing) 65 | return tform 66 | 67 | 68 | def random_perturbation_transform(img, zoom_range, rotation_range, shear_range, translation_range, do_flip=False): 69 | # random shift [-4, 4] - shift no longer needs to be integer! 70 | shift_x = np.random.uniform(*translation_range) 71 | shift_y = np.random.uniform(*translation_range) 72 | translation = (shift_x, shift_y) 73 | 74 | # random rotation [0, 360] 75 | rotation = np.random.uniform(*rotation_range) # there is no post-augmentation, so full rotations here! 76 | 77 | # random shear [0, 5] 78 | shear = np.random.uniform(*shear_range) 79 | 80 | # # flip 81 | if do_flip and (np.random.randint(2) > 0): # flip half of the time 82 | shear += 180 83 | rotation += 180 84 | # shear by 180 degrees is equivalent to rotation by 180 degrees + flip. 85 | # So after that we rotate it another 180 degrees to get just the flip. 86 | 87 | # random zoom [0.9, 1.1] 88 | # zoom = np.random.uniform(*zoom_range) 89 | log_zoom_range = [np.log(z) for z in zoom_range] 90 | zoom = np.exp(np.random.uniform(*log_zoom_range)) # for a zoom factor this sampling approach makes more sense. 91 | # the range should be multiplicatively symmetric, so [1/1.1, 1.1] instead of [0.9, 1.1] makes more sense. 92 | 93 | return build_augmentation_transform(img, zoom, rotation, shear, translation) 94 | 95 | 96 | def random_flip_mirroring(img): 97 | 98 | # random generator for flip and/ or mirorring 99 | rand_val = np.random.randint(3) 100 | 101 | if rand_val == 0: 102 | # apply flip 103 | tr_img = np.rot90(img) 104 | 105 | if rand_val == 1: 106 | # apply mirroring 107 | tr_img = img[:, ::-1, :] 108 | 109 | if rand_val == 2: 110 | # apply both 111 | tr_img = np.rot90(img) 112 | tr_img = tr_img[:, ::-1, :] 113 | 114 | return tr_img 115 | 116 | 117 | def fast_warp(img, tf, mode='reflect', background_value=0.0): 118 | """ 119 | This wrapper function is about five times faster than skimage.transform.warp, for our use case. 120 | """ 121 | m = tf._matrix 122 | img_wf = np.empty((img.shape[0], img.shape[1], img.shape[2]), dtype='float32') 123 | for k in xrange(img.shape[2]): 124 | img_wf[..., k] = skimage.transform._warps_cy._warp_fast(img[..., k], m, output_shape=(img.shape[0], img.shape[1]), mode=mode, cval=background_value) 125 | return img_wf 126 | 127 | # ============================================================================================ # 128 | 129 | 130 | # ============= INPUTS =========== # 131 | 132 | # when random value is larger that this threshold apply simple flip/ mirror operation 133 | flip_threshold = 0.40 134 | 135 | # define initial augmentation parameters 136 | augmentation_params = { 137 | 'zoom_range': (1, 1.01), # 0 138 | 'rotation_range': (0, 15), # 3 139 | 'shear_range': (0, 8), 140 | 'translation_range': (-5, 5), 141 | } 142 | 143 | # ============= PROCESS ========= # 144 | 145 | for ii in range(bottom[0].data.shape[0]): 146 | 147 | # Copy all data 148 | input_im = bottom[0].data[ii, :] 149 | 150 | # roll-axis to build image (H x W x Chan) 151 | input_im = np.rollaxis(input_im, 0, 3) 152 | 153 | # randomly select augmentation mode => simple flip / affine transform 154 | augmentation_mode = np.random.random(1) 155 | 156 | if augmentation_mode > flip_threshold: 157 | out_im = random_flip_mirroring(input_im) 158 | 159 | if augmentation_mode <= flip_threshold: 160 | # compute random transformation 161 | tform_augment = random_perturbation_transform(img=input_im, **augmentation_params) 162 | 163 | # apply random transformation 164 | out_im = fast_warp(input_im, tform_augment).astype('float32') 165 | 166 | # convert to caffe tensor (Chan x H x W) 167 | out_im = np.rollaxis(out_im, 2, 0) 168 | 169 | # store to blob-output 170 | top[0].data[ii, :] = out_im[:] 171 | 172 | def backward(self, top, propagate_down, bottom): 173 | pass 174 | -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Potsdam_Models/01_FCN-Model/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "FCN-ClassBoundary-Annotation-Train" 2 | 3 | input: 'image' 4 | input_dim: 1 5 | input_dim: 3 6 | nput_dim: 256 7 | input_dim: 256 8 | 9 | input: 'dsm' 10 | input_dim: 1 11 | input_dim: 1 12 | input_dim: 256 13 | input_dim: 256 14 | 15 | input: 'ndsm' 16 | input_dim: 1 17 | input_dim: 1 18 | input_dim: 256 19 | input_dim: 256 20 | 21 | force_backward: true 22 | 23 | 24 | ############################# Concatenate nDSM and DSM ############################## 25 | 26 | layer { 27 | name: "dem-componets" 28 | type: "Concat" 29 | bottom: "dsm" 30 | bottom: "ndsm" 31 | top: "dem" 32 | } 33 | 34 | 35 | ################## Edge Model Inference ####################### 36 | 37 | 38 | layer { 39 | name: "edge_dem_conv1_1_" 40 | type: "Convolution" 41 | bottom: "dem" 42 | top: "ee_dem_conv1_1" 43 | param { 44 | lr_mult: 0 # 1 45 | decay_mult: 1 46 | } 47 | param { 48 | lr_mult: 0 # 2 49 | decay_mult: 0 50 | } 51 | convolution_param { 52 | num_output: 64 53 | pad: 35 54 | kernel_size: 3 55 | weight_filler { 56 | type: "xavier" 57 | } 58 | engine: CUDNN 59 | } 60 | } 61 | 62 | layer { 63 | name: "edge_conv1_1" 64 | type: "Convolution" 65 | bottom: "image" 66 | top: "ee_data_conv1_1" 67 | param { 68 | lr_mult: 0 # 1 69 | decay_mult: 1 70 | } 71 | param { 72 | lr_mult: 0 # 2 73 | decay_mult: 0 74 | } 75 | convolution_param { 76 | num_output: 64 77 | pad: 35 78 | kernel_size: 3 79 | engine: CUDNN 80 | } 81 | } 82 | layer { 83 | name: "data_relu1_1" 84 | type: "ReLU" 85 | bottom: "ee_data_conv1_1" 86 | top: "ee_data_conv1_1" 87 | } 88 | layer { 89 | name: "dem_relu1_1" 90 | type: "ReLU" 91 | bottom: "ee_dem_conv1_1" 92 | top: "ee_dem_conv1_1" 93 | } 94 | layer { 95 | name: "edge_conv1_2" 96 | type: "Convolution" 97 | bottom: "ee_data_conv1_1" 98 | top: "ee_data_conv1_2" 99 | param { 100 | lr_mult: 0 # 1 101 | decay_mult: 1 102 | } 103 | param { 104 | lr_mult: 0 # 2 105 | decay_mult: 0 106 | } 107 | convolution_param { 108 | num_output: 64 109 | pad: 1 110 | kernel_size: 3 111 | engine: CUDNN 112 | } 113 | } 114 | layer { 115 | name: "edge_dem_conv1_2_" 116 | type: "Convolution" 117 | bottom: "ee_dem_conv1_1" 118 | top: "ee_dem_conv1_2" 119 | param { 120 | lr_mult: 0 # 1 121 | decay_mult: 1 122 | } 123 | param { 124 | lr_mult: 0 # 2 125 | decay_mult: 0 126 | } 127 | convolution_param { 128 | num_output: 64 129 | pad: 1 130 | kernel_size: 3 131 | weight_filler { 132 | type: "xavier" 133 | } 134 | engine: CUDNN 135 | } 136 | } 137 | layer { 138 | name: "data_relu1_2" 139 | type: "ReLU" 140 | bottom: "ee_data_conv1_2" 141 | top: "ee_data_conv1_2" 142 | } 143 | layer { 144 | name: "dem_relu1_2" 145 | type: "ReLU" 146 | bottom: "ee_dem_conv1_2" 147 | top: "ee_dem_conv1_2" 148 | } 149 | layer { 150 | name: "data_pool1" 151 | type: "Pooling" 152 | bottom: "ee_data_conv1_2" 153 | top: "ee_data_pool1" 154 | pooling_param { 155 | pool: MAX 156 | kernel_size: 2 157 | stride: 2 158 | } 159 | } 160 | layer { 161 | name: "dem_pool1" 162 | type: "Pooling" 163 | bottom: "ee_dem_conv1_2" 164 | top: "ee_dem_pool1" 165 | pooling_param { 166 | pool: MAX 167 | kernel_size: 2 168 | stride: 2 169 | } 170 | } 171 | layer { 172 | name: "edge_conv2_1" 173 | type: "Convolution" 174 | bottom: "ee_data_pool1" 175 | top: "ee_data_conv2_1" 176 | param { 177 | lr_mult: 0 # 1 178 | decay_mult: 1 179 | } 180 | param { 181 | lr_mult: 0 # 2 182 | decay_mult: 0 183 | } 184 | convolution_param { 185 | num_output: 128 186 | pad: 1 187 | kernel_size: 3 188 | engine: CUDNN 189 | } 190 | } 191 | layer { 192 | name: "edge_dem_conv2_1_" 193 | type: "Convolution" 194 | bottom: "ee_dem_pool1" 195 | top: "ee_dem_conv2_1" 196 | param { 197 | lr_mult: 0 # 1 198 | decay_mult: 1 199 | } 200 | param { 201 | lr_mult: 0 # 2 202 | decay_mult: 0 203 | } 204 | convolution_param { 205 | num_output: 128 206 | pad: 1 207 | kernel_size: 3 208 | weight_filler { 209 | type: "xavier" 210 | } 211 | engine: CUDNN 212 | } 213 | } 214 | layer { 215 | name: "data_relu2_1" 216 | type: "ReLU" 217 | bottom: "ee_data_conv2_1" 218 | top: "ee_data_conv2_1" 219 | } 220 | layer { 221 | name: "dem_relu2_1" 222 | type: "ReLU" 223 | bottom: "ee_dem_conv2_1" 224 | top: "ee_dem_conv2_1" 225 | } 226 | layer { 227 | name: "edge_conv2_2" 228 | type: "Convolution" 229 | bottom: "ee_data_conv2_1" 230 | top: "ee_data_conv2_2" 231 | param { 232 | lr_mult: 0 # 1 233 | decay_mult: 1 234 | } 235 | param { 236 | lr_mult: 0 # 2 237 | decay_mult: 0 238 | } 239 | convolution_param { 240 | num_output: 128 241 | pad: 1 242 | kernel_size: 3 243 | engine: CUDNN 244 | } 245 | } 246 | layer { 247 | name: "edge_dem_conv2_2_" 248 | type: "Convolution" 249 | bottom: "ee_dem_conv2_1" 250 | top: "ee_dem_conv2_2" 251 | param { 252 | lr_mult: 0 # 1 253 | decay_mult: 1 254 | } 255 | param { 256 | lr_mult: 0 # 2 257 | decay_mult: 0 258 | } 259 | convolution_param { 260 | num_output: 128 261 | pad: 1 262 | kernel_size: 3 263 | weight_filler { 264 | type: "xavier" 265 | } 266 | engine: CUDNN 267 | } 268 | } 269 | layer { 270 | name: "data_relu2_2" 271 | type: "ReLU" 272 | bottom: "ee_data_conv2_2" 273 | top: "ee_data_conv2_2" 274 | } 275 | layer { 276 | name: "dem_relu2_2" 277 | type: "ReLU" 278 | bottom: "ee_dem_conv2_2" 279 | top: "ee_dem_conv2_2" 280 | } 281 | layer { 282 | name: "data_pool2" 283 | type: "Pooling" 284 | bottom: "ee_data_conv2_2" 285 | top: "ee_data_pool2" 286 | pooling_param { 287 | pool: MAX 288 | kernel_size: 2 289 | stride: 2 290 | } 291 | } 292 | layer { 293 | name: "dem_pool2" 294 | type: "Pooling" 295 | bottom: "ee_dem_conv2_2" 296 | top: "ee_dem_pool2" 297 | pooling_param { 298 | pool: MAX 299 | kernel_size: 2 300 | stride: 2 301 | } 302 | } 303 | layer { 304 | name: "edge_conv3_1" 305 | type: "Convolution" 306 | bottom: "ee_data_pool2" 307 | top: "ee_data_conv3_1" 308 | param { 309 | lr_mult: 0 # 1 310 | decay_mult: 1 311 | } 312 | param { 313 | lr_mult: 0 # 2 314 | decay_mult: 0 315 | } 316 | convolution_param { 317 | num_output: 256 318 | pad: 1 319 | kernel_size: 3 320 | engine: CUDNN 321 | } 322 | } 323 | layer { 324 | name: "edge_dem_conv3_1_" 325 | type: "Convolution" 326 | bottom: "ee_dem_pool2" 327 | top: "ee_dem_conv3_1" 328 | param { 329 | lr_mult: 0 # 1 330 | decay_mult: 1 331 | } 332 | param { 333 | lr_mult: 0 # 2 334 | decay_mult: 0 335 | } 336 | convolution_param { 337 | num_output: 256 338 | pad: 1 339 | kernel_size: 3 340 | weight_filler { 341 | type: "xavier" 342 | } 343 | engine: CUDNN 344 | } 345 | } 346 | layer { 347 | name: "data_relu3_1" 348 | type: "ReLU" 349 | bottom: "ee_data_conv3_1" 350 | top: "ee_data_conv3_1" 351 | } 352 | layer { 353 | name: "dem_relu3_1" 354 | type: "ReLU" 355 | bottom: "ee_dem_conv3_1" 356 | top: "ee_dem_conv3_1" 357 | } 358 | layer { 359 | name: "edge_conv3_2" 360 | type: "Convolution" 361 | bottom: "ee_data_conv3_1" 362 | top: "ee_data_conv3_2" 363 | param { 364 | lr_mult: 0 # 1 365 | decay_mult: 1 366 | } 367 | param { 368 | lr_mult: 0 # 2 369 | decay_mult: 0 370 | } 371 | convolution_param { 372 | num_output: 256 373 | pad: 1 374 | kernel_size: 3 375 | engine: CUDNN 376 | } 377 | } 378 | layer { 379 | name: "edge_dem_conv3_2_" 380 | type: "Convolution" 381 | bottom: "ee_dem_conv3_1" 382 | top: "ee_dem_conv3_2" 383 | param { 384 | lr_mult: 0 # 1 385 | decay_mult: 1 386 | } 387 | param { 388 | lr_mult: 0 # 2 389 | decay_mult: 0 390 | } 391 | convolution_param { 392 | num_output: 256 393 | pad: 1 394 | kernel_size: 3 395 | weight_filler { 396 | type: "xavier" 397 | } 398 | engine: CUDNN 399 | } 400 | } 401 | layer { 402 | name: "data_relu3_2" 403 | type: "ReLU" 404 | bottom: "ee_data_conv3_2" 405 | top: "ee_data_conv3_2" 406 | } 407 | layer { 408 | name: "dem_relu3_2" 409 | type: "ReLU" 410 | bottom: "ee_dem_conv3_2" 411 | top: "ee_dem_conv3_2" 412 | } 413 | layer { 414 | name: "edge_conv3_3" 415 | type: "Convolution" 416 | bottom: "ee_data_conv3_2" 417 | top: "ee_data_conv3_3" 418 | param { 419 | lr_mult: 0 # 1 420 | decay_mult: 1 421 | } 422 | param { 423 | lr_mult: 0 # 2 424 | decay_mult: 0 425 | } 426 | convolution_param { 427 | num_output: 256 428 | pad: 1 429 | kernel_size: 3 430 | engine: CUDNN 431 | } 432 | } 433 | layer { 434 | name: "edge_dem_conv3_3_" 435 | type: "Convolution" 436 | bottom: "ee_dem_conv3_2" 437 | top: "ee_dem_conv3_3" 438 | param { 439 | lr_mult: 0 # 1 440 | decay_mult: 1 441 | } 442 | param { 443 | lr_mult: 0 # 2 444 | decay_mult: 0 445 | } 446 | convolution_param { 447 | num_output: 256 448 | pad: 1 449 | kernel_size: 3 450 | weight_filler { 451 | type: "xavier" 452 | } 453 | engine: CUDNN 454 | } 455 | } 456 | layer { 457 | name: "data_relu3_3" 458 | type: "ReLU" 459 | bottom: "ee_data_conv3_3" 460 | top: "ee_data_conv3_3" 461 | } 462 | layer { 463 | name: "dem_relu3_3" 464 | type: "ReLU" 465 | bottom: "ee_dem_conv3_3" 466 | top: "ee_dem_conv3_3" 467 | } 468 | layer { 469 | name: "data_pool3" 470 | type: "Pooling" 471 | bottom: "ee_data_conv3_3" 472 | top: "ee_data_pool3" 473 | pooling_param { 474 | pool: MAX 475 | kernel_size: 2 476 | stride: 2 477 | } 478 | } 479 | layer { 480 | name: "dem_pool3" 481 | type: "Pooling" 482 | bottom: "ee_dem_conv3_3" 483 | top: "ee_dem_pool3" 484 | pooling_param { 485 | pool: MAX 486 | kernel_size: 2 487 | stride: 2 488 | } 489 | } 490 | layer { 491 | name: "edge_conv4_1" 492 | type: "Convolution" 493 | bottom: "ee_data_pool3" 494 | top: "ee_data_conv4_1" 495 | param { 496 | lr_mult: 0 # 1 497 | decay_mult: 1 498 | } 499 | param { 500 | lr_mult: 0 # 2 501 | decay_mult: 0 502 | } 503 | convolution_param { 504 | num_output: 512 505 | pad: 1 506 | kernel_size: 3 507 | engine: CUDNN 508 | } 509 | } 510 | layer { 511 | name: "edge_dem_conv4_1_" 512 | type: "Convolution" 513 | bottom: "ee_dem_pool3" 514 | top: "ee_dem_conv4_1" 515 | param { 516 | lr_mult: 0 # 1 517 | decay_mult: 1 518 | } 519 | param { 520 | lr_mult: 0 # 2 521 | decay_mult: 0 522 | } 523 | convolution_param { 524 | num_output: 512 525 | pad: 1 526 | kernel_size: 3 527 | weight_filler { 528 | type: "xavier" 529 | } 530 | engine: CUDNN 531 | } 532 | } 533 | layer { 534 | name: "data_relu4_1" 535 | type: "ReLU" 536 | bottom: "ee_data_conv4_1" 537 | top: "ee_data_conv4_1" 538 | } 539 | layer { 540 | name: "dem_relu4_1" 541 | type: "ReLU" 542 | bottom: "ee_dem_conv4_1" 543 | top: "ee_dem_conv4_1" 544 | } 545 | layer { 546 | name: "edge_conv4_2" 547 | type: "Convolution" 548 | bottom: "ee_data_conv4_1" 549 | top: "ee_data_conv4_2" 550 | param { 551 | lr_mult: 0 # 1 552 | decay_mult: 1 553 | } 554 | param { 555 | lr_mult: 0 # 2 556 | decay_mult: 0 557 | } 558 | convolution_param { 559 | num_output: 512 560 | pad: 1 561 | kernel_size: 3 562 | engine: CUDNN 563 | } 564 | } 565 | layer { 566 | name: "edge_dem_conv4_2_" 567 | type: "Convolution" 568 | bottom: "ee_dem_conv4_1" 569 | top: "ee_dem_conv4_2" 570 | param { 571 | lr_mult: 0 # 1 572 | decay_mult: 1 573 | } 574 | param { 575 | lr_mult: 0 # 2 576 | decay_mult: 0 577 | } 578 | convolution_param { 579 | num_output: 512 580 | pad: 1 581 | kernel_size: 3 582 | weight_filler { 583 | type: "xavier" 584 | } 585 | engine: CUDNN 586 | } 587 | } 588 | layer { 589 | name: "data_relu4_2" 590 | type: "ReLU" 591 | bottom: "ee_data_conv4_2" 592 | top: "ee_data_conv4_2" 593 | } 594 | layer { 595 | name: "dem_relu4_2" 596 | type: "ReLU" 597 | bottom: "ee_dem_conv4_2" 598 | top: "ee_dem_conv4_2" 599 | } 600 | layer { 601 | name: "edge_conv4_3" 602 | type: "Convolution" 603 | bottom: "ee_data_conv4_2" 604 | top: "ee_data_conv4_3" 605 | param { 606 | lr_mult: 0 # 1 607 | decay_mult: 1 608 | } 609 | param { 610 | lr_mult: 0 # 2 611 | decay_mult: 0 612 | } 613 | convolution_param { 614 | num_output: 512 615 | pad: 1 616 | kernel_size: 3 617 | engine: CUDNN 618 | } 619 | } 620 | layer { 621 | name: "edge_dem_conv4_3_" 622 | type: "Convolution" 623 | bottom: "ee_dem_conv4_2" 624 | top: "ee_dem_conv4_3" 625 | param { 626 | lr_mult: 0 # 1 627 | decay_mult: 1 628 | } 629 | param { 630 | lr_mult: 0 # 2 631 | decay_mult: 0 632 | } 633 | convolution_param { 634 | num_output: 512 635 | pad: 1 636 | kernel_size: 3 637 | weight_filler { 638 | type: "xavier" 639 | } 640 | engine: CUDNN 641 | } 642 | } 643 | layer { 644 | name: "data_relu4_3" 645 | type: "ReLU" 646 | bottom: "ee_data_conv4_3" 647 | top: "ee_data_conv4_3" 648 | } 649 | layer { 650 | name: "dem_relu4_3" 651 | type: "ReLU" 652 | bottom: "ee_dem_conv4_3" 653 | top: "ee_dem_conv4_3" 654 | } 655 | layer { 656 | name: "data_pool4" 657 | type: "Pooling" 658 | bottom: "ee_data_conv4_3" 659 | top: "ee_data_pool4" 660 | pooling_param { 661 | pool: MAX 662 | kernel_size: 2 663 | stride: 2 664 | } 665 | } 666 | layer { 667 | name: "dem_pool4" 668 | type: "Pooling" 669 | bottom: "ee_dem_conv4_3" 670 | top: "ee_dem_pool4" 671 | pooling_param { 672 | pool: MAX 673 | kernel_size: 2 674 | stride: 2 675 | } 676 | } 677 | layer { 678 | name: "edge_conv5_1" 679 | type: "Convolution" 680 | bottom: "ee_data_pool4" 681 | top: "ee_data_conv5_1" 682 | param { 683 | lr_mult: 0 # 1 684 | decay_mult: 1 685 | } 686 | param { 687 | lr_mult: 0 # 2 688 | decay_mult: 0 689 | } 690 | convolution_param { 691 | num_output: 512 692 | pad: 1 693 | kernel_size: 3 694 | engine: CUDNN 695 | } 696 | } 697 | layer { 698 | name: "edge_dem_conv5_1_" 699 | type: "Convolution" 700 | bottom: "ee_dem_pool4" 701 | top: "ee_dem_conv5_1" 702 | param { 703 | lr_mult: 0 # 1 704 | decay_mult: 1 705 | } 706 | param { 707 | lr_mult: 0 # 2 708 | decay_mult: 0 709 | } 710 | convolution_param { 711 | num_output: 512 712 | pad: 1 713 | kernel_size: 3 714 | weight_filler { 715 | type: "xavier" 716 | } 717 | engine: CUDNN 718 | } 719 | } 720 | layer { 721 | name: "data_relu5_1" 722 | type: "ReLU" 723 | bottom: "ee_data_conv5_1" 724 | top: "ee_data_conv5_1" 725 | } 726 | layer { 727 | name: "dem_relu5_1" 728 | type: "ReLU" 729 | bottom: "ee_dem_conv5_1" 730 | top: "ee_dem_conv5_1" 731 | } 732 | layer { 733 | name: "edge_conv5_2" 734 | type: "Convolution" 735 | bottom: "ee_data_conv5_1" 736 | top: "ee_data_conv5_2" 737 | param { 738 | lr_mult: 0 # 1 739 | decay_mult: 1 740 | } 741 | param { 742 | lr_mult: 0 # 2 743 | decay_mult: 0 744 | } 745 | convolution_param { 746 | num_output: 512 747 | pad: 1 748 | kernel_size: 3 749 | engine: CUDNN 750 | } 751 | } 752 | layer { 753 | name: "edge_dem_conv5_2_" 754 | type: "Convolution" 755 | bottom: "ee_dem_conv5_1" 756 | top: "ee_dem_conv5_2" 757 | param { 758 | lr_mult: 0 # 1 759 | decay_mult: 1 760 | } 761 | param { 762 | lr_mult: 0 # 2 763 | decay_mult: 0 764 | } 765 | convolution_param { 766 | num_output: 512 767 | pad: 1 768 | kernel_size: 3 769 | weight_filler { 770 | type: "xavier" 771 | } 772 | engine: CUDNN 773 | } 774 | } 775 | layer { 776 | name: "data_relu5_2" 777 | type: "ReLU" 778 | bottom: "ee_data_conv5_2" 779 | top: "ee_data_conv5_2" 780 | } 781 | layer { 782 | name: "dem_relu5_2" 783 | type: "ReLU" 784 | bottom: "ee_dem_conv5_2" 785 | top: "ee_dem_conv5_2" 786 | } 787 | layer { 788 | name: "edge_conv5_3" 789 | type: "Convolution" 790 | bottom: "ee_data_conv5_2" 791 | top: "ee_data_conv5_3" 792 | param { 793 | lr_mult: 0 # 1 794 | decay_mult: 1 795 | } 796 | param { 797 | lr_mult: 0 # 2 798 | decay_mult: 0 799 | } 800 | convolution_param { 801 | num_output: 512 802 | pad: 1 803 | kernel_size: 3 804 | engine: CUDNN 805 | } 806 | } 807 | layer { 808 | name: "edge_dem_conv5_3_" 809 | type: "Convolution" 810 | bottom: "ee_dem_conv5_2" 811 | top: "ee_dem_conv5_3" 812 | param { 813 | lr_mult: 0 # 1 814 | decay_mult: 1 815 | } 816 | param { 817 | lr_mult: 0 # 2 818 | decay_mult: 0 819 | } 820 | convolution_param { 821 | num_output: 512 822 | pad: 1 823 | kernel_size: 3 824 | weight_filler { 825 | type: "xavier" 826 | } 827 | engine: CUDNN 828 | } 829 | } 830 | layer { 831 | name: "data_relu5_3" 832 | type: "ReLU" 833 | bottom: "ee_data_conv5_3" 834 | top: "ee_data_conv5_3" 835 | } 836 | layer { 837 | name: "dem_relu5_3" 838 | type: "ReLU" 839 | bottom: "ee_dem_conv5_3" 840 | top: "ee_dem_conv5_3" 841 | } 842 | 843 | #################### DSN-1 #################### 844 | 845 | layer { 846 | name: "dsn-1-fuse" 847 | type: "Concat" 848 | bottom: "ee_data_conv1_2" 849 | bottom: "ee_dem_conv1_2" 850 | top: "score-dsn1-merge" 851 | concat_param { 852 | concat_dim: 1 853 | } 854 | } 855 | layer { 856 | name: "edge_score-dsn1" 857 | type: "Convolution" 858 | bottom: "score-dsn1-merge" 859 | top: "score-dsn1-up" 860 | param { 861 | lr_mult: 0 # 1 862 | decay_mult: 1 863 | } 864 | param { 865 | lr_mult: 0 # 2 866 | decay_mult: 0 867 | } 868 | convolution_param { 869 | num_output: 1 870 | kernel_size: 1 871 | engine: CUDNN 872 | } 873 | } 874 | layer { 875 | name: "crop" 876 | type: "Crop" 877 | bottom: "score-dsn1-up" 878 | bottom: "image" 879 | top: "score-dsn1" 880 | crop_param { 881 | axis: 2 882 | #offset: 0 883 | offset: 34 884 | offset: 34 885 | } 886 | } 887 | 888 | layer { name: "loss-dns1" type: "EuclideanLoss" bottom: "score-dsn1" bottom: "edge-label" top: "dsn1_loss" loss_weight: 1 } 889 | 890 | #################### DSN-2 #################### 891 | 892 | layer { 893 | name: "dsn-2-fuse" 894 | type: "Concat" 895 | bottom: "ee_data_conv2_2" 896 | bottom: "ee_dem_conv2_2" 897 | top: "score-dsn2-merge" 898 | concat_param { 899 | concat_dim: 1 900 | } 901 | } 902 | layer { 903 | name: "edge_score-dsn2" 904 | type: "Convolution" 905 | bottom: "score-dsn2-merge" 906 | top: "score-dsn2_" 907 | param { 908 | lr_mult: 0 # 1 909 | decay_mult: 1 910 | } 911 | param { 912 | lr_mult: 0 # 2 913 | decay_mult: 0 914 | } 915 | convolution_param { 916 | num_output: 1 917 | kernel_size: 1 918 | engine: CUDNN 919 | } 920 | } 921 | layer { 922 | name: "edge_upsample_2" 923 | type: "Deconvolution" 924 | bottom: "score-dsn2_" 925 | top: "score-dsn2-up" 926 | param { 927 | lr_mult: 0 # 1 928 | decay_mult: 1 929 | } 930 | param { 931 | lr_mult: 0 # 2 932 | decay_mult: 0 933 | } 934 | convolution_param { 935 | num_output: 1 936 | kernel_size: 4 937 | stride: 2 938 | } 939 | } 940 | layer { 941 | name: "crop" 942 | type: "Crop" 943 | bottom: "score-dsn2-up" 944 | bottom: "image" 945 | top: "score-dsn2" 946 | crop_param { 947 | axis: 2 948 | #offset: 0 949 | offset: 35 950 | offset: 35 951 | } 952 | } 953 | 954 | layer { name: "loss-dns2" type: "EuclideanLoss" bottom: "score-dsn2" bottom: "edge-label" top: "dsn2_loss" loss_weight: 1} 955 | 956 | #################### DSN-3 #################### 957 | 958 | layer { 959 | name: "dsn-3-fuse" 960 | type: "Concat" 961 | bottom: "ee_data_conv3_3" 962 | bottom: "ee_dem_conv3_3" 963 | top: "score-dsn3-merge" 964 | concat_param { 965 | concat_dim: 1 966 | } 967 | } 968 | layer { 969 | name: "edge_score-dsn3" 970 | type: "Convolution" 971 | bottom: "score-dsn3-merge" 972 | top: "score-dsn3_" 973 | param { 974 | lr_mult: 0 # 1 975 | decay_mult: 1 976 | } 977 | param { 978 | lr_mult: 0 # 2 979 | decay_mult: 0 980 | } 981 | convolution_param { 982 | num_output: 1 983 | kernel_size: 1 984 | engine: CUDNN 985 | } 986 | } 987 | layer { 988 | name: "edge_upsample_4" 989 | type: "Deconvolution" 990 | bottom: "score-dsn3_" 991 | top: "score-dsn3-up" 992 | param { 993 | lr_mult: 0 # 1 994 | decay_mult: 1 995 | } 996 | param { 997 | lr_mult: 0 # 2 998 | decay_mult: 0 999 | } 1000 | convolution_param { 1001 | num_output: 1 1002 | kernel_size: 8 1003 | stride: 4 1004 | } 1005 | } 1006 | layer { 1007 | name: "crop" 1008 | type: "Crop" 1009 | bottom: "score-dsn3-up" 1010 | bottom: "image" 1011 | top: "score-dsn3" 1012 | crop_param { 1013 | axis: 2 1014 | #offset: 0 1015 | offset: 36 1016 | offset: 36 1017 | } 1018 | } 1019 | 1020 | layer { name: "loss-dns3" type: "EuclideanLoss" bottom: "score-dsn3" bottom: "edge-label" top: "dsn3_loss" loss_weight: 1} 1021 | 1022 | 1023 | #################### DSN-4 #################### 1024 | 1025 | layer { 1026 | name: "dsn-4-fuse" 1027 | type: "Concat" 1028 | bottom: "ee_data_conv4_3" 1029 | bottom: "ee_dem_conv4_3" 1030 | top: "score-dsn4-merge" 1031 | concat_param { 1032 | concat_dim: 1 1033 | } 1034 | } 1035 | layer { 1036 | name: "edge_score-dsn4" 1037 | type: "Convolution" 1038 | bottom: "score-dsn4-merge" 1039 | top: "score-dsn4_" 1040 | param { 1041 | lr_mult: 0 # 1 1042 | decay_mult: 1 1043 | } 1044 | param { 1045 | lr_mult: 0 # 2 1046 | decay_mult: 0 1047 | } 1048 | convolution_param { 1049 | num_output: 1 1050 | kernel_size: 1 1051 | engine: CUDNN 1052 | } 1053 | } 1054 | layer { 1055 | name: "edge_upsample_8" 1056 | type: "Deconvolution" 1057 | bottom: "score-dsn4_" 1058 | top: "score-dsn4-up" 1059 | param { 1060 | lr_mult: 0 # 1 1061 | decay_mult: 1 1062 | } 1063 | param { 1064 | lr_mult: 0 # 2 1065 | decay_mult: 0 1066 | } 1067 | convolution_param { 1068 | num_output: 1 1069 | kernel_size: 16 1070 | stride: 8 1071 | } 1072 | } 1073 | layer { 1074 | name: "crop" 1075 | type: "Crop" 1076 | bottom: "score-dsn4-up" 1077 | bottom: "image" 1078 | top: "score-dsn4" 1079 | crop_param { 1080 | axis: 2 1081 | #offset: 0 1082 | offset: 40 1083 | offset: 40 1084 | } 1085 | } 1086 | 1087 | layer { name: "loss-dns4" type: "EuclideanLoss" bottom: "score-dsn4" bottom: "edge-label" top: "dsn4_loss" loss_weight: 1} 1088 | 1089 | 1090 | #################### DSN-5 #################### 1091 | 1092 | layer { 1093 | name: "dsn-5-fuse" 1094 | type: "Concat" 1095 | bottom: "ee_data_conv5_3" 1096 | bottom: "ee_dem_conv5_3" 1097 | top: "score-dsn5-merge" 1098 | concat_param { 1099 | concat_dim: 1 1100 | } 1101 | } 1102 | layer { 1103 | name: "edge_score-dsn5" 1104 | type: "Convolution" 1105 | bottom: "score-dsn5-merge" 1106 | top: "score-dsn5_" 1107 | param { 1108 | lr_mult: 0 # 1 1109 | decay_mult: 1 1110 | } 1111 | param { 1112 | lr_mult: 0 # 2 1113 | decay_mult: 0 1114 | } 1115 | convolution_param { 1116 | num_output: 1 1117 | kernel_size: 1 1118 | engine: CUDNN 1119 | } 1120 | } 1121 | layer { 1122 | name: "edge_upsample_16" 1123 | type: "Deconvolution" 1124 | bottom: "score-dsn5_" 1125 | top: "score-dsn5-up" 1126 | param { 1127 | lr_mult: 0 # 1 1128 | decay_mult: 1 1129 | } 1130 | param { 1131 | lr_mult: 0 # 2 1132 | decay_mult: 0 1133 | } 1134 | convolution_param { 1135 | num_output: 1 1136 | kernel_size: 32 1137 | stride: 16 1138 | } 1139 | } 1140 | layer { 1141 | name: "crop" 1142 | type: "Crop" 1143 | bottom: "score-dsn5-up" 1144 | bottom: "image" 1145 | top: "score-dsn5" 1146 | crop_param { 1147 | axis: 2 1148 | #offset: 0 1149 | offset: 48 1150 | offset: 48 1151 | } 1152 | } 1153 | 1154 | layer { name: "loss-dns5" type: "EuclideanLoss" bottom: "score-dsn5" bottom: "edge-label" top: "dsn5_loss" loss_weight: 1 } 1155 | 1156 | 1157 | #################### FUSE DSN #################### 1158 | 1159 | layer { 1160 | name: "concat" 1161 | type: "Concat" 1162 | bottom: "score-dsn1" 1163 | bottom: "score-dsn2" 1164 | bottom: "score-dsn3" 1165 | bottom: "score-dsn4" 1166 | bottom: "score-dsn5" 1167 | top: "concat-score" 1168 | concat_param { 1169 | concat_dim: 1 1170 | } 1171 | } 1172 | layer { 1173 | name: "edge_new-score-weighting" 1174 | type: "Convolution" 1175 | bottom: "concat-score" 1176 | top: "edge" 1177 | param { 1178 | lr_mult: 0 # 1 1179 | decay_mult: 1 1180 | } 1181 | param { 1182 | lr_mult: 0 # 2 1183 | decay_mult: 0 1184 | } 1185 | convolution_param { 1186 | num_output: 1 1187 | kernel_size: 1 1188 | engine: CUDNN 1189 | } 1190 | } 1191 | 1192 | layer { name: "edge-loss" type: "EuclideanLoss" bottom: "edge" bottom: "edge-label" top: "edge-loss" loss_weight: 1 } 1193 | 1194 | 1195 | ################################################################################################### 1196 | ############################## Concatenate Edges to Train Data ################################### 1197 | ################################################################################################### 1198 | 1199 | layer { 1200 | name: "annotation-image-data" 1201 | type: "Concat" 1202 | bottom: "image" 1203 | bottom: "edge" 1204 | top: "data-and-edges" 1205 | } 1206 | 1207 | layer { 1208 | name: "annotation-dem-data" 1209 | type: "Concat" 1210 | bottom: "dem" 1211 | bottom: "edge" 1212 | top: "dem-and-edges" 1213 | } 1214 | 1215 | 1216 | ################################################################################################### 1217 | ############################# Trainable Annotation Network #################################### 1218 | ################################################################################################### 1219 | 1220 | 1221 | layer { 1222 | name: "conv1_1" 1223 | type: "Convolution" 1224 | bottom: "data-and-edges" 1225 | top: "conv1_1" 1226 | param { 1227 | lr_mult: 1 1228 | decay_mult: 1 1229 | } 1230 | param { 1231 | lr_mult: 2 1232 | decay_mult: 0 1233 | } 1234 | convolution_param { 1235 | num_output: 64 1236 | pad: 100 1237 | kernel_size: 3 1238 | engine: CUDNN 1239 | weight_filler {type: "xavier"} 1240 | bias_filler { type: "constant"} 1241 | } 1242 | } 1243 | 1244 | layer { 1245 | name: "dem_conv1_1" 1246 | type: "Convolution" 1247 | bottom: "dem-and-edges" 1248 | top: "dem_conv1_1" 1249 | param { 1250 | lr_mult: 1 1251 | decay_mult: 1 1252 | } 1253 | param { 1254 | lr_mult: 2 1255 | decay_mult: 0 1256 | } 1257 | convolution_param { 1258 | num_output: 64 1259 | pad: 100 1260 | kernel_size: 3 1261 | engine: CUDNN 1262 | weight_filler { 1263 | type: "xavier" 1264 | } 1265 | bias_filler { 1266 | type: "constant" 1267 | } 1268 | } 1269 | } 1270 | 1271 | ######################################### 1272 | 1273 | layer { 1274 | name: "relu1_1" 1275 | type: "ReLU" 1276 | bottom: "conv1_1" 1277 | top: "conv1_1" 1278 | } 1279 | 1280 | layer { 1281 | name: "dem_relu1_1" 1282 | type: "ReLU" 1283 | bottom: "dem_conv1_1" 1284 | top: "dem_conv1_1" 1285 | } 1286 | 1287 | ######################################### 1288 | 1289 | layer { 1290 | name: "conv1_2" 1291 | type: "Convolution" 1292 | bottom: "conv1_1" 1293 | top: "conv1_2" 1294 | param { 1295 | lr_mult: 1 1296 | decay_mult: 1 1297 | } 1298 | param { 1299 | lr_mult: 2 1300 | decay_mult: 0 1301 | } 1302 | convolution_param { 1303 | num_output: 64 1304 | pad: 1 1305 | kernel_size: 3 1306 | engine: CUDNN 1307 | } 1308 | } 1309 | 1310 | layer { 1311 | name: "dem_conv1_2_" 1312 | type: "Convolution" 1313 | bottom: "dem_conv1_1" 1314 | top: "dem_conv1_2" 1315 | param { 1316 | lr_mult: 1 1317 | decay_mult: 1 1318 | } 1319 | param { 1320 | lr_mult: 2 1321 | decay_mult: 0 1322 | } 1323 | convolution_param { 1324 | num_output: 64 1325 | pad: 1 1326 | kernel_size: 3 1327 | engine: CUDNN 1328 | weight_filler { 1329 | type: "xavier" 1330 | } 1331 | bias_filler { 1332 | type: "constant" 1333 | } 1334 | } 1335 | } 1336 | 1337 | ######################################### 1338 | 1339 | layer { 1340 | name: "relu1_2" 1341 | type: "ReLU" 1342 | bottom: "conv1_2" 1343 | top: "conv1_2" 1344 | } 1345 | 1346 | layer { 1347 | name: "dem_relu1_2" 1348 | type: "ReLU" 1349 | bottom: "dem_conv1_2" 1350 | top: "dem_conv1_2" 1351 | } 1352 | 1353 | ######################################### 1354 | 1355 | layer { 1356 | name: "pool1" 1357 | type: "Pooling" 1358 | bottom: "conv1_2" 1359 | top: "pool1" 1360 | pooling_param { 1361 | pool: MAX 1362 | kernel_size: 2 1363 | stride: 2 1364 | } 1365 | } 1366 | 1367 | layer { 1368 | name: "dem_pool1" 1369 | type: "Pooling" 1370 | bottom: "dem_conv1_2" 1371 | top: "dem_pool1" 1372 | pooling_param { 1373 | pool: MAX 1374 | kernel_size: 2 1375 | stride: 2 1376 | } 1377 | } 1378 | 1379 | 1380 | ################# Deep Supervision 1 ################# 1381 | 1382 | # layer { name: "ds_conv1" type: "Convolution" bottom: "conv1_2" top: "ds_conv1" param { lr_mult: 2 decay_mult: 1} 1383 | # param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 1 pad: 0 kernel_size: 1 engine: CUDNN }} 1384 | 1385 | # layer { type: "Crop" name: "crop1" bottom: "ds_conv1" bottom: "image" top: "ds_conv1c" } 1386 | 1387 | # layer { name: "ds_loss1" type: "EuclideanLoss" bottom: "ds_conv1c" bottom: "label" top: "ds_loss1" loss_weight: 1} 1388 | 1389 | 1390 | ############################ CONV 2 ############################ 1391 | 1392 | layer { 1393 | name: "conv2_1" 1394 | type: "Convolution" 1395 | bottom: "pool1" 1396 | top: "conv2_1" 1397 | param { 1398 | lr_mult: 1 1399 | decay_mult: 1 1400 | } 1401 | param { 1402 | lr_mult: 2 1403 | decay_mult: 0 1404 | } 1405 | convolution_param { 1406 | num_output: 128 1407 | pad: 1 1408 | kernel_size: 3 1409 | engine: CUDNN 1410 | } 1411 | } 1412 | 1413 | layer { 1414 | name: "dem_conv2_1_" 1415 | type: "Convolution" 1416 | bottom: "dem_pool1" 1417 | top: "dem_conv2_1" 1418 | param { 1419 | lr_mult: 1 1420 | decay_mult: 1 1421 | } 1422 | param { 1423 | lr_mult: 2 1424 | decay_mult: 0 1425 | } 1426 | 1427 | convolution_param { 1428 | num_output: 128 1429 | pad: 1 1430 | kernel_size: 3 1431 | engine: CUDNN 1432 | weight_filler { 1433 | type: "xavier" 1434 | } 1435 | bias_filler { 1436 | type: "constant" 1437 | } 1438 | } 1439 | } 1440 | 1441 | ######################################### 1442 | 1443 | layer { 1444 | name: "relu2_1" 1445 | type: "ReLU" 1446 | bottom: "conv2_1" 1447 | top: "conv2_1" 1448 | } 1449 | 1450 | layer { 1451 | name: "dem_relu2_1" 1452 | type: "ReLU" 1453 | bottom: "dem_conv2_1" 1454 | top: "dem_conv2_1" 1455 | } 1456 | 1457 | ######################################### 1458 | 1459 | layer { 1460 | name: "conv2_2" 1461 | type: "Convolution" 1462 | bottom: "conv2_1" 1463 | top: "conv2_2" 1464 | param { 1465 | lr_mult: 1 1466 | decay_mult: 1 1467 | } 1468 | param { 1469 | lr_mult: 2 1470 | decay_mult: 0 1471 | } 1472 | convolution_param { 1473 | num_output: 128 1474 | pad: 1 1475 | kernel_size: 3 1476 | engine: CUDNN 1477 | } 1478 | } 1479 | 1480 | layer { 1481 | name: "dem_conv2_2_" 1482 | type: "Convolution" 1483 | bottom: "dem_conv2_1" 1484 | top: "dem_conv2_2" 1485 | param { 1486 | lr_mult: 1 1487 | decay_mult: 1 1488 | } 1489 | param { 1490 | lr_mult: 2 1491 | decay_mult: 0 1492 | } 1493 | convolution_param { 1494 | num_output: 128 1495 | pad: 1 1496 | kernel_size: 3 1497 | engine: CUDNN 1498 | weight_filler { 1499 | type: "xavier" 1500 | } 1501 | bias_filler { 1502 | type: "constant" 1503 | } 1504 | } 1505 | } 1506 | 1507 | ######################################### 1508 | 1509 | layer { 1510 | name: "relu2_2" 1511 | type: "ReLU" 1512 | bottom: "conv2_2" 1513 | top: "conv2_2" 1514 | } 1515 | 1516 | layer { 1517 | name: "dem_relu2_2" 1518 | type: "ReLU" 1519 | bottom: "dem_conv2_2" 1520 | top: "dem_conv2_2" 1521 | } 1522 | 1523 | 1524 | ######################################### 1525 | 1526 | layer { 1527 | name: "pool2" 1528 | type: "Pooling" 1529 | bottom: "conv2_2" 1530 | top: "pool2" 1531 | pooling_param { 1532 | pool: MAX 1533 | kernel_size: 2 1534 | stride: 2 1535 | } 1536 | } 1537 | 1538 | layer { 1539 | name: "dem_pool2" 1540 | type: "Pooling" 1541 | bottom: "dem_conv2_2" 1542 | top: "dem_pool2" 1543 | pooling_param { 1544 | pool: MAX 1545 | kernel_size: 2 1546 | stride: 2 1547 | } 1548 | } 1549 | 1550 | # ################# Deep Supervision 2 ################# 1551 | 1552 | layer { 1553 | name: "ds_conv2" 1554 | type: "Convolution" 1555 | bottom: "conv2_2" 1556 | top: "ds_conv2" 1557 | param { 1558 | lr_mult: 1 1559 | decay_mult: 1 1560 | } 1561 | param { 1562 | lr_mult: 2 1563 | decay_mult: 0 1564 | } 1565 | convolution_param { 1566 | num_output: 5 1567 | pad: 0 1568 | kernel_size: 1 1569 | engine: CUDNN 1570 | weight_filler { 1571 | type: "xavier" 1572 | } 1573 | bias_filler { 1574 | type: "constant" 1575 | } 1576 | } 1577 | } 1578 | 1579 | layer { 1580 | type: "Deconvolution" 1581 | name: 'ds_deconv2' 1582 | bottom: 'ds_conv2' 1583 | top: 'ds_deconv2' 1584 | param { 1585 | lr_mult: 1 1586 | decay_mult: 1 1587 | } 1588 | param { 1589 | lr_mult: 2 1590 | decay_mult: 0 1591 | } 1592 | convolution_param { 1593 | kernel_size: 4 1594 | stride: 2 1595 | num_output: 5 1596 | weight_filler: { 1597 | type: "bilinear" 1598 | } 1599 | } 1600 | } 1601 | 1602 | layer { 1603 | type: "Crop" 1604 | name: "crop2" 1605 | bottom: "ds_deconv2" 1606 | bottom: "image" 1607 | top: "ds_deconv2c" 1608 | crop_param { 1609 | axis: 2 1610 | offset: 102 1611 | offset: 102 1612 | } 1613 | } 1614 | 1615 | layer { 1616 | name: "ds_prob2" 1617 | type: "Softmax" 1618 | bottom: "ds_deconv2c" 1619 | top: "ds_prob2" 1620 | } 1621 | 1622 | ############################# CONV3 ############################ 1623 | 1624 | layer { 1625 | name: "conv3_1" 1626 | type: "Convolution" 1627 | bottom: "pool2" 1628 | top: "conv3_1" 1629 | param { 1630 | lr_mult: 1 1631 | decay_mult: 1 1632 | } 1633 | param { 1634 | lr_mult: 2 1635 | decay_mult: 0 1636 | } 1637 | convolution_param { 1638 | num_output: 256 1639 | pad: 1 1640 | kernel_size: 3 1641 | engine: CUDNN 1642 | } 1643 | } 1644 | 1645 | layer { 1646 | name: "dem_conv3_1_" 1647 | type: "Convolution" 1648 | bottom: "dem_pool2" 1649 | top: "dem_conv3_1" 1650 | param { 1651 | lr_mult: 1 1652 | decay_mult: 1 1653 | } 1654 | param { 1655 | lr_mult: 2 1656 | decay_mult: 0 1657 | } 1658 | convolution_param { 1659 | num_output: 256 1660 | pad: 1 1661 | kernel_size: 3 1662 | engine: CUDNN 1663 | weight_filler { 1664 | type: "xavier" 1665 | } 1666 | bias_filler { 1667 | type: "constant" 1668 | } 1669 | } 1670 | } 1671 | 1672 | ######################################### 1673 | 1674 | layer { 1675 | name: "relu3_1" 1676 | type: "ReLU" 1677 | bottom: "conv3_1" 1678 | top: "conv3_1" 1679 | } 1680 | 1681 | layer { 1682 | name: "dem_relu3_1" 1683 | type: "ReLU" 1684 | bottom: "dem_conv3_1" 1685 | top: "dem_conv3_1" 1686 | } 1687 | 1688 | 1689 | ######################################### 1690 | 1691 | layer { 1692 | name: "conv3_2" 1693 | type: "Convolution" 1694 | bottom: "conv3_1" 1695 | top: "conv3_2" 1696 | param { 1697 | lr_mult: 1 1698 | decay_mult: 1 1699 | } 1700 | param { 1701 | lr_mult: 2 1702 | decay_mult: 0 1703 | } 1704 | convolution_param { 1705 | num_output: 256 1706 | pad: 1 1707 | kernel_size: 3 1708 | engine: CUDNN 1709 | } 1710 | } 1711 | 1712 | layer { 1713 | name: "dem_conv3_2_" 1714 | type: "Convolution" 1715 | bottom: "dem_conv3_1" 1716 | top: "dem_conv3_2" 1717 | param { 1718 | lr_mult: 1 1719 | decay_mult: 1 1720 | } 1721 | param { 1722 | lr_mult: 2 1723 | decay_mult: 0 1724 | } 1725 | convolution_param { 1726 | num_output: 256 1727 | pad: 1 1728 | kernel_size: 3 1729 | engine: CUDNN 1730 | weight_filler { 1731 | type: "xavier" 1732 | } 1733 | bias_filler { 1734 | type: "constant" 1735 | } 1736 | } 1737 | } 1738 | 1739 | ######################################### 1740 | 1741 | layer { 1742 | name: "relu3_2" 1743 | type: "ReLU" 1744 | bottom: "conv3_2" 1745 | top: "conv3_2" 1746 | } 1747 | 1748 | layer { 1749 | name: "dem_relu3_2" 1750 | type: "ReLU" 1751 | bottom: "dem_conv3_2" 1752 | top: "dem_conv3_2" 1753 | } 1754 | 1755 | ######################################### 1756 | 1757 | layer { 1758 | name: "conv3_3" 1759 | type: "Convolution" 1760 | bottom: "conv3_2" 1761 | top: "conv3_3" 1762 | param { 1763 | lr_mult: 1 1764 | decay_mult: 1 1765 | } 1766 | param { 1767 | lr_mult: 2 1768 | decay_mult: 0 1769 | } 1770 | convolution_param { 1771 | num_output: 256 1772 | pad: 1 1773 | kernel_size: 3 1774 | engine: CUDNN 1775 | } 1776 | } 1777 | 1778 | layer { 1779 | name: "dem_conv3_3_" 1780 | type: "Convolution" 1781 | bottom: "dem_conv3_2" 1782 | top: "dem_conv3_3" 1783 | param { 1784 | lr_mult: 1 1785 | decay_mult: 1 1786 | } 1787 | param { 1788 | lr_mult: 2 1789 | decay_mult: 0 1790 | } 1791 | convolution_param { 1792 | num_output: 256 1793 | pad: 1 1794 | kernel_size: 3 1795 | engine: CUDNN 1796 | weight_filler { 1797 | type: "xavier" 1798 | } 1799 | bias_filler { 1800 | type: "constant" 1801 | } 1802 | } 1803 | } 1804 | 1805 | ######################################### 1806 | 1807 | layer { 1808 | name: "relu3_3" 1809 | type: "ReLU" 1810 | bottom: "conv3_3" 1811 | top: "conv3_3" 1812 | } 1813 | 1814 | layer { 1815 | name: "dem_relu3_3" 1816 | type: "ReLU" 1817 | bottom: "dem_conv3_3" 1818 | top: "dem_conv3_3" 1819 | } 1820 | 1821 | ######################################### 1822 | 1823 | layer { 1824 | name: "pool3" 1825 | type: "Pooling" 1826 | bottom: "conv3_3" 1827 | top: "pool3" 1828 | pooling_param { 1829 | pool: MAX 1830 | kernel_size: 2 1831 | stride: 2 1832 | } 1833 | } 1834 | 1835 | layer { 1836 | name: "dem_pool3" 1837 | type: "Pooling" 1838 | bottom: "dem_conv3_3" 1839 | top: "dem_pool3" 1840 | pooling_param { 1841 | pool: MAX 1842 | kernel_size: 2 1843 | stride: 2 1844 | } 1845 | } 1846 | 1847 | 1848 | ################# Deep Supervision 3 ################# 1849 | 1850 | layer { 1851 | name: "ds_conv3_" 1852 | type: "Convolution" 1853 | bottom: "conv3_3" 1854 | top: "ds_conv3" 1855 | param { 1856 | lr_mult: 1 1857 | decay_mult: 1 1858 | } 1859 | param { 1860 | lr_mult: 2 1861 | decay_mult: 0 1862 | } 1863 | convolution_param { 1864 | num_output: 5 1865 | pad: 0 1866 | kernel_size: 1 1867 | weight_filler { 1868 | type: "xavier" 1869 | } 1870 | bias_filler { 1871 | type: "constant" 1872 | } 1873 | engine: CUDNN 1874 | } 1875 | } 1876 | 1877 | layer { 1878 | name: 'ds_deconv3_' 1879 | type: "Deconvolution" 1880 | bottom: 'ds_conv3' 1881 | top: 'ds_deconv3' 1882 | param { 1883 | lr_mult: 5 1884 | decay_mult: 1 1885 | } 1886 | param { 1887 | lr_mult: 5 1888 | decay_mult: 0 1889 | } 1890 | convolution_param { 1891 | kernel_size: 8 1892 | stride: 4 1893 | num_output: 5 1894 | weight_filler: { 1895 | type: "bilinear" 1896 | } 1897 | } 1898 | } 1899 | 1900 | layer { 1901 | type: "Crop" 1902 | name: "crop3" 1903 | bottom: "ds_deconv3" 1904 | bottom: "image" 1905 | top: "ds_deconv3c" 1906 | crop_param { 1907 | axis: 2 1908 | offset: 102 1909 | offset: 102 1910 | } 1911 | } 1912 | 1913 | layer { 1914 | name: "ds_prob3" 1915 | type: "Softmax" 1916 | bottom: "ds_deconv3c" 1917 | top: "ds_prob3" 1918 | } 1919 | 1920 | 1921 | ######################### CONV4 ############################# 1922 | 1923 | layer { 1924 | name: "conv4_1" 1925 | type: "Convolution" 1926 | bottom: "pool3" 1927 | top: "conv4_1" 1928 | param { 1929 | lr_mult: 1 1930 | decay_mult: 1 1931 | } 1932 | param { 1933 | lr_mult: 2 1934 | decay_mult: 0 1935 | } 1936 | convolution_param { 1937 | num_output: 512 1938 | pad: 1 1939 | kernel_size: 3 1940 | engine: CUDNN 1941 | } 1942 | } 1943 | 1944 | layer { 1945 | name: "dem_conv4_1_" 1946 | type: "Convolution" 1947 | bottom: "dem_pool3" 1948 | top: "dem_conv4_1" 1949 | param { 1950 | lr_mult: 1 1951 | decay_mult: 1 1952 | } 1953 | param { 1954 | lr_mult: 2 1955 | decay_mult: 0 1956 | } 1957 | convolution_param { 1958 | num_output: 512 1959 | pad: 1 1960 | kernel_size: 3 1961 | engine: CUDNN 1962 | weight_filler { 1963 | type: "xavier" 1964 | } 1965 | bias_filler { 1966 | type: "constant" 1967 | } 1968 | } 1969 | } 1970 | 1971 | 1972 | ######################################### 1973 | 1974 | layer { 1975 | name: "relu4_1" 1976 | type: "ReLU" 1977 | bottom: "conv4_1" 1978 | top: "conv4_1" 1979 | } 1980 | 1981 | layer { 1982 | name: "dem_relu4_1" 1983 | type: "ReLU" 1984 | bottom: "dem_conv4_1" 1985 | top: "dem_conv4_1" 1986 | } 1987 | 1988 | 1989 | ######################################### 1990 | 1991 | layer { 1992 | name: "conv4_2" 1993 | type: "Convolution" 1994 | bottom: "conv4_1" 1995 | top: "conv4_2" 1996 | param { 1997 | lr_mult: 1 1998 | decay_mult: 1 1999 | } 2000 | param { 2001 | lr_mult: 2 2002 | decay_mult: 0 2003 | } 2004 | convolution_param { 2005 | num_output: 512 2006 | pad: 1 2007 | kernel_size: 3 2008 | engine: CUDNN 2009 | } 2010 | } 2011 | 2012 | layer { 2013 | name: "dem_conv4_2_" 2014 | type: "Convolution" 2015 | bottom: "dem_conv4_1" 2016 | top: "dem_conv4_2" 2017 | param { 2018 | lr_mult: 1 2019 | decay_mult: 1 2020 | } 2021 | param { 2022 | lr_mult: 2 2023 | decay_mult: 0 2024 | } 2025 | convolution_param { 2026 | num_output: 512 2027 | pad: 1 2028 | kernel_size: 3 2029 | engine: CUDNN 2030 | weight_filler { 2031 | type: "xavier" 2032 | } 2033 | bias_filler { 2034 | type: "constant" 2035 | } 2036 | } 2037 | } 2038 | 2039 | 2040 | ######################################### 2041 | 2042 | layer { 2043 | name: "relu4_2" 2044 | type: "ReLU" 2045 | bottom: "conv4_2" 2046 | top: "conv4_2" 2047 | } 2048 | 2049 | layer { 2050 | name: "dem_relu4_2" 2051 | type: "ReLU" 2052 | bottom: "dem_conv4_2" 2053 | top: "dem_conv4_2" 2054 | } 2055 | 2056 | 2057 | ######################################### 2058 | 2059 | layer { 2060 | name: "conv4_3" 2061 | type: "Convolution" 2062 | bottom: "conv4_2" 2063 | top: "conv4_3" 2064 | param { 2065 | lr_mult: 1 2066 | decay_mult: 1 2067 | } 2068 | param { 2069 | lr_mult: 2 2070 | decay_mult: 0 2071 | } 2072 | convolution_param { 2073 | num_output: 512 2074 | pad: 1 2075 | kernel_size: 3 2076 | engine: CUDNN 2077 | } 2078 | } 2079 | 2080 | layer { 2081 | name: "dem_conv4_3_" 2082 | type: "Convolution" 2083 | bottom: "dem_conv4_2" 2084 | top: "dem_conv4_3" 2085 | param { 2086 | lr_mult: 1 2087 | decay_mult: 1 2088 | } 2089 | param { 2090 | lr_mult: 2 2091 | decay_mult: 0 2092 | } 2093 | convolution_param { 2094 | num_output: 512 2095 | pad: 1 2096 | kernel_size: 3 2097 | engine: CUDNN 2098 | weight_filler { 2099 | type: "xavier" 2100 | } 2101 | bias_filler { 2102 | type: "constant" 2103 | } 2104 | } 2105 | } 2106 | 2107 | ######################################### 2108 | 2109 | layer { 2110 | name: "relu4_3" 2111 | type: "ReLU" 2112 | bottom: "conv4_3" 2113 | top: "conv4_3" 2114 | } 2115 | 2116 | layer { 2117 | name: "dem_relu4_3" 2118 | type: "ReLU" 2119 | bottom: "dem_conv4_3" 2120 | top: "dem_conv4_3" 2121 | } 2122 | 2123 | 2124 | ######################################### 2125 | 2126 | layer { 2127 | name: "pool4" 2128 | type: "Pooling" 2129 | bottom: "conv4_3" 2130 | top: "pool4" 2131 | pooling_param { 2132 | pool: MAX 2133 | kernel_size: 2 2134 | stride: 2 2135 | } 2136 | } 2137 | 2138 | layer { 2139 | name: "dem_pool4" 2140 | type: "Pooling" 2141 | bottom: "dem_conv4_3" 2142 | top: "dem_pool4" 2143 | pooling_param { 2144 | pool: MAX 2145 | kernel_size: 2 2146 | stride: 2 2147 | } 2148 | } 2149 | 2150 | ################# Deep Supervision 4 ################# 2151 | 2152 | layer { 2153 | name: "ds_conv4_" 2154 | type: "Convolution" 2155 | bottom: "conv4_3" 2156 | top: "ds_conv4" 2157 | param { 2158 | lr_mult: 1 2159 | decay_mult: 1 2160 | } 2161 | param { 2162 | lr_mult: 2 2163 | decay_mult: 0 2164 | } 2165 | convolution_param { 2166 | num_output: 5 2167 | pad: 0 2168 | kernel_size: 1 2169 | weight_filler { 2170 | type: "xavier" 2171 | } 2172 | bias_filler { 2173 | type: "constant" 2174 | } 2175 | engine: CUDNN 2176 | } 2177 | } 2178 | 2179 | layer { 2180 | name: 'ds_deconv4_' 2181 | type: "Deconvolution" 2182 | bottom: 'ds_conv4' 2183 | top: 'ds_deconv4' 2184 | param { 2185 | lr_mult: 5 2186 | decay_mult: 1 2187 | } 2188 | param { 2189 | lr_mult: 5 2190 | decay_mult: 0 2191 | } 2192 | convolution_param { 2193 | kernel_size: 16 2194 | stride: 8 2195 | num_output: 5 2196 | weight_filler: { 2197 | type: "bilinear" 2198 | } 2199 | } 2200 | } 2201 | 2202 | layer { 2203 | type: "Crop" 2204 | name: "crop4" 2205 | bottom: "ds_deconv4" 2206 | bottom: "image" 2207 | top: "ds_deconv4c" 2208 | crop_param { 2209 | axis: 2 2210 | offset: 104 2211 | offset: 104 2212 | } 2213 | } 2214 | 2215 | layer { 2216 | name: "ds_prob4" 2217 | type: "Softmax" 2218 | bottom: "ds_deconv4c" 2219 | top: "ds_prob4" 2220 | } 2221 | 2222 | 2223 | ########################################## CONV 5 ########################################## 2224 | 2225 | layer { 2226 | name: "conv5_1" 2227 | type: "Convolution" 2228 | bottom: "pool4" 2229 | top: "conv5_1" 2230 | param { 2231 | lr_mult: 1 2232 | decay_mult: 1 2233 | } 2234 | param { 2235 | lr_mult: 2 2236 | decay_mult: 0 2237 | } 2238 | convolution_param { 2239 | num_output: 512 2240 | pad: 1 2241 | kernel_size: 3 2242 | engine: CUDNN 2243 | } 2244 | } 2245 | 2246 | layer { 2247 | name: "dem_conv5_1_" 2248 | type: "Convolution" 2249 | bottom: "dem_pool4" 2250 | top: "dem_conv5_1" 2251 | param { 2252 | lr_mult: 1 2253 | decay_mult: 1 2254 | } 2255 | param { 2256 | lr_mult: 2 2257 | decay_mult: 0 2258 | } 2259 | convolution_param { 2260 | num_output: 512 2261 | pad: 1 2262 | kernel_size: 3 2263 | engine: CUDNN 2264 | weight_filler { 2265 | type: "xavier" 2266 | } 2267 | bias_filler { 2268 | type: "constant" 2269 | } 2270 | } 2271 | } 2272 | 2273 | 2274 | ######################################### 2275 | 2276 | layer { 2277 | name: "relu5_1" 2278 | type: "ReLU" 2279 | bottom: "conv5_1" 2280 | top: "conv5_1" 2281 | } 2282 | 2283 | layer { 2284 | name: "dem_relu5_1" 2285 | type: "ReLU" 2286 | bottom: "dem_conv5_1" 2287 | top: "dem_conv5_1" 2288 | } 2289 | 2290 | ######################################### 2291 | 2292 | layer { 2293 | name: "conv5_2" 2294 | type: "Convolution" 2295 | bottom: "conv5_1" 2296 | top: "conv5_2" 2297 | param { 2298 | lr_mult: 1 2299 | decay_mult: 1 2300 | } 2301 | param { 2302 | lr_mult: 2 2303 | decay_mult: 0 2304 | } 2305 | convolution_param { 2306 | num_output: 512 2307 | pad: 1 2308 | kernel_size: 3 2309 | engine: CUDNN 2310 | } 2311 | } 2312 | 2313 | layer { 2314 | name: "dem_conv5_2_" 2315 | type: "Convolution" 2316 | bottom: "dem_conv5_1" 2317 | top: "dem_conv5_2" 2318 | param { 2319 | lr_mult: 1 2320 | decay_mult: 1 2321 | } 2322 | param { 2323 | lr_mult: 2 2324 | decay_mult: 0 2325 | } 2326 | convolution_param { 2327 | num_output: 512 2328 | pad: 1 2329 | kernel_size: 3 2330 | engine: CUDNN 2331 | weight_filler { 2332 | type: "xavier" 2333 | } 2334 | bias_filler { 2335 | type: "constant" 2336 | } 2337 | } 2338 | } 2339 | 2340 | ######################################### 2341 | 2342 | layer { 2343 | name: "relu5_2" 2344 | type: "ReLU" 2345 | bottom: "conv5_2" 2346 | top: "conv5_2" 2347 | } 2348 | 2349 | layer { 2350 | name: "dem_relu5_2" 2351 | type: "ReLU" 2352 | bottom: "dem_conv5_2" 2353 | top: "dem_conv5_2" 2354 | } 2355 | 2356 | ######################################### 2357 | 2358 | layer { 2359 | name: "conv5_3" 2360 | type: "Convolution" 2361 | bottom: "conv5_2" 2362 | top: "conv5_3" 2363 | param { 2364 | lr_mult: 1 2365 | decay_mult: 1 2366 | } 2367 | param { 2368 | lr_mult: 2 2369 | decay_mult: 0 2370 | } 2371 | convolution_param { 2372 | num_output: 512 2373 | pad: 1 2374 | kernel_size: 3 2375 | engine: CUDNN 2376 | } 2377 | } 2378 | 2379 | layer { 2380 | name: "dem_conv5_3_" 2381 | type: "Convolution" 2382 | bottom: "dem_conv5_2" 2383 | top: "dem_conv5_3" 2384 | param { 2385 | lr_mult: 1 2386 | decay_mult: 1 2387 | } 2388 | param { 2389 | lr_mult: 2 2390 | decay_mult: 0 2391 | } 2392 | convolution_param { 2393 | num_output: 512 2394 | pad: 1 2395 | kernel_size: 3 2396 | engine: CUDNN 2397 | weight_filler { 2398 | type: "xavier" 2399 | } 2400 | bias_filler { 2401 | type: "constant" 2402 | } 2403 | } 2404 | } 2405 | 2406 | ######################################### 2407 | 2408 | layer { 2409 | name: "relu5_3" 2410 | type: "ReLU" 2411 | bottom: "conv5_3" 2412 | top: "conv5_3" 2413 | } 2414 | 2415 | layer { 2416 | name: "dem_relu5_3" 2417 | type: "ReLU" 2418 | bottom: "dem_conv5_3" 2419 | top: "dem_conv5_3" 2420 | } 2421 | 2422 | 2423 | ######################################### 2424 | 2425 | layer { 2426 | name: "pool5" 2427 | type: "Pooling" 2428 | bottom: "conv5_3" 2429 | top: "pool5" 2430 | pooling_param { 2431 | pool: MAX 2432 | kernel_size: 2 2433 | stride: 2 2434 | } 2435 | } 2436 | 2437 | layer { 2438 | name: "dem_pool5" 2439 | type: "Pooling" 2440 | bottom: "dem_conv5_3" 2441 | top: "dem_pool5" 2442 | pooling_param { 2443 | pool: MAX 2444 | kernel_size: 2 2445 | stride: 2 2446 | } 2447 | } 2448 | 2449 | ################# Deep Supervision 5 ################# 2450 | 2451 | layer { 2452 | name: "ds_conv5_" 2453 | type: "Convolution" 2454 | bottom: "conv5_3" 2455 | top: "ds_conv5" 2456 | param { 2457 | lr_mult: 1 2458 | decay_mult: 1 2459 | } 2460 | param { 2461 | lr_mult: 2 2462 | decay_mult: 0 2463 | } 2464 | convolution_param { 2465 | num_output: 5 2466 | pad: 0 2467 | kernel_size: 1 2468 | weight_filler { 2469 | type: "xavier" 2470 | } 2471 | bias_filler { 2472 | type: "constant" 2473 | } 2474 | } 2475 | } 2476 | 2477 | layer { 2478 | name: 'ds_deconv5_' 2479 | type: "Deconvolution" 2480 | bottom: 'ds_conv5' 2481 | top: 'ds_deconv5' 2482 | param { 2483 | lr_mult: 5 2484 | decay_mult: 1 2485 | } 2486 | param { 2487 | lr_mult: 5 2488 | decay_mult: 0 2489 | } 2490 | convolution_param { 2491 | kernel_size: 32 2492 | stride: 16 2493 | num_output: 5 2494 | weight_filler: { 2495 | type: "bilinear" 2496 | } 2497 | } 2498 | } 2499 | 2500 | layer { 2501 | type: "Crop" 2502 | name: "crop5" 2503 | bottom: "ds_deconv5" 2504 | bottom: "image" 2505 | top: "ds_deconv5c" 2506 | crop_param { 2507 | axis: 2 2508 | offset: 112 2509 | offset: 112 2510 | } 2511 | } 2512 | 2513 | layer { 2514 | name: "ds_prob5" 2515 | type: "Softmax" 2516 | bottom: "ds_deconv5c" 2517 | top: "ds_prob5" 2518 | } 2519 | 2520 | ######################################### FCN ######################################### 2521 | 2522 | layer { 2523 | name: "fc6" 2524 | type: "Convolution" 2525 | bottom: "pool5" 2526 | top: "fc6" 2527 | param { 2528 | lr_mult: 1 2529 | decay_mult: 1 2530 | } 2531 | param { 2532 | lr_mult: 2 2533 | decay_mult: 0 2534 | } 2535 | convolution_param { 2536 | num_output: 4096 2537 | kernel_size: 7 2538 | engine: CUDNN 2539 | } 2540 | } 2541 | 2542 | layer { 2543 | name: "dem_fc6" 2544 | type: "Convolution" 2545 | bottom: "dem_pool5" 2546 | top: "dem_fc6" 2547 | param { 2548 | lr_mult: 1 2549 | decay_mult: 1 2550 | } 2551 | param { 2552 | lr_mult: 2 2553 | decay_mult: 0 2554 | } 2555 | convolution_param { 2556 | num_output: 4096 2557 | kernel_size: 7 2558 | engine: CUDNN 2559 | } 2560 | } 2561 | 2562 | ######################################### 2563 | 2564 | layer { 2565 | name: "relu6" 2566 | type: "ReLU" 2567 | bottom: "fc6" 2568 | top: "fc6" 2569 | } 2570 | 2571 | layer { 2572 | name: "dem_relu6" 2573 | type: "ReLU" 2574 | bottom: "dem_fc6" 2575 | top: "dem_fc6" 2576 | } 2577 | 2578 | ######################################### 2579 | 2580 | layer { 2581 | name: "fc7" 2582 | type: "Convolution" 2583 | bottom: "fc6" 2584 | top: "fc7" 2585 | param { 2586 | lr_mult: 1 2587 | decay_mult: 1 2588 | } 2589 | param { 2590 | lr_mult: 2 2591 | decay_mult: 0 2592 | } 2593 | convolution_param { 2594 | num_output: 4096 2595 | kernel_size: 1 2596 | engine: CUDNN 2597 | } 2598 | } 2599 | 2600 | layer { 2601 | name: "dem_fc7" 2602 | type: "Convolution" 2603 | bottom: "dem_fc6" 2604 | top: "dem_fc7" 2605 | param { 2606 | lr_mult: 1 2607 | decay_mult: 1 2608 | } 2609 | param { 2610 | lr_mult: 2 2611 | decay_mult: 0 2612 | } 2613 | convolution_param { 2614 | num_output: 4096 2615 | kernel_size: 1 2616 | engine: CUDNN 2617 | } 2618 | } 2619 | 2620 | 2621 | ######################################### 2622 | 2623 | layer { 2624 | name: "relu7" 2625 | type: "ReLU" 2626 | bottom: "fc7" 2627 | top: "fc7" 2628 | } 2629 | 2630 | layer { 2631 | name: "dem_relu7" 2632 | type: "ReLU" 2633 | bottom: "dem_fc7" 2634 | top: "dem_fc7" 2635 | } 2636 | 2637 | 2638 | ######################################### FCN-16 ################################################ 2639 | 2640 | layer { 2641 | name: "score59" 2642 | type: "Convolution" 2643 | bottom: "fc7" 2644 | top: "score59" 2645 | param { 2646 | lr_mult: 1 2647 | decay_mult: 1 2648 | } 2649 | param { 2650 | lr_mult: 2 2651 | decay_mult: 0 2652 | } 2653 | convolution_param { 2654 | num_output: 60 2655 | kernel_size: 1 2656 | engine: CUDNN 2657 | } 2658 | } 2659 | 2660 | layer { 2661 | name: "dem-score59" 2662 | type: "Convolution" 2663 | bottom: "dem_fc7" 2664 | top: "dem-score59" 2665 | param { 2666 | lr_mult: 1 2667 | decay_mult: 1 2668 | } 2669 | param { 2670 | lr_mult: 2 2671 | decay_mult: 0 2672 | } 2673 | convolution_param { 2674 | num_output: 60 2675 | kernel_size: 1 2676 | engine: CUDNN 2677 | } 2678 | } 2679 | 2680 | layer { 2681 | name: "upscore16_" 2682 | type: "Deconvolution" 2683 | bottom: "score59" 2684 | top: "upscore16" 2685 | param { 2686 | lr_mult: 1 2687 | decay_mult: 1 2688 | } 2689 | convolution_param { 2690 | num_output: 5 2691 | kernel_size: 2 2692 | stride: 2 2693 | weight_filler: { type: "bilinear" } 2694 | bias_filler { type: "gaussian" std: 0.2 } 2695 | } 2696 | } 2697 | 2698 | layer { 2699 | name: "dem-upscore16_" 2700 | type: "Deconvolution" 2701 | bottom: "dem-score59" 2702 | top: "dem-upscore16" 2703 | param { 2704 | lr_mult: 1 2705 | decay_mult: 1 2706 | } 2707 | convolution_param { 2708 | num_output: 5 2709 | kernel_size: 2 2710 | stride: 2 2711 | weight_filler: { type: "bilinear" } 2712 | bias_filler { type: "gaussian" std: 0.2 } 2713 | } 2714 | } 2715 | 2716 | layer { 2717 | name: "score-pool4_" 2718 | type: "Convolution" 2719 | bottom: "pool4" 2720 | top: "score-pool4" 2721 | param { 2722 | lr_mult: 1 2723 | decay_mult: 1 2724 | } 2725 | param { 2726 | lr_mult: 2 2727 | decay_mult: 0 2728 | } 2729 | convolution_param { 2730 | num_output: 5 2731 | kernel_size: 1 2732 | engine: CUDNN 2733 | } 2734 | } 2735 | 2736 | layer { 2737 | name: "dem-score-pool4_" 2738 | type: "Convolution" 2739 | bottom: "dem_pool4" 2740 | top: "dem-score-pool4" 2741 | param { 2742 | lr_mult: 1 2743 | decay_mult: 1 2744 | } 2745 | param { 2746 | lr_mult: 2 2747 | decay_mult: 0 2748 | } 2749 | convolution_param { 2750 | num_output: 5 2751 | kernel_size: 1 2752 | engine: CUDNN 2753 | } 2754 | } 2755 | 2756 | layer { 2757 | type: 'Crop' 2758 | name: 'crop-upscore16' 2759 | bottom: 'score-pool4' 2760 | bottom: 'upscore16' 2761 | top: 'score-pool4c' 2762 | crop_param { 2763 | axis: 1 2764 | offset: 0 2765 | offset: 5 2766 | offset: 5 2767 | } 2768 | } 2769 | 2770 | layer { 2771 | type: 'Crop' 2772 | name: 'dem-crop-upscore16' 2773 | bottom: 'dem-score-pool4' 2774 | bottom: 'dem-upscore16' 2775 | top: 'dem-score-pool4c' 2776 | crop_param { 2777 | axis: 1 2778 | offset: 0 2779 | offset: 5 2780 | offset: 5 2781 | } 2782 | } 2783 | ########################################## FCN-8 ####################################### 2784 | 2785 | layer { 2786 | name: "fuse-pool4" 2787 | type: "Eltwise" 2788 | bottom: "upscore16" 2789 | bottom: "score-pool4c" 2790 | top: "score-fused-pool4" 2791 | eltwise_param { 2792 | operation: SUM 2793 | } 2794 | } 2795 | 2796 | layer { 2797 | name: "dem-fuse-pool4" 2798 | type: "Eltwise" 2799 | bottom: "dem-upscore16" 2800 | bottom: "dem-score-pool4c" 2801 | top: "dem-score-fused-pool4" 2802 | eltwise_param { 2803 | operation: SUM 2804 | } 2805 | } 2806 | 2807 | layer { 2808 | name: "upscore8_" 2809 | type: "Deconvolution" 2810 | bottom: "score-fused-pool4" 2811 | top: "upscore8" 2812 | param { 2813 | lr_mult: 1 2814 | decay_mult: 1 2815 | } 2816 | convolution_param { 2817 | num_output: 5 2818 | bias_term: false 2819 | kernel_size: 1 # 4 2820 | stride: 2 # 2 2821 | weight_filler: { type: "bilinear" } 2822 | bias_filler { type: "gaussian" std: 0.2 } 2823 | } 2824 | } 2825 | 2826 | layer { 2827 | name: "dem-upscore8_" 2828 | type: "Deconvolution" 2829 | bottom: "dem-score-fused-pool4" 2830 | top: "dem-upscore8" 2831 | param { 2832 | lr_mult: 1 2833 | decay_mult: 1 2834 | } 2835 | convolution_param { 2836 | num_output: 5 2837 | bias_term: false 2838 | kernel_size: 1 2839 | stride: 2 2840 | weight_filler: { type: "bilinear" } 2841 | bias_filler { type: "gaussian" std: 0.2 } 2842 | } 2843 | } 2844 | 2845 | layer { 2846 | name: "score-pool3_" 2847 | type: "Convolution" 2848 | bottom: "pool3" 2849 | top: "score-pool3" 2850 | param { 2851 | lr_mult: 1 2852 | decay_mult: 1 2853 | } 2854 | param { 2855 | lr_mult: 2 2856 | decay_mult: 0 2857 | } 2858 | convolution_param { 2859 | num_output: 5 2860 | kernel_size: 1 2861 | engine: CUDNN 2862 | } 2863 | } 2864 | 2865 | layer { 2866 | name: "dem-score-pool3_" 2867 | type: "Convolution" 2868 | bottom: "dem_pool3" 2869 | top: "dem-score-pool3" 2870 | param { 2871 | lr_mult: 1 2872 | decay_mult: 1 2873 | } 2874 | param { 2875 | lr_mult: 2 2876 | decay_mult: 0 2877 | } 2878 | convolution_param { 2879 | num_output: 5 2880 | kernel_size: 1 2881 | engine: CUDNN 2882 | } 2883 | } 2884 | 2885 | layer { 2886 | type: 'Crop' 2887 | name: 'crop-upscore8' 2888 | bottom: 'score-pool3' 2889 | bottom: 'upscore8' 2890 | top: 'score-pool3c' 2891 | crop_param { 2892 | axis: 2 2893 | offset: 11 2894 | offset: 11 2895 | } 2896 | } 2897 | 2898 | layer { 2899 | type: 'Crop' 2900 | name: 'dem-crop-upscore8' 2901 | bottom: 'dem-score-pool3' 2902 | bottom: 'dem-upscore8' 2903 | top: 'dem-score-pool3c' 2904 | crop_param { 2905 | axis: 2 2906 | offset: 11 2907 | offset: 11 2908 | } 2909 | } 2910 | 2911 | ######################################################################################################## 2912 | 2913 | layer { 2914 | name: "fuse-pool3" 2915 | type: "Eltwise" 2916 | bottom: "upscore8" 2917 | bottom: "score-pool3c" 2918 | top: "score-fused-pool3" 2919 | eltwise_param { 2920 | operation: SUM 2921 | } 2922 | } 2923 | 2924 | layer { 2925 | name: "dem-fuse-pool3" 2926 | type: "Eltwise" 2927 | bottom: "dem-upscore8" 2928 | bottom: "dem-score-pool3c" 2929 | top: "dem-score-fused-pool3" 2930 | eltwise_param { 2931 | operation: SUM 2932 | } 2933 | } 2934 | 2935 | ########################################## FCN-4 ####################################### 2936 | 2937 | layer { 2938 | name: "upscore4_" 2939 | type: "Deconvolution" 2940 | bottom: "score-fused-pool3" 2941 | top: "upscore4" 2942 | param { 2943 | lr_mult: 1 2944 | decay_mult: 1 2945 | } 2946 | convolution_param { 2947 | num_output: 5 2948 | bias_term: false 2949 | kernel_size: 2 2950 | stride: 2 2951 | pad: 2 2952 | weight_filler: { type: "bilinear" } 2953 | bias_filler { type: "gaussian" std: 0.2 } 2954 | } 2955 | } 2956 | 2957 | layer { 2958 | name: "dem-upscore4_" 2959 | type: "Deconvolution" 2960 | bottom: "dem-score-fused-pool3" 2961 | top: "dem-upscore4" 2962 | param { 2963 | lr_mult: 1 2964 | decay_mult: 1 2965 | } 2966 | convolution_param { 2967 | num_output: 5 2968 | bias_term: false 2969 | kernel_size: 2 2970 | stride: 2 2971 | pad: 2 2972 | weight_filler: { type: "bilinear" } 2973 | bias_filler { type: "gaussian" std: 0.2 } 2974 | } 2975 | } 2976 | 2977 | 2978 | layer { 2979 | name: "score-pool2_" 2980 | type: "Convolution" 2981 | bottom: "pool2" 2982 | top: "score-pool2" 2983 | param { 2984 | lr_mult: 1 2985 | decay_mult: 1 2986 | } 2987 | param { 2988 | lr_mult: 2 2989 | decay_mult: 0 2990 | } 2991 | convolution_param { 2992 | num_output: 5 2993 | pad: 1 2994 | kernel_size: 3 2995 | engine: CUDNN 2996 | } 2997 | } 2998 | 2999 | layer { 3000 | name: "dem-score-pool2_" 3001 | type: "Convolution" 3002 | bottom: "dem_pool2" 3003 | top: "dem-score-pool2" 3004 | param { 3005 | lr_mult: 1 3006 | decay_mult: 1 3007 | } 3008 | param { 3009 | lr_mult: 1 3010 | decay_mult: 0 3011 | } 3012 | convolution_param { 3013 | num_output: 5 3014 | pad: 1 3015 | kernel_size: 3 3016 | engine: CUDNN 3017 | } 3018 | } 3019 | 3020 | layer { 3021 | type: 'Crop' 3022 | name: 'crop-upscore4' 3023 | bottom: 'score-pool2' 3024 | bottom: 'upscore4' 3025 | top: 'score-pool2c' 3026 | crop_param { 3027 | axis: 2 3028 | offset: 24 3029 | offset: 24 3030 | } 3031 | } 3032 | 3033 | 3034 | layer { 3035 | type: 'Crop' 3036 | name: 'dem-crop-upscore4' 3037 | bottom: 'dem-score-pool2' 3038 | bottom: 'dem-upscore4' 3039 | top: 'dem-score-pool2c' 3040 | crop_param { 3041 | axis: 2 3042 | offset: 24 3043 | offset: 24 3044 | } 3045 | } 3046 | 3047 | ######################################################################################################### 3048 | 3049 | layer { 3050 | name: "fuse-pool2" 3051 | type: "Eltwise" 3052 | bottom: "upscore4" 3053 | bottom: "score-pool2c" 3054 | top: "final" 3055 | eltwise_param { 3056 | operation: SUM 3057 | } 3058 | } 3059 | 3060 | layer { 3061 | name: "dem-fuse-pool2" 3062 | type: "Eltwise" 3063 | bottom: "dem-upscore4" 3064 | bottom: "dem-score-pool2c" 3065 | top: "dem-final" 3066 | eltwise_param { 3067 | operation: SUM 3068 | } 3069 | } 3070 | 3071 | # ================================= # 3072 | 3073 | layer { 3074 | name: "final" 3075 | type: "Concat" 3076 | bottom: "final" 3077 | bottom: "dem-final" 3078 | top: "final-merged" 3079 | } 3080 | 3081 | ########################################## FINAL ####################################### 3082 | 3083 | layer { 3084 | name: "conv-fcn" 3085 | type: "Deconvolution" 3086 | bottom: "final-merged" 3087 | top: "bigscore" 3088 | param { 3089 | lr_mult: 1 3090 | decay_mult: 1 3091 | } 3092 | param { 3093 | lr_mult: 2 3094 | decay_mult: 1 3095 | } 3096 | convolution_param { 3097 | num_output: 5 3098 | bias_term: true 3099 | kernel_size: 16 3100 | stride: 4 3101 | weight_filler: { type: "bilinear" } 3102 | bias_filler { type: "gaussian" std: 0.2 } 3103 | } 3104 | } 3105 | 3106 | layer { 3107 | name: "relu_fcn" 3108 | type: "ReLU" 3109 | bottom: "bigscore" 3110 | top: "score-fcn_" 3111 | } 3112 | 3113 | layer { 3114 | type: 'Crop' 3115 | name: 'crop' 3116 | bottom: 'score-fcn_' 3117 | bottom: 'image' 3118 | top: 'score-fcn' 3119 | crop_param { 3120 | axis: 2 3121 | offset: 20 3122 | offset: 20 3123 | } 3124 | } 3125 | 3126 | # ============== LOSS FCN ================= # 3127 | 3128 | layer { 3129 | name: "prob-fcn" 3130 | type: "Softmax" 3131 | bottom: "score-fcn" 3132 | top: "prob-fcn" 3133 | } 3134 | 3135 | # ====================================== DS SCORE FUSS ================================== # 3136 | 3137 | 3138 | layer { 3139 | bottom: "edge" 3140 | top: "sedge" 3141 | name: "scale_edge" 3142 | type: "Scale" 3143 | scale_param { 3144 | bias_term: true 3145 | } 3146 | } 3147 | 3148 | layer { 3149 | name: "ds-concat" 3150 | type: "Concat" 3151 | bottom: "score-fcn" 3152 | bottom: "ds_deconv5c" 3153 | bottom: "ds_deconv4c" 3154 | bottom: "ds_deconv3c" 3155 | bottom: "ds_deconv2c" 3156 | bottom: "sedge" 3157 | top: "fuse-score" 3158 | concat_param { 3159 | axis: 1 3160 | } 3161 | } 3162 | 3163 | layer { 3164 | name: "fuse-conv-scores" 3165 | type: "Convolution" 3166 | bottom: "fuse-score" 3167 | top: "score-3x3" 3168 | param { 3169 | lr_mult: 1 3170 | decay_mult: 1 3171 | } 3172 | param { 3173 | lr_mult: 2 3174 | decay_mult: 0 3175 | } 3176 | convolution_param { 3177 | num_output: 10 3178 | pad: 1 3179 | kernel_size: 3 3180 | weight_filler { 3181 | type: "xavier" 3182 | } 3183 | engine: CUDNN 3184 | } 3185 | } 3186 | layer { 3187 | name: "relu-fuse-conv-scores" 3188 | type: "ReLU" 3189 | bottom: "score-3x3" 3190 | top: "score-3x3" 3191 | } 3192 | 3193 | 3194 | layer { 3195 | name: "ds-scores_" 3196 | type: "Convolution" 3197 | bottom: "score-3x3" 3198 | top: "score" 3199 | param { 3200 | lr_mult: 1 3201 | decay_mult: 1 3202 | } 3203 | #param { 3204 | # lr_mult: 2 3205 | #} 3206 | convolution_param { 3207 | num_output: 5 3208 | bias_term: false 3209 | kernel_size: 1 3210 | stride: 1 3211 | weight_filler { 3212 | type: "xavier" 3213 | } 3214 | bias_filler { 3215 | type: "constant" 3216 | value: 0 3217 | } 3218 | } 3219 | } 3220 | 3221 | # ====================================== CLASSIFICATION ===================================== # 3222 | 3223 | layer { 3224 | name: "prob" 3225 | type: "Softmax" 3226 | bottom: "score" 3227 | top: "prob" 3228 | } 3229 | -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Potsdam_Models/01_FCN-Model/link_to_weights: -------------------------------------------------------------------------------- 1 | 2 | https://drive.google.com/open?id=0ByVXVsnJKrUzM1dXUk9mUG85RFU -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Potsdam_Models/02_VGG-Model/link_to_weights: -------------------------------------------------------------------------------- 1 | 2 | https://drive.google.com/open?id=0ByVXVsnJKrUzM1dXUk9mUG85RFU -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Potsdam_Models/03_SegNet-Model/link_to_weights: -------------------------------------------------------------------------------- 1 | 2 | https://drive.google.com/open?id=0ByVXVsnJKrUzM1dXUk9mUG85RFU -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Potsdam_Models/INFO_Models: -------------------------------------------------------------------------------- 1 | 2 | 3 | =============================================== 4 | FCN-Vaihingen-Model 5 | =============================================== 6 | 7 | - Input size of data 256x256 pixels 8 | 9 | Data for training : 10 | 11 | image-lmdb 12 | dsm-lmdb 13 | dsm-lmdb 14 | labels-lmdb 15 | class-boundaries-lmdb 16 | 17 | 18 | - Inference size 256x256 pixels 19 | 20 | Data for inference : 21 | 22 | image-lmdb 23 | dsm-lmdb 24 | dsm-lmdb 25 | 26 | =============================================== 27 | VGG-Vaihingen-Model 28 | =============================================== 29 | 30 | - Input size of data 256x256 pixels 31 | 32 | Data for training : 33 | 34 | image-lmdb 35 | dsm-lmdb 36 | dsm-lmdb 37 | labels-lmdb 38 | class-boundaries-lmdb 39 | 40 | 41 | - Inference size 256x256 pixels 42 | 43 | Data for inference : 44 | 45 | image-lmdb 46 | dsm-lmdb 47 | dsm-lmdb 48 | 49 | 50 | =============================================== 51 | SegNet-Vaihingen-Model 52 | =============================================== 53 | 54 | - Input size of data 256x256 pixels 55 | 56 | Data for training : 57 | 58 | image-lmdb 59 | dsm-lmdb 60 | dsm-lmdb 61 | labels-lmdb 62 | class-boundaries-lmdb 63 | 64 | 65 | - Inference size 256x256 pixels 66 | 67 | Data for inference : 68 | 69 | image-lmdb 70 | dsm-lmdb 71 | dsm-lmdb 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Vaihingen_Models/01_FCN-Model/link_to_weight_Vaihingen: -------------------------------------------------------------------------------- 1 | https://drive.google.com/open?id=0ByVXVsnJKrUzVkV1dXVTNFFzdG8 2 | -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Vaihingen_Models/02_VGG-Model/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG-ClassBoundary-Annotation-Deploy" 2 | 3 | input: 'image' 4 | input_dim: 1 5 | input_dim: 3 6 | input_dim: 259 7 | input_dim: 259 8 | 9 | input: 'dsm' 10 | input_dim: 1 11 | input_dim: 1 12 | input_dim: 259 13 | input_dim: 259 14 | 15 | input: 'ndsm' 16 | input_dim: 1 17 | input_dim: 1 18 | input_dim: 259 19 | input_dim: 259 20 | force_backward: true 21 | 22 | 23 | ############################# Concatenate nDSM and DSM ############################## 24 | 25 | layer { 26 | name: "dem-componets" 27 | type: "Concat" 28 | bottom: "dsm" 29 | bottom: "ndsm" 30 | top: "dem" 31 | } 32 | 33 | 34 | ################## Edge Model Inference ####################### 35 | 36 | 37 | layer { 38 | name: "edge_dem_conv1_1_" 39 | type: "Convolution" 40 | bottom: "dem" 41 | top: "ee_dem_conv1_1" 42 | param { 43 | lr_mult: 0 # 1 44 | decay_mult: 1 45 | } 46 | param { 47 | lr_mult: 0 # 2 48 | decay_mult: 0 49 | } 50 | convolution_param { 51 | num_output: 64 52 | pad: 35 53 | kernel_size: 3 54 | weight_filler { 55 | type: "xavier" 56 | } 57 | engine: CUDNN 58 | } 59 | } 60 | 61 | layer { 62 | name: "edge_conv1_1" 63 | type: "Convolution" 64 | bottom: "data" 65 | top: "ee_data_conv1_1" 66 | param { 67 | lr_mult: 0 # 1 68 | decay_mult: 1 69 | } 70 | param { 71 | lr_mult: 0 # 2 72 | decay_mult: 0 73 | } 74 | convolution_param { 75 | num_output: 64 76 | pad: 35 77 | kernel_size: 3 78 | engine: CUDNN 79 | } 80 | } 81 | layer { 82 | name: "data_relu1_1" 83 | type: "ReLU" 84 | bottom: "ee_data_conv1_1" 85 | top: "ee_data_conv1_1" 86 | } 87 | layer { 88 | name: "dem_relu1_1" 89 | type: "ReLU" 90 | bottom: "ee_dem_conv1_1" 91 | top: "ee_dem_conv1_1" 92 | } 93 | layer { 94 | name: "edge_conv1_2" 95 | type: "Convolution" 96 | bottom: "ee_data_conv1_1" 97 | top: "ee_data_conv1_2" 98 | param { 99 | lr_mult: 0 # 1 100 | decay_mult: 1 101 | } 102 | param { 103 | lr_mult: 0 # 2 104 | decay_mult: 0 105 | } 106 | convolution_param { 107 | num_output: 64 108 | pad: 1 109 | kernel_size: 3 110 | engine: CUDNN 111 | } 112 | } 113 | layer { 114 | name: "edge_dem_conv1_2_" 115 | type: "Convolution" 116 | bottom: "ee_dem_conv1_1" 117 | top: "ee_dem_conv1_2" 118 | param { 119 | lr_mult: 0 # 1 120 | decay_mult: 1 121 | } 122 | param { 123 | lr_mult: 0 # 2 124 | decay_mult: 0 125 | } 126 | convolution_param { 127 | num_output: 64 128 | pad: 1 129 | kernel_size: 3 130 | weight_filler { 131 | type: "xavier" 132 | } 133 | engine: CUDNN 134 | } 135 | } 136 | layer { 137 | name: "data_relu1_2" 138 | type: "ReLU" 139 | bottom: "ee_data_conv1_2" 140 | top: "ee_data_conv1_2" 141 | } 142 | layer { 143 | name: "dem_relu1_2" 144 | type: "ReLU" 145 | bottom: "ee_dem_conv1_2" 146 | top: "ee_dem_conv1_2" 147 | } 148 | layer { 149 | name: "data_pool1" 150 | type: "Pooling" 151 | bottom: "ee_data_conv1_2" 152 | top: "ee_data_pool1" 153 | pooling_param { 154 | pool: MAX 155 | kernel_size: 2 156 | stride: 2 157 | } 158 | } 159 | layer { 160 | name: "dem_pool1" 161 | type: "Pooling" 162 | bottom: "ee_dem_conv1_2" 163 | top: "ee_dem_pool1" 164 | pooling_param { 165 | pool: MAX 166 | kernel_size: 2 167 | stride: 2 168 | } 169 | } 170 | layer { 171 | name: "edge_conv2_1" 172 | type: "Convolution" 173 | bottom: "ee_data_pool1" 174 | top: "ee_data_conv2_1" 175 | param { 176 | lr_mult: 0 # 1 177 | decay_mult: 1 178 | } 179 | param { 180 | lr_mult: 0 # 2 181 | decay_mult: 0 182 | } 183 | convolution_param { 184 | num_output: 128 185 | pad: 1 186 | kernel_size: 3 187 | engine: CUDNN 188 | } 189 | } 190 | layer { 191 | name: "edge_dem_conv2_1_" 192 | type: "Convolution" 193 | bottom: "ee_dem_pool1" 194 | top: "ee_dem_conv2_1" 195 | param { 196 | lr_mult: 0 # 1 197 | decay_mult: 1 198 | } 199 | param { 200 | lr_mult: 0 # 2 201 | decay_mult: 0 202 | } 203 | convolution_param { 204 | num_output: 128 205 | pad: 1 206 | kernel_size: 3 207 | weight_filler { 208 | type: "xavier" 209 | } 210 | engine: CUDNN 211 | } 212 | } 213 | layer { 214 | name: "data_relu2_1" 215 | type: "ReLU" 216 | bottom: "ee_data_conv2_1" 217 | top: "ee_data_conv2_1" 218 | } 219 | layer { 220 | name: "dem_relu2_1" 221 | type: "ReLU" 222 | bottom: "ee_dem_conv2_1" 223 | top: "ee_dem_conv2_1" 224 | } 225 | layer { 226 | name: "edge_conv2_2" 227 | type: "Convolution" 228 | bottom: "ee_data_conv2_1" 229 | top: "ee_data_conv2_2" 230 | param { 231 | lr_mult: 0 # 1 232 | decay_mult: 1 233 | } 234 | param { 235 | lr_mult: 0 # 2 236 | decay_mult: 0 237 | } 238 | convolution_param { 239 | num_output: 128 240 | pad: 1 241 | kernel_size: 3 242 | engine: CUDNN 243 | } 244 | } 245 | layer { 246 | name: "edge_dem_conv2_2_" 247 | type: "Convolution" 248 | bottom: "ee_dem_conv2_1" 249 | top: "ee_dem_conv2_2" 250 | param { 251 | lr_mult: 0 # 1 252 | decay_mult: 1 253 | } 254 | param { 255 | lr_mult: 0 # 2 256 | decay_mult: 0 257 | } 258 | convolution_param { 259 | num_output: 128 260 | pad: 1 261 | kernel_size: 3 262 | weight_filler { 263 | type: "xavier" 264 | } 265 | engine: CUDNN 266 | } 267 | } 268 | layer { 269 | name: "data_relu2_2" 270 | type: "ReLU" 271 | bottom: "ee_data_conv2_2" 272 | top: "ee_data_conv2_2" 273 | } 274 | layer { 275 | name: "dem_relu2_2" 276 | type: "ReLU" 277 | bottom: "ee_dem_conv2_2" 278 | top: "ee_dem_conv2_2" 279 | } 280 | layer { 281 | name: "data_pool2" 282 | type: "Pooling" 283 | bottom: "ee_data_conv2_2" 284 | top: "ee_data_pool2" 285 | pooling_param { 286 | pool: MAX 287 | kernel_size: 2 288 | stride: 2 289 | } 290 | } 291 | layer { 292 | name: "dem_pool2" 293 | type: "Pooling" 294 | bottom: "ee_dem_conv2_2" 295 | top: "ee_dem_pool2" 296 | pooling_param { 297 | pool: MAX 298 | kernel_size: 2 299 | stride: 2 300 | } 301 | } 302 | layer { 303 | name: "edge_conv3_1" 304 | type: "Convolution" 305 | bottom: "ee_data_pool2" 306 | top: "ee_data_conv3_1" 307 | param { 308 | lr_mult: 0 # 1 309 | decay_mult: 1 310 | } 311 | param { 312 | lr_mult: 0 # 2 313 | decay_mult: 0 314 | } 315 | convolution_param { 316 | num_output: 256 317 | pad: 1 318 | kernel_size: 3 319 | engine: CUDNN 320 | } 321 | } 322 | layer { 323 | name: "edge_dem_conv3_1_" 324 | type: "Convolution" 325 | bottom: "ee_dem_pool2" 326 | top: "ee_dem_conv3_1" 327 | param { 328 | lr_mult: 0 # 1 329 | decay_mult: 1 330 | } 331 | param { 332 | lr_mult: 0 # 2 333 | decay_mult: 0 334 | } 335 | convolution_param { 336 | num_output: 256 337 | pad: 1 338 | kernel_size: 3 339 | weight_filler { 340 | type: "xavier" 341 | } 342 | engine: CUDNN 343 | } 344 | } 345 | layer { 346 | name: "data_relu3_1" 347 | type: "ReLU" 348 | bottom: "ee_data_conv3_1" 349 | top: "ee_data_conv3_1" 350 | } 351 | layer { 352 | name: "dem_relu3_1" 353 | type: "ReLU" 354 | bottom: "ee_dem_conv3_1" 355 | top: "ee_dem_conv3_1" 356 | } 357 | layer { 358 | name: "edge_conv3_2" 359 | type: "Convolution" 360 | bottom: "ee_data_conv3_1" 361 | top: "ee_data_conv3_2" 362 | param { 363 | lr_mult: 0 # 1 364 | decay_mult: 1 365 | } 366 | param { 367 | lr_mult: 0 # 2 368 | decay_mult: 0 369 | } 370 | convolution_param { 371 | num_output: 256 372 | pad: 1 373 | kernel_size: 3 374 | engine: CUDNN 375 | } 376 | } 377 | layer { 378 | name: "edge_dem_conv3_2_" 379 | type: "Convolution" 380 | bottom: "ee_dem_conv3_1" 381 | top: "ee_dem_conv3_2" 382 | param { 383 | lr_mult: 0 # 1 384 | decay_mult: 1 385 | } 386 | param { 387 | lr_mult: 0 # 2 388 | decay_mult: 0 389 | } 390 | convolution_param { 391 | num_output: 256 392 | pad: 1 393 | kernel_size: 3 394 | weight_filler { 395 | type: "xavier" 396 | } 397 | engine: CUDNN 398 | } 399 | } 400 | layer { 401 | name: "data_relu3_2" 402 | type: "ReLU" 403 | bottom: "ee_data_conv3_2" 404 | top: "ee_data_conv3_2" 405 | } 406 | layer { 407 | name: "dem_relu3_2" 408 | type: "ReLU" 409 | bottom: "ee_dem_conv3_2" 410 | top: "ee_dem_conv3_2" 411 | } 412 | layer { 413 | name: "edge_conv3_3" 414 | type: "Convolution" 415 | bottom: "ee_data_conv3_2" 416 | top: "ee_data_conv3_3" 417 | param { 418 | lr_mult: 0 # 1 419 | decay_mult: 1 420 | } 421 | param { 422 | lr_mult: 0 # 2 423 | decay_mult: 0 424 | } 425 | convolution_param { 426 | num_output: 256 427 | pad: 1 428 | kernel_size: 3 429 | engine: CUDNN 430 | } 431 | } 432 | layer { 433 | name: "edge_dem_conv3_3_" 434 | type: "Convolution" 435 | bottom: "ee_dem_conv3_2" 436 | top: "ee_dem_conv3_3" 437 | param { 438 | lr_mult: 0 # 1 439 | decay_mult: 1 440 | } 441 | param { 442 | lr_mult: 0 # 2 443 | decay_mult: 0 444 | } 445 | convolution_param { 446 | num_output: 256 447 | pad: 1 448 | kernel_size: 3 449 | weight_filler { 450 | type: "xavier" 451 | } 452 | engine: CUDNN 453 | } 454 | } 455 | layer { 456 | name: "data_relu3_3" 457 | type: "ReLU" 458 | bottom: "ee_data_conv3_3" 459 | top: "ee_data_conv3_3" 460 | } 461 | layer { 462 | name: "dem_relu3_3" 463 | type: "ReLU" 464 | bottom: "ee_dem_conv3_3" 465 | top: "ee_dem_conv3_3" 466 | } 467 | layer { 468 | name: "data_pool3" 469 | type: "Pooling" 470 | bottom: "ee_data_conv3_3" 471 | top: "ee_data_pool3" 472 | pooling_param { 473 | pool: MAX 474 | kernel_size: 2 475 | stride: 2 476 | } 477 | } 478 | layer { 479 | name: "dem_pool3" 480 | type: "Pooling" 481 | bottom: "ee_dem_conv3_3" 482 | top: "ee_dem_pool3" 483 | pooling_param { 484 | pool: MAX 485 | kernel_size: 2 486 | stride: 2 487 | } 488 | } 489 | layer { 490 | name: "edge_conv4_1" 491 | type: "Convolution" 492 | bottom: "ee_data_pool3" 493 | top: "ee_data_conv4_1" 494 | param { 495 | lr_mult: 0 # 1 496 | decay_mult: 1 497 | } 498 | param { 499 | lr_mult: 0 # 2 500 | decay_mult: 0 501 | } 502 | convolution_param { 503 | num_output: 512 504 | pad: 1 505 | kernel_size: 3 506 | engine: CUDNN 507 | } 508 | } 509 | layer { 510 | name: "edge_dem_conv4_1_" 511 | type: "Convolution" 512 | bottom: "ee_dem_pool3" 513 | top: "ee_dem_conv4_1" 514 | param { 515 | lr_mult: 0 # 1 516 | decay_mult: 1 517 | } 518 | param { 519 | lr_mult: 0 # 2 520 | decay_mult: 0 521 | } 522 | convolution_param { 523 | num_output: 512 524 | pad: 1 525 | kernel_size: 3 526 | weight_filler { 527 | type: "xavier" 528 | } 529 | engine: CUDNN 530 | } 531 | } 532 | layer { 533 | name: "data_relu4_1" 534 | type: "ReLU" 535 | bottom: "ee_data_conv4_1" 536 | top: "ee_data_conv4_1" 537 | } 538 | layer { 539 | name: "dem_relu4_1" 540 | type: "ReLU" 541 | bottom: "ee_dem_conv4_1" 542 | top: "ee_dem_conv4_1" 543 | } 544 | layer { 545 | name: "edge_conv4_2" 546 | type: "Convolution" 547 | bottom: "ee_data_conv4_1" 548 | top: "ee_data_conv4_2" 549 | param { 550 | lr_mult: 0 # 1 551 | decay_mult: 1 552 | } 553 | param { 554 | lr_mult: 0 # 2 555 | decay_mult: 0 556 | } 557 | convolution_param { 558 | num_output: 512 559 | pad: 1 560 | kernel_size: 3 561 | engine: CUDNN 562 | } 563 | } 564 | layer { 565 | name: "edge_dem_conv4_2_" 566 | type: "Convolution" 567 | bottom: "ee_dem_conv4_1" 568 | top: "ee_dem_conv4_2" 569 | param { 570 | lr_mult: 0 # 1 571 | decay_mult: 1 572 | } 573 | param { 574 | lr_mult: 0 # 2 575 | decay_mult: 0 576 | } 577 | convolution_param { 578 | num_output: 512 579 | pad: 1 580 | kernel_size: 3 581 | weight_filler { 582 | type: "xavier" 583 | } 584 | engine: CUDNN 585 | } 586 | } 587 | layer { 588 | name: "data_relu4_2" 589 | type: "ReLU" 590 | bottom: "ee_data_conv4_2" 591 | top: "ee_data_conv4_2" 592 | } 593 | layer { 594 | name: "dem_relu4_2" 595 | type: "ReLU" 596 | bottom: "ee_dem_conv4_2" 597 | top: "ee_dem_conv4_2" 598 | } 599 | layer { 600 | name: "edge_conv4_3" 601 | type: "Convolution" 602 | bottom: "ee_data_conv4_2" 603 | top: "ee_data_conv4_3" 604 | param { 605 | lr_mult: 0 # 1 606 | decay_mult: 1 607 | } 608 | param { 609 | lr_mult: 0 # 2 610 | decay_mult: 0 611 | } 612 | convolution_param { 613 | num_output: 512 614 | pad: 1 615 | kernel_size: 3 616 | engine: CUDNN 617 | } 618 | } 619 | layer { 620 | name: "edge_dem_conv4_3_" 621 | type: "Convolution" 622 | bottom: "ee_dem_conv4_2" 623 | top: "ee_dem_conv4_3" 624 | param { 625 | lr_mult: 0 # 1 626 | decay_mult: 1 627 | } 628 | param { 629 | lr_mult: 0 # 2 630 | decay_mult: 0 631 | } 632 | convolution_param { 633 | num_output: 512 634 | pad: 1 635 | kernel_size: 3 636 | weight_filler { 637 | type: "xavier" 638 | } 639 | engine: CUDNN 640 | } 641 | } 642 | layer { 643 | name: "data_relu4_3" 644 | type: "ReLU" 645 | bottom: "ee_data_conv4_3" 646 | top: "ee_data_conv4_3" 647 | } 648 | layer { 649 | name: "dem_relu4_3" 650 | type: "ReLU" 651 | bottom: "ee_dem_conv4_3" 652 | top: "ee_dem_conv4_3" 653 | } 654 | layer { 655 | name: "data_pool4" 656 | type: "Pooling" 657 | bottom: "ee_data_conv4_3" 658 | top: "ee_data_pool4" 659 | pooling_param { 660 | pool: MAX 661 | kernel_size: 2 662 | stride: 2 663 | } 664 | } 665 | layer { 666 | name: "dem_pool4" 667 | type: "Pooling" 668 | bottom: "ee_dem_conv4_3" 669 | top: "ee_dem_pool4" 670 | pooling_param { 671 | pool: MAX 672 | kernel_size: 2 673 | stride: 2 674 | } 675 | } 676 | layer { 677 | name: "edge_conv5_1" 678 | type: "Convolution" 679 | bottom: "ee_data_pool4" 680 | top: "ee_data_conv5_1" 681 | param { 682 | lr_mult: 0 # 1 683 | decay_mult: 1 684 | } 685 | param { 686 | lr_mult: 0 # 2 687 | decay_mult: 0 688 | } 689 | convolution_param { 690 | num_output: 512 691 | pad: 1 692 | kernel_size: 3 693 | engine: CUDNN 694 | } 695 | } 696 | layer { 697 | name: "edge_dem_conv5_1_" 698 | type: "Convolution" 699 | bottom: "ee_dem_pool4" 700 | top: "ee_dem_conv5_1" 701 | param { 702 | lr_mult: 0 # 1 703 | decay_mult: 1 704 | } 705 | param { 706 | lr_mult: 0 # 2 707 | decay_mult: 0 708 | } 709 | convolution_param { 710 | num_output: 512 711 | pad: 1 712 | kernel_size: 3 713 | weight_filler { 714 | type: "xavier" 715 | } 716 | engine: CUDNN 717 | } 718 | } 719 | layer { 720 | name: "data_relu5_1" 721 | type: "ReLU" 722 | bottom: "ee_data_conv5_1" 723 | top: "ee_data_conv5_1" 724 | } 725 | layer { 726 | name: "dem_relu5_1" 727 | type: "ReLU" 728 | bottom: "ee_dem_conv5_1" 729 | top: "ee_dem_conv5_1" 730 | } 731 | layer { 732 | name: "edge_conv5_2" 733 | type: "Convolution" 734 | bottom: "ee_data_conv5_1" 735 | top: "ee_data_conv5_2" 736 | param { 737 | lr_mult: 0 # 1 738 | decay_mult: 1 739 | } 740 | param { 741 | lr_mult: 0 # 2 742 | decay_mult: 0 743 | } 744 | convolution_param { 745 | num_output: 512 746 | pad: 1 747 | kernel_size: 3 748 | engine: CUDNN 749 | } 750 | } 751 | layer { 752 | name: "edge_dem_conv5_2_" 753 | type: "Convolution" 754 | bottom: "ee_dem_conv5_1" 755 | top: "ee_dem_conv5_2" 756 | param { 757 | lr_mult: 0 # 1 758 | decay_mult: 1 759 | } 760 | param { 761 | lr_mult: 0 # 2 762 | decay_mult: 0 763 | } 764 | convolution_param { 765 | num_output: 512 766 | pad: 1 767 | kernel_size: 3 768 | weight_filler { 769 | type: "xavier" 770 | } 771 | engine: CUDNN 772 | } 773 | } 774 | layer { 775 | name: "data_relu5_2" 776 | type: "ReLU" 777 | bottom: "ee_data_conv5_2" 778 | top: "ee_data_conv5_2" 779 | } 780 | layer { 781 | name: "dem_relu5_2" 782 | type: "ReLU" 783 | bottom: "ee_dem_conv5_2" 784 | top: "ee_dem_conv5_2" 785 | } 786 | layer { 787 | name: "edge_conv5_3" 788 | type: "Convolution" 789 | bottom: "ee_data_conv5_2" 790 | top: "ee_data_conv5_3" 791 | param { 792 | lr_mult: 0 # 1 793 | decay_mult: 1 794 | } 795 | param { 796 | lr_mult: 0 # 2 797 | decay_mult: 0 798 | } 799 | convolution_param { 800 | num_output: 512 801 | pad: 1 802 | kernel_size: 3 803 | engine: CUDNN 804 | } 805 | } 806 | layer { 807 | name: "edge_dem_conv5_3_" 808 | type: "Convolution" 809 | bottom: "ee_dem_conv5_2" 810 | top: "ee_dem_conv5_3" 811 | param { 812 | lr_mult: 0 # 1 813 | decay_mult: 1 814 | } 815 | param { 816 | lr_mult: 0 # 2 817 | decay_mult: 0 818 | } 819 | convolution_param { 820 | num_output: 512 821 | pad: 1 822 | kernel_size: 3 823 | weight_filler { 824 | type: "xavier" 825 | } 826 | engine: CUDNN 827 | } 828 | } 829 | layer { 830 | name: "data_relu5_3" 831 | type: "ReLU" 832 | bottom: "ee_data_conv5_3" 833 | top: "ee_data_conv5_3" 834 | } 835 | layer { 836 | name: "dem_relu5_3" 837 | type: "ReLU" 838 | bottom: "ee_dem_conv5_3" 839 | top: "ee_dem_conv5_3" 840 | } 841 | 842 | #################### DSN-1 #################### 843 | 844 | layer { 845 | name: "dsn-1-fuse" 846 | type: "Concat" 847 | bottom: "ee_data_conv1_2" 848 | bottom: "ee_dem_conv1_2" 849 | top: "score-dsn1-merge" 850 | concat_param { 851 | concat_dim: 1 852 | } 853 | } 854 | layer { 855 | name: "edge_score-dsn1" 856 | type: "Convolution" 857 | bottom: "score-dsn1-merge" 858 | top: "score-dsn1-up" 859 | param { 860 | lr_mult: 0 # 1 861 | decay_mult: 1 862 | } 863 | param { 864 | lr_mult: 0 # 2 865 | decay_mult: 0 866 | } 867 | convolution_param { 868 | num_output: 1 869 | kernel_size: 1 870 | engine: CUDNN 871 | } 872 | } 873 | layer { 874 | name: "crop" 875 | type: "Crop" 876 | bottom: "score-dsn1-up" 877 | bottom: "data" 878 | top: "score-dsn1" 879 | crop_param { 880 | axis: 2 881 | offset: 34 882 | offset: 34 883 | } 884 | } 885 | 886 | layer { name: "loss-dns1" type: "EuclideanLoss" bottom: "score-dsn1" bottom: "edge-label" top: "dsn1_loss" loss_weight: 1 } 887 | 888 | #################### DSN-2 #################### 889 | 890 | layer { 891 | name: "dsn-2-fuse" 892 | type: "Concat" 893 | bottom: "ee_data_conv2_2" 894 | bottom: "ee_dem_conv2_2" 895 | top: "score-dsn2-merge" 896 | concat_param { 897 | concat_dim: 1 898 | } 899 | } 900 | layer { 901 | name: "edge_score-dsn2" 902 | type: "Convolution" 903 | bottom: "score-dsn2-merge" 904 | top: "score-dsn2_" 905 | param { 906 | lr_mult: 0 # 1 907 | decay_mult: 1 908 | } 909 | param { 910 | lr_mult: 0 # 2 911 | decay_mult: 0 912 | } 913 | convolution_param { 914 | num_output: 1 915 | kernel_size: 1 916 | engine: CUDNN 917 | } 918 | } 919 | layer { 920 | name: "edge_upsample_2" 921 | type: "Deconvolution" 922 | bottom: "score-dsn2_" 923 | top: "score-dsn2-up" 924 | param { 925 | lr_mult: 0 # 1 926 | decay_mult: 1 927 | } 928 | param { 929 | lr_mult: 0 # 2 930 | decay_mult: 0 931 | } 932 | convolution_param { 933 | num_output: 1 934 | kernel_size: 4 935 | stride: 2 936 | } 937 | } 938 | layer { 939 | name: "crop" 940 | type: "Crop" 941 | bottom: "score-dsn2-up" 942 | bottom: "data" 943 | top: "score-dsn2" 944 | crop_param { 945 | axis: 2 946 | offset: 35 947 | offset: 35 948 | } 949 | } 950 | 951 | layer { name: "loss-dns2" type: "EuclideanLoss" bottom: "score-dsn2" bottom: "edge-label" top: "dsn2_loss" loss_weight: 1} 952 | 953 | #################### DSN-3 #################### 954 | 955 | layer { 956 | name: "dsn-3-fuse" 957 | type: "Concat" 958 | bottom: "ee_data_conv3_3" 959 | bottom: "ee_dem_conv3_3" 960 | top: "score-dsn3-merge" 961 | concat_param { 962 | concat_dim: 1 963 | } 964 | } 965 | layer { 966 | name: "edge_score-dsn3" 967 | type: "Convolution" 968 | bottom: "score-dsn3-merge" 969 | top: "score-dsn3_" 970 | param { 971 | lr_mult: 0 # 1 972 | decay_mult: 1 973 | } 974 | param { 975 | lr_mult: 0 # 2 976 | decay_mult: 0 977 | } 978 | convolution_param { 979 | num_output: 1 980 | kernel_size: 1 981 | engine: CUDNN 982 | } 983 | } 984 | layer { 985 | name: "edge_upsample_4" 986 | type: "Deconvolution" 987 | bottom: "score-dsn3_" 988 | top: "score-dsn3-up" 989 | param { 990 | lr_mult: 0 # 1 991 | decay_mult: 1 992 | } 993 | param { 994 | lr_mult: 0 # 2 995 | decay_mult: 0 996 | } 997 | convolution_param { 998 | num_output: 1 999 | kernel_size: 8 1000 | stride: 4 1001 | } 1002 | } 1003 | layer { 1004 | name: "crop" 1005 | type: "Crop" 1006 | bottom: "score-dsn3-up" 1007 | bottom: "data" 1008 | top: "score-dsn3" 1009 | crop_param { 1010 | axis: 2 1011 | offset: 36 1012 | offset: 36 1013 | } 1014 | } 1015 | 1016 | layer { name: "loss-dns3" type: "EuclideanLoss" bottom: "score-dsn3" bottom: "edge-label" top: "dsn3_loss" loss_weight: 1} 1017 | 1018 | 1019 | #################### DSN-4 #################### 1020 | 1021 | layer { 1022 | name: "dsn-4-fuse" 1023 | type: "Concat" 1024 | bottom: "ee_data_conv4_3" 1025 | bottom: "ee_dem_conv4_3" 1026 | top: "score-dsn4-merge" 1027 | concat_param { 1028 | concat_dim: 1 1029 | } 1030 | } 1031 | layer { 1032 | name: "edge_score-dsn4" 1033 | type: "Convolution" 1034 | bottom: "score-dsn4-merge" 1035 | top: "score-dsn4_" 1036 | param { 1037 | lr_mult: 0 # 1 1038 | decay_mult: 1 1039 | } 1040 | param { 1041 | lr_mult: 0 # 2 1042 | decay_mult: 0 1043 | } 1044 | convolution_param { 1045 | num_output: 1 1046 | kernel_size: 1 1047 | engine: CUDNN 1048 | } 1049 | } 1050 | layer { 1051 | name: "edge_upsample_8" 1052 | type: "Deconvolution" 1053 | bottom: "score-dsn4_" 1054 | top: "score-dsn4-up" 1055 | param { 1056 | lr_mult: 0 # 1 1057 | decay_mult: 1 1058 | } 1059 | param { 1060 | lr_mult: 0 # 2 1061 | decay_mult: 0 1062 | } 1063 | convolution_param { 1064 | num_output: 1 1065 | kernel_size: 16 1066 | stride: 8 1067 | } 1068 | } 1069 | layer { 1070 | name: "crop" 1071 | type: "Crop" 1072 | bottom: "score-dsn4-up" 1073 | bottom: "data" 1074 | top: "score-dsn4" 1075 | crop_param { 1076 | axis: 2 1077 | offset: 40 1078 | offset: 40 1079 | } 1080 | } 1081 | 1082 | layer { name: "loss-dns4" type: "EuclideanLoss" bottom: "score-dsn4" bottom: "edge-label" top: "dsn4_loss" loss_weight: 1} 1083 | 1084 | 1085 | #################### DSN-5 #################### 1086 | 1087 | layer { 1088 | name: "dsn-5-fuse" 1089 | type: "Concat" 1090 | bottom: "ee_data_conv5_3" 1091 | bottom: "ee_dem_conv5_3" 1092 | top: "score-dsn5-merge" 1093 | concat_param { 1094 | concat_dim: 1 1095 | } 1096 | } 1097 | layer { 1098 | name: "edge_score-dsn5" 1099 | type: "Convolution" 1100 | bottom: "score-dsn5-merge" 1101 | top: "score-dsn5_" 1102 | param { 1103 | lr_mult: 0 # 1 1104 | decay_mult: 1 1105 | } 1106 | param { 1107 | lr_mult: 0 # 2 1108 | decay_mult: 0 1109 | } 1110 | convolution_param { 1111 | num_output: 1 1112 | kernel_size: 1 1113 | engine: CUDNN 1114 | } 1115 | } 1116 | layer { 1117 | name: "edge_upsample_16" 1118 | type: "Deconvolution" 1119 | bottom: "score-dsn5_" 1120 | top: "score-dsn5-up" 1121 | param { 1122 | lr_mult: 0 # 1 1123 | decay_mult: 1 1124 | } 1125 | param { 1126 | lr_mult: 0 # 2 1127 | decay_mult: 0 1128 | } 1129 | convolution_param { 1130 | num_output: 1 1131 | kernel_size: 32 1132 | stride: 16 1133 | } 1134 | } 1135 | layer { 1136 | name: "crop" 1137 | type: "Crop" 1138 | bottom: "score-dsn5-up" 1139 | bottom: "data" 1140 | top: "score-dsn5" 1141 | crop_param { 1142 | axis: 2 1143 | offset: 48 1144 | offset: 48 1145 | } 1146 | } 1147 | 1148 | layer { name: "loss-dns5" type: "EuclideanLoss" bottom: "score-dsn5" bottom: "edge-label" top: "dsn5_loss" loss_weight: 1 } 1149 | 1150 | 1151 | #################### FUSE DSN #################### 1152 | 1153 | layer { 1154 | name: "concat" 1155 | type: "Concat" 1156 | bottom: "score-dsn1" 1157 | bottom: "score-dsn2" 1158 | bottom: "score-dsn3" 1159 | bottom: "score-dsn4" 1160 | bottom: "score-dsn5" 1161 | top: "concat-score" 1162 | concat_param { 1163 | concat_dim: 1 1164 | } 1165 | } 1166 | layer { 1167 | name: "edge_new-score-weighting" 1168 | type: "Convolution" 1169 | bottom: "concat-score" 1170 | top: "edges" 1171 | param { 1172 | lr_mult: 0 # 10 1173 | decay_mult: 1 1174 | } 1175 | param { 1176 | lr_mult: 0 # 20 1177 | decay_mult: 0 1178 | } 1179 | convolution_param { 1180 | num_output: 1 1181 | kernel_size: 1 1182 | engine: CUDNN 1183 | } 1184 | } 1185 | 1186 | layer { name: "edge-loss" type: "EuclideanLoss" bottom: "edges" bottom: "edge-label" top: "edge-loss" loss_weight: 1 } 1187 | 1188 | 1189 | ################################################################################################### 1190 | ############################## Concatenate Edges to Train Data ################################### 1191 | ################################################################################################### 1192 | 1193 | layer { 1194 | name: "annotation-image-data" 1195 | type: "Concat" 1196 | bottom: "data" 1197 | bottom: "edges" 1198 | top: "data-and-edges" 1199 | } 1200 | 1201 | layer { 1202 | name: "annotation-dem-data" 1203 | type: "Concat" 1204 | bottom: "dem" 1205 | bottom: "edges" 1206 | top: "dem-and-edges" 1207 | } 1208 | 1209 | 1210 | ################################################################################################### 1211 | ############################# Trainable Annotation Network #################################### 1212 | ################################################################################################### 1213 | 1214 | layer { 1215 | name: "conv1_1__" 1216 | type: "Convolution" 1217 | bottom: "data-and-edges" 1218 | top: "conv1_1" 1219 | param { 1220 | lr_mult: 1 1221 | decay_mult: 1 1222 | } 1223 | param { 1224 | lr_mult: 2 1225 | decay_mult: 0 1226 | } 1227 | convolution_param { 1228 | num_output: 64 1229 | pad: 100 1230 | kernel_size: 3 1231 | weight_filler {type: "xavier"} 1232 | engine: CUDNN 1233 | } 1234 | } 1235 | 1236 | layer { 1237 | name: "dem_conv1_1__" 1238 | type: "Convolution" 1239 | bottom: "dem-and-edges" 1240 | top: "dem_conv1_1" 1241 | param { 1242 | lr_mult: 1 1243 | decay_mult: 1 1244 | } 1245 | param { 1246 | lr_mult: 2 1247 | decay_mult: 0 1248 | } 1249 | convolution_param { 1250 | num_output: 64 1251 | pad: 100 1252 | kernel_size: 3 1253 | engine: CUDNN 1254 | weight_filler {type: "xavier"} 1255 | bias_filler { 1256 | type: "constant" 1257 | } 1258 | } 1259 | } 1260 | 1261 | ######################################### 1262 | 1263 | layer { 1264 | name: "relu1_1" 1265 | type: "ReLU" 1266 | bottom: "conv1_1" 1267 | top: "conv1_1" 1268 | } 1269 | 1270 | layer { 1271 | name: "dem_relu1_1" 1272 | type: "ReLU" 1273 | bottom: "dem_conv1_1" 1274 | top: "dem_conv1_1" 1275 | } 1276 | 1277 | ######################################### 1278 | 1279 | layer { 1280 | name: "conv1_2" 1281 | type: "Convolution" 1282 | bottom: "conv1_1" 1283 | top: "conv1_2" 1284 | param { 1285 | lr_mult: 1 1286 | decay_mult: 1 1287 | } 1288 | param { 1289 | lr_mult: 2 1290 | decay_mult: 0 1291 | } 1292 | convolution_param { 1293 | num_output: 64 1294 | pad: 1 1295 | kernel_size: 3 1296 | engine: CUDNN 1297 | } 1298 | } 1299 | 1300 | layer { 1301 | name: "dem_conv1_2" 1302 | type: "Convolution" 1303 | bottom: "dem_conv1_1" 1304 | top: "dem_conv1_2" 1305 | param { 1306 | lr_mult: 1 1307 | decay_mult: 1 1308 | } 1309 | param { 1310 | lr_mult: 2 1311 | decay_mult: 0 1312 | } 1313 | convolution_param { 1314 | num_output: 64 1315 | pad: 1 1316 | kernel_size: 3 1317 | engine: CUDNN 1318 | weight_filler {type: "xavier"} 1319 | bias_filler { type: "constant"} 1320 | } 1321 | } 1322 | 1323 | ######################################### 1324 | 1325 | layer { 1326 | name: "relu1_2" 1327 | type: "ReLU" 1328 | bottom: "conv1_2" 1329 | top: "conv1_2" 1330 | } 1331 | 1332 | layer { 1333 | name: "dem_relu1_2" 1334 | type: "ReLU" 1335 | bottom: "dem_conv1_2" 1336 | top: "dem_conv1_2" 1337 | } 1338 | 1339 | ######################################### 1340 | 1341 | layer { 1342 | name: "pool1" 1343 | type: "Pooling" 1344 | bottom: "conv1_2" 1345 | top: "pool1" 1346 | pooling_param { 1347 | pool: MAX 1348 | kernel_size: 2 1349 | stride: 2 1350 | } 1351 | } 1352 | 1353 | layer { 1354 | name: "dem_pool1" 1355 | type: "Pooling" 1356 | bottom: "dem_conv1_2" 1357 | top: "dem_pool1" 1358 | pooling_param { 1359 | pool: MAX 1360 | kernel_size: 2 1361 | stride: 2 1362 | } 1363 | } 1364 | 1365 | ######################################### 1366 | 1367 | layer { 1368 | name: "conv2_1" 1369 | type: "Convolution" 1370 | bottom: "pool1" 1371 | top: "conv2_1" 1372 | param { 1373 | lr_mult: 1 1374 | decay_mult: 1 1375 | } 1376 | param { 1377 | lr_mult: 2 1378 | decay_mult: 0 1379 | } 1380 | convolution_param { 1381 | num_output: 128 1382 | pad: 1 1383 | kernel_size: 3 1384 | engine: CUDNN 1385 | } 1386 | } 1387 | 1388 | layer { 1389 | name: "dem_conv2_1" 1390 | type: "Convolution" 1391 | bottom: "dem_pool1" 1392 | top: "dem_conv2_1" 1393 | param { 1394 | lr_mult: 1 1395 | decay_mult: 1 1396 | } 1397 | param { 1398 | lr_mult: 2 1399 | decay_mult: 0 1400 | } 1401 | 1402 | convolution_param { 1403 | num_output: 128 1404 | pad: 1 1405 | kernel_size: 3 1406 | engine: CUDNN 1407 | weight_filler {type: "xavier"} 1408 | bias_filler { type: "constant"} 1409 | } 1410 | } 1411 | 1412 | ######################################### 1413 | 1414 | layer { 1415 | name: "relu2_1" 1416 | type: "ReLU" 1417 | bottom: "conv2_1" 1418 | top: "conv2_1" 1419 | } 1420 | 1421 | layer { 1422 | name: "dem_relu2_1" 1423 | type: "ReLU" 1424 | bottom: "dem_conv2_1" 1425 | top: "dem_conv2_1" 1426 | } 1427 | 1428 | ######################################### 1429 | 1430 | layer { 1431 | name: "conv2_2" 1432 | type: "Convolution" 1433 | bottom: "conv2_1" 1434 | top: "conv2_2" 1435 | param { 1436 | lr_mult: 1 1437 | decay_mult: 1 1438 | } 1439 | param { 1440 | lr_mult: 2 1441 | decay_mult: 0 1442 | } 1443 | convolution_param { 1444 | num_output: 128 1445 | pad: 1 1446 | kernel_size: 3 1447 | engine: CUDNN 1448 | } 1449 | } 1450 | 1451 | layer { 1452 | name: "dem_conv2_2" 1453 | type: "Convolution" 1454 | bottom: "dem_conv2_1" 1455 | top: "dem_conv2_2" 1456 | param { 1457 | lr_mult: 1 1458 | decay_mult: 1 1459 | } 1460 | param { 1461 | lr_mult: 2 1462 | decay_mult: 0 1463 | } 1464 | convolution_param { 1465 | num_output: 128 1466 | pad: 1 1467 | kernel_size: 3 1468 | engine: CUDNN 1469 | weight_filler {type: "xavier"} 1470 | bias_filler { type: "constant"} 1471 | } 1472 | } 1473 | 1474 | ######################################### 1475 | 1476 | layer { 1477 | name: "relu2_2" 1478 | type: "ReLU" 1479 | bottom: "conv2_2" 1480 | top: "conv2_2" 1481 | } 1482 | 1483 | layer { 1484 | name: "dem_relu2_2" 1485 | type: "ReLU" 1486 | bottom: "dem_conv2_2" 1487 | top: "dem_conv2_2" 1488 | } 1489 | 1490 | 1491 | ######################################### 1492 | 1493 | layer { 1494 | name: "pool2" 1495 | type: "Pooling" 1496 | bottom: "conv2_2" 1497 | top: "pool2" 1498 | pooling_param { 1499 | pool: MAX 1500 | kernel_size: 2 1501 | stride: 2 1502 | } 1503 | } 1504 | 1505 | layer { 1506 | name: "dem_pool2" 1507 | type: "Pooling" 1508 | bottom: "dem_conv2_2" 1509 | top: "dem_pool2" 1510 | pooling_param { 1511 | pool: MAX 1512 | kernel_size: 2 1513 | stride: 2 1514 | } 1515 | } 1516 | 1517 | layer { 1518 | name: "conv3_1" 1519 | type: "Convolution" 1520 | bottom: "pool2" 1521 | top: "conv3_1" 1522 | param { 1523 | lr_mult: 1 1524 | decay_mult: 1 1525 | } 1526 | param { 1527 | lr_mult: 2 1528 | decay_mult: 0 1529 | } 1530 | convolution_param { 1531 | num_output: 256 1532 | pad: 1 1533 | kernel_size: 3 1534 | engine: CUDNN 1535 | } 1536 | } 1537 | 1538 | layer { 1539 | name: "dem_conv3_1" 1540 | type: "Convolution" 1541 | bottom: "dem_pool2" 1542 | top: "dem_conv3_1" 1543 | param { 1544 | lr_mult: 1 1545 | decay_mult: 1 1546 | } 1547 | param { 1548 | lr_mult: 2 1549 | decay_mult: 0 1550 | } 1551 | convolution_param { 1552 | num_output: 256 1553 | pad: 1 1554 | kernel_size: 3 1555 | engine: CUDNN 1556 | weight_filler {type: "xavier"} 1557 | bias_filler { type: "constant"} 1558 | } 1559 | } 1560 | 1561 | ######################################### 1562 | 1563 | layer { 1564 | name: "relu3_1" 1565 | type: "ReLU" 1566 | bottom: "conv3_1" 1567 | top: "conv3_1" 1568 | } 1569 | 1570 | layer { 1571 | name: "dem_relu3_1" 1572 | type: "ReLU" 1573 | bottom: "dem_conv3_1" 1574 | top: "dem_conv3_1" 1575 | } 1576 | 1577 | 1578 | ######################################### 1579 | 1580 | layer { 1581 | name: "conv3_2" 1582 | type: "Convolution" 1583 | bottom: "conv3_1" 1584 | top: "conv3_2" 1585 | param { 1586 | lr_mult: 1 1587 | decay_mult: 1 1588 | } 1589 | param { 1590 | lr_mult: 2 1591 | decay_mult: 0 1592 | } 1593 | convolution_param { 1594 | num_output: 256 1595 | pad: 1 1596 | kernel_size: 3 1597 | engine: CUDNN 1598 | } 1599 | } 1600 | 1601 | layer { 1602 | name: "dem_conv3_2" 1603 | type: "Convolution" 1604 | bottom: "dem_conv3_1" 1605 | top: "dem_conv3_2" 1606 | param { 1607 | lr_mult: 1 1608 | decay_mult: 1 1609 | } 1610 | param { 1611 | lr_mult: 2 1612 | decay_mult: 0 1613 | } 1614 | convolution_param { 1615 | num_output: 256 1616 | pad: 1 1617 | kernel_size: 3 1618 | engine: CUDNN 1619 | weight_filler {type: "xavier"} 1620 | bias_filler { type: "constant"} 1621 | } 1622 | } 1623 | 1624 | ######################################### 1625 | 1626 | layer { 1627 | name: "relu3_2" 1628 | type: "ReLU" 1629 | bottom: "conv3_2" 1630 | top: "conv3_2" 1631 | } 1632 | 1633 | layer { 1634 | name: "dem_relu3_2" 1635 | type: "ReLU" 1636 | bottom: "dem_conv3_2" 1637 | top: "dem_conv3_2" 1638 | } 1639 | 1640 | ######################################### 1641 | 1642 | layer { 1643 | name: "conv3_3" 1644 | type: "Convolution" 1645 | bottom: "conv3_2" 1646 | top: "conv3_3" 1647 | param { 1648 | lr_mult: 1 1649 | decay_mult: 1 1650 | } 1651 | param { 1652 | lr_mult: 2 1653 | decay_mult: 0 1654 | } 1655 | convolution_param { 1656 | num_output: 256 1657 | pad: 1 1658 | kernel_size: 3 1659 | engine: CUDNN 1660 | } 1661 | } 1662 | 1663 | layer { 1664 | name: "dem_conv3_3" 1665 | type: "Convolution" 1666 | bottom: "dem_conv3_2" 1667 | top: "dem_conv3_3" 1668 | param { 1669 | lr_mult: 1 1670 | decay_mult: 1 1671 | } 1672 | param { 1673 | lr_mult: 2 1674 | decay_mult: 0 1675 | } 1676 | convolution_param { 1677 | num_output: 256 1678 | pad: 1 1679 | kernel_size: 3 1680 | engine: CUDNN 1681 | weight_filler {type: "xavier"} 1682 | bias_filler { type: "constant"} 1683 | } 1684 | } 1685 | 1686 | ######################################### 1687 | 1688 | layer { 1689 | name: "relu3_3" 1690 | type: "ReLU" 1691 | bottom: "conv3_3" 1692 | top: "conv3_3" 1693 | } 1694 | 1695 | layer { 1696 | name: "dem_relu3_3" 1697 | type: "ReLU" 1698 | bottom: "dem_conv3_3" 1699 | top: "dem_conv3_3" 1700 | } 1701 | 1702 | ################# Deep Supervision 3 ################# 1703 | 1704 | layer { 1705 | name: "ds_conv3" 1706 | type: "Convolution" 1707 | bottom: "conv3_3" 1708 | top: "ds_conv3" 1709 | param { 1710 | lr_mult: 1 1711 | decay_mult: 1 1712 | } 1713 | param { 1714 | lr_mult: 2 1715 | decay_mult: 0 1716 | } 1717 | convolution_param { 1718 | num_output: 5 1719 | pad: 0 1720 | kernel_size: 1 1721 | weight_filler { 1722 | type: "xavier" 1723 | } 1724 | bias_filler { 1725 | type: "constant" 1726 | } 1727 | engine: CUDNN 1728 | } 1729 | } 1730 | 1731 | layer { 1732 | name: 'ds_deconv3_' 1733 | type: "Deconvolution" 1734 | bottom: 'ds_conv3' 1735 | top: 'ds_deconv3' 1736 | param { 1737 | lr_mult: 1 1738 | decay_mult: 1 1739 | } 1740 | param { 1741 | lr_mult: 2 1742 | decay_mult: 0 1743 | } 1744 | convolution_param { 1745 | kernel_size: 8 1746 | stride: 4 1747 | num_output: 5 1748 | group: 5 1749 | weight_filler: { 1750 | type: "bilinear" 1751 | } 1752 | } 1753 | } 1754 | 1755 | layer { 1756 | type: "Crop" 1757 | name: "crop3" 1758 | bottom: "ds_deconv3" 1759 | bottom: "data" 1760 | top: "ds_deconv3c" 1761 | crop_param { 1762 | axis: 2 1763 | offset: 102 1764 | offset: 102 1765 | } 1766 | } 1767 | 1768 | layer { name: "ds_loss3" type: "SoftmaxWithLoss" bottom: "ds_deconv3c" bottom: "label" top: "ds_loss3" loss_weight: 1 loss_param {normalize: false} } 1769 | 1770 | 1771 | 1772 | 1773 | ######################################### 1774 | layer { 1775 | name: "pool3" 1776 | type: "Pooling" 1777 | bottom: "conv3_3" 1778 | top: "pool3" 1779 | pooling_param { 1780 | pool: MAX 1781 | kernel_size: 2 1782 | stride: 2 1783 | } 1784 | } 1785 | 1786 | layer { 1787 | name: "dem_pool3" 1788 | type: "Pooling" 1789 | bottom: "dem_conv3_3" 1790 | top: "dem_pool3" 1791 | pooling_param { 1792 | pool: MAX 1793 | kernel_size: 2 1794 | stride: 2 1795 | } 1796 | } 1797 | 1798 | ######################################### 1799 | 1800 | layer { 1801 | name: "conv4_1" 1802 | type: "Convolution" 1803 | bottom: "pool3" 1804 | top: "conv4_1" 1805 | param { 1806 | lr_mult: 1 1807 | decay_mult: 1 1808 | } 1809 | param { 1810 | lr_mult: 2 1811 | decay_mult: 0 1812 | } 1813 | convolution_param { 1814 | num_output: 512 1815 | pad: 1 1816 | kernel_size: 3 1817 | engine: CUDNN 1818 | } 1819 | } 1820 | 1821 | layer { 1822 | name: "dem_conv4_1" 1823 | type: "Convolution" 1824 | bottom: "dem_pool3" 1825 | top: "dem_conv4_1" 1826 | param { 1827 | lr_mult: 1 1828 | decay_mult: 1 1829 | } 1830 | param { 1831 | lr_mult: 2 1832 | decay_mult: 0 1833 | } 1834 | convolution_param { 1835 | num_output: 512 1836 | pad: 1 1837 | kernel_size: 3 1838 | engine: CUDNN 1839 | weight_filler {type: "xavier"} 1840 | bias_filler { type: "constant"} 1841 | } 1842 | } 1843 | 1844 | 1845 | ######################################### 1846 | 1847 | layer { 1848 | name: "relu4_1" 1849 | type: "ReLU" 1850 | bottom: "conv4_1" 1851 | top: "conv4_1" 1852 | } 1853 | 1854 | layer { 1855 | name: "dem_relu4_1" 1856 | type: "ReLU" 1857 | bottom: "dem_conv4_1" 1858 | top: "dem_conv4_1" 1859 | } 1860 | 1861 | 1862 | ######################################### 1863 | 1864 | layer { 1865 | name: "conv4_2" 1866 | type: "Convolution" 1867 | bottom: "conv4_1" 1868 | top: "conv4_2" 1869 | param { 1870 | lr_mult: 1 1871 | decay_mult: 1 1872 | } 1873 | param { 1874 | lr_mult: 2 1875 | decay_mult: 0 1876 | } 1877 | convolution_param { 1878 | num_output: 512 1879 | pad: 1 1880 | kernel_size: 3 1881 | engine: CUDNN 1882 | } 1883 | } 1884 | 1885 | layer { 1886 | name: "dem_conv4_2" 1887 | type: "Convolution" 1888 | bottom: "dem_conv4_1" 1889 | top: "dem_conv4_2" 1890 | param { 1891 | lr_mult: 1 1892 | decay_mult: 1 1893 | } 1894 | param { 1895 | lr_mult: 2 1896 | decay_mult: 0 1897 | } 1898 | convolution_param { 1899 | num_output: 512 1900 | pad: 1 1901 | kernel_size: 3 1902 | engine: CUDNN 1903 | weight_filler {type: "xavier"} 1904 | bias_filler { type: "constant"} 1905 | } 1906 | } 1907 | 1908 | 1909 | ######################################### 1910 | 1911 | layer { 1912 | name: "relu4_2" 1913 | type: "ReLU" 1914 | bottom: "conv4_2" 1915 | top: "conv4_2" 1916 | } 1917 | 1918 | layer { 1919 | name: "dem_relu4_2" 1920 | type: "ReLU" 1921 | bottom: "dem_conv4_2" 1922 | top: "dem_conv4_2" 1923 | } 1924 | 1925 | 1926 | ######################################### 1927 | 1928 | layer { 1929 | name: "conv4_3" 1930 | type: "Convolution" 1931 | bottom: "conv4_2" 1932 | top: "conv4_3" 1933 | param { 1934 | lr_mult: 1 1935 | decay_mult: 1 1936 | } 1937 | param { 1938 | lr_mult: 2 1939 | decay_mult: 0 1940 | } 1941 | convolution_param { 1942 | num_output: 512 1943 | pad: 1 1944 | kernel_size: 3 1945 | engine: CUDNN 1946 | } 1947 | } 1948 | 1949 | layer { 1950 | name: "dem_conv4_3" 1951 | type: "Convolution" 1952 | bottom: "dem_conv4_2" 1953 | top: "dem_conv4_3" 1954 | param { 1955 | lr_mult: 1 1956 | decay_mult: 1 1957 | } 1958 | param { 1959 | lr_mult: 2 1960 | decay_mult: 0 1961 | } 1962 | convolution_param { 1963 | num_output: 512 1964 | pad: 1 1965 | kernel_size: 3 1966 | engine: CUDNN 1967 | weight_filler {type: "xavier"} 1968 | bias_filler { type: "constant"} 1969 | } 1970 | } 1971 | 1972 | ######################################### 1973 | 1974 | layer { 1975 | name: "relu4_3" 1976 | type: "ReLU" 1977 | bottom: "conv4_3" 1978 | top: "conv4_3" 1979 | } 1980 | 1981 | layer { 1982 | name: "dem_relu4_3" 1983 | type: "ReLU" 1984 | bottom: "dem_conv4_3" 1985 | top: "dem_conv4_3" 1986 | } 1987 | 1988 | ################# Deep Supervision 4 ################# 1989 | 1990 | layer { 1991 | name: "ds_conv4_" 1992 | type: "Convolution" 1993 | bottom: "conv4_3" 1994 | top: "ds_conv4" 1995 | param { 1996 | lr_mult: 1 1997 | decay_mult: 1 1998 | } 1999 | param { 2000 | lr_mult: 2 2001 | decay_mult: 0 2002 | } 2003 | convolution_param { 2004 | num_output: 5 2005 | pad: 0 2006 | kernel_size: 1 2007 | weight_filler { 2008 | type: "xavier" 2009 | } 2010 | bias_filler { 2011 | type: "constant" 2012 | } 2013 | engine: CUDNN 2014 | } 2015 | } 2016 | 2017 | layer { 2018 | name: 'ds_deconv4_' 2019 | type: "Deconvolution" 2020 | bottom: 'ds_conv4' 2021 | top: 'ds_deconv4' 2022 | param { 2023 | lr_mult: 1 2024 | decay_mult: 1 2025 | } 2026 | param { 2027 | lr_mult: 2 2028 | decay_mult: 0 2029 | } 2030 | convolution_param { 2031 | kernel_size: 16 2032 | stride: 8 2033 | num_output: 5 2034 | group: 5 2035 | weight_filler: { 2036 | type: "bilinear" 2037 | } 2038 | } 2039 | } 2040 | 2041 | layer { 2042 | type: "Crop" 2043 | name: "crop4" 2044 | bottom: "ds_deconv4" 2045 | bottom: "data" 2046 | top: "ds_deconv4c" 2047 | crop_param { 2048 | axis: 2 2049 | offset: 104 2050 | offset: 104 2051 | } 2052 | } 2053 | 2054 | layer { name: "ds_loss4" type: "SoftmaxWithLoss" bottom: "ds_deconv4c" bottom: "label" top: "ds_loss4" loss_weight: 1 loss_param {normalize: false} } 2055 | 2056 | 2057 | ######################################### 2058 | 2059 | layer { 2060 | name: "pool4" 2061 | type: "Pooling" 2062 | bottom: "conv4_3" 2063 | top: "pool4" 2064 | pooling_param { 2065 | pool: MAX 2066 | kernel_size: 2 2067 | stride: 2 2068 | } 2069 | } 2070 | 2071 | layer { 2072 | name: "dem_pool4" 2073 | type: "Pooling" 2074 | bottom: "dem_conv4_3" 2075 | top: "dem_pool4" 2076 | pooling_param { 2077 | pool: MAX 2078 | kernel_size: 2 2079 | stride: 2 2080 | } 2081 | } 2082 | 2083 | ######################################### 2084 | 2085 | layer { 2086 | name: "conv5_1" 2087 | type: "Convolution" 2088 | bottom: "pool4" 2089 | top: "conv5_1" 2090 | param { 2091 | lr_mult: 1 2092 | decay_mult: 1 2093 | } 2094 | param { 2095 | lr_mult: 2 2096 | decay_mult: 0 2097 | } 2098 | convolution_param { 2099 | num_output: 512 2100 | pad: 1 2101 | kernel_size: 3 2102 | engine: CUDNN 2103 | } 2104 | } 2105 | 2106 | layer { 2107 | name: "dem_conv5_1" 2108 | type: "Convolution" 2109 | bottom: "dem_pool4" 2110 | top: "dem_conv5_1" 2111 | param { 2112 | lr_mult: 1 2113 | decay_mult: 1 2114 | } 2115 | param { 2116 | lr_mult: 2 2117 | decay_mult: 0 2118 | } 2119 | convolution_param { 2120 | num_output: 512 2121 | pad: 1 2122 | kernel_size: 3 2123 | engine: CUDNN 2124 | weight_filler {type: "xavier"} 2125 | bias_filler { type: "constant"} 2126 | } 2127 | } 2128 | 2129 | 2130 | ######################################### 2131 | 2132 | layer { 2133 | name: "relu5_1" 2134 | type: "ReLU" 2135 | bottom: "conv5_1" 2136 | top: "conv5_1" 2137 | } 2138 | 2139 | layer { 2140 | name: "dem_relu5_1" 2141 | type: "ReLU" 2142 | bottom: "dem_conv5_1" 2143 | top: "dem_conv5_1" 2144 | } 2145 | 2146 | ######################################### 2147 | 2148 | layer { 2149 | name: "conv5_2" 2150 | type: "Convolution" 2151 | bottom: "conv5_1" 2152 | top: "conv5_2" 2153 | param { 2154 | lr_mult: 1 2155 | decay_mult: 1 2156 | } 2157 | param { 2158 | lr_mult: 2 2159 | decay_mult: 0 2160 | } 2161 | convolution_param { 2162 | num_output: 512 2163 | pad: 1 2164 | kernel_size: 3 2165 | engine: CUDNN 2166 | } 2167 | } 2168 | 2169 | layer { 2170 | name: "dem_conv5_2" 2171 | type: "Convolution" 2172 | bottom: "dem_conv5_1" 2173 | top: "dem_conv5_2" 2174 | param { 2175 | lr_mult: 1 2176 | decay_mult: 1 2177 | } 2178 | param { 2179 | lr_mult: 2 2180 | decay_mult: 0 2181 | } 2182 | convolution_param { 2183 | num_output: 512 2184 | pad: 1 2185 | kernel_size: 3 2186 | engine: CUDNN 2187 | weight_filler {type: "xavier"} 2188 | bias_filler { type: "constant"} 2189 | } 2190 | } 2191 | 2192 | ######################################### 2193 | 2194 | layer { 2195 | name: "relu5_2" 2196 | type: "ReLU" 2197 | bottom: "conv5_2" 2198 | top: "conv5_2" 2199 | } 2200 | 2201 | layer { 2202 | name: "dem_relu5_2" 2203 | type: "ReLU" 2204 | bottom: "dem_conv5_2" 2205 | top: "dem_conv5_2" 2206 | } 2207 | 2208 | ######################################### 2209 | 2210 | layer { 2211 | name: "conv5_3" 2212 | type: "Convolution" 2213 | bottom: "conv5_2" 2214 | top: "conv5_3" 2215 | param { 2216 | lr_mult: 1 2217 | decay_mult: 1 2218 | } 2219 | param { 2220 | lr_mult: 2 2221 | decay_mult: 0 2222 | } 2223 | convolution_param { 2224 | num_output: 512 2225 | pad: 1 2226 | kernel_size: 3 2227 | engine: CUDNN 2228 | } 2229 | } 2230 | 2231 | layer { 2232 | name: "dem_conv5_3" 2233 | type: "Convolution" 2234 | bottom: "dem_conv5_2" 2235 | top: "dem_conv5_3" 2236 | param { 2237 | lr_mult: 1 2238 | decay_mult: 1 2239 | } 2240 | param { 2241 | lr_mult: 2 2242 | decay_mult: 0 2243 | } 2244 | convolution_param { 2245 | num_output: 512 2246 | pad: 1 2247 | kernel_size: 3 2248 | engine: CUDNN 2249 | weight_filler {type: "xavier"} 2250 | bias_filler { type: "constant"} 2251 | } 2252 | } 2253 | 2254 | ######################################### 2255 | 2256 | layer { 2257 | name: "relu5_3" 2258 | type: "ReLU" 2259 | bottom: "conv5_3" 2260 | top: "conv5_3" 2261 | } 2262 | 2263 | layer { 2264 | name: "dem_relu5_3" 2265 | type: "ReLU" 2266 | bottom: "dem_conv5_3" 2267 | top: "dem_conv5_3" 2268 | } 2269 | 2270 | ################# Deep Supervision 5 ################# 2271 | 2272 | layer { 2273 | name: "ds_conv5_" 2274 | type: "Convolution" 2275 | bottom: "conv5_3" 2276 | top: "ds_conv5" 2277 | param { 2278 | lr_mult: 1 2279 | decay_mult: 1 2280 | } 2281 | param { 2282 | lr_mult: 2 2283 | decay_mult: 0 2284 | } 2285 | convolution_param { 2286 | num_output: 5 2287 | pad: 0 2288 | kernel_size: 1 2289 | weight_filler { 2290 | type: "xavier" 2291 | } 2292 | bias_filler { 2293 | type: "constant" 2294 | } 2295 | } 2296 | } 2297 | 2298 | layer { 2299 | name: 'ds_deconv5_' 2300 | type: "Deconvolution" 2301 | bottom: 'ds_conv5' 2302 | top: 'ds_deconv5' 2303 | param { 2304 | lr_mult: 1 2305 | decay_mult: 1 2306 | } 2307 | param { 2308 | lr_mult: 2 2309 | decay_mult: 0 2310 | } 2311 | convolution_param { 2312 | kernel_size: 32 2313 | stride: 16 2314 | num_output: 5 2315 | group: 5 2316 | weight_filler: { 2317 | type: "bilinear" 2318 | } 2319 | } 2320 | } 2321 | 2322 | layer { 2323 | type: "Crop" 2324 | name: "crop5" 2325 | bottom: "ds_deconv5" 2326 | bottom: "data" 2327 | top: "ds_deconv5c" 2328 | crop_param { 2329 | axis: 2 2330 | offset: 112 2331 | offset: 112 2332 | } 2333 | } 2334 | 2335 | layer { name: "ds_loss5" type: "SoftmaxWithLoss" bottom: "ds_deconv5c" bottom: "label" top: "ds_loss5" loss_weight: 1 loss_param {normalize: false} } 2336 | 2337 | 2338 | ######################################### 2339 | 2340 | layer { 2341 | name: "pool5" 2342 | type: "Pooling" 2343 | bottom: "conv5_3" 2344 | top: "pool5" 2345 | pooling_param { 2346 | pool: MAX 2347 | kernel_size: 2 2348 | stride: 2 2349 | } 2350 | } 2351 | 2352 | layer { 2353 | name: "dem_pool5" 2354 | type: "Pooling" 2355 | bottom: "dem_conv5_3" 2356 | top: "dem_pool5" 2357 | pooling_param { 2358 | pool: MAX 2359 | kernel_size: 2 2360 | stride: 2 2361 | } 2362 | } 2363 | 2364 | ######################################### 2365 | 2366 | layer { 2367 | name: "fc6" 2368 | type: "Convolution" 2369 | bottom: "pool5" 2370 | top: "fc6" 2371 | param { 2372 | lr_mult: 1 2373 | decay_mult: 1 2374 | } 2375 | param { 2376 | lr_mult: 2 2377 | decay_mult: 0 2378 | } 2379 | convolution_param { 2380 | num_output: 4096 2381 | kernel_size: 7 2382 | engine: CUDNN 2383 | } 2384 | } 2385 | 2386 | layer { 2387 | name: "dem_fc6" 2388 | type: "Convolution" 2389 | bottom: "dem_pool5" 2390 | top: "dem_fc6" 2391 | param { 2392 | lr_mult: 1 2393 | decay_mult: 1 2394 | } 2395 | param { 2396 | lr_mult: 2 2397 | decay_mult: 0 2398 | } 2399 | convolution_param { 2400 | num_output: 4096 2401 | kernel_size: 7 2402 | engine: CUDNN 2403 | } 2404 | } 2405 | 2406 | ######################################### 2407 | 2408 | layer { 2409 | name: "relu6" 2410 | type: "ReLU" 2411 | bottom: "fc6" 2412 | top: "fc6" 2413 | } 2414 | 2415 | layer { 2416 | name: "dem_relu6" 2417 | type: "ReLU" 2418 | bottom: "dem_fc6" 2419 | top: "dem_fc6" 2420 | } 2421 | 2422 | ######################################### 2423 | 2424 | layer { 2425 | name: "drop6" 2426 | type: "Dropout" 2427 | bottom: "fc6" 2428 | top: "fc6" 2429 | dropout_param { 2430 | dropout_ratio: 0.4 2431 | } 2432 | } 2433 | 2434 | layer { 2435 | name: "dem_drop6" 2436 | type: "Dropout" 2437 | bottom: "dem_fc6" 2438 | top: "dem_fc6" 2439 | dropout_param { 2440 | dropout_ratio: 0.4 2441 | } 2442 | } 2443 | 2444 | 2445 | ######################################### 2446 | 2447 | layer { 2448 | name: "fc7" 2449 | type: "Convolution" 2450 | bottom: "fc6" 2451 | top: "fc7" 2452 | param { 2453 | lr_mult: 1 2454 | decay_mult: 1 2455 | } 2456 | param { 2457 | lr_mult: 2 2458 | decay_mult: 0 2459 | } 2460 | convolution_param { 2461 | num_output: 4096 2462 | kernel_size: 1 2463 | engine: CUDNN 2464 | } 2465 | } 2466 | 2467 | layer { 2468 | name: "dem_fc7" 2469 | type: "Convolution" 2470 | bottom: "dem_fc6" 2471 | top: "dem_fc7" 2472 | param { 2473 | lr_mult: 1 2474 | decay_mult: 1 2475 | } 2476 | param { 2477 | lr_mult: 2 2478 | decay_mult: 0 2479 | } 2480 | convolution_param { 2481 | num_output: 4096 2482 | kernel_size: 1 2483 | engine: CUDNN 2484 | } 2485 | } 2486 | 2487 | 2488 | ######################################### 2489 | 2490 | layer { 2491 | name: "relu7" 2492 | type: "ReLU" 2493 | bottom: "fc7" 2494 | top: "fc7" 2495 | } 2496 | 2497 | layer { 2498 | name: "dem_relu7" 2499 | type: "ReLU" 2500 | bottom: "dem_fc7" 2501 | top: "dem_fc7" 2502 | } 2503 | 2504 | ######################################### 2505 | 2506 | layer { 2507 | name: "drop7" 2508 | type: "Dropout" 2509 | bottom: "fc7" 2510 | top: "fc7" 2511 | dropout_param { 2512 | dropout_ratio: 0.4 2513 | } 2514 | } 2515 | 2516 | layer { 2517 | name: "dem_drop7" 2518 | type: "Dropout" 2519 | bottom: "dem_fc7" 2520 | top: "dem_fc7" 2521 | dropout_param { 2522 | dropout_ratio: 0.4 2523 | } 2524 | } 2525 | 2526 | ######################################### SINGAL BOOSTER ################################################ 2527 | 2528 | #layer { 2529 | # name: "pre-fuse-image" 2530 | # type: "LRN" 2531 | # bottom: "final_1" 2532 | # top: "final" 2533 | # lrn_param { 2534 | # local_size: 5 2535 | # alpha: 0.000001 2536 | # beta: 0.050 2537 | # } 2538 | #} 2539 | 2540 | #layer { 2541 | # name: "pre-fuse-dem" 2542 | # type: "LRN" 2543 | # bottom: "dem-final_1" 2544 | # top: "dem-final" 2545 | # lrn_param { 2546 | # local_size: 5 2547 | # alpha: 0.000001 2548 | # beta: 0.005 2549 | # } 2550 | #} 2551 | 2552 | 2553 | 2554 | 2555 | ######################################### FCN-16 ################################################ 2556 | 2557 | layer { 2558 | name: "score59" 2559 | type: "Convolution" 2560 | bottom: "fc7" 2561 | top: "score59" 2562 | param { 2563 | lr_mult: 1 2564 | decay_mult: 1 2565 | } 2566 | param { 2567 | lr_mult: 2 2568 | decay_mult: 0 2569 | } 2570 | convolution_param { 2571 | num_output: 60 2572 | kernel_size: 1 2573 | engine: CUDNN 2574 | weight_filler { 2575 | type: "xavier" 2576 | } 2577 | bias_filler { 2578 | type: "constant" 2579 | } 2580 | } 2581 | } 2582 | 2583 | layer { 2584 | name: "dem-score59" 2585 | type: "Convolution" 2586 | bottom: "dem_fc7" 2587 | top: "dem-score59" 2588 | param { 2589 | lr_mult: 1 2590 | decay_mult: 1 2591 | } 2592 | param { 2593 | lr_mult: 2 2594 | decay_mult: 0 2595 | } 2596 | convolution_param { 2597 | num_output: 60 2598 | kernel_size: 1 2599 | engine: CUDNN 2600 | weight_filler { 2601 | type: "xavier" 2602 | } 2603 | bias_filler { 2604 | type: "constant" 2605 | } 2606 | } 2607 | } 2608 | 2609 | layer { 2610 | name: "upscore16" 2611 | type: "Deconvolution" 2612 | bottom: "score59" 2613 | top: "upscore16" 2614 | param { 2615 | lr_mult: 1 2616 | decay_mult: 1 2617 | } 2618 | convolution_param { 2619 | num_output: 60 2620 | bias_term: false 2621 | kernel_size: 4 2622 | stride: 2 2623 | } 2624 | } 2625 | 2626 | layer { 2627 | name: "dem-upscore16" 2628 | type: "Deconvolution" 2629 | bottom: "dem-score59" 2630 | top: "dem-upscore16" 2631 | param { 2632 | lr_mult: 1 2633 | decay_mult: 1 2634 | } 2635 | convolution_param { 2636 | num_output: 60 2637 | bias_term: false 2638 | kernel_size: 4 2639 | stride: 2 2640 | } 2641 | } 2642 | 2643 | layer { 2644 | name: "score-pool4" 2645 | type: "Convolution" 2646 | bottom: "pool4" 2647 | top: "score-pool4" 2648 | param { 2649 | lr_mult: 1 2650 | decay_mult: 1 2651 | } 2652 | param { 2653 | lr_mult: 2 2654 | decay_mult: 0 2655 | } 2656 | convolution_param { 2657 | num_output: 60 2658 | kernel_size: 1 2659 | engine: CUDNN 2660 | weight_filler { 2661 | type: "xavier" 2662 | } 2663 | bias_filler { 2664 | type: "constant" 2665 | } 2666 | } 2667 | } 2668 | 2669 | # decrease strong peaks 2670 | #layer { 2671 | # name: "pre-fuse-image" 2672 | # type: "LRN" 2673 | # bottom: "score-pool4" 2674 | # top: "score-pool4_" 2675 | # lrn_param { 2676 | # local_size: 5 2677 | # alpha: 0.01 2678 | # beta: 0.95 2679 | # } 2680 | #} 2681 | 2682 | 2683 | layer { 2684 | name: "dem-score-pool4" 2685 | type: "Convolution" 2686 | bottom: "dem_pool4" 2687 | top: "dem-score-pool4" 2688 | param { 2689 | lr_mult: 1 2690 | decay_mult: 1 2691 | } 2692 | param { 2693 | lr_mult: 2 2694 | decay_mult: 0 2695 | } 2696 | convolution_param { 2697 | num_output: 60 2698 | kernel_size: 1 2699 | engine: CUDNN 2700 | weight_filler { 2701 | type: "xavier" 2702 | } 2703 | bias_filler { 2704 | type: "constant" 2705 | } 2706 | } 2707 | } 2708 | 2709 | layer { 2710 | type: 'Crop' 2711 | name: 'crop' 2712 | bottom: 'score-pool4' 2713 | bottom: 'upscore16' 2714 | top: 'score-pool4c' 2715 | crop_param { 2716 | axis: 1 2717 | offset: 0 2718 | offset: 5 2719 | offset: 5 2720 | } 2721 | } 2722 | 2723 | layer { 2724 | type: 'Crop' 2725 | name: 'crop' 2726 | bottom: 'dem-score-pool4' 2727 | bottom: 'dem-upscore16' 2728 | top: 'dem-score-pool4c' 2729 | crop_param { 2730 | axis: 1 2731 | offset: 0 2732 | offset: 5 2733 | offset: 5 2734 | } 2735 | } 2736 | 2737 | ########################################## FCN-8 ####################################### 2738 | 2739 | layer { 2740 | name: "fuse-pool4" 2741 | type: "Eltwise" 2742 | bottom: "upscore16" 2743 | bottom: "score-pool4c" 2744 | top: "score-fused-pool4" 2745 | eltwise_param { 2746 | operation: SUM 2747 | } 2748 | } 2749 | 2750 | layer { 2751 | name: "dem-fuse-pool4" 2752 | type: "Eltwise" 2753 | bottom: "dem-upscore16" 2754 | bottom: "dem-score-pool4c" 2755 | top: "dem-score-fused-pool4" 2756 | eltwise_param { 2757 | operation: SUM 2758 | } 2759 | } 2760 | 2761 | layer { 2762 | name: "upscore8" 2763 | type: "Deconvolution" 2764 | bottom: "score-fused-pool4" 2765 | top: "upscore8" 2766 | param { 2767 | lr_mult: 1 2768 | decay_mult: 1 2769 | } 2770 | convolution_param { 2771 | num_output: 60 2772 | bias_term: false 2773 | kernel_size: 4 2774 | stride: 2 2775 | } 2776 | } 2777 | 2778 | layer { 2779 | name: "dem-upscore8" 2780 | type: "Deconvolution" 2781 | bottom: "dem-score-fused-pool4" 2782 | top: "dem-upscore8" 2783 | param { 2784 | lr_mult: 1 2785 | decay_mult: 1 2786 | } 2787 | convolution_param { 2788 | num_output: 60 2789 | bias_term: false 2790 | kernel_size: 4 2791 | stride: 2 2792 | } 2793 | } 2794 | 2795 | layer { 2796 | name: "score-pool3" 2797 | type: "Convolution" 2798 | bottom: "pool3" 2799 | top: "score-pool3" 2800 | param { 2801 | lr_mult: 1 2802 | decay_mult: 1 2803 | } 2804 | param { 2805 | lr_mult: 2 2806 | decay_mult: 0 2807 | } 2808 | convolution_param { 2809 | num_output: 60 2810 | kernel_size: 1 2811 | engine: CUDNN 2812 | weight_filler { 2813 | type: "xavier" 2814 | } 2815 | bias_filler { 2816 | type: "constant" 2817 | } 2818 | } 2819 | } 2820 | 2821 | # decrease strong peaks 2822 | #layer { 2823 | # name: "peak-cutter" 2824 | # type: "LRN" 2825 | # bottom: "score-pool3" 2826 | # top: "score-pool3_" 2827 | # lrn_param { 2828 | # local_size: 5 2829 | # alpha: 0.01 2830 | # beta: 0.95 2831 | # } 2832 | #} 2833 | 2834 | layer { 2835 | name: "dem-score-pool3" 2836 | type: "Convolution" 2837 | bottom: "dem_pool3" 2838 | top: "dem-score-pool3" 2839 | param { 2840 | lr_mult: 1 2841 | decay_mult: 1 2842 | } 2843 | param { 2844 | lr_mult: 2 2845 | decay_mult: 0 2846 | } 2847 | convolution_param { 2848 | num_output: 60 2849 | kernel_size: 1 2850 | engine: CUDNN 2851 | weight_filler { 2852 | type: "xavier" 2853 | } 2854 | bias_filler { 2855 | type: "constant" 2856 | } 2857 | } 2858 | } 2859 | 2860 | layer { 2861 | type: 'Crop' 2862 | name: 'crop' 2863 | bottom: 'score-pool3' 2864 | bottom: 'upscore8' 2865 | top: 'score-pool3c' 2866 | crop_param { 2867 | axis: 2 2868 | offset: 11 2869 | offset: 11 2870 | } 2871 | } 2872 | 2873 | layer { 2874 | type: 'Crop' 2875 | name: 'crop' 2876 | bottom: 'dem-score-pool3' 2877 | bottom: 'dem-upscore8' 2878 | top: 'dem-score-pool3c' 2879 | crop_param { 2880 | axis: 2 2881 | offset: 11 2882 | offset: 11 2883 | } 2884 | } 2885 | 2886 | ######################################################################################################## 2887 | 2888 | layer { 2889 | name: "fuse-pool3" 2890 | type: "Eltwise" 2891 | bottom: "upscore8" 2892 | bottom: "score-pool3c" 2893 | top: "score-fused-pool3" 2894 | eltwise_param { 2895 | operation: SUM 2896 | } 2897 | } 2898 | 2899 | layer { 2900 | name: "dem-fuse-pool3" 2901 | type: "Eltwise" 2902 | bottom: "dem-upscore8" 2903 | bottom: "dem-score-pool3c" 2904 | top: "dem-score-fused-pool3" 2905 | eltwise_param { 2906 | operation: SUM 2907 | } 2908 | } 2909 | 2910 | ########################################## FCN-4 ####################################### 2911 | 2912 | layer { 2913 | name: "upscore4" 2914 | type: "Deconvolution" 2915 | bottom: "score-fused-pool3" 2916 | top: "upscore4" 2917 | param { 2918 | lr_mult: 1 2919 | decay_mult: 1 2920 | } 2921 | convolution_param { 2922 | num_output: 60 2923 | bias_term: false 2924 | kernel_size: 4 2925 | stride: 2 2926 | } 2927 | } 2928 | 2929 | layer { 2930 | name: "dem-upscore4" 2931 | type: "Deconvolution" 2932 | bottom: "dem-score-fused-pool3" 2933 | top: "dem-upscore4" 2934 | param { 2935 | lr_mult: 1 2936 | decay_mult: 1 2937 | } 2938 | convolution_param { 2939 | num_output: 60 2940 | bias_term: false 2941 | kernel_size: 4 2942 | stride: 2 2943 | } 2944 | } 2945 | 2946 | 2947 | layer { 2948 | name: "score-pool2" 2949 | type: "Convolution" 2950 | bottom: "pool2" 2951 | top: "score-pool2" 2952 | param { 2953 | lr_mult: 1 2954 | decay_mult: 1 2955 | } 2956 | param { 2957 | lr_mult: 2 2958 | decay_mult: 0 2959 | } 2960 | convolution_param { 2961 | num_output: 60 2962 | pad: 1 2963 | kernel_size: 3 2964 | engine: CUDNN 2965 | weight_filler { 2966 | type: "xavier" 2967 | } 2968 | bias_filler { 2969 | type: "constant" 2970 | } 2971 | } 2972 | } 2973 | 2974 | 2975 | # decrease strong peaks 2976 | #layer { 2977 | # name: "peak-cutter-pool2" 2978 | # type: "LRN" 2979 | # bottom: "score-pool2" 2980 | # top: "score-pool2_" 2981 | # lrn_param { 2982 | # local_size: 5 2983 | # alpha: 0.01 2984 | # beta: 0.95 2985 | # } 2986 | #} 2987 | 2988 | 2989 | layer { 2990 | name: "dem-score-pool2" 2991 | type: "Convolution" 2992 | bottom: "dem_pool2" 2993 | top: "dem-score-pool2" 2994 | param { 2995 | lr_mult: 1 2996 | decay_mult: 1 2997 | } 2998 | param { 2999 | lr_mult: 1 3000 | decay_mult: 0 3001 | } 3002 | convolution_param { 3003 | num_output: 60 3004 | pad: 1 3005 | kernel_size: 3 3006 | engine: CUDNN 3007 | weight_filler { 3008 | type: "xavier" 3009 | } 3010 | bias_filler { 3011 | type: "constant" 3012 | } 3013 | } 3014 | } 3015 | 3016 | 3017 | layer { 3018 | type: 'Crop' 3019 | name: 'crop' 3020 | bottom: 'score-pool2' 3021 | bottom: 'upscore4' 3022 | top: 'score-pool2c' 3023 | crop_param { 3024 | axis: 2 3025 | offset: 24 3026 | offset: 24 3027 | } 3028 | } 3029 | 3030 | layer { 3031 | type: 'Crop' 3032 | name: 'crop' 3033 | bottom: 'dem-score-pool2' 3034 | bottom: 'dem-upscore4' 3035 | top: 'dem-score-pool2c' 3036 | crop_param { 3037 | axis: 2 3038 | offset: 24 3039 | offset: 24 3040 | } 3041 | } 3042 | 3043 | ######################################################################################################### 3044 | 3045 | layer { 3046 | name: "fuse-pool2" 3047 | type: "Eltwise" 3048 | bottom: "upscore4" 3049 | bottom: "score-pool2c" 3050 | top: "final_1" 3051 | eltwise_param { 3052 | operation: SUM 3053 | } 3054 | } 3055 | 3056 | layer { 3057 | name: "dem-fuse-pool2" 3058 | type: "Eltwise" 3059 | bottom: "dem-upscore4" 3060 | bottom: "dem-score-pool2c" 3061 | top: "dem-final_1" 3062 | eltwise_param { 3063 | operation: SUM 3064 | } 3065 | } 3066 | 3067 | layer { 3068 | name: "final" 3069 | type: "Concat" 3070 | bottom: "final_1" 3071 | bottom: "dem-final_1" 3072 | top: "final-merged" 3073 | } 3074 | 3075 | ########################################## FINAL ###################################### 3076 | 3077 | layer { 3078 | name: "_upscore2" 3079 | type: "Deconvolution" 3080 | bottom: "final-merged" 3081 | top: "bigscore" 3082 | param { 3083 | lr_mult: 1 3084 | } 3085 | convolution_param { 3086 | num_output: 120 3087 | bias_term: false 3088 | kernel_size: 4 3089 | stride: 4 3090 | group: 120 3091 | weight_filler{ 3092 | type: "constant" 3093 | value: 1 3094 | } 3095 | 3096 | #weight_filler { type: "bilinear" } 3097 | } 3098 | } 3099 | 3100 | layer { 3101 | name: "relu_final" 3102 | type: "ReLU" 3103 | bottom: "bigscore" 3104 | top: "bigscore" 3105 | } 3106 | 3107 | layer { 3108 | name: "reducer" 3109 | type: "Convolution" 3110 | bottom: "bigscore" 3111 | top: "score-reducer" 3112 | param { 3113 | lr_mult: 1 3114 | decay_mult: 1 3115 | } 3116 | convolution_param { 3117 | num_output: 6 3118 | bias_term: false 3119 | kernel_size: 1 3120 | stride: 1 3121 | weight_filler { 3122 | type: "xavier" 3123 | } 3124 | bias_filler { 3125 | type: "constant" 3126 | } 3127 | } 3128 | } 3129 | 3130 | layer { 3131 | type: 'Crop' 3132 | name: 'crop' 3133 | bottom: 'score-reducer' 3134 | bottom: 'data' 3135 | top: 'score' 3136 | crop_param { 3137 | axis: 2 3138 | offset: 20 3139 | offset: 20 3140 | } 3141 | } 3142 | 3143 | layer { 3144 | name: "loss" 3145 | type: "SoftmaxWithLoss" 3146 | bottom: "score" 3147 | bottom: "label" 3148 | top: "loss" 3149 | loss_param { 3150 | normalize: false 3151 | } 3152 | } 3153 | -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Vaihingen_Models/02_VGG-Model/link_to_weight_Vaihingen: -------------------------------------------------------------------------------- 1 | https://drive.google.com/open?id=0ByVXVsnJKrUzVkV1dXVTNFFzdG8 2 | -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Vaihingen_Models/03_SegNet-Model/link_to_weight_Vaihingen: -------------------------------------------------------------------------------- 1 | https://drive.google.com/open?id=0ByVXVsnJKrUzVkV1dXVTNFFzdG8 2 | -------------------------------------------------------------------------------- /ISPRS_Caffe_Models/Vaihingen_Models/INFO_Models: -------------------------------------------------------------------------------- 1 | 2 | 3 | =============================================== 4 | FCN-Vaihingen-Model 5 | =============================================== 6 | 7 | - Input size of data 259x259 pixels 8 | 9 | Data for training : 10 | 11 | image-lmdb 12 | dsm-lmdb 13 | dsm-lmdb 14 | labels-lmdb 15 | class-boundaries-lmdb 16 | 17 | 18 | - Inference size 259x259 pixels 19 | 20 | Data for inference : 21 | 22 | image-lmdb 23 | dsm-lmdb 24 | dsm-lmdb 25 | 26 | 27 | =============================================== 28 | VGG-Vaihingen-Model 29 | =============================================== 30 | 31 | - Input size of data 259x259 pixels 32 | 33 | Data for training : 34 | 35 | image-lmdb 36 | dsm-lmdb 37 | dsm-lmdb 38 | labels-lmdb 39 | class-boundaries-lmdb 40 | 41 | 42 | - Inference size 259x259 pixels 43 | 44 | Data for inference : 45 | 46 | image-lmdb 47 | dsm-lmdb 48 | dsm-lmdb 49 | 50 | =============================================== 51 | SegNet-Vaihingen-Model 52 | =============================================== 53 | 54 | - Input size of data 256x256 pixels 55 | 56 | Data for training : 57 | 58 | image-lmdb 59 | dsm-lmdb 60 | dsm-lmdb 61 | labels-lmdb 62 | class-boundaries-lmdb 63 | 64 | 65 | - Inference size 256x256 pixels 66 | 67 | Data for inference : 68 | 69 | image-lmdb 70 | dsm-lmdb 71 | dsm-lmdb 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /Misc/INFO.txt: -------------------------------------------------------------------------------- 1 | 2 | =========================== INFO =========================== 3 | 4 | 5 | Various tools for data construction and manipulation 6 | 7 | - Random patch extraction 8 | 9 | - Storing of images, labels, DSMs, nDSM, 10 | 11 | - HDF5 data construction 12 | 13 | - LMDB data construction 14 | 15 | - Count number of parameters in fully-convolutional networks 16 | 17 | - Convert data from LMDB to TIFF 18 | 19 | - Convert TIFF data into LMDB 20 | 21 | 22 | -------------------------------------------------------------------------------- /Misc/INFO.txt~: -------------------------------------------------------------------------------- 1 | 2 | =========================== INFO =========================== 3 | 4 | 5 | Various tools for data construction and manipulation 6 | 7 | - Random patch extraction 8 | 9 | - Storing of images, labels, DSMs, nDSM, 10 | 11 | - HDF5 data construction 12 | 13 | - LMDB storing format 14 | -------------------------------------------------------------------------------- /Misc/count_total_num_params.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import operator 3 | import functools 4 | #import pdb 5 | 6 | 7 | """ 8 | This method computes the total number of parameters in a CAFFE fully convolutional network (FCN). 9 | 10 | Best USE: Stop training in debug mode and run the script 11 | 12 | 13 | 14 | === TODO === 15 | 16 | * Modify it so that can also be used for fully-connected parts in a network 17 | 18 | """ 19 | 20 | def count_num_params(solver): 21 | 22 | # total parameter counter - initialize 23 | total_counter=0 24 | 25 | all_keys = solver.net.params.keys() 26 | 27 | for ii in range (len(all_keys)): 28 | 29 | # initialize 30 | this_layers_params_num=0 31 | 32 | this_layers_params_num = solver.net.params[all_keys[ii]][0].data.shape 33 | num_of_params=functools.reduce(operator.mul, this_layers_params_num, 1) 34 | 35 | try: 36 | num_of_params += solver.net.params[all_keys[ii]][1].data.shape[0] 37 | 38 | except IndexError: # network does not have a bias parameter 39 | num_of_params += 0 40 | 41 | # sum all parameters of all layers 42 | total_counter += num_of_params 43 | 44 | return total_counter 45 | -------------------------------------------------------------------------------- /Misc/h5_to_lmdb.py: -------------------------------------------------------------------------------- 1 | import caffe 2 | import lmdb 3 | from PIL import Image 4 | import numpy as np 5 | import h5py as h5 6 | 7 | # for debbuging 8 | #import pdb 9 | #pdb.set_trace() 10 | 11 | 12 | def make_img_lmdb(path_to_h5_txt): 13 | 14 | # open text file 15 | h5_txt_file = open(path_to_h5_txt) 16 | 17 | # construct image LMDB 18 | # IMAGE LMDB 19 | image_lmdb = lmdb.open('image-lmdb', map_size=int(1e12)) 20 | with image_lmdb.begin(write=True) as in_txn: 21 | 22 | idx_image = 0 # initialize index saver 23 | # loop through different H5 files 24 | for in_idx, in_ in enumerate(h5_txt_file): 25 | h5_file = h5.File(in_.strip(), 'r') 26 | img_data = np.array(h5_file['data']) 27 | 28 | # loop though the H5 data and assign the the the lmdb 29 | for i in range(img_data.shape[0]): 30 | image = img_data[i, :] 31 | im_to_db = caffe.io.array_to_datum(image.astype('float')) 32 | in_txn.put('{:0>10d}'.format(idx_image), im_to_db.SerializeToString()) 33 | idx_image += 1 34 | print "------- Processed: ", i, " from ", img_data.shape[0], " -----" 35 | print "------------------------------------------------------------------" 36 | print " " 37 | print "------ Processing IMAGE H5 ", in_, " ------" 38 | print " " 39 | print "------------------------------------------------------------------" 40 | 41 | image_lmdb.close() 42 | 43 | 44 | def make_dem_lmdb(path_to_h5_txt): 45 | 46 | # open text file 47 | h5_txt_file = open(path_to_h5_txt) 48 | 49 | # DEM LMDB 50 | dem_lmdb = lmdb.open('dem-lmdb', map_size=int(1e12)) 51 | with dem_lmdb.begin(write=True) as in_txn: 52 | 53 | idx_dem = 0 # initialize index saver 54 | # loop through different H5 files 55 | for in_idx, in_ in enumerate(h5_txt_file): 56 | h5_file = h5.File(in_.strip()) 57 | dem_data = np.array(h5_file['dem']) 58 | 59 | # loop though the H5 data and assign the the the lmdb 60 | for i in range(dem_data.shape[0]): 61 | dem = dem_data[i, :] 62 | dem_to_db = caffe.io.array_to_datum(dem.astype('float')) 63 | in_txn.put('{:0>10d}'.format(idx_dem), dem_to_db.SerializeToString()) 64 | idx_dem += 1 65 | print "------- Processed: ", i, " from ", dem_data.shape[0], " -----" 66 | 67 | print "------------------------------------------------------------------" 68 | print " " 69 | print "------ Processing DEM H5: ", in_, " ------" 70 | print " " 71 | print "------------------------------------------------------------------" 72 | 73 | dem_lmdb.close() 74 | 75 | 76 | def make_label_lmdb(path_to_h5_txt): 77 | 78 | # open text file 79 | h5_txt_file = open(path_to_h5_txt) 80 | # LABEL LMDB 81 | label_lmdb = lmdb.open('label-lmdb', map_size=int(1e12)) 82 | with label_lmdb.begin(write=True) as in_txn: 83 | 84 | idx_label = 0 # initialize index saver 85 | # loop through different H5 files 86 | for in_idx, in_ in enumerate(h5_txt_file): 87 | h5_file = h5.File(in_.strip()) 88 | label_data = np.array(h5_file['label']) 89 | 90 | # loop though the H5 data and assign the the the lmdb 91 | for i in range(label_data.shape[0]): 92 | label = label_data[i, :] 93 | label_to_db = caffe.io.array_to_datum(label.astype('int')) 94 | in_txn.put('{:0>10d}'.format(idx_label), label_to_db.SerializeToString()) 95 | idx_label += 1 96 | print "------- Processed: ", i, " from ", label_data.shape[0], " -----" 97 | 98 | print "------------------------------------------------------------------" 99 | print " " 100 | print "------ Processing Labels H5: ", in_, " ------" 101 | print " " 102 | print "------------------------------------------------------------------" 103 | 104 | label_lmdb.close() 105 | 106 | 107 | def make_edge_label_lmdb(path_to_h5_txt): 108 | 109 | # open text file 110 | h5_txt_file = open(path_to_h5_txt) 111 | 112 | # Edge-LABEL LMDB 113 | edge_label_lmdb = lmdb.open('edge-label-lmdb', map_size=int(1e12)) 114 | with edge_label_lmdb.begin(write=True) as in_txn: 115 | 116 | idx_label = 0 # initialize index saver 117 | # loop through different H5 files 118 | for in_idx, in_ in enumerate(h5_txt_file): 119 | h5_file = h5.File(in_.strip()) 120 | edge_label_data = np.array(h5_file['label']) 121 | 122 | # loop though the H5 data and assign the the the lmdb 123 | for i in range(edge_label_data.shape[0]): 124 | edge_label = edge_label_data[i, :] 125 | edge_label_to_db = caffe.io.array_to_datum(edge_label.astype('float')) 126 | in_txn.put('{:0>10d}'.format(idx_label), edge_label_to_db.SerializeToString()) 127 | idx_label += 1 128 | print "------- Processed: ", i, " from ", edge_label_data.shape[0], " -----" 129 | 130 | print "------------------------------------------------------------------" 131 | print " " 132 | print "------ Processing Labels H5: ", in_, " ------" 133 | print " " 134 | print "------------------------------------------------------------------" 135 | 136 | edge_label_lmdb.close() 137 | 138 | # ================================================================================ # 139 | 140 | # Converts raw LABELS into LMDB data 141 | 142 | # ================================================================================ # 143 | 144 | def make_image_db_from_image(image_file): 145 | image_db = lmdb.open('image-lmdb', map_size=int(1e12)) 146 | with image_db.begin(write=True) as in_txn: 147 | for in_idx, in_ in enumerate(image_file): 148 | # load image: 149 | # - as np.uint8 {0, ..., 255} 150 | # - in BGR (switch from RGB) 151 | # - in Channel x Height x Width order (switch from H x W x C) 152 | im = np.array(Image.open(in_.strip())) # or load whatever nd-array you need 153 | im = im[:, :, ::-1] 154 | im = im[:, :, 0:-1] # take only three bands - from 4 155 | im = im.transpose((2, 0, 1)) 156 | im_dat = caffe.io.array_to_datum(im) 157 | in_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString()) 158 | image_db.close() 159 | 160 | # ================================================================================ # 161 | 162 | # Converts raw Image data into LMDB format 163 | 164 | # ================================================================================ # 165 | 166 | def make_label_db_from_image(label_file): 167 | label_db = lmdb.open('label-lmdb', map_size=int(1e12)) 168 | with label_db.begin(write=True) as in_txn: 169 | for in_idx, in_ in enumerate(label_file): 170 | im = np.array(Image.open(in_.strip())) # or load whatever nd-array you need 171 | im = im[:, :, 3:4] > 1 #take only single band and binarize it 172 | im = im.astype('uint8') 173 | im = im.transpose((2, 0, 1)) 174 | im_dat = caffe.io.array_to_datum(im) 175 | in_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString()) 176 | label_db.close() 177 | 178 | 179 | # ================================================================================ # 180 | 181 | # Methods that convert as set of LMDB image data into TIF 182 | 183 | # ================================================================================ # 184 | 185 | 186 | def db_float_to_img(db_name, ext): 187 | env = lmdb.open(db_name, readonly=True) 188 | with env.begin() as txn: 189 | cursor = txn.cursor() 190 | for key, value in cursor: 191 | datum = caffe.proto.caffe_pb2.Datum() 192 | datum.ParseFromString(value) 193 | x = caffe.io.datum_to_array(datum) 194 | x = x.transpose((1, 2, 0)) 195 | x = x[:, :, ::-1] 196 | if x.shape[2] == 1: 197 | x = np.concatenate((x, x, x), axis=2) 198 | img = Image.fromarray(x.astype(np.uint8)) 199 | img.save(ext+str(key)+'.tif') 200 | 201 | 202 | def db_int_to_img(db_name,ext): 203 | env = lmdb.open(db_name, readonly=True) 204 | with env.begin() as txn: 205 | cursor = txn.cursor() 206 | for key, value in cursor: 207 | datum = caffe.proto.caffe_pb2.Datum() 208 | datum.ParseFromString(value) 209 | flat_x = np.fromstring(datum.data, dtype=np.float64) 210 | x = flat_x.reshape(datum.channels, datum.height, datum.width) 211 | x = x[:, :, ::-1] 212 | img = Image.fromarray(x.astype(np.uint8)) 213 | img.save(ext+str(key)+'.tif') 214 | 215 | 216 | if __name__ == '__main__': 217 | 218 | """ 219 | Multiple methods for converting HDF5 data into LMDB format 220 | 221 | make_img_lmdb @ construct LMDB for the image data HDF5 222 | 223 | make_dem_lmdb @ construct LMDB for nDSM or DSM HDF5 (separately) 224 | 225 | make_edge_lmdb @ construct LMDB for class-contours HDF5 226 | 227 | make_label_lmdb @ construct LMDB for annotated labels HDF5 228 | 229 | TODO: 230 | 231 | * methods for constructing LMDBs directly from image data also included (no need for HDF5) - check them before using 232 | 233 | * method for constructing images from LMDB --- check them before use 234 | 235 | """ 236 | 237 | 238 | # ======================= INPUT ========================= # 239 | 240 | # path to HDF5 data 241 | path_to_h5_txt_data = "/mnt/data1/_Dimitris/00_Data/03_KITTI_Aerial/00_Code/data_creation/data_out/HDF5/train_data/train_data.txt" 242 | 243 | 244 | # ============================================ # 245 | 246 | # Enable ONE at a time to construct LMDBs 247 | 248 | #make_img_lmdb(path_to_h5_txt=path_to_h5_txt_data) 249 | #make_dem_lmdb(path_to_h5_txt=path_to_h5_txt_data) 250 | #make_edge_label_lmdb(path_to_h5_txt=path_to_h5_txt_data) 251 | make_label_lmdb(path_to_h5_txt=path_to_h5_txt_data) 252 | -------------------------------------------------------------------------------- /Misc/patch_extraction_to_HDF5.py: -------------------------------------------------------------------------------- 1 | __author__ = 'deep_unlearn' 2 | 3 | from sklearn.feature_extraction.image import extract_patches_2d 4 | from PIL import Image 5 | import numpy as np 6 | import os 7 | from natsort import natsorted 8 | from osgeo import gdal 9 | import h5py 10 | 11 | 12 | """ 13 | Code for extracting randomly patches from a set of image scenes - 14 | 15 | - If exist considers patches from a DSM & nDSM 16 | - If height information(nDSM, DEM) is not available extracts data from RGB 17 | images and respective labels only 18 | 19 | Results are stored in limited-size H5 format for use with CAFFE-lib 20 | 21 | Optional: The code also accepts a mask with where the allowed foreground for 22 | patch extraction is explicitly designated. This also allows partial 23 | overal with background. 24 | 25 | """ 26 | 27 | # ======================================== # 28 | 29 | # Enable for degub mode 30 | 31 | #import pdb 32 | #pdb.set_trace() 33 | #from matplotlib import pyplot as plt 34 | 35 | # ###################################### 36 | 37 | 38 | def remove_background(img_patches, 39 | dsm_patches, 40 | ndsm_patches, 41 | label_patches, 42 | mask_patches): 43 | 44 | # initialize storing index 45 | idx_del = [] 46 | 47 | for i in range (mask_patches.shape[0]): 48 | total_sum_mask = 0 49 | current_patch = mask_patches[i, :] 50 | total_sum_mask = current_patch.sum() # gives the total value of mask pixel-values - 255 for non background pixel 51 | 52 | # if non-black pixel as less than 20% of total size remove 53 | if total_sum_mask < 3342336: 54 | 55 | # strore index 56 | idx_del.append(i) 57 | 58 | # delete found indeces 59 | img_patches = np.delete(img_patches, idx_del, axis=0) 60 | label_patches = np.delete(label_patches, idx_del, axis=0) 61 | 62 | if dsm_patches is not None: 63 | dsm_patches = np.delete(dsm_patches, idx_del, axis=0) 64 | 65 | if ndsm_patches is not None: 66 | ndsm_patches = np.delete(ndsm_patches, idx_del, axis=0) 67 | 68 | if mask_patches is not None: 69 | mask_patches = np.delete(mask_patches, idx_del, axis=0) 70 | 71 | assert (img_patches.shape[0]==label_patches.shape[0]) 72 | 73 | return img_patches, dsm_patches, ndsm_patches, label_patches 74 | 75 | 76 | def read_in_images(img_path, dsm_path, ndsm_path, labels_path, mask_path): 77 | 78 | # read in and convert to array 79 | image = Image.open(img_path) 80 | img = np.array(image) 81 | 82 | if dsm_path is not None: 83 | file = gdal.Open(dsm_path) 84 | dsm = np.array(file.GetRasterBand(1).ReadAsArray()) 85 | 86 | # check that dem does not contain nans or infinite - 87 | # if exist replace spikes with average height 88 | if ~(np.isfinite(dsm.sum()) or np.isfinite(dsm).all()): 89 | idx_nan = np.isnan(dsm) 90 | mean_val = dsm[~np.isnan(dsm)].mean() 91 | dsm[idx_nan] = mean_val 92 | else: 93 | dsm = None 94 | 95 | if ndsm_path is not None: 96 | file = gdal.Open(ndsm_path) 97 | ndsm = np.array(file.GetRasterBand(1).ReadAsArray()) 98 | 99 | # check that ndsm does not contain nans or infinite 100 | if ~(np.isfinite(ndsm.sum()) or np.isfinite(ndsm).all()): 101 | idx_nan = np.isnan(ndsm) 102 | mean_val = ndsm[~np.isnan(ndsm)].mean() 103 | ndsm[idx_nan] = mean_val 104 | else: 105 | ndsm = None 106 | 107 | labels = Image.open(labels_path) 108 | labels = np.array(labels) 109 | 110 | if mask_path is not None: 111 | file = gdal.Open(mask_path) 112 | mask = np.array(file.GetRasterBand(1).ReadAsArray()) 113 | else: 114 | mask = None 115 | 116 | return img, dsm, ndsm, labels, mask 117 | 118 | 119 | def patch_extractor(img=None, 120 | dsm=None, 121 | ndsm=None, 122 | labels=None, 123 | mask_ext=None, 124 | num_patches=None, 125 | patch_size=None, 126 | random_num='None'): 127 | 128 | # extract image patches 129 | print "Random number - patch extraction : ", random_num 130 | 131 | img_patches = extract_patches_2d(img, 132 | (patch_size, patch_size), 133 | num_patches, 134 | random_state=random_num) 135 | 136 | # if DSM exist extract patches 137 | if dsm is not None: 138 | dsm_patches = extract_patches_2d(dsm, 139 | (patch_size, patch_size), 140 | num_patches, 141 | random_state=random_num) 142 | else: 143 | dsm_patches = None 144 | 145 | # if nDSM exist extract patches 146 | if ndsm is not None: 147 | ndsm_patches = extract_patches_2d(ndsm, 148 | (patch_size, patch_size), 149 | num_patches, 150 | random_state=random_num) 151 | else: 152 | ndsm_patches = None 153 | 154 | # if nDSM exist extract patches 155 | if mask_ext is not None: 156 | mask_patches = extract_patches_2d(mask_ext, 157 | (patch_size, patch_size), 158 | num_patches, 159 | random_state=random_num) 160 | mask_patches=mask_patches[:, :, :, None] 161 | else: 162 | mask_patches = None 163 | 164 | # extract label patches 165 | label_patches = extract_patches_2d(labels, 166 | (patch_size, patch_size), 167 | num_patches, 168 | random_state=random_num) 169 | 170 | 171 | # =========== Remove Tiles containing only background ============ # 172 | 173 | img_patches, dsm_patches, ndsm_patches, label_patches = remove_background(img_patches, 174 | dsm_patches, 175 | ndsm_patches, 176 | label_patches, 177 | mask_patches) 178 | 179 | return img_patches, dsm_patches, ndsm_patches, label_patches 180 | 181 | 182 | def crop_labels(img_labels=None, keep_area=None, patch_size=None): 183 | 184 | """ 185 | 186 | This method allow to extract sub-patches from patches 187 | Used for smaller context in label 188 | 189 | """ 190 | 191 | # index of center pixel to be extracted 192 | middle_element = np.floor(patch_size/2) 193 | pixels_around_center = np.floor(keep_area/2) 194 | 195 | # initialize matrix to store new sub-labels 196 | img_labels_new = np.zeros((img_labels.shape[0], 197 | keep_area*keep_area*img_labels.shape[3])) 198 | 199 | # Crop labels so that a small sub-window of area "keep_area" is retained after process 200 | for i in xrange(img_labels.shape[0]): 201 | 202 | # read patch sequentially and store it to temporary value 203 | current_temp = img_labels[i, :, :, :] 204 | 205 | # new set of minimized labels 206 | label_temp = current_temp[middle_element - \ 207 | pixels_around_center: middle_element + \ 208 | pixels_around_center + 1, 209 | middle_element - pixels_around_center: middle_element \ 210 | + pixels_around_center + 1] 211 | 212 | label_temp = label_temp.flatten() 213 | # store into new matrix 214 | img_labels_new[i, :] = label_temp 215 | 216 | return img_labels_new 217 | 218 | 219 | def label_replacer(img_labels=None): 220 | 221 | """ 222 | Method replaces standard RGB label-values with single integer 223 | categorical variable. Define initial label below 224 | 225 | """ 226 | 227 | # Building (255, 0, 0) --- 0 228 | # Road (255, 105, 180) --- 1 229 | # Sidewalk (0, 0, 255) --- 2 230 | # Parking (255, 255, 0) --- 3 231 | # Background (0,0, 0) --- 4 232 | 233 | # indeces to detect 234 | building = (255, 0, 0) 235 | road = (255, 105, 180) 236 | sidewalk = (0, 0, 255) 237 | parking = (255, 255, 0) 238 | background = (0, 0, 0) 239 | 240 | # initialize matrix to store new labels 241 | img_labels_single_id = np.zeros((img_labels.shape[0], 242 | img_labels.shape[1], 243 | img_labels.shape[2], 1)) 244 | 245 | for i in range(img_labels.shape[0]): 246 | 247 | # store temp image to process 248 | temp_img = img_labels[i, :, :, :] 249 | 250 | xx_idx_building, yy_idx_building = np.where(np.all(temp_img == building, axis=-1)) 251 | xx_idx_road, yy_idx_road = np.where(np.all(temp_img == road, axis=-1)) 252 | xx_idx_sidewalk, yy_idx_sidewalk = np.where(np.all(temp_img == sidewalk, axis=-1)) 253 | xx_idx_parking, yy_idx_parking = np.where(np.all(temp_img == parking, axis=-1)) 254 | xx_idx_background, yy_idx_background = np.where(np.all(temp_img == background, axis=-1)) 255 | 256 | # replace with new single indexes 257 | temp_img[xx_idx_building, yy_idx_building] = 0 258 | temp_img[xx_idx_road, yy_idx_road] = 1 259 | temp_img[xx_idx_sidewalk, yy_idx_sidewalk] = 2 260 | temp_img[xx_idx_parking, yy_idx_parking] = 3 261 | temp_img[xx_idx_background, yy_idx_background] = 4 262 | 263 | img_labels_single_id[i, :, :, :] = temp_img[:, :, 0:1] 264 | 265 | return img_labels_single_id 266 | 267 | 268 | def store_images(img_patches, 269 | img_labels, 270 | path_store_img, 271 | path_store_label): 272 | 273 | """ 274 | Method for storing IMAGE and LABEL patches in folder - used with MemoryDataLayer 275 | in CAFFE 276 | 277 | 278 | Todo: 279 | * Expand this for considering nDSM, DEM patches 280 | 281 | """ 282 | 283 | for ii in range(img_labels.shape[0]): 284 | 285 | temp_img = img_patches[ii, :, :, :] 286 | temp_label = img_labels[ii, :, :, 0] 287 | 288 | img = Image.fromarray(temp_img) 289 | label = Image.fromarray(temp_label) 290 | 291 | name_image = 'image_' + str(ii) + '.tif' 292 | name_label = 'label_' + str(ii) + '.tif' 293 | 294 | img.save(os.path.join(path_store_img, name_image), "TIFF") 295 | label.save(os.path.join(path_store_label, name_label), "TIFF") 296 | 297 | 298 | def construct_hdf5_dataset(img_patches, 299 | dsm_patches, 300 | ndsm_patches, 301 | img_labels, 302 | save_path, 303 | save_file_name, 304 | band_mean_vals=None): 305 | 306 | img_patches = img_patches.astype('float32') 307 | img_labels = img_labels.astype('float32') 308 | if dsm_patches is not None: 309 | dsm_patches = dsm_patches.astype('float32') 310 | if ndsm_patches is not None: 311 | ndsm_patches = dsm_patches.astype('float32') 312 | 313 | if band_mean_vals is not None: 314 | img_patches[:, 0, :, :] = img_patches[:, 0, :, :] - band_mean_vals[0] # R 315 | img_patches[:, 1, :, :] = img_patches[:, 1, :, :] - band_mean_vals[1] # G 316 | img_patches[:, 2, :, :] = img_patches[:, 2, :, :] - band_mean_vals[2] # B 317 | 318 | # - transpose channels from RGB to BGR 319 | img_patches = img_patches[:, :, :, ::-1] 320 | 321 | # - convert to Batches x Channel x Height x Width order (switch from B x H x W x C) 322 | img_patches = img_patches.transpose((0, 3, 1, 2)) 323 | if dsm_patches is not None: 324 | dsm_patches = dsm_patches.transpose((0, 3, 1, 2)) 325 | if ndsm_patches is not None: 326 | ndsm_patches = ndsm_patches.transpose((0, 3, 1, 2)) 327 | img_labels = img_labels.transpose((0, 3, 1, 2)) 328 | 329 | # compute total size 330 | total_size = img_patches[0] * img_patches[1] * img_patches[2] * img_patches[3] 331 | 332 | # STORE DATA AS UNCOMPRESSED HDF5 333 | with h5py.File(save_path + '/' + save_file_name + '.h5', 'w') as f: 334 | f['data'] = img_patches 335 | if dsm_patches is not None: 336 | f['dsm'] = dsm_patches 337 | if ndsm_patches is not None: 338 | f['ndsm'] = ndsm_patches 339 | f['label'] = img_labels 340 | 341 | with open(save_path + '/' + save_file_name + '.txt', 'w') as f: 342 | f.write(save_path + '/' +save_file_name + '.h5\n') 343 | 344 | 345 | # ================== MAIN ===================== # 346 | 347 | 348 | def main (image_path_folder, 349 | labels_path_folder, 350 | allow_perturbation, 351 | num_patches, 352 | patch_size, 353 | save_name, 354 | save_path, 355 | random_num, 356 | dsm_path_folder=None, 357 | ndsm_path_folder=None, 358 | mask_path_folder=None): 359 | 360 | # store names of images to be processed 361 | image_name_list = natsorted(os.listdir(image_path_folder)) 362 | labels_name_list = natsorted(os.listdir(labels_path_folder)) 363 | 364 | # sort naturally if exist 365 | if dsm_path_folder is not None: 366 | dsm_name_list = natsorted(os.listdir(dsm_path_folder)) 367 | else: 368 | dsm_name_list = None 369 | 370 | if ndsm_path_folder is not None: 371 | ndsm_name_list = natsorted(os.listdir(ndsm_path_folder)) 372 | else: 373 | ndsm_name_list = None 374 | 375 | if mask_path_folder is not None: 376 | mask_name_list = natsorted(os.listdir(mask_path_folder)) 377 | else: 378 | mask_name_list = None 379 | 380 | # CHECK 381 | 382 | # ensure that num of labels = num of images 383 | 384 | if (dsm_name_list is not None) and (ndsm_name_list is not None): 385 | assert (len(labels_name_list) == len(image_name_list) == len(dsm_name_list) == len(ndsm_name_list)) 386 | 387 | else: # only images-labels-mask exist 388 | assert (len(labels_name_list) == len(image_name_list) == len(mask_name_list)) 389 | 390 | # initialize 391 | final_img_patches = [] 392 | final_dsm_patches = [] 393 | final_ndsm_patches = [] 394 | final_labels = [] 395 | 396 | #index 397 | i = 0 398 | 399 | # read in tiff images from folder 400 | for file_name in natsorted(os.listdir(image_path_folder)): 401 | 402 | # temp store image_name and label_name 403 | image_path = os.path.join(image_path_folder, image_name_list[i]) 404 | 405 | labels_path = os.path.join(labels_path_folder, labels_name_list[i]) 406 | 407 | if dsm_path_folder is not None: 408 | dsm_path = os.path.join(dsm_path_folder, dsm_name_list[i]) 409 | else: 410 | dsm_path = None 411 | 412 | if ndsm_path_folder is not None: 413 | ndsm_path = os.path.join(ndsm_path_folder, ndsm_name_list[i]) 414 | else: 415 | ndsm_path = None 416 | 417 | if mask_path_folder is not None: 418 | mask_path = os.path.join(mask_path_folder, mask_name_list[i]) 419 | else: 420 | mask_path = None 421 | 422 | # read in image and labels 423 | img, dsm, ndsm, labels, mask = read_in_images(image_path, dsm_path, ndsm_path, labels_path, mask_path) 424 | 425 | # extract number of patches randomly 426 | img_patches, \ 427 | dsm_patches, \ 428 | ndsm_patches, \ 429 | img_labels = patch_extractor(img=img, 430 | dsm=dsm, 431 | ndsm=ndsm, 432 | labels=labels, 433 | mask_ext=mask, 434 | num_patches=num_patches, 435 | patch_size=patch_size, 436 | random_num=random_num) 437 | 438 | img_patches = np.array(img_patches, dtype='float32') 439 | 440 | if dsm_patches is not None: 441 | dsm_patches = np.array(dsm_patches, dtype='float32') 442 | 443 | if ndsm_patches is not None: 444 | ndsm_patches = np.array(ndsm_patches, dtype='float32') 445 | 446 | # replace label values for categorical classes # 447 | # If label is single-plane edge then skip this 448 | if img_labels.ndim > 3: 449 | # replace standard RGB labels with single value labels 450 | img_labels = label_replacer(img_labels) 451 | 452 | # extract sub-patch of labels - smaller than initial size 453 | # TODO: NEEDS MODIFICATION for including dem-data 454 | # img_labels = crop_labels(img_labels, 455 | # keep_area=keep_area, 456 | # patch_size=patch_size) 457 | 458 | # store cumulative data 459 | if i == 0: 460 | final_img_patches = img_patches 461 | final_dsm_patches = dsm_patches 462 | final_ndsm_patches = ndsm_patches 463 | final_labels = img_labels 464 | else: 465 | final_img_patches = np.concatenate((final_img_patches, 466 | img_patches), 467 | axis=0) 468 | if final_dsm_patches is not None: 469 | final_dsm_patches = np.concatenate((final_dsm_patches, 470 | dsm_patches), 471 | axis=0) 472 | if final_ndsm_patches is not None: 473 | final_ndsm_patches = np.concatenate((final_ndsm_patches, 474 | ndsm_patches), 475 | axis=0) 476 | final_labels = np.concatenate((final_labels, img_labels), 477 | axis=0) 478 | 479 | # increase index 480 | i += 1 481 | 482 | # add singleton dimension in dem_patches so that makes up a 4D -array 483 | # as required in Caffe 484 | if final_dsm_patches is not None: 485 | final_dsm_patches = final_dsm_patches[:, :, :, np.newaxis] 486 | 487 | if final_ndsm_patches is not None: 488 | final_ndsm_patches = final_ndsm_patches[:, :, :, np.newaxis] 489 | 490 | #if labels are edges also add singleton dimension to them 491 | if final_labels.ndim < 4: 492 | final_labels = final_labels[:, :, :, np.newaxis] 493 | 494 | # Perturb training instances and labels equally 495 | # perturbation index 496 | if allow_perturbation is True: 497 | perturbation_index = np.random.permutation(final_img_patches.shape[0]) 498 | 499 | final_img_patches = final_img_patches[perturbation_index] 500 | 501 | if final_dsm_patches is not None: 502 | final_dsm_patches = final_dsm_patches[perturbation_index] 503 | if final_ndsm_patches is not None: 504 | final_ndsm_patches = final_ndsm_patches[perturbation_index] 505 | 506 | final_labels = final_labels[perturbation_index] 507 | 508 | # ensure labels are integers - then float32 509 | final_labels = np.array(final_labels, dtype='int32') 510 | final_labels = np.array(final_labels, dtype='float32') 511 | 512 | # if saving variables exist save HDf5 file 513 | if 'save_name' and 'save_path' in locals(): 514 | 515 | #Construct HDF5 data 516 | construct_hdf5_dataset(img_patches=final_img_patches, 517 | dsm_patches=final_dsm_patches, 518 | ndsm_patches=final_ndsm_patches, 519 | img_labels=final_labels, 520 | save_file_name=save_name, 521 | save_path=save_path, band_mean_vals=None) 522 | 523 | # Store images and labels 524 | # store_images(img_patches=final_patches, img_labels=final_labels, 525 | # path_store_img=save_path_img, path_store_label=save_path_label) 526 | 527 | 528 | ############################################################################################################# 529 | 530 | 531 | if __name__ == '__main__': 532 | 533 | 534 | # =============== INPUTs ================ # 535 | 536 | # Path to images 537 | path_img_fold = "/mnt/data1/_Dimitris/00_RS_Data/03_KITTI_Aerial/Aerial/test_images/images" 538 | 539 | # Path to DSM 540 | path_dsm_fold = None 541 | 542 | # Path to nDSM 543 | path_ndsm_fold = None 544 | 545 | # Path to image masks 546 | path_mask_fold = "/mnt/data1/_Dimitris/00_RS_Data/03_KITTI_Aerial/Aerial/test_images/extent_masks/" 547 | 548 | # Labels 549 | path_label_folder = "/mnt/data1/_Dimitris/00_RS_Data/03_KITTI_Aerial/Aerial/test_images/labels" 550 | 551 | # Boolean - Enable if to randomly perurb data 552 | perturb_mode = True 553 | 554 | """ 555 | IMPORTANT 556 | 557 | TOTAL NUMBER of patches : 558 | 559 | (NUM of patches PER image (variable below)) x (length of random number list - defined below) 560 | 561 | """ 562 | 563 | num_of_patches_per_img = 200 # number of patches PER each input image !!!!!!!! 564 | 565 | # size in pixels of patches 566 | size_of_patch = 256 567 | 568 | # ONLY enable if want to create HDF5 data - otherwise comment out 569 | saving_path = "./test/" 570 | 571 | random_numbers = [83762, 572 | 38476, 573 | 26152, 574 | 38485, 575 | 2221, 576 | 213875, 577 | 968564, 578 | 3735251, 579 | 78705038, 580 | 5342] 581 | 582 | # ONLY enable if want to create HDF5 data - otherwise comment out 583 | saving_names = ["train_data_256x256_1", 584 | "train_data_256x256_2", 585 | "train_data_256x256_3", 586 | "train_data_256x256_4", 587 | "train_data_256x256_5", 588 | "train_data_256x256_6", 589 | "train_data_256x256_7", 590 | "train_data_256x256_8", 591 | "train_data_256x256_9", 592 | "train_data_256x256_10"] 593 | 594 | # ============================================================ # 595 | 596 | for i in range(len(random_numbers)): 597 | 598 | print "\t\t ", i+1, " out of ", len(random_numbers) 599 | 600 | # read in next random number 601 | random_num = random_numbers[i] 602 | 603 | saving_name = saving_names[i] 604 | main(image_path_folder=path_img_fold, 605 | dsm_path_folder=path_dsm_fold, 606 | ndsm_path_folder=path_dsm_fold, 607 | labels_path_folder=path_label_folder, 608 | mask_path_folder = path_mask_fold, 609 | allow_perturbation=perturb_mode, 610 | num_patches=num_of_patches_per_img, 611 | patch_size=size_of_patch, 612 | save_name=saving_name, 613 | save_path=saving_path, 614 | random_num=random_num) 615 | 616 | print "DONE" 617 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Class-Boundaries-Implementation 2 | 3 | 4 | ======= INFO ======= 5 | 6 | This repository contains the code for replicating results in the following work 7 | 8 | "Classification with An Edge: Improving Semantic Image Segmentation with Boundary Detection" 9 | 10 | https://arxiv.org/abs/1612.01337 11 | 12 | The repository contains code: 13 | 14 | - Training an ensemble of three independent fully convolutional networks (VGG16-FCN, Pascal-FCN, SegNet-FCN) 15 | with respective pre-trained models over the ISPRS-Vaihingen & ISPRS-Potsdam dataset - Uisng Caffe Deep-Learning Framework 16 | 17 | - Some miscelenous python files for data construction (Patch-extraction, HDF5 & LMDB generation) 18 | 19 | - Some sample code on training models and respective prototxt files 20 | 21 | - A Caffe custom Python-Layer for stochastic data-augmentation of data on every training batch 22 | 23 | 24 | If you make use of the code please cite the following work: 25 | 26 | @article{marmanis2016classification, 27 | title={Classification with an edge: improving semantic image segmentation with boundary detection}, 28 | author={Marmanis, Dimitrios and Schindler, Konrad and Wegner, Jan Dirk and Galliani, Silvano and Datcu, Mihai and Stilla, Uwe}, 29 | journal={arXiv preprint arXiv:1612.01337}, 30 | year={2016} 31 | } 32 | 33 | 34 | -------------------------------------------------------------------------------- /master: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deep-unlearn/ISPRS-Classification-With-an-Edge/de5f904938a4182de2d3fef0e5b0aef3977badbf/master -------------------------------------------------------------------------------- /model_inference/model_inference_single_model.py: -------------------------------------------------------------------------------- 1 | __author__ = 'dmarmanis' 2 | from sklearn.feature_extraction.image import extract_patches_2d as patch_extractor 3 | import matplotlib 4 | 5 | #matplotlib.use('Agg') # to avoid problems in servers with no dispay variables 6 | import matplotlib.pyplot as plt 7 | from itertools import product 8 | import h5py as h5 9 | #from PIL import Image # not loading all image-extent properly 10 | from osgeo import gdal 11 | import numpy as np 12 | import caffe 13 | import shutil 14 | import pdb 15 | import os 16 | 17 | 18 | """ 19 | This script runs inference of a set of images (and respective DEM data) using a caffe model 20 | and a predefined stride value. 21 | The input parameters are defined in __main__ method - so user should check this out before 22 | running the script. 23 | 24 | Important - mean subtraction per-band is disabled as generally does not perform well over 25 | unseen data 26 | 27 | """ 28 | 29 | def sequential_image_loader(path_to_folder, index_reader): 30 | """ 31 | 32 | Read in images sequentially and compute their mean per band 33 | 34 | """ 35 | 36 | # intialize 37 | list_of_images = [] 38 | 39 | for file in sorted(os.listdir(path_to_folder)): 40 | if file.endswith('tif'): 41 | list_of_images.append(file) 42 | 43 | # add check if list of images is empty - try jpeg 44 | if list_of_images == []: 45 | for file in sorted(os.listdir(path_to_folder)): 46 | if file.endswith('jpg'): 47 | list_of_images.append(file) 48 | 49 | # check if list of images is empty 50 | assert list_of_images != [] 51 | 52 | # find image to be returned in this call 53 | image_to_read = list_of_images[index_reader] 54 | 55 | # read in image - using PIL ---> Images are not loaded always correct 56 | #current_image = Image.open(os.path.join(path_to_folder, image_to_read)) 57 | #img = np.array(current_image) 58 | 59 | # Use GDAL to load in images 60 | data = gdal.Open(os.path.join(path_to_folder, image_to_read), gdal.GA_ReadOnly) 61 | img = data.ReadAsArray() # (bands, xdim, ydim) 62 | 63 | # flip axis if image is multispectral 64 | if img.ndim > 2: 65 | img = np.swapaxes(np.swapaxes(img, 0, 2), 0, 1) # flip axis to make (xdim, ydim, bands) image 66 | 67 | if img.ndim > 2: 68 | # read as BGR as in training data 69 | img = img[:, :, ::-1] 70 | 71 | # initialize for storing values 72 | img_mean = np.zeros((3, 1)) 73 | 74 | # red band mean 75 | img_mean[0] = img[:, :, 0].mean() 76 | # green band mean 77 | img_mean[1] = img[:, :, 1].mean() 78 | # blue band mean 79 | img_mean[2] = img[:, :, 2].mean() 80 | else: 81 | img_mean = img.mean() 82 | 83 | return img, img_mean, list_of_images 84 | 85 | 86 | def image_preprocessing(img_data, 87 | img_mean, 88 | img_value_scaler, 89 | dsm_data, 90 | dsm_mean, 91 | ndsm_data, 92 | ndsm_mean, 93 | dsm_value_scaler): 94 | 95 | # Preproce IMAGE data 96 | # substract mean per band 97 | # img_data[:, 0, :, :] = img_data[:, 0, :, :] - img_mean[0] 98 | # img_data[:, 1, :, :] = img_data[:, 1, :, :] - img_mean[1] 99 | # img_data[:, 2, :, :] = img_data[:, 2, :, :] - img_mean[2] 100 | 101 | # re-scale 102 | img_data = img_data * img_value_scaler 103 | 104 | # Preprocess DEM data 105 | # dem_data = dem_data - dem_mean 106 | dsm_data = dsm_data * dsm_value_scaler 107 | ndsm_data = ndsm_data * dsm_value_scaler 108 | 109 | return img_data, dsm_data, ndsm_data 110 | 111 | 112 | # code modification to reconstruct patches back by summing them 113 | 114 | 115 | def reconstruct_from_patches_2d(patches,image_size): 116 | """ 117 | 118 | Reconstruct the image from all of its patches. 119 | Patches are assumed to overlap and the image is constructed by filling in 120 | the patches from left to right, top to bottom, SUMMING the overlapping 121 | regions. 122 | Read more in the :ref:`User Guide `. 123 | Parameters 124 | 125 | ---------- 126 | 127 | patches : array, shape = (n_patches, patch_height, patch_width) or 128 | (n_patches, patch_height, patch_width, n_channels) 129 | The complete set of patches. If the patches contain colour information, 130 | channels are indexed along the last dimension: RGB patches would 131 | have `n_channels=3`. 132 | 133 | image_size : tuple of ints (image_height, image_width) or 134 | (image_height, image_width, n_channels) 135 | the size of the image that will be reconstructed 136 | 137 | Returns 138 | ------- 139 | 140 | image : array, shape = image_size 141 | the reconstructed image 142 | 143 | """ 144 | 145 | i_h, i_w = image_size[:2] 146 | p_h, p_w = patches.shape[1:3] 147 | img = np.zeros(image_size) 148 | # compute the dimensions of the patches array 149 | n_h = i_h - p_h + 1 150 | n_w = i_w - p_w + 1 151 | for p, (i, j) in zip(patches, product(range(n_h), range(n_w))): 152 | img[i:i + p_h, j:j + p_w] += p 153 | return img 154 | 155 | 156 | def image_padder(image, 157 | dsm, 158 | ndsm, 159 | patch_size): 160 | """ 161 | 162 | This function naively pads the image with a complete size patch-size 163 | in this way it is assured all the pixels of the image to be visited at least ones 164 | Maybe smaller pad is enough but in this naive way an investigation of the 165 | image dimensions is unnesessary 166 | 167 | :param image: image to be padeed 168 | :param dem: dem data to be padded 169 | :param patch_size: path size to process the data 170 | :return: a padded image (naively) 171 | 172 | """ 173 | 174 | padding_size = patch_size # 2 * patch_size 175 | 176 | padded_image = np.zeros((image.shape[0] + padding_size, 177 | image.shape[1] + padding_size, image.shape[2]), 178 | dtype='float32') 179 | padded_image[0: image.shape[0], 0:image.shape[1], :] = image 180 | 181 | # --------------- Mirroring Image---------------------------- # 182 | 183 | # mirror last row pixels in pad-areas 184 | x_pad = image[-padding_size:, :, :] 185 | # mirror horizontaly 186 | x_pad = x_pad[::-1, :, :] 187 | 188 | y_pad = image[:, -padding_size:, :] 189 | # mirror vertically 190 | y_pad = y_pad[:, ::-1, :] 191 | 192 | xy_corner = x_pad[:, -padding_size:, :] 193 | xy_corner = xy_corner[:, ::-1, :] 194 | 195 | # add mirror data to padded image 196 | padded_image[image.shape[0]:, :image.shape[1], :] = x_pad[:] 197 | padded_image[:image.shape[0], image.shape[1]:, :] = y_pad[:] 198 | padded_image[image.shape[0]:, image.shape[1]:, :] = xy_corner[:] 199 | 200 | # --------------- Mirorring DEMs ---------------------- # 201 | 202 | padded_dsm = np.zeros((dsm.shape[0] + padding_size, dsm.shape[1] + padding_size), dtype='float32') 203 | padded_dsm[0: dsm.shape[0], 0:dsm.shape[1]] = dsm 204 | 205 | # mirror last row pixels in pad-areas 206 | x_pad = dsm[-padding_size:, :] 207 | # mirror horizontaly 208 | x_pad = x_pad[::-1, :] 209 | 210 | y_pad = dsm[:, -padding_size:] 211 | # mirror vertically 212 | y_pad = y_pad[:, ::-1] 213 | 214 | xy_corner = x_pad[:, -padding_size:] 215 | xy_corner = xy_corner[:, ::-1] 216 | 217 | # add mirror data to padded dsm 218 | padded_dsm[dsm.shape[0]:, :dsm.shape[1]] = x_pad[:] 219 | padded_dsm[:dsm.shape[0], dsm.shape[1]:] = y_pad[:] 220 | padded_dsm[dsm.shape[0]:, dsm.shape[1]:] = xy_corner[:] 221 | 222 | padded_ndsm = np.zeros((ndsm.shape[0] + padding_size, ndsm.shape[1] + padding_size), dtype='float32') 223 | padded_ndsm[0: ndsm.shape[0], 0:ndsm.shape[1]] = ndsm 224 | 225 | # mirror last row pixels in pad-areas 226 | x_pad = ndsm[-padding_size:, :] 227 | # mirror horizontaly 228 | x_pad = x_pad[::-1, :] 229 | 230 | y_pad = ndsm[:, -padding_size:] 231 | # mirror vertically 232 | y_pad = y_pad[:, ::-1] 233 | 234 | xy_corner = x_pad[:, -padding_size:] 235 | xy_corner = xy_corner[:, ::-1] 236 | 237 | # add mirror data to padded dsm 238 | padded_ndsm[ndsm.shape[0]:, :ndsm.shape[1]] = x_pad[:] 239 | padded_ndsm[:ndsm.shape[0], ndsm.shape[1]:] = y_pad[:] 240 | padded_ndsm[ndsm.shape[0]:, ndsm.shape[1]:] = xy_corner[:] 241 | 242 | return padded_image, padded_dsm, padded_ndsm 243 | 244 | 245 | def image_depadder(image, patch_size): 246 | # this function de-pads the image which is introduced by 247 | # method "image_padder" (look above method) 248 | 249 | deppader_size = patch_size # 2 * patch_size 250 | 251 | image = image[:-deppader_size, :-deppader_size, :] 252 | 253 | return image 254 | 255 | 256 | def model_loader(list_of_models, list_of_weights, gpu_device): 257 | """ 258 | 259 | :param list_of_models: 260 | :param list_of_weights: 261 | :param model_indexer: 262 | :return: loaded caffe model 263 | 264 | """ 265 | # set uo gpu parameters 266 | caffe.set_mode_gpu() 267 | caffe.set_device(gpu_device) 268 | 269 | # load model and initialize 270 | solver = caffe.SGDSolver(list_of_models) 271 | solver.net.copy_from(list_of_weights) 272 | 273 | return solver 274 | 275 | 276 | def image_saver_semantics(model_scores, 277 | save_folder, 278 | image_vis=None, 279 | dem_vis=None, 280 | vmin=None, 281 | vmax=None): 282 | """ 283 | 284 | save images and scores for Semantic_Network 285 | 286 | """ 287 | 288 | # create folder for saving data 289 | os.makedirs(save_folder) 290 | 291 | # block pop-up figure 292 | plt.ioff() 293 | 294 | if image_vis is not None: 295 | fig = plt.figure() 296 | imgplt = plt.imshow(np.floor(image_vis + 297 | np.abs(image_vis.min())).astype('uint8')) 298 | plt.axis('off') 299 | plt.savefig(os.path.join(save_folder, "Image"), dpi=400) 300 | 301 | if dem_vis is not None: 302 | imgplot = plt.imshow(dem_vis) 303 | plt.axis('off') 304 | plt.savefig(os.path.join(save_folder, "DEM"), dpi=400) 305 | 306 | out = model_scores.argmax(axis=2) 307 | imgplot = plt.imshow(out, vmin=0, vmax=6, cmap='gist_ncar') 308 | plt.axis('off') 309 | plt.savefig(os.path.join(save_folder, "Classification"), dpi=400) 310 | 311 | plt.imshow(model_scores[:, :, 0]) 312 | plt.title('Score-Impervious') 313 | plt.axis('off') 314 | plt.savefig(os.path.join(save_folder, "Scores-Impervious-Surfaces"), dpi=400) 315 | 316 | plt.imshow(model_scores[:, :, 1]) 317 | plt.title('Score-Buildings') 318 | plt.axis('off') 319 | plt.savefig(os.path.join(save_folder, "Scores-Buildings"), dpi=400) 320 | 321 | plt.imshow(model_scores[:, :, 2]) 322 | plt.title('Score-Vegetation') 323 | plt.axis('off') 324 | plt.savefig(os.path.join(save_folder, "Scores-Low-Vegeration"), dpi=400) 325 | 326 | plt.imshow(model_scores[:, :, 3]) 327 | plt.title('Score-Trees') 328 | plt.axis('off') 329 | plt.savefig(os.path.join(save_folder, "Score-Trees"), dpi=400) 330 | 331 | plt.imshow(model_scores[:, :, 4]) 332 | plt.title('Score-Cars') 333 | plt.axis('off') 334 | plt.savefig(os.path.join(save_folder, "Scores-Cars"), dpi=400) 335 | 336 | plt.close('all') 337 | 338 | 339 | def hdf5_score_saver(score_matrix, 340 | save_data_path, 341 | naming_string_file, 342 | attribute_name): 343 | """ 344 | This function accumulates the scores and save them according to an interval defined by the 345 | "save_interval" variable 346 | 347 | :param net: 348 | :param save_interval: 349 | :param i_scoring: 350 | :param i_idx: 351 | :return: 352 | 353 | """ 354 | 355 | # construct folder for saving outcome 356 | os.makedirs(save_data_path) 357 | 358 | # store score for last entry of matrix 359 | score_matrix = score_matrix.astype('float32') 360 | 361 | # string for saving name 362 | current_save_name = naming_string_file + '_h5_data' 363 | 364 | with h5.File(os.path.join(save_data_path, current_save_name) + '.h5', 'w') as ff: 365 | ff[attribute_name] = score_matrix 366 | 367 | # save txt file with h5 names 368 | with open(os.path.join(save_data_path, current_save_name) + '_h5_data' + '.txt', 'w') as f: 369 | f.write(os.path.join(save_data_path, current_save_name) + '.h5\n') 370 | 371 | 372 | def model_inference(solver, 373 | image_for_inference, 374 | dsm_for_inference, 375 | ndsm_for_inference, 376 | size_of_patches, 377 | stride_step, 378 | image_mean, 379 | dsm_mean, 380 | ndsm_mean, 381 | img_scaler, 382 | dsm_scaler): 383 | if dsm_for_inference is not None: 384 | 385 | # prediction step - for faster inference, will define over how many pixels an inference will be perfomed 386 | # in x and y direction !!!! This is defined by the STRIDE_STEP variable in method input 387 | 388 | # image padding to ensure that image will be covered completely through the adaptive prodiction 389 | # overllaping step !!!! 390 | image_for_inference, \ 391 | dsm_for_inference, \ 392 | ndsm_for_inference = image_padder(image_for_inference, 393 | dsm_for_inference, 394 | ndsm_for_inference, 395 | size_of_patches) 396 | 397 | # image_shape 398 | img_height, img_width = image_for_inference.shape[0], image_for_inference.shape[1] 399 | 400 | # this model scores - not the global cummulative scores of all models 401 | this_model_scores = np.zeros((img_height, img_width, 5)) 402 | 403 | # detect point for starting and stopping the inference 404 | # - so that patch size will cover the complete image 405 | start_point = (size_of_patches / 2) + 1 406 | stop_point_x = img_height - (size_of_patches / 2) 407 | # stop_point_y = img_width - (size_of_patches/2) 408 | 409 | # extract patch-size row of data for inference 410 | # for i_row in range(start_point, stop_point_x + 1): 411 | for i_row in range(start_point, stop_point_x + 1, stride_step): 412 | 413 | row_image = image_for_inference[i_row - (size_of_patches / 2) - 1: i_row + (size_of_patches / 2), :] 414 | row_dsm = dsm_for_inference[i_row - (size_of_patches / 2) - 1: i_row + (size_of_patches / 2), :] 415 | row_ndsm = ndsm_for_inference[i_row - (size_of_patches / 2) - 1: i_row + (size_of_patches / 2), :] 416 | 417 | # extract dense grid patches 418 | row_image_patches = patch_extractor(image=row_image, 419 | patch_size=(size_of_patches, size_of_patches), 420 | max_patches=None) 421 | row_dsm_patches = patch_extractor(image=row_dsm, 422 | patch_size=(size_of_patches, size_of_patches), 423 | max_patches=None) 424 | row_ndsm_patches = patch_extractor(image=row_ndsm, 425 | patch_size=(size_of_patches, size_of_patches), 426 | max_patches=None) 427 | 428 | # reshape to CAFFE standards 429 | row_image_patches = np.rollaxis(row_image_patches, 3, 1) 430 | row_dsm_patches = np.rollaxis(row_dsm_patches[:, None, :, :], 1, 1) 431 | row_ndsm_patches = np.rollaxis(row_ndsm_patches[:, None, :, :], 1, 1) 432 | 433 | if row_image_patches is None and row_dsm_patches is None and row_ndsm_patches is None: 434 | assert False, "Please check data cause trying to make prediction with no Image-data and/or no DEM-data" 435 | 436 | assert row_image_patches.shape[0] == row_dsm_patches.shape[0] \ 437 | == row_ndsm_patches.shape[0], \ 438 | "Number of DEM and IMAGE patches are not the " \ 439 | "same, please check why and ensure that they " \ 440 | "are equal" 441 | 442 | # convert patches to float-32 443 | row_image_patches = np.array(row_image_patches, dtype='float32') 444 | row_dsm_patches = np.array(row_dsm_patches, dtype='float32') 445 | row_ndsm_patches = np.array(row_ndsm_patches, dtype='float32') 446 | 447 | # subtract mean and normalize data - as in TRAINING protxt 448 | row_image_patches, \ 449 | row_dsm_patches, \ 450 | row_ndsm_patches = image_preprocessing(img_data=row_image_patches, 451 | img_mean=image_mean, 452 | img_value_scaler=img_scaler, 453 | dsm_data=row_dsm_patches, 454 | dsm_mean=dsm_mean, 455 | ndsm_data=row_ndsm_patches, 456 | ndsm_mean=ndsm_mean, 457 | dsm_value_scaler=dsm_scaler) 458 | 459 | 460 | # TODO ---> include self adaptive number of batches for the caffe model - currently the batches is standardize to size 1 461 | 462 | # initialize score patch saver --- FOR 5 CLASSES 463 | score_patches = np.zeros((row_dsm_patches.shape[0], 5, size_of_patches, size_of_patches)) 464 | 465 | # TODO - REPLACE BACK FOR COMPLETE LOOPS 466 | 467 | for i_batch in range(0, row_image_patches.shape[0], stride_step): 468 | # use caffe model for inference 469 | solver.net.blobs['image'].data[...] = row_image_patches[i_batch, :] 470 | solver.net.blobs['dsm'].data[...] = row_dsm_patches[i_batch, :] 471 | solver.net.blobs['ndsm'].data[...] = row_ndsm_patches[i_batch, :] 472 | 473 | # apply feedforward pass 474 | solver.net.forward() 475 | 476 | # print "Processing column :", i_batch, " from ", row_image_patches.shape[0] 477 | 478 | # store scoring 479 | # score_patches[i_batch, :, :, :] = solver.net.blobs['score'].data[:, :5, :, :] 480 | score_patches[i_batch, :, :, :] = solver.net.blobs['prob'].data[:, :5, :, :] 481 | 482 | 483 | # reshape to scikit learn standards (b,0,1,c) 484 | score_patches = np.rollaxis(score_patches, 1, 4) 485 | 486 | # reconstruct by overlapping summations 487 | row_scores = reconstruct_from_patches_2d(score_patches, 488 | (row_image.shape[0], row_image.shape[1], 5)) 489 | 490 | # store to score matrix 491 | this_model_scores[i_row - (size_of_patches / 2) - 1: i_row + (size_of_patches / 2), :, :] = row_scores 492 | 493 | print "----------------------------------------------------------------------------------------------------" 494 | print "----------------------------------------------------------------------------------------------------" 495 | print "---------------------- Done with row : ", i_row, " from ", stop_point_x, " -------------------------" 496 | print "----------------------------------------------------------------------------------------------------" 497 | print "----------------------------------------------------------------------------------------------------" 498 | 499 | # remove padding introduced before to ensure complete image inference 500 | this_model_scores = image_depadder(this_model_scores, size_of_patches) 501 | 502 | return this_model_scores 503 | 504 | 505 | def cumulative_model_inference(path_to_folder_with_images, 506 | path_to_folder_with_dsm, 507 | path_to_folder_with_ndsm, 508 | patch_size, 509 | model_list, 510 | weight_list, 511 | img_save_data_path, 512 | hdf5_save_data_path, 513 | string_names, 514 | hdf5_attributes, 515 | stride_value, 516 | img_scaler, 517 | dsm_scaler, 518 | save_visualizations=False, 519 | gpu_device=0): 520 | """ 521 | THis method applied sequential inference and recostruction of data from patches to a set of models 522 | and summs up their individual score maps 523 | 524 | """ 525 | 526 | # initialization 527 | num_of_images = 0 528 | 529 | # this index will keep track of the current image to be loaded 530 | img_index_reader = 0 531 | 532 | # index for saving folder 533 | save_data_index = 0 534 | 535 | # ------ computation calculation ----- # 536 | # - number of images to process - # 537 | for file in os.listdir(path_to_folder_with_images): 538 | if file.endswith('tif'): 539 | num_of_images += 1 540 | 541 | # ========================== SEQUENTIAL PROCESSING ============================ # 542 | 543 | # Load CAFFE model 544 | solver_model = model_loader(list_of_models=model_list, 545 | list_of_weights=weight_list, 546 | gpu_device=gpu_device) 547 | 548 | # loop number of images number of 549 | for i in range(num_of_images): 550 | 551 | # call image 552 | complete_image, image_mean, img_list = \ 553 | sequential_image_loader(path_to_folder_with_images, img_index_reader) 554 | complete_dsm, dsm_mean, dsm_list = \ 555 | sequential_image_loader(path_to_folder_with_dsm, img_index_reader) 556 | complete_ndsm, ndsm_mean, dem_list = \ 557 | sequential_image_loader(path_to_folder_with_ndsm, img_index_reader) 558 | 559 | assert complete_image.shape[0] == complete_dsm.shape[0] and \ 560 | complete_image.shape[1] == complete_dsm.shape[1] and \ 561 | complete_image.shape[0] == complete_ndsm.shape[0] and \ 562 | complete_image.shape[1] == complete_ndsm.shape[1], \ 563 | "Shape of Image and DEM does not match, please ensure " \ 564 | "that are correctly read-in with associative order" 565 | 566 | # increase index reader 567 | img_index_reader += 1 568 | 569 | # initialize image for storing scores 570 | cumulative_img_scores = np.zeros((complete_image.shape[0], 571 | complete_image.shape[1], 5), 572 | dtype='float32') 573 | 574 | # loop through the various models and perform inference 575 | image_scores = model_inference(solver=solver_model, 576 | image_for_inference=complete_image, 577 | dsm_for_inference=complete_dsm, 578 | ndsm_for_inference=complete_ndsm, 579 | size_of_patches=patch_size, 580 | stride_step=stride_value, 581 | image_mean=image_mean, 582 | dsm_mean=dsm_mean, 583 | ndsm_mean=ndsm_mean, 584 | img_scaler=img_scaler, 585 | dsm_scaler=dsm_scaler) 586 | 587 | # saving string name - remove img type from string 588 | save_name_string = img_list[save_data_index].split(".", 1)[0] 589 | 590 | if save_visualizations is True: 591 | # save image outcomes 592 | image_saver_semantics(model_scores=image_scores, 593 | save_folder=os.path.join(img_save_data_path, 594 | str(save_name_string)), 595 | image_vis=complete_image, 596 | dem_vis=complete_dsm, 597 | vmin=0, 598 | vmax=6) 599 | 600 | # save scores in hdf5 format matrix 601 | hdf5_score_saver(score_matrix=image_scores, 602 | save_data_path=os.path.join(hdf5_save_data_path, 603 | str(save_name_string)), 604 | naming_string_file=string_names, 605 | attribute_name=hdf5_attributes) 606 | 607 | # increase save folder index 608 | save_data_index += 1 609 | 610 | print "DONE" 611 | 612 | 613 | 614 | # =========================================== MAIN =========================================== # 615 | 616 | if __name__ == '__main__': 617 | # set for debug 618 | pdb.set_trace() 619 | 620 | # ======================== SET INFERENCE INPUTS =============== # 621 | 622 | patch_size = 256 # standardize patch size from training a network 623 | stride = 150 # overlapping stride for annotation-inference 624 | gpu_device = 1 # index of gpu to be used 625 | 626 | # take this values from TRAINING prototxt 627 | dsm_value_scaler = 0.003333333 628 | img_value_scaler = 0.00390625 629 | 630 | # This will create graphics of all inferred data 631 | generate_visualizations = True 632 | 633 | path_to_folder_with_images = '... path to folder with images' 634 | path_to_folder_with_DSMs = '... path to folder with DEMs' 635 | path_to_folder_with_nDSMs = '... path to folder with nDSMs' 636 | 637 | # inference models 638 | # -------- MODEL NET PARAMETERS -------- # 639 | model_solver_path = '... path to solver' 640 | model_solver = '... solver file' 641 | 642 | model_weights_path = '... path to folder with networks weights' 643 | model_weights = '... weights file' 644 | 645 | model_save_string_name = 'segnet_scores' 646 | model_attribute_hdf5_name = 'segnet_scores' 647 | 648 | # create storing folders if do not exist 649 | annot_image_dir = './annotated_image_' + str(stride) + '_pix_overlap' 650 | h5_save_data_dir = './h5_data_' + str(stride) + '_pix_overlap' 651 | 652 | if not os.path.exists(annot_image_dir): 653 | os.makedirs(annot_image_dir) 654 | 655 | if not os.path.exists(h5_save_data_dir): 656 | os.makedirs(h5_save_data_dir) 657 | 658 | # apply sequential inference in a set of multiple or single models and store data 659 | cumulative_model_inference(path_to_folder_with_images=path_to_folder_with_images, 660 | path_to_folder_with_dsm=path_to_folder_with_DSMs, 661 | path_to_folder_with_ndsm=path_to_folder_with_nDSMs, 662 | img_scaler=img_value_scaler, 663 | dsm_scaler=dsm_value_scaler, 664 | patch_size=patch_size, 665 | stride_value=stride, 666 | model_list=os.path.join(model_solver_path, model_solver), 667 | weight_list=os.path.join(model_weights_path, model_weights), 668 | img_save_data_path=annot_image_dir, 669 | hdf5_save_data_path=h5_save_data_dir, 670 | string_names=model_save_string_name, 671 | hdf5_attributes=model_attribute_hdf5_name, 672 | save_visualizations=generate_visualizations, 673 | gpu_device=gpu_device) 674 | -------------------------------------------------------------------------------- /model_training/analysis.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Various MISC code for real-time analysis of the CNN network. This works along the real-time 4 | signal handler included in the TRAINING script allowing real-time analysis of the state of the network 5 | 6 | """ 7 | 8 | # ======================= BATCH Visualizations ======================= # 9 | 10 | # View Image in particular batch 11 | plt.show(plt.imshow(np.rollaxis(solver.net.blobs['image'].data[0, ::-1],0,3))) 12 | 13 | # View Labels in particular batch 14 | plt.show(plt.imshow(solver.net.blobs['label'].data[0,0], vmin=0, vmax=6, cmap='gist_ncar')) 15 | 16 | # View Prediction of particular state in the network - result without softmax 17 | plt.show(plt.imshow(solver.net.blobs['score'].data[0,:].argmax(0), vmin=0, vmax=6, cmap='gist_ncar')) 18 | 19 | # View class-boundaries of particular batch 20 | vis(solver.net.blobs['edge'].data[0,:]) 21 | 22 | # Visualize absolute difference between infered class-boudnaries and label-boundaries 23 | vis(np.abs(solver.net.blobs['edges'].data[0,:]-solver.net.blobs['edge-label'].data[0,:])) 24 | 25 | # Visualize difference between between infered annotation and label-annotation 26 | plt.show(plt.imshow(np.abs(solver.net.blobs['score'].data[0,:].argmax(0)-solver.net.blobs['label'].data[0,0]), vmin=0, vmax=6, cmap='gist_ncar')) 27 | 28 | 29 | # ================= ERROR GRAPHS ================== # 30 | 31 | # plot step-error of annotation through epochs and fit straight trending line 32 | plt.show(plt.plot(range(len(np.array(batch_error))), np.poly1d(np.polyfit(range(len(np.array(batch_error))), np.array(batch_error),1))(range(len(np.array(batch_error)))), range(len(np.array(batch_error))), np.array(batch_error), '*')) 33 | # plot step-error of edges through epochs and fit straight trending line 34 | plt.show(plt.plot(range(len(np.array(edge_error))), np.poly1d(np.polyfit(range(len(np.array(edge_error))), np.array(edge_error),1))(range(len(np.array(edge_error)))), range(len(np.array(edge_error))), np.array(edge_error), '*')) 35 | -------------------------------------------------------------------------------- /model_training/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "... path to prototext architecture model" 2 | test_iter: 1 3 | # make test net, but don't invoke it from the solver itself 4 | test_interval: 100000 5 | display: 20000 6 | average_loss: 2 7 | lr_policy: "step" 8 | # lr for unnormalized softmax -- see train_val definition 9 | base_lr: 1e-11 10 | clip_gradients: 3e5 11 | gamma: 0.1 12 | stepsize: 6000 13 | # high momentum 14 | momentum: 0.90 15 | # no gradient accumulation 16 | iter_size: 1 17 | max_iter: 80000000 18 | weight_decay: 0.00015 19 | snapshot: 1000 20 | snapshot_prefix: "out_models/train" 21 | test_initialization: false 22 | -------------------------------------------------------------------------------- /model_training/training_class_boundary_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | os.environ['GLOG_minloglevel'] = '0' 4 | from matplotlib import pyplot as plt 5 | from visualizations import vis 6 | import numpy as np 7 | import caffe 8 | import pdb 9 | import signal 10 | 11 | # enable for explicit debugging 12 | #pdb.set_trace() 13 | 14 | ############ Signal Handler - Real Time Manipulation of CNN ############## 15 | 16 | def signal_handler(signal_number, frame): 17 | pdb.set_trace() 18 | 19 | # ########################## HYPERPARAMETERS ################################ # 20 | 21 | def main(nsteps, print_out_interval, test_interval, num_test_iter): 22 | 23 | # variable initalization 24 | train_loss = np.zeros(nsteps) 25 | test_acc = np.zeros(int(np.ceil(nsteps / test_interval))) 26 | batch_error = [] 27 | edge_error = [] 28 | 29 | # take gradient steps 30 | for it in range(nsteps): 31 | 32 | # when ignite singnal function 33 | signal.signal(signal.SIGINT, signal_handler) 34 | 35 | # take a single step SGD step 36 | solver.step(1) 37 | 38 | # store the all SGD train loss 39 | #train_loss[it] = solver.net.blobs['loss'].data 40 | 41 | # store the output on the first test batch 42 | # (start the forward pass at conv1 to avoid loading new data) 43 | solver.test_nets[0].forward(start='edge_dem_conv1_1_') 44 | #output[it] = solver.test_nets[0].blobs['loss'].data[:8] 45 | 46 | # print-out interval 47 | if (it % print_out_interval == 0): 48 | 49 | print "------------------------------------------------------------------------------------" 50 | print "ITERATION : ", it 51 | print "------------------------------------------------------------------------------------" 52 | print " FCN loss : ", solver.net.blobs['loss-fcn'].data 53 | print " DS-5 loss : ", solver.net.blobs['ds_loss5'].data 54 | print " DS-4 loss : ", solver.net.blobs['ds_loss4'].data 55 | print " DS-3 loss : ", solver.net.blobs['ds_loss3'].data 56 | print " DS-2 loss : ", solver.net.blobs['ds_loss2'].data 57 | print " =====================================================" 58 | print " " 59 | print " Fuse loss : ", solver.net.blobs['loss'].data 60 | print " Edges loss : ", solver.net.blobs['edge-loss'].data 61 | print " " 62 | print " =====================================================" 63 | 64 | # store batch error for learning-trend analysis 65 | batch_error.append(np.array(solver.net.blobs['loss'].data)) 66 | edge_error.append(np.array(solver.net.blobs['edge-loss'].data)) 67 | 68 | # run evaluation over validation-set 69 | if (it % test_interval == 0) and (it!=0): 70 | print "------------------------------------------------------------------------------------" 71 | print '####################################################################################' 72 | print '####################################################################################' 73 | print "------------------------------------------------------------------------------------" 74 | 75 | # store values for computing statistics 76 | batch_size = solver.test_nets[0].blobs['label'].data.shape[0] 77 | num_pixels_prediction_instance = solver.test_nets[0].blobs['label'].data.shape[2] * solver.test_nets[0].blobs['label'].data.shape[3] 78 | 79 | # Copy all weights from the TRAIN to the TEST NETWORK 80 | all_layers = [k for k in solver.net.params.keys()] 81 | for ll in all_layers: 82 | 83 | # try to copy WEIGHTS & BIASES if exist for layer 84 | try: 85 | solver.test_nets[0].params[ll][0].data[:, :, :, :] = solver.net.params[ll][0].data[:, :, :, :] 86 | solver.test_nets[0].params[ll][1].data[:] = solver.net.params[ll][1].data[:] 87 | 88 | except IndexError: 89 | # do not copy biases if dont exist. Copy only weights 90 | solver.test_nets[0].params[ll][0].data[:, :, :, :] = solver.net.params[ll][0].data[:, :, :, :] 91 | 92 | # initialize 93 | correct = 0 94 | for test_it in range(num_test_iter): 95 | solver.test_nets[0].forward() 96 | 97 | # detect all correct predictions for this batch of images 98 | temp_correct = solver.test_nets[0].blobs['score'].data.argmax(1) == solver.test_nets[0].blobs['label'].data[:, 0, :, :] 99 | 100 | # store their sum 101 | temp_correct = np.float32(temp_correct.sum()) 102 | 103 | # accumulate to total sum 104 | correct += temp_correct 105 | 106 | # clear 107 | temp_correct = 0 108 | 109 | testset_correct_prc = correct / (num_test_iter * batch_size * num_pixels_prediction_instance) 110 | 111 | # clear 112 | correct = 0 113 | 114 | print "------------------------------------------------------------------------------------" 115 | print '####################################################################################' 116 | print '####################################################################################' 117 | 118 | print 'VALIDATION SET ACCURACY : ', testset_correct_prc 119 | print "------------------------------------------------------------------------------------" 120 | print '####################################################################################' 121 | print '####################################################################################' 122 | 123 | # store validation set accuracy per iteration 124 | test_acc[it // test_interval] = testset_correct_prc 125 | 126 | # clear 127 | testset_correct_prc = 0 128 | 129 | # ============================================================ # 130 | 131 | 132 | if __name__ == "__main__": 133 | 134 | # --------------- INPUTS -------------- # 135 | 136 | # select gpu 137 | idx_gpu = 1 138 | 139 | # set pre-trained weights for model 140 | base_weights = '... path to weight model' 141 | 142 | # set solver file 143 | solver = '... path to solver' 144 | 145 | # total number of training SGD steps 146 | nsteps = 100000 147 | 148 | # print error every "x" number of SGD steps 149 | print_out_interval = 10 150 | 151 | # test-network every "x" numver of SGD steps 152 | test_interval = 1000 153 | 154 | # number of batches to be tested in TEST-phase 155 | num_test_iter = 2000 156 | 157 | # --------------------------------------# 158 | 159 | # set gpu 160 | caffe.set_mode_gpu() 161 | caffe.set_device(idx_gpu) 162 | 163 | # initialize solver 164 | solver = caffe.SGDSolver(solver) 165 | 166 | # copy base weights for architecture 167 | solver.net.copy_from(base_weights) 168 | 169 | # call main function for training 170 | main(nsteps, print_out_interval, test_interval, num_test_iter) 171 | -------------------------------------------------------------------------------- /model_training/visualizations.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import h5py 3 | import caffe 4 | import numpy as np 5 | 6 | """ 7 | Method for visualizing all channels in a 2D grid - Can be used in real-time during training for visualizing states of particular blobs. 8 | """ 9 | 10 | def vis(data, padsize=1, padval=0): 11 | data -= data.min() 12 | data /= data.max() + 1e-8 13 | 14 | # force the number of filters to be square 15 | n = int(np.ceil(np.sqrt(data.shape[0]))) 16 | padding = ((0, n ** 2 - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3) 17 | data = np.pad(data, padding, mode='constant', constant_values=(padval, padval)) 18 | 19 | # tile the filters into an image 20 | data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) 21 | data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:]) 22 | 23 | plt.show(plt.imshow(data, cmap='gist_ncar')) 24 | --------------------------------------------------------------------------------