├── Models
    ├── PSPNet.py
    ├── __init__.py
    ├── Utils.py
    ├── Unet.py
    ├── Segnet_transpose.py
    ├── FCN32.py
    ├── Segnet.py
    ├── Segnet_crf_res.py
    ├── Segnet_res.py
    ├── VGGSegnet.py
    ├── Segnet_crf_res_l1_v1.py
    ├── Segnet_crf_res_l1_v2.py
    ├── FCN8.py
    └── VGGUnet.py
├── github_imgs
    ├── data_tree.png
    ├── segnet_crfasrnn.png
    ├── run_segnet_crfasrnn.png
    └── training_segnet_crfasrnn.png
├── cpp
    ├── compile.sh
    ├── modified_permutohedral.h
    ├── high_dim_filter.cc
    └── modified_permutohedral.cc
├── visualizeDataset.py
├── segnet_crfasrnn_env.yml
├── high_dim_filter_grad.py
├── predict.py
├── compute_test_results.m
├── LoadBatches.py
├── util.py
├── README.md
├── train_modifiedLoss.py
├── train.py
├── crfrnn_model.py
└── crfrnn_layer.py


/Models/PSPNet.py:
--------------------------------------------------------------------------------
1 | 
2 | # todo


--------------------------------------------------------------------------------
/github_imgs/data_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/HEAD/github_imgs/data_tree.png


--------------------------------------------------------------------------------
/github_imgs/segnet_crfasrnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/HEAD/github_imgs/segnet_crfasrnn.png


--------------------------------------------------------------------------------
/Models/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | import VGGUnet
3 | import VGGSegnet
4 | import FCN8
5 | import FCN32
6 | import Segnet
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/github_imgs/run_segnet_crfasrnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/HEAD/github_imgs/run_segnet_crfasrnn.png


--------------------------------------------------------------------------------
/github_imgs/training_segnet_crfasrnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/HEAD/github_imgs/training_segnet_crfasrnn.png


--------------------------------------------------------------------------------
/Models/Utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | import h5py
 5 | import os
 6 | 
 7 | 
 8 | def loadWeightsPartial( model , weights_path , n_layers ):
 9 | 
10 | 	f = h5py.File(weights_path)
11 | 	for k in range(f.attrs['nb_layers']):
12 | 	    if k >= n_layers :
13 | 	        break
14 | 	    g = f['layer_{}'.format(k)]
15 | 	    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
16 | 	    model.layers[k].set_weights(weights)
17 | 	f.close()
18 | 
19 | 	


--------------------------------------------------------------------------------
/cpp/compile.sh:
--------------------------------------------------------------------------------
 1 | #  ------------------------------------------------------------------------------------------------------------i----------
 2 | #  *  Activate your Tensorflow virtualenv before running this script.
 3 | #  *  This script assumes gcc version >=5. If you have an older version, remove the -D_GLIBCXX_USE_CXX11_ABI=0 flag below.
 4 | #  *  On Mac OS X, the additional flag "-undefined dynamic_lookup" is required.
 5 | #  *  If this script fails, please refer to https://www.tensorflow.org/extend/adding_an_op#build_the_op_library for help.
 6 | #  -----------------------------------------------------------------------------------------------------------------------
 7 | 
 8 | 
 9 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
10 | TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
11 | 
12 | 
13 | g++ -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=0 -shared high_dim_filter.cc modified_permutohedral.cc -o high_dim_filter.so -fPIC -I$TF_INC -I$TF_INC/external/nsync/public -L$TF_LIB -ltensorflow_framework -O2
14 | 
15 | 


--------------------------------------------------------------------------------
/visualizeDataset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import glob
 3 | import numpy as np
 4 | import cv2
 5 | import random
 6 | import argparse
 7 | 
 8 | def imageSegmentationGenerator( images_path , segs_path ,  n_classes ):
 9 | 
10 | 	assert images_path[-1] == '/'
11 | 	assert segs_path[-1] == '/'
12 | 
13 | 	images = glob.glob( images_path + "*.jpg"  ) + glob.glob( images_path + "*.png"  ) +  glob.glob( images_path + "*.jpeg"  )
14 | 	images.sort()
15 | 	segmentations  = glob.glob( segs_path + "*.jpg"  ) + glob.glob( segs_path + "*.png"  ) +  glob.glob( segs_path + "*.jpeg"  )
16 | 	segmentations.sort()
17 | 
18 | 	colors = [  ( random.randint(0,255),random.randint(0,255),random.randint(0,255)   ) for _ in range(n_classes)  ]
19 | 
20 | 	assert len( images ) == len(segmentations)
21 | 
22 | 	for im_fn , seg_fn in zip(images,segmentations):
23 | 		assert(  im_fn.split('/')[-1] ==  seg_fn.split('/')[-1] )
24 | 
25 | 		img = cv2.imread( im_fn )
26 | 		seg = cv2.imread( seg_fn )
27 | 		print np.unique( seg )
28 | 
29 | 		seg_img = np.zeros_like( seg )
30 | 
31 | 		for c in range(n_classes):
32 | 			seg_img[:,:,0] += ( (seg[:,:,0] == c )*( colors[c][0] )).astype('uint8')
33 | 			seg_img[:,:,1] += ((seg[:,:,0] == c )*( colors[c][1] )).astype('uint8')
34 | 			seg_img[:,:,2] += ((seg[:,:,0] == c )*( colors[c][2] )).astype('uint8')
35 | 
36 | 		cv2.imshow("img" , img )
37 | 		cv2.imshow("seg_img" , seg_img )
38 | 		cv2.waitKey()
39 | 
40 | 
41 | 
42 | parser = argparse.ArgumentParser()
43 | parser.add_argument("--images", type = str  )
44 | parser.add_argument("--annotations", type = str  )
45 | parser.add_argument("--n_classes", type=int )
46 | args = parser.parse_args()
47 | 
48 | 
49 | imageSegmentationGenerator(args.images ,  args.annotations  ,  args.n_classes   ) 
50 | 


--------------------------------------------------------------------------------
/segnet_crfasrnn_env.yml:
--------------------------------------------------------------------------------
 1 | name: segnet_crfasrnn
 2 | channels:
 3 |   - menpo
 4 |   - soumith
 5 |   - defaults
 6 | dependencies:
 7 |   - blas=1.0=mkl
 8 |   - ca-certificates=2018.03.07=0
 9 |   - cairo=1.14.12=h8948797_3
10 |   - certifi=2018.10.15=py27_0
11 |   - fontconfig=2.13.0=h9420a91_0
12 |   - freetype=2.9.1=h8a8886c_1
13 |   - glib=2.56.2=hd408876_0
14 |   - h5py=2.7.0=np111py27_0
15 |   - harfbuzz=0.9.39=1
16 |   - hdf5=1.8.17=2
17 |   - icu=58.2=h9c2bf20_1
18 |   - intel-openmp=2019.1=144
19 |   - jpeg=9b=h024ee3a_2
20 |   - libedit=3.1.20170329=h6b74fdf_2
21 |   - libffi=3.2.1=hd88cf55_4
22 |   - libgcc-ng=8.2.0=hdf63c60_1
23 |   - libgfortran-ng=7.3.0=hdf63c60_0
24 |   - libpng=1.6.35=hbc83047_0
25 |   - libstdcxx-ng=8.2.0=hdf63c60_1
26 |   - libtiff=4.0.9=he85c1e1_2
27 |   - libuuid=1.0.3=h1bed415_2
28 |   - libxcb=1.13=h1bed415_1
29 |   - libxml2=2.9.8=h26e45fe_1
30 |   - linecache2=1.0.0=py27_0
31 |   - mkl=2018.0.3=1
32 |   - mkl_fft=1.0.6=py27h7dd41cf_0
33 |   - mkl_random=1.0.1=py27h4414c95_1
34 |   - ncurses=6.1=he6710b0_1
35 |   - numpy=1.11.3=py27h3dfced4_4
36 |   - numpy-base=1.15.4=py27h81de0dd_0
37 |   - openssl=1.1.1a=h7b6447c_0
38 |   - pcre=8.42=h439df22_0
39 |   - pip=18.1=py27_0
40 |   - pixman=0.34.0=hceecf20_3
41 |   - python=2.7.15=h9bab390_4
42 |   - readline=7.0=h7b6447c_5
43 |   - setuptools=40.6.2=py27_0
44 |   - six=1.11.0=py27_1
45 |   - sqlite=3.25.3=h7b6447c_0
46 |   - tk=8.6.8=hbc83047_0
47 |   - traceback2=1.4.0=py27_0
48 |   - unittest2=1.1.0=py27_0
49 |   - wheel=0.32.3=py27_0
50 |   - xz=5.2.4=h14c3975_4
51 |   - zlib=1.2.11=h7b6447c_3
52 |   - opencv3=3.2.0=np111py27_0
53 |   - pip:
54 |     - backports.weakref==1.0.post1
55 |     - bleach==1.5.0
56 |     - enum34==1.1.6
57 |     - funcsigs==1.0.2
58 |     - futures==3.2.0
59 |     - html5lib==0.9999999
60 |     - keras==2.2.4
61 |     - keras-applications==1.0.6
62 |     - keras-preprocessing==1.0.5
63 |     - markdown==3.0.1
64 |     - mock==2.0.0
65 |     - pbr==5.1.1
66 |     - protobuf==3.6.1
67 |     - pyyaml==3.13
68 |     - scipy==1.1.0
69 |     - tensorflow-gpu==1.4.0
70 |     - tensorflow-tensorboard==0.4.0
71 |     - theano==1.0.3
72 |     - werkzeug==0.14.1
73 | 
74 | 


--------------------------------------------------------------------------------
/high_dim_filter_grad.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MIT License
 3 | 
 4 | Copyright (c) 2017 Sadeep Jayasumana
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | """
24 | 
25 | import tensorflow as tf
26 | from tensorflow.python.framework import ops
27 | custom_module = tf.load_op_library('./cpp/high_dim_filter.so')
28 | 
29 | 
30 | @ops.RegisterGradient("HighDimFilter")
31 | def _high_dim_filter_grad(op, grad):
32 |     """ Gradients for the HighDimFilter op. We only need to calculate the gradients
33 |     w.r.t. the first input (unaries) as we never need to backprop errors to the
34 |     second input (RGB values of the image).
35 | 
36 |     Args:
37 |     op: The `high_dim_filter` operation that we are differentiating.
38 |     grad: Gradients with respect to the output of the `high_dim_filter` op.
39 | 
40 |     Returns:
41 |     Gradients with respect to the input of `high_dim_filter`.
42 |     """
43 | 
44 |     rgb = op.inputs[1]
45 |     grad_vals = custom_module.high_dim_filter(grad, rgb,
46 |                                               bilateral=op.get_attr("bilateral"),
47 |                                               theta_alpha=op.get_attr("theta_alpha"),
48 |                                               theta_beta=op.get_attr("theta_beta"),
49 |                                               theta_gamma=op.get_attr("theta_gamma"),
50 |                                               backwards=True)
51 | 
52 |     return [grad_vals, tf.zeros_like(rgb)]
53 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import Models , LoadBatches
 3 | from keras.models import load_model
 4 | import glob
 5 | import cv2
 6 | import numpy as np
 7 | import random
 8 | 
 9 | parser = argparse.ArgumentParser()
10 | parser.add_argument("--save_weights_path", type = str  )
11 | parser.add_argument("--epoch_number", type = int, default = 5 )
12 | parser.add_argument("--test_images", type = str , default = "")
13 | parser.add_argument("--output_path", type = str , default = "")
14 | parser.add_argument("--input_height", type=int , default = 224  )
15 | parser.add_argument("--input_width", type=int , default = 224 )
16 | parser.add_argument("--model_name", type = str , default = "")
17 | parser.add_argument("--n_classes", type=int )
18 | 
19 | args = parser.parse_args()
20 | 
21 | n_classes = args.n_classes
22 | model_name = args.model_name
23 | images_path = args.test_images
24 | input_width =  args.input_width
25 | input_height = args.input_height
26 | epoch_number = args.epoch_number
27 | 
28 | modelFns = { 'vgg_segnet':Models.VGGSegnet.VGGSegnet , 'vgg_unet':Models.VGGUnet.VGGUnet , 'vgg_unet2':Models.VGGUnet.VGGUnet2 , 'fcn8':Models.FCN8.FCN8 , 'fcn32':Models.FCN32.FCN32, 'segnet':Models.Segnet.segnet, 'segnet_transposed':Models.Segnet_transpose.segnet_transposed, 'segnet_res':Models.Segnet_res.segnet_res, 'segnet_res_crf':Models.Segnet_crf_res.segnet_crf_res}
29 | modelFN = modelFns[ model_name ]
30 | 
31 | m = modelFN( n_classes , input_height=input_height, input_width=input_width   )
32 | m.load_weights(  args.save_weights_path) #+ "." + str(  epoch_number )  )
33 | m.compile(loss='categorical_crossentropy',
34 |       optimizer= 'sgd' ,
35 |       metrics=['accuracy'])
36 | 
37 | 
38 | output_height = m.outputHeight
39 | output_width = m.outputWidth
40 | 
41 | images = glob.glob( images_path + "*.jpg"  ) + glob.glob( images_path + "*.png"  ) +  glob.glob( images_path + "*.jpeg"  )
42 | images.sort()
43 | 
44 | colors = [  ( random.randint(0,255),random.randint(0,255),random.randint(0,255)   ) for _ in range(n_classes)  ]
45 | 
46 | for imgName in images:
47 | 	print (imgName)
48 | 	outName = imgName.replace( images_path ,  args.output_path )
49 | 	X = LoadBatches.getImageArr(imgName , args.input_width  , args.input_height  )
50 | 	pr = m.predict( np.array([X]) )[0]
51 | 	pr = pr.reshape(( output_height ,  output_width , n_classes ) ).argmax( axis=2 )
52 | 	#seg_img = np.zeros( ( output_height , output_width , 3  ) )
53 | 	#for c in range(n_classes):
54 | 	#	seg_img[:,:,0] += ( (pr[:,: ] == c )*( colors[c][0] )).astype('uint8')
55 | 	#	seg_img[:,:,1] += ((pr[:,: ] == c )*( colors[c][1] )).astype('uint8')
56 | 	#	seg_img[:,:,2] += ((pr[:,: ] == c )*( colors[c][2] )).astype('uint8')
57 | 	#seg_img = cv2.resize(seg_img  , (input_width , input_height ))
58 | 	#cv2.waitKey(0)
59 | 	cv2.imwrite(  outName , pr )
60 | 
61 | 


--------------------------------------------------------------------------------
/Models/Unet.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # todo upgrade to keras 2.0
 4 | 
 5 | from keras.models import Sequential
 6 | from keras.layers import Reshape
 7 | from keras.models import Model
 8 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Merge, Permute
 9 | from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D, Reshape, core, Dropout
10 | from keras.layers.normalization import BatchNormalization
11 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D , ZeroPadding3D , UpSampling3D
12 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
13 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
14 | from keras.layers.recurrent import LSTM
15 | from keras.layers.advanced_activations import LeakyReLU
16 | from keras.optimizers import Adam , SGD
17 | from keras.layers.embeddings import Embedding
18 | from keras.utils import np_utils
19 | from keras.regularizers import ActivityRegularizer
20 | from keras import backend as K
21 | 
22 | 
23 | 
24 | 
25 | 
26 | def Unet (nClasses , optimizer=None , input_width=360 , input_height=480 , nChannels=1 ): 
27 |     
28 |     inputs = Input((nChannels, input_height, input_width))
29 |     conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(inputs)
30 |     conv1 = Dropout(0.2)(conv1)
31 |     conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv1)
32 |     pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
33 |     
34 |     conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(pool1)
35 |     conv2 = Dropout(0.2)(conv2)
36 |     conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv2)
37 |     pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
38 |     
39 |     conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(pool2)
40 |     conv3 = Dropout(0.2)(conv3)
41 |     conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv3)
42 | 
43 |     up1 = merge([UpSampling2D(size=(2, 2))(conv3), conv2], mode='concat', concat_axis=1)
44 |     conv4 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(up1)
45 |     conv4 = Dropout(0.2)(conv4)
46 |     conv4 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv4)
47 |     
48 |     up2 = merge([UpSampling2D(size=(2, 2))(conv4), conv1], mode='concat', concat_axis=1)
49 |     conv5 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(up2)
50 |     conv5 = Dropout(0.2)(conv5)
51 |     conv5 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv5)
52 |     
53 |     conv6 = Convolution2D(nClasses, 1, 1, activation='relu',border_mode='same')(conv5)
54 |     conv6 = core.Reshape((nClasses,input_height*input_width))(conv6)
55 |     conv6 = core.Permute((2,1))(conv6)
56 | 
57 | 
58 |     conv7 = core.Activation('softmax')(conv6)
59 | 
60 |     model = Model(input=inputs, output=conv7)
61 | 
62 |     if not optimizer is None:
63 | 	    model.compile(loss="categorical_crossentropy", optimizer= optimizer , metrics=['accuracy'] )
64 | 	
65 |     return model
66 | 	
67 | 	
68 | 	
69 | 
70 | 


--------------------------------------------------------------------------------
/compute_test_results.m:
--------------------------------------------------------------------------------
 1 | % if you use another test/train set change number of classes and the
 2 | % unlabeled index as well as number of iterations (needs to be equal to the test set size)
 3 | 
 4 | gtPath = './data/dataset1/annotations_prepped_test'; % path to your ground truth images
 5 | predPath = './data/predictions/'; %path to your predictions (you get them after you implement saving images in the test_segmentation_camvid.py script - or you write your own)
 6 | groundTruths = dir(gtPath);
 7 | skip = 2; % first two are '.' and '..' so skip them
 8 | predictions = dir(predPath);
 9 | 
10 | iter = 101;
11 | 
12 | numClasses = 11;
13 | unknown_class = 12;
14 | 
15 | img_height=50;
16 | img_width=50;
17 | 
18 | totalpoints = 0;
19 | cf = zeros(iter,numClasses,numClasses);
20 | globalacc = 0;
21 | 
22 | for i = 1:iter
23 |     display(num2str(i));
24 |     
25 |     %strcat(predPath, '/', predictions(i + skip).name)
26 |     %strcat(gtPath, '/', groundTruths(i + skip).name)
27 |     %waitforbuttonpress()
28 | 
29 |     pred = imread(strcat(predPath, '/', predictions(i + skip).name)); % set this to iterate through your segnet prediction images
30 |     pred = imresize(pred, [img_height img_width]);
31 |     
32 |     pred = pred + 1; % i added this cause i labeled my classes from 0 to 11
33 |     annot = imresize(imread(strcat(gtPath, '/', groundTruths(i + skip).name)),[img_height img_width]); % set this to iterate through your ground truth annotations
34 |     annot = annot + 1; % i added this cause i labeled my classes from 0 to 11 -> so in that case the next line will find every pixel labeled with unknown_class=12
35 |     imshow(pred)
36 |     imshow(annot)
37 |     %waitforbuttonpress()
38 | 
39 |     pixels_ignore = annot == unknown_class;
40 |     pred(pixels_ignore) = 0;
41 |     annot(pixels_ignore) = 0;
42 |    
43 |     totalpoints = totalpoints + sum(annot(:)>0);
44 | 
45 |     % global and class accuracy computation
46 |     for j = 1:numClasses
47 |         for k = 1:numClasses
48 |                 c1  = annot == j;
49 |                 c1p = pred == k;
50 |                 index = gather(c1 .* c1p);              
51 |                 cf(i,j,k) = cf(i,j,k) + sum(index(:));
52 |         end
53 |             c1  = annot == j;
54 |             c1p = pred == j;
55 |             index = gather(c1 .* c1p);
56 |             globalacc = globalacc + sum(index(:));
57 |         
58 |     end
59 | end
60 | 
61 | cf = sum(cf,1);
62 | cf = squeeze(cf);
63 | 
64 | % Compute confusion matrix
65 | conf = zeros(numClasses);
66 | for i = 1:numClasses
67 |     if i ~= unknown_class && sum(cf(i,:)) > 0
68 |         conf(i,:) = cf(i,:)/sum(cf(i,:));
69 |     end
70 | end
71 | globalacc = sum(globalacc)/sum(totalpoints);
72 | 
73 | % Compute intersection over union for each class and its mean
74 | intoverunion = zeros(numClasses,1);
75 | for i = 1:numClasses
76 |     if i ~= unknown_class   && sum(conf(i,:)) > 0
77 |         intoverunion(i) = (cf(i,i))/(sum(cf(i,:))+sum(cf(:,i))-cf(i,i));
78 |     end
79 | end
80 | 
81 | display([' Global acc = ' num2str(globalacc*100) '% Class average acc = ' num2str(100*sum(diag(conf))/(numClasses)) '% Mean Int over Union = ' num2str(100*sum(intoverunion)/(numClasses)) '%']);
82 | 


--------------------------------------------------------------------------------
/LoadBatches.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import cv2
 4 | import glob
 5 | import itertools
 6 | 
 7 | 
 8 | def getImageArr( path , width , height , imgNorm="sub_mean" , odering='channels_first' ):
 9 | 
10 | 	try:
11 | 		img = cv2.imread(path, 1)
12 | 
13 | 		if imgNorm == "sub_and_divide":
14 | 			img = np.float32(cv2.resize(img, ( width , height ))) / 127.5 - 1
15 | 		elif imgNorm == "sub_mean":
16 | 			img = cv2.resize(img, ( width , height ))
17 | 			img = img.astype(np.float32)
18 | 			img[:,:,0] -= 103.939
19 | 			img[:,:,1] -= 116.779
20 | 			img[:,:,2] -= 123.68
21 | 		elif imgNorm == "divide":
22 | 			img = cv2.resize(img, ( width , height ))
23 | 			img = img.astype(np.float32)
24 | 			img = img/255.0
25 | 
26 | 		#if odering == 'channels_first':
27 | 		#	img = np.rollaxis(img, 2, 0)
28 | 		return img
29 | 	except Exception, e:
30 | 		print path , e
31 | 		img = np.zeros((  height , width  , 3 ))
32 | 		if odering == 'channels_first':
33 | 			img = np.rollaxis(img, 2, 0)
34 | 		return img
35 | 
36 | 
37 | 
38 | 
39 | 
40 | def getSegmentationArr( path , nClasses ,  width , height  ):
41 | 
42 | 	seg_labels = np.zeros((  height , width  , nClasses ))
43 | 	try:
44 | 		img = cv2.imread(path, 1)
45 | 		img = cv2.resize(img, ( width , height ))
46 | 		img = img[:, : , 0]
47 | 
48 | 		for c in range(nClasses):
49 | 			seg_labels[: , : , c ] = (img == c ).astype(int)
50 | 
51 | 	except Exception, e:
52 | 		print e
53 | 		
54 | 	seg_labels = np.reshape(seg_labels, ( width*height , nClasses ))
55 | 	return seg_labels
56 | 
57 | 
58 | 
59 | def imageSegmentationGenerator( images_path , segs_path ,  batch_size,  n_classes , input_height , input_width , output_height , output_width   ):
60 | 	
61 | 	assert images_path[-1] == '/'
62 | 	assert segs_path[-1] == '/'
63 | 
64 | 	images = glob.glob( images_path + "*.jpg"  ) + glob.glob( images_path + "*.png"  ) +  glob.glob( images_path + "*.jpeg"  )
65 | 	images.sort()
66 | 	segmentations  = glob.glob( segs_path + "*.jpg"  ) + glob.glob( segs_path + "*.png"  ) +  glob.glob( segs_path + "*.jpeg"  )
67 | 	segmentations.sort()
68 | 
69 | 	assert len( images ) == len(segmentations)
70 | 	for im , seg in zip(images,segmentations):
71 | 		assert(  im.split('/')[-1].split(".")[0] ==  seg.split('/')[-1].split(".")[0] )
72 | 
73 | 	zipped = itertools.cycle( zip(images,segmentations) )
74 | 
75 | 	while True:
76 | 		X = []
77 | 		Y = []
78 | 		for _ in range( batch_size) :
79 | 			im , seg = zipped.next()
80 | 			X.append( getImageArr(im , input_width , input_height )  )
81 | 			Y.append( getSegmentationArr( seg , n_classes , output_width , output_height )  )
82 | 
83 | 
84 | 		yield np.array(X) , np.array(Y)
85 | 
86 | 
87 | 
88 | # import Models , LoadBatches
89 | # G  = LoadBatches.imageSegmentationGenerator( "data/clothes_seg/prepped/images_prepped_train/" ,  "data/clothes_seg/prepped/annotations_prepped_train/" ,  1,  10 , 800 , 550 , 400 , 272   ) 
90 | # G2  = LoadBatches.imageSegmentationGenerator( "data/clothes_seg/prepped/images_prepped_test/" ,  "data/clothes_seg/prepped/annotations_prepped_test/" ,  1,  10 , 800 , 550 , 400 , 272   ) 
91 | 
92 | # m = Models.VGGSegnet.VGGSegnet( 10  , use_vgg_weights=True ,  optimizer='adadelta' , input_image_size=( 800 , 550 )  )
93 | # m.fit_generator( G , 512  , nb_epoch=10 )
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/cpp/modified_permutohedral.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |    This file contains a modified version of the "permutohedral.h" code
 3 |    available at http://graphics.stanford.edu/projects/drf/. Copyright notice of
 4 |    the original file is included below:
 5 | 
 6 |     Copyright (c) 2013, Philipp Krähenbühl
 7 |     All rights reserved.
 8 | 
 9 |     Redistribution and use in source and binary forms, with or without
10 |     modification, are permitted provided that the following conditions are met:
11 |         * Redistributions of source code must retain the above copyright
12 |         notice, this list of conditions and the following disclaimer.
13 |         * Redistributions in binary form must reproduce the above copyright
14 |         notice, this list of conditions and the following disclaimer in the
15 |         documentation and/or other materials provided with the distribution.
16 |         * Neither the name of the Stanford University nor the
17 |         names of its contributors may be used to endorse or promote products
18 |         derived from this software without specific prior written permission.
19 | 
20 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
21 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
24 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 |     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 |     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | */
31 | 
32 | #ifndef MODIFIED_PERMUTOHEDRAL_HPP_
33 | #define MODIFIED_PERMUTOHEDRAL_HPP_
34 | 
35 | #include <cstdlib>
36 | #include <vector>
37 | #include <cstring>
38 | #include <cassert>
39 | #include <cstdio>
40 | #include <cmath>
41 | 
42 | #include "tensorflow/core/framework/tensor.h"
43 | 
44 | using namespace tensorflow;
45 | 
46 | /************************************************/
47 | /***          ModifiedPermutohedral Lattice   ***/
48 | /************************************************/
49 | class ModifiedPermutohedral {
50 | protected:
51 |   struct Neighbors {
52 |     int n1, n2;
53 | 
54 |     Neighbors(int n1 = 0, int n2 = 0) : n1(n1), n2(n2) {
55 |     }
56 |   };
57 | 
58 |   std::vector<int> offset_, rank_;
59 |   std::vector<float> barycentric_;
60 |   std::vector<Neighbors> blur_neighbors_;
61 |   // Number of elements, size of sparse discretized space, dimension of features
62 |   int N_, M_, d_;
63 | 
64 |   void sseCompute(Tensor &out, const Tensor &in, int value_size,
65 |                   bool reverse = false, bool add = false) const;
66 | 
67 |   void seqCompute(Tensor &out, const Tensor &in, int value_size,
68 |                   bool reverse = false, bool add = false) const;
69 | 
70 | public:
71 |   ModifiedPermutohedral();
72 | 
73 |   void init(const float *features, int num_dimensions, int num_points);
74 | 
75 |   void compute(Tensor &out, const Tensor &in, int value_size,
76 |                bool reverse = false, bool add = false) const;
77 | };
78 | 
79 | #endif //_MODIFIED_PERMUTOHEDRAL_HPP_
80 | 


--------------------------------------------------------------------------------
/Models/Segnet_transpose.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Model
 2 | from keras.layers import Reshape
 3 | from keras.layers import Input
 4 | 
 5 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute
 6 | from keras.layers.normalization import BatchNormalization
 7 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D, Conv2DTranspose
 8 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
 9 | 
10 | def segnet_transposed(nClasses, optimizer=None, input_height=360, input_width=480):
11 |     kernel = 3
12 |     filter_size = 64
13 |     pad = 1
14 |     pool_size = 2
15 | 
16 |     img_input = Input(shape=(input_height, input_width,3))
17 | 
18 | 
19 | 
20 | 
21 |     # encoder
22 |     x = ZeroPadding2D(padding=(pad, pad))(img_input)
23 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
24 |     x = BatchNormalization()(x)
25 |     x = Activation('relu') (x)
26 |     l1 = x
27 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
28 | 
29 |     x = ZeroPadding2D(padding=(pad, pad))(x)
30 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
31 |     x = BatchNormalization()(x)
32 |     x = Activation('relu')(x)
33 |     l2 = x
34 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
35 | 
36 |     x = ZeroPadding2D(padding=(pad, pad))(x)
37 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
38 |     x = BatchNormalization()(x)
39 |     x = Activation('relu')(x)
40 |     l3 = x
41 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
42 | 
43 |     x = ZeroPadding2D(padding=(pad, pad))(x)
44 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
45 |     x = BatchNormalization()(x)
46 |     l4 = x
47 |     x = Activation('relu')(x)
48 | 
49 | # decoder
50 |     x = ZeroPadding2D(padding=(pad, pad))(x)
51 |     x = Conv2DTranspose(512, (kernel, kernel), padding='valid')(x)
52 |     x = BatchNormalization()(x)
53 | 
54 | #    x = Add()([l4, x])
55 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
56 |     x = ZeroPadding2D(padding=(pad, pad))(x)
57 |     x = Conv2DTranspose(256, (kernel, kernel), padding='valid')(x)
58 |     x = BatchNormalization()(x)
59 | 
60 |  #   x = Add()([l3, x])
61 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
62 |     x = ZeroPadding2D(padding=(pad, pad))(x)
63 |     x = Conv2DTranspose(128, (kernel, kernel), padding='valid')(x)
64 |     x = BatchNormalization()(x)
65 | 
66 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
67 |     x = ZeroPadding2D(padding=(pad, pad))(x)
68 |     x = Conv2DTranspose(filter_size, (kernel, kernel), padding='valid')(x)
69 |     x = BatchNormalization()(x)
70 | 
71 |    # x = Add()([l1, x])
72 |     x = Conv2DTranspose(nClasses, (1, 1), padding='valid') (x)
73 | 
74 |     out = x
75 |     a = Model(inputs=img_input, outputs=out)
76 | 
77 |     model = []
78 |     a.outputHeight = a.output_shape[1]
79 |     a.outputWidth = a.output_shape[2]
80 | 
81 |     out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out)
82 |     out = Activation('softmax')(out)
83 | #    if not optimizer is None:
84 | #        model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])
85 |     model = Model(inputs=img_input, outputs=out)
86 |     model.outputHeight = a.outputHeight
87 |     model.outputWidth = a.outputWidth
88 | 
89 |     return model
90 | 
91 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MIT License
 3 | 
 4 | Copyright (c) 2017 Sadeep Jayasumana
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | """
24 | 
25 | import numpy as np
26 | from PIL import Image
27 | 
28 | 
29 | # Pascal VOC color palette for labels
30 | _PALETTE = [0, 0, 0,
31 |            128, 0, 0,
32 |            0, 128, 0,
33 |            128, 128, 0,
34 |            0, 0, 128,
35 |            128, 0, 128,
36 |            0, 128, 128,
37 |            128, 128, 128,
38 |            64, 0, 0,
39 |            192, 0, 0,
40 |            64, 128, 0,
41 |            192, 128, 0,
42 |            64, 0, 128,
43 |            192, 0, 128,
44 |            64, 128, 128,
45 |            192, 128, 128,
46 |            0, 64, 0,
47 |            128, 64, 0,
48 |            0, 192, 0,
49 |            128, 192, 0,
50 |            0, 64, 128,
51 |            128, 64, 128,
52 |            0, 192, 128,
53 |            128, 192, 128,
54 |            64, 64, 0,
55 |            192, 64, 0,
56 |            64, 192, 0,
57 |            192, 192, 0]
58 | 
59 | 
60 | def get_preprocessed_image(file_name):
61 |     """ Reads an image from the disk, pre-processes it by subtracting mean etc. and
62 |     returns a numpy array that's ready to be fed into a Keras model.
63 | 
64 |     Note: This method assumes 'channels_last' data format in Keras.
65 |     """
66 | 
67 |     mean_values = np.array([123.68, 116.779, 103.939], dtype=np.float32)  # RGB mean values
68 |     mean_values = mean_values.reshape(1, 1, 3)
69 |     im = np.array(Image.open(file_name)).astype(np.float32)
70 |     assert im.ndim == 3, "Only RGB images are supported."
71 |     im = im - mean_values
72 |     im = im[:, :, ::-1]
73 |     img_h, img_w, img_c = im.shape
74 |     assert img_c == 3, "Only RGB images are supported."
75 |     if img_h > 500 or img_w > 500:
76 |         raise ValueError("Please resize your images to be not bigger than 500 x 500.")
77 | 
78 |     pad_h = 500 - img_h
79 |     pad_w = 500 - img_w
80 |     im = np.pad(im, pad_width=((0, pad_h), (0, pad_w), (0, 0)), mode='constant', constant_values=0)
81 |     return im.astype(np.float32).reshape(1, 500, 500, 3), img_h, img_w
82 | 
83 | 
84 | def get_label_image(probs, img_h, img_w):
85 |     """ Returns the label image (PNG with Pascal VOC colormap) given the probabilities.
86 | 
87 |     Note: This method assumes 'channels_last' data format.
88 |     """
89 | 
90 |     labels = probs.argmax(axis=2).astype("uint8")[:img_h, :img_w]
91 |     label_im = Image.fromarray(labels, "P")
92 |     label_im.putpalette(_PALETTE)
93 |     return label_im
94 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SegNet + CRF as RNN.
 2 | 
 3 | This project has the implementation of SegNetResCRF, combination of SegNet with CRF as RNN, published in the [2018 IJCNN](https://ieeexplore.ieee.org/xpl/conhome/8465565/proceeding) paper [SegNetRes-CRF: A Deep Convolutional Encoder-Decoder Architecture for Semantic Image Segmentation
 4 | ](https://ieeexplore.ieee.org/abstract/document/8489376)
 5 | 
 6 | Some references:
 7 | * SegNet implementation: https://github.com/divamgupta/image-segmentation-keras
 8 | * CRF as RNN implementation: https://github.com/sadeepj/crfasrnn_keras
 9 | 
10 | ## Repository working tree:
11 | [![Repo Working Tree](https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/segnet_crfasrnn.png)](https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/segnet_crfasrnn.png " Repo working tree")
12 | 
13 | 
14 | ##  Installation
15 | 
16 | Clone this repository and inside it, run:
17 | ```
18 | conda env create -f segnet_crfasrnn_env.yml
19 | source activate segnet_crfasrnn
20 | ```
21 | After that you need to run compile high_dim_filter (Go to cpp folder and run compile script):
22 | 
23 | ```
24 | cd cpp
25 | ./compile.sh
26 | ```
27 | 
28 | After that you can run train script as mentioned on Training the Model.
29 | 
30 | 
31 | 
32 | ### Tested with:
33 | 	pip install --upgrade tensorflow-gpu==1.4
34 | 	conda install -c menpo opencv3 
35 | 
36 | ## keras.json content
37 | ```json
38 | {
39 |     "epsilon": 1e-07, 
40 |     "floatx": "float32", 
41 |     "image_data_format": "channels_last", 
42 |     "backend": "theano"
43 | }
44 | ```
45 | 
46 | ## Visualizing the prepared data
47 | 
48 | You can also visualize your prepared annotations for verification of the prepared data.
49 | 
50 | ```shell
51 | python visualizeDataset.py \
52 |  --images="data/dataset1/images_prepped_train/" \
53 |  --annotations="data/dataset1/annotations_prepped_train/" \
54 |  --n_classes=11 
55 | ```
56 | 
57 | ### Dataset working tree:
58 | 
59 | [![Data Working Tree](https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/data_tree.png)](https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/data_tree.png "Data working tree")
60 | 
61 | 
62 | 
63 | ## Training the Model
64 | 
65 | To train the model run the following command:
66 | 
67 | ```shell
68 | TENSORFLOW_FLAGS=device=cuda0,image_data_format=channels_last,floatX=float32 python train.py --save_weights_path="weights/ex1/" --train_images="path/train/" --train_annotations="data_semantics/trainannot/" --val_images="data_semantics/val/" --val_annotations="data_semantics/valannot/" --n_classes=8 --model_name="segnet_res_crf" --input_height=128 --input_width=128
69 | ```
70 | 
71 | [![Run segnet crfasrnn](
72 | https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/run_segnet_crfasrnn.png)](
73 | https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/run_segnet_crfasrnn.png "Run segnet crfasrnn")
74 | 
75 | 
76 | [![Training segnet crfasrnn](
77 | https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/training_segnet_crfasrnn.png)](
78 | https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/training_segnet_crfasrnn.png "Training segnet crfasrnn")
79 | 
80 | 
81 | 
82 | 
83 | 
84 | ## Getting the predictions
85 | 
86 | ```shell
87 | TENSORFLOW_FLAGS=device=cuda0,image_data_format=channels_last,floatX=float32 python predict.py --output_path="teste/" --test_images="data_semantics/test/" --n_classes=8 --model_name="segnet_res_crf" --input_height=128 --input_width=128 --save_weights_path="weights_360_480_res_with_crf.hdf5"
88 | ```
89 | 


--------------------------------------------------------------------------------
/train_modifiedLoss.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint
 3 | import argparse
 4 | import Models , LoadBatches
 5 | 
 6 | # learning rate schedule
 7 | def step_decay(epoch):
 8 | 	initial_lrate = 0.000001
 9 | 	drop = 0.5
10 | 	epochs_drop = 10.0
11 | 	lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
12 | 	return lrate
13 | 
14 | 
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument("--save_weights_path", type = str  )
17 | parser.add_argument("--train_images", type = str  )
18 | parser.add_argument("--train_annotations", type = str  )
19 | parser.add_argument("--n_classes", type=int )
20 | parser.add_argument("--input_height", type=int , default = 224  )
21 | parser.add_argument("--input_width", type=int , default = 224 )
22 | 
23 | parser.add_argument('--validate',action='store_false')
24 | parser.add_argument("--val_images", type = str , default = "")
25 | parser.add_argument("--val_annotations", type = str , default = "")
26 | 
27 | parser.add_argument("--epochs", type = int, default = 100 )
28 | parser.add_argument("--batch_size", type = int, default = 1 )
29 | parser.add_argument("--val_batch_size", type = int, default = 1 )
30 | parser.add_argument("--load_weights", type = str , default = "data/vgg16_weights_th_dim_ordering_th_kernels.h5")
31 | 
32 | parser.add_argument("--model_name", type = str , default = "")
33 | parser.add_argument("--optimizer_name", type = str , default = "adadelta")
34 | 
35 | 
36 | args = parser.parse_args()
37 | 
38 | train_images_path = args.train_images
39 | train_segs_path = args.train_annotations
40 | train_batch_size = args.batch_size
41 | n_classes = args.n_classes
42 | input_height = args.input_height
43 | input_width = args.input_width
44 | validate = args.validate
45 | save_weights_path = args.save_weights_path
46 | epochs = args.epochs
47 | load_weights = args.load_weights
48 | 
49 | optimizer_name = args.optimizer_name
50 | model_name = args.model_name
51 | 
52 | if validate:
53 | 	val_images_path = args.val_images
54 | 	val_segs_path = args.val_annotations
55 | 	val_batch_size = args.val_batch_size
56 | 
57 | modelFns = { 'vgg_segnet':Models.VGGSegnet.VGGSegnet , 'vgg_unet':Models.VGGUnet.VGGUnet , 'vgg_unet2':Models.VGGUnet.VGGUnet2 , 'fcn8':Models.FCN8.FCN8 , 'fcn32':Models.FCN32.FCN32, 'segnet':Models.Segnet.segnet}
58 | modelFN = modelFns[ model_name ]
59 | 
60 | m = modelFN( n_classes , input_height=input_height, input_width=input_width   )
61 | 
62 | 
63 | #if len( load_weights ) > 0:
64 | #	m.load_weights(load_weights, by_name=True)
65 | 
66 | 
67 | lrate = LearningRateScheduler(step_decay)
68 | filepath="weights_360_480_correct_weights_best.hdf5"
69 | checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
70 | callbacks_list = [checkpoint]
71 | 
72 | 
73 | 
74 | print "Model output shape" ,  m.output_shape
75 | 
76 | output_height = m.outputHeight
77 | output_width = m.outputWidth
78 | class_weighting= [0.2595, 0.1826, 4.5640, 0.1417, 0.9051, 0.3826, 9.6446, 1.8418, 0.6823, 6.2478, 7.3614, 0]
79 | G  = LoadBatches.imageSegmentationGenerator( train_images_path , train_segs_path ,  train_batch_size,  n_classes , input_height , input_width , output_height , output_width   )
80 | 
81 | 
82 | if validate:
83 | 	G2  = LoadBatches.imageSegmentationGenerator( val_images_path , val_segs_path ,  val_batch_size,  n_classes , input_height , input_width , output_height , output_width   )
84 | 
85 | if not validate:
86 | 	for ep in range( epochs ):
87 | 		m.fit_generator( G , 512  , epochs=1 )
88 | 		m.save_weights( save_weights_path + "." + str( ep ) )
89 | 		m.save( save_weights_path + ".model." + str( ep ) )
90 | else:
91 | #	for ep in range( epochs ):
92 | 	m.fit_generator( G , 512  , validation_data=G2 , validation_steps=200 , callbacks=callbacks_list,class_weight=class_weighting, epochs=epochs, verbose=1)
93 | #	m.save_weights( save_weights_path + "." + str( ep )  )
94 | #	m.save( save_weights_path + ".model." + str( ep ) )


--------------------------------------------------------------------------------
/Models/FCN32.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/models/fcn32s.py
 3 | # fc weights into the 1x1 convs  , get_upsampling_weight 
 4 | 
 5 | 
 6 | 
 7 | from keras.models import *
 8 | from keras.layers import *
 9 | 
10 | 
11 | import os
12 | file_path = os.path.dirname( os.path.abspath(__file__) )
13 | 
14 | VGG_Weights_path = file_path+"/../data/vgg16_weights_th_dim_ordering_th_kernels.h5"
15 | 
16 | IMAGE_ORDERING = 'channels_first'
17 | 
18 | 
19 | def FCN32( n_classes ,  input_height=416, input_width=608 , vgg_level=3):
20 | 
21 | 	assert input_height%32 == 0
22 | 	assert input_width%32 == 0
23 | 
24 | 	# https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5
25 | 	img_input = Input(shape=(3,input_height,input_width))
26 | 
27 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING )(img_input)
28 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING )(x)
29 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING )(x)
30 | 	f1 = x
31 | 	# Block 2
32 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING )(x)
33 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING )(x)
34 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING )(x)
35 | 	f2 = x
36 | 
37 | 	# Block 3
38 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING )(x)
39 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING )(x)
40 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING )(x)
41 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING )(x)
42 | 	f3 = x
43 | 
44 | 	# Block 4
45 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING )(x)
46 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING )(x)
47 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING )(x)
48 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING )(x)
49 | 	f4 = x
50 | 
51 | 	# Block 5
52 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING )(x)
53 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING )(x)
54 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING )(x)
55 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING )(x)
56 | 	f5 = x
57 | 
58 | 	x = Flatten(name='flatten')(x)
59 | 	x = Dense(4096, activation='relu', name='fc1')(x)
60 | 	x = Dense(4096, activation='relu', name='fc2')(x)
61 | 	x = Dense( 1000 , activation='softmax', name='predictions')(x)
62 | 
63 | 	vgg  = Model(  img_input , x  )
64 | 	vgg.load_weights(VGG_Weights_path)
65 | 
66 | 	o = f5
67 | 
68 | 	o = ( Conv2D( 4096 , ( 7 , 7 ) , activation='relu' , padding='same', data_format=IMAGE_ORDERING))(o)
69 | 	o = Dropout(0.5)(o)
70 | 	o = ( Conv2D( 4096 , ( 1 , 1 ) , activation='relu' , padding='same', data_format=IMAGE_ORDERING))(o)
71 | 	o = Dropout(0.5)(o)
72 | 
73 | 	o = ( Conv2D( n_classes ,  ( 1 , 1 ) ,kernel_initializer='he_normal' , data_format=IMAGE_ORDERING))(o)
74 | 	o = Conv2DTranspose( n_classes , kernel_size=(64,64) ,  strides=(32,32) , use_bias=False ,  data_format=IMAGE_ORDERING )(o)
75 | 	o_shape = Model(img_input , o ).output_shape
76 | 	
77 | 	outputHeight = o_shape[2]
78 | 	outputWidth = o_shape[3]
79 | 
80 | 	print "koko" , o_shape
81 | 
82 | 	o = (Reshape(( -1  , outputHeight*outputWidth   )))(o)
83 | 	o = (Permute((2, 1)))(o)
84 | 	o = (Activation('softmax'))(o)
85 | 	model = Model( img_input , o )
86 | 	model.outputWidth = outputWidth
87 | 	model.outputHeight = outputHeight
88 | 
89 | 	return model
90 | 
91 | 
92 | if __name__ == '__main__':
93 | 	m = FCN32( 101 )
94 | 	from keras.utils import plot_model
95 | 	plot_model( m , show_shapes=True , to_file='model.png')
96 | 


--------------------------------------------------------------------------------
/Models/Segnet.py:
--------------------------------------------------------------------------------
  1 | from keras.layers.merge import Add
  2 | from keras.models import Model
  3 | from keras.layers import Reshape
  4 | from keras.layers import Input
  5 | 
  6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute
  7 | from keras.layers.normalization import BatchNormalization
  8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D
  9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
 11 | from keras.layers.recurrent import LSTM
 12 | from keras.layers.advanced_activations import LeakyReLU
 13 | from keras.optimizers import Adam, SGD
 14 | from keras.layers.embeddings import Embedding
 15 | from keras.utils import np_utils
 16 | # from keras.regularizers import ActivityRegularizer
 17 | from keras import backend as K
 18 | 
 19 | from crfrnn_layer import CrfRnnLayer
 20 | 
 21 | 
 22 | def segnet(nClasses, optimizer=None, input_height=360, input_width=480):
 23 |     kernel = 3
 24 |     filter_size = 64
 25 |     pad = 1
 26 |     pool_size = 2
 27 | 
 28 |     img_input = Input(shape=(input_height, input_width,3))
 29 | 
 30 | 
 31 | 
 32 | 
 33 |     # encoder
 34 |     x = ZeroPadding2D(padding=(pad, pad))(img_input)
 35 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 36 |     x = BatchNormalization()(x)
 37 |     x = Activation('relu') (x)
 38 |     l1 = x
 39 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 40 | 
 41 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 42 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 43 |     x = BatchNormalization()(x)
 44 |     x = Activation('relu')(x)
 45 |     l2 = x
 46 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 47 | 
 48 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 49 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 50 |     x = BatchNormalization()(x)
 51 |     x = Activation('relu')(x)
 52 |     l3 = x
 53 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 54 | 
 55 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 56 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 57 |     x = BatchNormalization()(x)
 58 |     l4 = x
 59 |     x = Activation('relu')(x)
 60 | 
 61 | 
 62 | 
 63 | 
 64 |     # decoder
 65 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 66 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 67 |     x = BatchNormalization()(x)
 68 | 
 69 | #    x = Add()([l4, x])
 70 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 71 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 72 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 73 |     x = BatchNormalization()(x)
 74 | 
 75 |  #   x = Add()([l3, x])
 76 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 77 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 78 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 79 |     x = BatchNormalization()(x)
 80 | 
 81 |   #  x = Add()([l2, x])
 82 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 83 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 84 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 85 |     x = BatchNormalization()(x)
 86 | 
 87 |    # x = Add()([l1, x])
 88 |     x = Convolution2D(nClasses, (1, 1), padding='valid') (x)
 89 | 
 90 |     out = CrfRnnLayer(image_dims=(input_height, input_width),
 91 |                          num_classes=nClasses,
 92 |                          theta_alpha=160.,
 93 |                          theta_beta=3.,
 94 |                          theta_gamma=3.,
 95 |                          num_iterations=5,
 96 |                          name='crfrnn')([x, img_input])
 97 | 
 98 |     a = Model(inputs=img_input, outputs=out)
 99 | 
100 |     model = []
101 |     a.outputHeight = a.output_shape[1]
102 |     a.outputWidth = a.output_shape[2]
103 | 
104 |     out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out)
105 |     out = Activation('softmax')(out)
106 | #    if not optimizer is None:
107 | #        model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])
108 |     model = Model(inputs=img_input, outputs=out)
109 |     model.outputHeight = a.outputHeight
110 |     model.outputWidth = a.outputWidth
111 | 
112 |     return model
113 | 


--------------------------------------------------------------------------------
/Models/Segnet_crf_res.py:
--------------------------------------------------------------------------------
  1 | from keras.layers.merge import Add
  2 | from keras.models import Model
  3 | from keras.layers import Reshape
  4 | from keras.layers import Input
  5 | 
  6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute
  7 | from keras.layers.normalization import BatchNormalization
  8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D
  9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
 11 | from keras.layers.recurrent import LSTM
 12 | from keras.layers.advanced_activations import LeakyReLU
 13 | from keras.optimizers import Adam, SGD
 14 | from keras.layers.embeddings import Embedding
 15 | from keras.utils import np_utils
 16 | # from keras.regularizers import ActivityRegularizer
 17 | from keras import backend as K
 18 | 
 19 | from crfrnn_layer import CrfRnnLayer
 20 | 
 21 | 
 22 | def segnet_crf_res(nClasses, optimizer=None, input_height=360, input_width=480):
 23 |     kernel = 3
 24 |     filter_size = 64
 25 |     pad = 1
 26 |     pool_size = 2
 27 | 
 28 |     img_input = Input(shape=(input_height, input_width,3))
 29 | 
 30 | 
 31 | 
 32 | 
 33 |     # encoder
 34 |     x = ZeroPadding2D(padding=(pad, pad))(img_input)
 35 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 36 |     x = BatchNormalization()(x)
 37 |     x = Activation('relu') (x)
 38 |     l1 = x
 39 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 40 | 
 41 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 42 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 43 |     x = BatchNormalization()(x)
 44 |     x = Activation('relu')(x)
 45 |     l2 = x
 46 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 47 | 
 48 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 49 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 50 |     x = BatchNormalization()(x)
 51 |     x = Activation('relu')(x)
 52 |     l3 = x
 53 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 54 | 
 55 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 56 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 57 |     x = BatchNormalization()(x)
 58 |     l4 = x
 59 |     x = Activation('relu')(x)
 60 | 
 61 | 
 62 | 
 63 | 
 64 |     # decoder
 65 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 66 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 67 |     x = BatchNormalization()(x)
 68 | 
 69 |     x = Add()([l4, x])
 70 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 71 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 72 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 73 |     x = BatchNormalization()(x)
 74 | 
 75 |     x = Add()([l3, x])
 76 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 77 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 78 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 79 |     x = BatchNormalization()(x)
 80 | 
 81 |     x = Add()([l2, x])
 82 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 83 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 84 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 85 |     x = BatchNormalization()(x)
 86 | 
 87 |     x = Add()([l1, x])
 88 |     x = Convolution2D(nClasses, (1, 1), padding='valid') (x)
 89 | 
 90 |     out = CrfRnnLayer(image_dims=(input_height, input_width),
 91 |                         num_classes=nClasses,
 92 |                         theta_alpha=160.,
 93 |                         theta_beta=3.,
 94 |                         theta_gamma=3.,
 95 |                         num_iterations=5,
 96 |                         name='crfrnn')([x, img_input])
 97 | #    out = x
 98 |     a = Model(inputs=img_input, outputs=out)
 99 | 
100 |     model = []
101 |     a.outputHeight = a.output_shape[1]
102 |     a.outputWidth = a.output_shape[2]
103 | 
104 |     out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out)
105 |     out = Activation('softmax')(out)
106 | #    if not optimizer is None:
107 | #        model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])
108 |     model = Model(inputs=img_input, outputs=out)
109 |     model.outputHeight = a.outputHeight
110 |     model.outputWidth = a.outputWidth
111 | 
112 |     return model
113 | 


--------------------------------------------------------------------------------
/Models/Segnet_res.py:
--------------------------------------------------------------------------------
  1 | from keras.layers.merge import Add
  2 | from keras.models import Model
  3 | from keras.layers import Reshape
  4 | from keras.layers import Input
  5 | 
  6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute
  7 | from keras.layers.normalization import BatchNormalization
  8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D
  9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
 11 | from keras.layers.recurrent import LSTM
 12 | from keras.layers.advanced_activations import LeakyReLU
 13 | from keras.optimizers import Adam, SGD
 14 | from keras.layers.embeddings import Embedding
 15 | from keras.utils import np_utils
 16 | # from keras.regularizers import ActivityRegularizer
 17 | from keras import backend as K
 18 | 
 19 | from crfrnn_layer import CrfRnnLayer
 20 | 
 21 | 
 22 | def segnet_res(nClasses, optimizer=None, input_height=360, input_width=480):
 23 |     kernel = 3
 24 |     filter_size = 64
 25 |     pad = 1
 26 |     pool_size = 2
 27 | 
 28 |     img_input = Input(shape=(input_height, input_width,3))
 29 | 
 30 | 
 31 | 
 32 | 
 33 |     # encoder
 34 |     x = ZeroPadding2D(padding=(pad, pad))(img_input)
 35 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 36 |     x = BatchNormalization()(x)
 37 |     x = Activation('relu') (x)
 38 |     l1 = x
 39 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 40 | 
 41 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 42 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 43 |     x = BatchNormalization()(x)
 44 |     x = Activation('relu')(x)
 45 |     l2 = x
 46 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 47 | 
 48 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 49 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 50 |     x = BatchNormalization()(x)
 51 |     x = Activation('relu')(x)
 52 |     l3 = x
 53 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 54 | 
 55 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 56 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 57 |     x = BatchNormalization()(x)
 58 |     l4 = x
 59 |     x = Activation('relu')(x)
 60 | 
 61 | 
 62 | 
 63 | 
 64 |     # decoder
 65 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 66 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 67 |     x = BatchNormalization()(x)
 68 | 
 69 |     x = Add()([l4, x])
 70 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 71 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 72 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 73 |     x = BatchNormalization()(x)
 74 | 
 75 |     x = Add()([l3, x])
 76 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 77 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 78 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 79 |     x = BatchNormalization()(x)
 80 | 
 81 |     x = Add()([l2, x])
 82 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 83 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 84 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 85 |     x = BatchNormalization()(x)
 86 | 
 87 |     x = Add()([l1, x])
 88 |     x = Convolution2D(nClasses, (1, 1), padding='valid') (x)
 89 | 
 90 | #    out = CrfRnnLayer(image_dims=(input_height, input_width),
 91 |  #                        num_classes=nClasses,
 92 |   #                       theta_alpha=160.,
 93 |    #                      theta_beta=3.,
 94 |     #                     theta_gamma=3.,
 95 |      #                    num_iterations=5,
 96 |       #                   name='crfrnn')([x, img_input])
 97 |     out = x
 98 |     a = Model(inputs=img_input, outputs=out)
 99 | 
100 |     model = []
101 |     a.outputHeight = a.output_shape[1]
102 |     a.outputWidth = a.output_shape[2]
103 | 
104 |     out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out)
105 |     out = Activation('softmax')(out)
106 | #    if not optimizer is None:
107 | #        model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])
108 |     model = Model(inputs=img_input, outputs=out)
109 |     model.outputHeight = a.outputHeight
110 |     model.outputWidth = a.outputWidth
111 | 
112 |     return model
113 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint
  3 | import argparse
  4 | import Models , LoadBatches
  5 | from Models.Segnet_crf_res import segnet_crf_res
  6 | from Models.VGGSegnet import VGGSegnet
  7 | from Models.VGGUnet import VGGUnet
  8 | from Models.VGGUnet import VGGUnet2
  9 | from Models.FCN8 import FCN8
 10 | from Models.FCN32 import FCN32
 11 | from Models.Segnet import segnet
 12 | from Models.Segnet_transpose import segnet_transposed
 13 | from Models.Segnet_res import segnet_res
 14 | 
 15 | 
 16 | # learning rate schedule
 17 | def step_decay(epoch):
 18 | 	initial_lrate = 0.000001
 19 | 	drop = 0.5
 20 | 	epochs_drop = 10.0
 21 | 	lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
 22 | 	return lrate
 23 | 
 24 | 
 25 | parser = argparse.ArgumentParser()
 26 | parser.add_argument("--save_weights_path", type = str  )
 27 | parser.add_argument("--train_images", type = str  )
 28 | parser.add_argument("--train_annotations", type = str  )
 29 | parser.add_argument("--n_classes", type=int )
 30 | parser.add_argument("--input_height", type=int , default = 224  )
 31 | parser.add_argument("--input_width", type=int , default = 224 )
 32 | 
 33 | parser.add_argument('--validate',action='store_false')
 34 | parser.add_argument("--val_images", type = str , default = "")
 35 | parser.add_argument("--val_annotations", type = str , default = "")
 36 | 
 37 | parser.add_argument("--epochs", type = int, default = 100 )
 38 | parser.add_argument("--batch_size", type = int, default = 1 )
 39 | parser.add_argument("--val_batch_size", type = int, default = 1 )
 40 | parser.add_argument("--load_weights", type = str , default = "data/vgg16_weights_th_dim_ordering_th_kernels.h5")
 41 | 
 42 | parser.add_argument("--model_name", type = str , default = "")
 43 | parser.add_argument("--optimizer_name", type = str , default = "adadelta")
 44 | 
 45 | 
 46 | args = parser.parse_args()
 47 | 
 48 | train_images_path = args.train_images
 49 | train_segs_path = args.train_annotations
 50 | train_batch_size = args.batch_size
 51 | n_classes = args.n_classes
 52 | input_height = args.input_height
 53 | input_width = args.input_width
 54 | validate = args.validate
 55 | save_weights_path = args.save_weights_path
 56 | epochs = args.epochs
 57 | load_weights = args.load_weights
 58 | 
 59 | optimizer_name = args.optimizer_name
 60 | model_name = args.model_name
 61 | 
 62 | if validate:
 63 | 	val_images_path = args.val_images
 64 | 	val_segs_path = args.val_annotations
 65 | 	val_batch_size = args.val_batch_size
 66 | 
 67 | modelFns = { 'vgg_segnet':VGGSegnet , 'vgg_unet':VGGUnet , 'vgg_unet2':VGGUnet2 , 'fcn8':FCN8 , 'fcn32':FCN32, 'segnet':segnet, 'segnet_transposed':segnet_transposed, 'segnet_res':segnet_res, 'segnet_res_crf':segnet_crf_res}
 68 | modelFN = modelFns[ model_name ]
 69 | 
 70 | m = modelFN( n_classes , input_height=input_height, input_width=input_width   )
 71 | m.compile(loss='categorical_crossentropy',
 72 |       optimizer= optimizer_name ,
 73 |       metrics=['accuracy'])
 74 | 
 75 | 
 76 | #if len( load_weights ) > 0:
 77 | #	m.load_weights(load_weights)
 78 | 
 79 | 
 80 | lrate = LearningRateScheduler(step_decay)
 81 | filepath="weights_360_480_res_with_crf.hdf5"
 82 | 
 83 | checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
 84 | callbacks_list = [checkpoint]
 85 | 
 86 | 
 87 | 
 88 | print "Model output shape" ,  m.output_shape
 89 | 
 90 | output_height = m.outputHeight
 91 | output_width = m.outputWidth
 92 | class_weighting= [0.2595, 0.1826, 4.5640, 0.1417, 0.9051, 0.3826, 9.6446, 1.8418, 0.6823, 6.2478, 7.3614]
 93 | G  = LoadBatches.imageSegmentationGenerator( train_images_path , train_segs_path ,  train_batch_size,  n_classes , input_height , input_width , output_height , output_width   )
 94 | 
 95 | 
 96 | if validate:
 97 | 	G2  = LoadBatches.imageSegmentationGenerator( val_images_path , val_segs_path ,  val_batch_size,  n_classes , input_height , input_width , output_height , output_width   )
 98 | 
 99 | if not validate:
100 | 	for ep in range( epochs ):
101 | 		m.fit_generator( G , 512  , epochs=1 )
102 | 		m.save_weights( save_weights_path + "." + str( ep ) )
103 | 		m.save( save_weights_path + ".model." + str( ep ) )
104 | else:
105 | #	for ep in range( epochs ):
106 | 	m.fit_generator( G , 512  , validation_data=G2 , validation_steps=200 , callbacks=callbacks_list,class_weight=class_weighting, epochs=epochs, verbose=1)
107 | #	m.save_weights( save_weights_path + "." + str( ep )  )
108 | #	m.save( save_weights_path + ".model." + str( ep ) )
109 | 


--------------------------------------------------------------------------------
/Models/VGGSegnet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 
  6 | from keras.models import *
  7 | from keras.layers import *
  8 | 
  9 | 
 10 | import os
 11 | file_path = os.path.dirname( os.path.abspath(__file__) )
 12 | VGG_Weights_path = file_path+"/../data/vgg16_weights_th_dim_ordering_th_kernels.h5"
 13 | 
 14 | 
 15 | def VGGSegnet( n_classes ,  input_height=416, input_width=608 , vgg_level=3):
 16 | 
 17 | 	img_input = Input(shape=(3,input_height,input_width))
 18 | 
 19 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format='channels_first' )(img_input)
 20 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format='channels_first' )(x)
 21 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format='channels_first' )(x)
 22 | 	f1 = x
 23 | 	# Block 2
 24 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format='channels_first' )(x)
 25 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format='channels_first' )(x)
 26 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format='channels_first' )(x)
 27 | 	f2 = x
 28 | 
 29 | 	# Block 3
 30 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format='channels_first' )(x)
 31 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format='channels_first' )(x)
 32 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format='channels_first' )(x)
 33 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format='channels_first' )(x)
 34 | 	f3 = x
 35 | 
 36 | 	# Block 4
 37 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format='channels_first' )(x)
 38 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format='channels_first' )(x)
 39 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format='channels_first' )(x)
 40 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format='channels_first' )(x)
 41 | 	f4 = x
 42 | 
 43 | 	# Block 5
 44 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format='channels_first' )(x)
 45 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format='channels_first' )(x)
 46 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format='channels_first' )(x)
 47 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format='channels_first' )(x)
 48 | 	f5 = x
 49 | 
 50 | 	x = Flatten(name='flatten')(x)
 51 | 	x = Dense(4096, activation='relu', name='fc1')(x)
 52 | 	x = Dense(4096, activation='relu', name='fc2')(x)
 53 | 	x = Dense( 1000 , activation='softmax', name='predictions')(x)
 54 | 
 55 | 	vgg  = Model(  img_input , x  )
 56 | 	vgg.load_weights(VGG_Weights_path)
 57 | 
 58 | 	levels = [f1 , f2 , f3 , f4 , f5 ]
 59 | 
 60 | 	o = levels[ vgg_level ]
 61 | 	
 62 | 	o = ( ZeroPadding2D( (1,1) , data_format='channels_first' ))(o)
 63 | 	o = ( Conv2D(512, (3, 3), padding='valid', data_format='channels_first'))(o)
 64 | 	o = ( BatchNormalization())(o)
 65 | 
 66 | 	o = ( UpSampling2D( (2,2), data_format='channels_first'))(o)
 67 | 	o = ( ZeroPadding2D( (1,1), data_format='channels_first'))(o)
 68 | 	o = ( Conv2D( 256, (3, 3), padding='valid', data_format='channels_first'))(o)
 69 | 	o = ( BatchNormalization())(o)
 70 | 
 71 | 	o = ( UpSampling2D((2,2)  , data_format='channels_first' ) )(o)
 72 | 	o = ( ZeroPadding2D((1,1) , data_format='channels_first' ))(o)
 73 | 	o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format='channels_first' ))(o)
 74 | 	o = ( BatchNormalization())(o)
 75 | 
 76 | 	o = ( UpSampling2D((2,2)  , data_format='channels_first' ))(o)
 77 | 	o = ( ZeroPadding2D((1,1)  , data_format='channels_first' ))(o)
 78 | 	o = ( Conv2D( 64 , (3, 3), padding='valid'  , data_format='channels_first' ))(o)
 79 | 	o = ( BatchNormalization())(o)
 80 | 
 81 | 
 82 | 	o =  Conv2D( n_classes , (3, 3) , padding='same', data_format='channels_first' )( o )
 83 | 	o_shape = Model(img_input , o ).output_shape
 84 | 	outputHeight = o_shape[2]
 85 | 	outputWidth = o_shape[3]
 86 | 
 87 | 	o = (Reshape((  -1  , outputHeight*outputWidth   )))(o)
 88 | 	o = (Permute((2, 1)))(o)
 89 | 	o = (Activation('softmax'))(o)
 90 | 	model = Model( img_input , o )
 91 | 	model.outputWidth = outputWidth
 92 | 	model.outputHeight = outputHeight
 93 | 
 94 | 	return model
 95 | 
 96 | 
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 | 	m = VGGSegnet( 101 )
101 | 	from keras.utils import plot_model
102 | 	plot_model( m , show_shapes=True , to_file='model.png')
103 | 
104 | 


--------------------------------------------------------------------------------
/Models/Segnet_crf_res_l1_v1.py:
--------------------------------------------------------------------------------
  1 | from keras.layers.merge import Add
  2 | from keras.models import Model
  3 | from keras.layers import Reshape
  4 | from keras.layers import Input
  5 | 
  6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute
  7 | from keras.layers.normalization import BatchNormalization
  8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D
  9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
 11 | from keras.layers.recurrent import LSTM
 12 | from keras.layers.advanced_activations import LeakyReLU
 13 | from keras.optimizers import Adam, SGD
 14 | from keras.layers.embeddings import Embedding
 15 | from keras.utils import np_utils
 16 | # from keras.regularizers import ActivityRegularizer
 17 | from keras import backend as K
 18 | 
 19 | from crfrnn_layer import CrfRnnLayer
 20 | 
 21 | def l1_reg(weight_matrix):
 22 |     return 0.01 * K.sum(K.abs(weight_matrix))
 23 | 
 24 | 
 25 | def penalized_loss(bottleNeckFeatures):
 26 |     def custom_loss(y_true, y_pred):
 27 |         loss1=K.categorical_crossentropy(y_pred, y_true)
 28 |         loss2=l1_reg(bottleNeckFeatures)
 29 |         return loss1+loss2
 30 |     return custom_loss
 31 | 
 32 | 
 33 | def segnet(nClasses, optimizer=None, input_height=360, input_width=480):
 34 |     kernel = 3
 35 |     filter_size = 64
 36 |     pad = 1
 37 |     pool_size = 2
 38 | 
 39 |     img_input = Input(shape=(input_height, input_width,3))
 40 | 
 41 |     # encoder
 42 |     x = ZeroPadding2D(padding=(pad, pad))(img_input)
 43 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 44 |     x = BatchNormalization()(x)
 45 |     x = Activation('relu') (x)
 46 |     l1 = x
 47 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 48 | 
 49 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 50 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 51 |     x = BatchNormalization()(x)
 52 |     x = Activation('relu')(x)
 53 |     l2 = x
 54 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 55 | 
 56 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 57 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 58 |     x = BatchNormalization()(x)
 59 |     x = Activation('relu')(x)
 60 |     l3 = x
 61 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 62 | 
 63 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 64 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 65 |     x = BatchNormalization()(x)
 66 |     l4 = x
 67 |     x = Activation('relu')(x)
 68 | 
 69 |     # decoder
 70 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 71 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 72 |     x = BatchNormalization()(x)
 73 | 
 74 |     x = Add()([l4, x])
 75 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 76 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 77 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 78 |     x = BatchNormalization()(x)
 79 | 
 80 |     x = Add()([l3, x])
 81 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 82 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 83 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 84 |     x = BatchNormalization()(x)
 85 | 
 86 |     x = Add()([l2, x])
 87 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 88 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 89 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 90 |     x = BatchNormalization()(x)
 91 | 
 92 |     x = Add()([l1, x])
 93 |     x = Convolution2D(nClasses, (1, 1), padding='valid') (x)
 94 | 
 95 |     beforeCrfRNN = x
 96 | 
 97 |     out = CrfRnnLayer(image_dims=(input_height, input_width),
 98 |                          num_classes=nClasses,
 99 |                          theta_alpha=160.,
100 |                          theta_beta=3.,
101 |                          theta_gamma=3.,
102 |                          num_iterations=5,
103 |                          name='crfrnn')([x, img_input])
104 | 
105 |     a = Model(inputs=img_input, outputs=out)
106 | 
107 |     model = []
108 |     a.outputHeight = a.output_shape[1]
109 |     a.outputWidth = a.output_shape[2]
110 | 
111 |     out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out)
112 |     out = Activation('softmax')(out)
113 |     
114 | 
115 |     
116 |     model = Model(inputs=img_input, outputs=out)
117 |     model.outputHeight = a.outputHeight
118 |     model.outputWidth = a.outputWidth
119 | 
120 |     print beforeCrfRNN.shape
121 |     print img_input.shape
122 |     print out.shape
123 |     print x.shape
124 | 
125 |     model.compile(loss=penalized_loss(bottleNeckFeatures=l4), optimizer="adadelta", metrics=['accuracy'])
126 | 
127 | return model


--------------------------------------------------------------------------------
/Models/Segnet_crf_res_l1_v2.py:
--------------------------------------------------------------------------------
  1 | from keras.layers.merge import Add
  2 | from keras.models import Model
  3 | from keras.layers import Reshape
  4 | from keras.layers import Input
  5 | 
  6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute
  7 | from keras.layers.normalization import BatchNormalization
  8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D
  9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
 11 | from keras.layers.recurrent import LSTM
 12 | from keras.layers.advanced_activations import LeakyReLU
 13 | from keras.optimizers import Adam, SGD
 14 | from keras.layers.embeddings import Embedding
 15 | from keras.utils import np_utils
 16 | from keras import backend as K
 17 | 
 18 | from crfrnn_layer import CrfRnnLayer
 19 | 
 20 | def l1_reg(weight_matrix):
 21 |     return 0.01 * K.sum(K.abs(weight_matrix))
 22 | 
 23 | def penalized_loss2(bottleNeckFeatures):
 24 |     def custom_loss(y_true, y_pred):
 25 |         loss1=K.categorical_crossentropy(y_pred, y_true)
 26 |         loss2=l1_reg(bottleNeckFeatures)
 27 |         return loss1+loss2
 28 |     return custom_loss
 29 | 
 30 | def penalized_loss(bottleNeckFeatures):
 31 |     def custom_loss(y_true, y_pred):
 32 |         loss1=K.categorical_crossentropy(y_pred, y_true)
 33 |         loss2=l1_reg(bottleNeckFeatures)
 34 |         return loss1+(0.1*loss2)
 35 |     return custom_loss
 36 | 
 37 | def segnet(nClasses, optimizer=None, input_height=360, input_width=480):
 38 |     kernel = 3
 39 |     filter_size = 64
 40 |     pad = 1
 41 |     pool_size = 2
 42 | 
 43 |     img_input = Input(shape=(input_height, input_width,3))
 44 | 
 45 |     # encoder
 46 |     x = ZeroPadding2D(padding=(pad, pad))(img_input)
 47 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 48 |     x = BatchNormalization()(x)
 49 |     x = Activation('relu') (x)
 50 |     l1 = x
 51 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 52 | 
 53 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 54 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 55 |     x = BatchNormalization()(x)
 56 |     x = Activation('relu')(x)
 57 |     l2 = x
 58 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 59 | 
 60 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 61 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 62 |     x = BatchNormalization()(x)
 63 |     x = Activation('relu')(x)
 64 |     l3 = x
 65 |     x = MaxPooling2D(pool_size=(pool_size, pool_size))(x)
 66 | 
 67 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 68 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 69 |     x = BatchNormalization()(x)
 70 |     l4 = x
 71 |     x = Activation('relu')(x)
 72 | 
 73 |     # decoder
 74 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 75 |     x = Convolution2D(512, (kernel, kernel), padding='valid')(x)
 76 |     x = BatchNormalization()(x)
 77 | 
 78 |     x = Add()([l4, x])
 79 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 80 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 81 |     x = Convolution2D(256, (kernel, kernel), padding='valid')(x)
 82 |     x = BatchNormalization()(x)
 83 | 
 84 |     x = Add()([l3, x])
 85 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 86 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 87 |     x = Convolution2D(128, (kernel, kernel), padding='valid')(x)
 88 |     x = BatchNormalization()(x)
 89 | 
 90 |     x = Add()([l2, x])
 91 |     x = UpSampling2D(size=(pool_size, pool_size))(x)
 92 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 93 |     x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x)
 94 |     x = BatchNormalization()(x)
 95 | 
 96 |     x = Add()([l1, x])
 97 |     x = Convolution2D(nClasses, (1, 1), padding='valid') (x)
 98 | 
 99 |     beforeCrfRNN = x
100 | 
101 |     out = CrfRnnLayer(image_dims=(input_height, input_width),
102 |                          num_classes=nClasses,
103 |                          theta_alpha=160.,
104 |                          theta_beta=3.,
105 |                          theta_gamma=3.,
106 |                          num_iterations=5,
107 |                          name='crfrnn')([x, img_input])
108 | 
109 |     a = Model(inputs=img_input, outputs=out)
110 | 
111 |     model = []
112 |     a.outputHeight = a.output_shape[1]
113 |     a.outputWidth = a.output_shape[2]
114 | 
115 |     out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out)
116 |     out = Activation('softmax')(out)
117 |     
118 |     model = Model(inputs=img_input, outputs=out)
119 |     model.outputHeight = a.outputHeight
120 |     model.outputWidth = a.outputWidth
121 | 
122 |     print beforeCrfRNN.shape
123 |     print img_input.shape
124 |     print out.shape
125 |     print x.shape
126 | 
127 |     model.compile(loss=penalized_loss(bottleNeckFeatures=l4), optimizer="adadelta", metrics=['accuracy'])
128 | 
129 |     return model


--------------------------------------------------------------------------------
/crfrnn_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MIT License
  3 | 
  4 | Copyright (c) 2017 Sadeep Jayasumana
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | """
 24 | from keras.layers.convolutional import UpSampling2D
 25 | from keras.layers.core import Activation, Reshape, Permute
 26 | from keras.models import Model
 27 | from keras.layers import Conv2D, MaxPooling2D, Input, ZeroPadding2D, \
 28 |     Dropout, Conv2DTranspose, Cropping2D, Add, BatchNormalization
 29 | from crfrnn_layer import CrfRnnLayer
 30 | 
 31 | 
 32 | def get_crfrnn_model_def (nClasses , optimizer=None , input_height=360, input_width=480 ):
 33 |     """ Returns Keras CRN-RNN model definition.
 34 | 
 35 |     Currently, only 500 x 500 images are supported. However, one can get this to
 36 |     work with different image sizes by adjusting the parameters of the Cropping2D layers
 37 |     below.
 38 |     """
 39 | 
 40 |     channels, height, weight = 3, input_height, input_width
 41 | 
 42 |     # Input
 43 |     input_shape = (height, weight, 3)
 44 |     img_input = Input(shape=input_shape)
 45 | 
 46 |     kernel = 3
 47 |     filter_size = 64
 48 |     pad = 1
 49 |     pool_size = 2
 50 | 
 51 | 
 52 |     # Add plenty of zero padding
 53 |     x = ZeroPadding2D(padding=(pad, pad))(img_input)
 54 | 
 55 | 
 56 |     # VGG-16 convolution block 1
 57 |     x = Conv2D(filter_size, (kernel, kernel), padding='valid', name='conv1_1')(x)
 58 |     x = BatchNormalization()(x)
 59 |     x = Activation('relu')(x)
 60 |     x = MaxPooling2D((pool_size, pool_size), name='pool1')(x)
 61 | 
 62 | 
 63 |     # VGG-16 convolution block 1
 64 |     x = Conv2D(128, (kernel, kernel), padding='valid', name='conv1_2')(x)
 65 |     x = BatchNormalization()(x)
 66 |     x = Activation('relu')(x)
 67 |     x = MaxPooling2D((pool_size, pool_size),  name='pool2')(x)
 68 | 
 69 |     # VGG-16 convolution block 1
 70 |     x = Conv2D(256, (kernel, kernel), padding='valid', name='conv1_3')(x)
 71 |     x = BatchNormalization()(x)
 72 |     x = Activation('relu')(x)
 73 |     x = MaxPooling2D((pool_size, pool_size),  name='pool3')(x)
 74 |     pool3 = x
 75 | 
 76 |     # VGG-16 convolution block 1
 77 |     x = Conv2D(512, (kernel, kernel), padding='valid', name='conv1_4')(x)
 78 |     x = BatchNormalization()(x)
 79 |     x = Activation('relu')(x)
 80 |     x = MaxPooling2D((pool_size, pool_size),  name='pool4')(x)
 81 |     pool4 = x
 82 | 
 83 |     #decoder
 84 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 85 |     x = Conv2D(512, (kernel, kernel), padding='valid', name='conv2_1')(x)
 86 |     x = BatchNormalization()(x)
 87 | 
 88 | 
 89 |     x = UpSampling2D((pool_size, pool_size))(x)
 90 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 91 |     x = Conv2D(256, (kernel, kernel), padding='valid', name='conv2_2')(x)
 92 |     x = BatchNormalization()(x)
 93 | 
 94 | 
 95 | 
 96 |     x = UpSampling2D((pool_size, pool_size))(x)
 97 |     x = ZeroPadding2D(padding=(pad, pad))(x)
 98 |     x = Conv2D(128, (kernel, kernel), padding='valid', name='conv2_3')(x)
 99 |     x = BatchNormalization()(x)
100 | 
101 |     x = UpSampling2D((pool_size, pool_size))(x)
102 |     x = ZeroPadding2D(padding=(pad, pad))(x)
103 |     x = Conv2D(filter_size, (kernel, kernel), padding='valid', name='conv2_4')(x)
104 |     x = BatchNormalization()(x)
105 | 
106 | 
107 | 
108 | 
109 |     x = Conv2D(nClasses, (1, 1), padding='valid', name='conv3_1')(x)
110 |     #x = Conv2D(100,(kernel,kernel),padding='valid')(x)
111 | 
112 |     #out_height = x.shape[1]
113 |     #out_width = x.shape[2]
114 | 
115 |     #x = Reshape((nClasses,32*32), input_shape=(32, 32, nClasses))(x)
116 | 
117 |     #x = Permute((2,1))(x)
118 | 
119 |     #x = Activation('softmax')(x)
120 |     print x
121 |     #x = UpSampling2D(size=(4,4))(x)
122 | 
123 | 
124 |     output = CrfRnnLayer(image_dims=(32, 32),
125 |                          num_classes=nClasses,
126 |                          theta_alpha=160.,
127 |                          theta_beta=3.,
128 |                          theta_gamma=3.,
129 |                          num_iterations=10,
130 |                          name='crfrnn')([x, img_input])
131 | 
132 |     # Build the model
133 |     model = Model(img_input, output, name='crfrnn_net')
134 |     model.outputHeight = 32
135 |     model.outputWidth = 32
136 |     return model
137 | 


--------------------------------------------------------------------------------
/cpp/high_dim_filter.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  MIT License
  3 |  *
  4 |  *  Copyright (c) 2017 Sadeep Jayasumana
  5 |  *
  6 |  *  Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  *  of this software and associated documentation files (the "Software"), to deal
  8 |  *  in the Software without restriction, including without limitation the rights
  9 |  *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  *  copies of the Software, and to permit persons to whom the Software is
 11 |  *  furnished to do so, subject to the following conditions:
 12 | 
 13 |  *  The above copyright notice and this permission notice shall be included in all
 14 |  *  copies or substantial portions of the Software.
 15 | 
 16 |  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 |  *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 |  *  SOFTWARE.
 23 |  */
 24 | 
 25 | #include "tensorflow/core/framework/op.h"
 26 | #include "tensorflow/core/framework/shape_inference.h"
 27 | #include "tensorflow/core/framework/op_kernel.h"
 28 | #include "tensorflow/core/framework/tensor_shape.h"
 29 | #include "modified_permutohedral.h"
 30 | 
 31 | using namespace tensorflow;
 32 | 
 33 | void compute_spatial_kernel(float * const output_kernel, const int width,
 34 |                             const int height, const float theta_gamma) {
 35 | 
 36 |   const int num_pixels = width * height;
 37 |   for (int p = 0; p < num_pixels; ++p) {
 38 |     output_kernel[2 * p] = static_cast<float>(p % width) / theta_gamma;
 39 |     output_kernel[2 * p + 1] = static_cast<float>(p / width) / theta_gamma;
 40 |   }
 41 | }
 42 | 
 43 | void compute_bilateral_kernel(float * const output_kernel, const Tensor& rgb_tensor,
 44 |                               const float theta_alpha, const float theta_beta) {
 45 | 
 46 |   const int height = rgb_tensor.dim_size(1);
 47 |   const int width = rgb_tensor.dim_size(2);
 48 |   const int num_pixels = height * width;
 49 |   auto rgb = rgb_tensor.flat<float>();
 50 | 
 51 |   for (int p = 0; p < num_pixels; ++p) {
 52 |     // Spatial terms
 53 |     output_kernel[5 * p] = static_cast<float>(p % width) / theta_alpha;
 54 |     output_kernel[5 * p + 1] = static_cast<float>(p / width) / theta_alpha;
 55 | 
 56 |     // Color terms
 57 |     output_kernel[5 * p + 2] = static_cast<float>(rgb(p) / theta_beta);
 58 |     output_kernel[5 * p + 3] = static_cast<float>(rgb(num_pixels + p) / theta_beta);
 59 |     output_kernel[5 * p + 4] = static_cast<float>(rgb(2 * num_pixels + p) / theta_beta);
 60 |   }
 61 | }
 62 | 
 63 | REGISTER_OP("HighDimFilter")
 64 |     .Attr("bilateral: bool")
 65 |     .Attr("theta_alpha: float = 1.0")
 66 |     .Attr("theta_beta: float = 1.0")
 67 |     .Attr("theta_gamma: float = 1.0")
 68 |     .Attr("backwards: bool = false")
 69 |     .Input("raw: float32")
 70 |     .Input("rgb: float32")
 71 |     .Output("filtered: float32")
 72 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 73 |       c->set_output(0, c->input(0));
 74 |       return Status::OK();
 75 |     });
 76 | 
 77 | class HighDimFilterOp : public OpKernel {
 78 |  public:
 79 |   explicit HighDimFilterOp(OpKernelConstruction* context) : OpKernel(context) {
 80 | 
 81 |     OP_REQUIRES_OK(context,
 82 |                    context->GetAttr("bilateral", &bilateral_));
 83 |     OP_REQUIRES_OK(context,
 84 |                    context->GetAttr("theta_alpha", &theta_alpha_));
 85 |     OP_REQUIRES_OK(context,
 86 |                    context->GetAttr("theta_beta", &theta_beta_));
 87 |     OP_REQUIRES_OK(context,
 88 |                    context->GetAttr("theta_gamma", &theta_gamma_));
 89 |     OP_REQUIRES_OK(context,
 90 |                    context->GetAttr("backwards", &backwards_));
 91 |   }
 92 | 
 93 |   void Compute(OpKernelContext* context) override {
 94 | 
 95 |     // Grab the unary tensor
 96 |     const Tensor& input_tensor = context->input(0);
 97 |     // Grab the RGB image tensor
 98 |     const Tensor& image_tensor = context->input(1);
 99 | 
100 |     const int channels = input_tensor.dim_size(0);
101 |     const int height = input_tensor.dim_size(1);
102 |     const int width = input_tensor.dim_size(2);
103 |     const int num_pixels = width * height;
104 | 
105 |     // Create the output tensor
106 |     Tensor* output_tensor = NULL;
107 |     OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
108 |                                                      &output_tensor));
109 |     ModifiedPermutohedral mp;
110 | 
111 |     if (bilateral_) {
112 |       float * const kernel_vals = new float[5 * num_pixels];
113 |       compute_bilateral_kernel(kernel_vals, image_tensor,
114 |                                theta_alpha_, theta_beta_);
115 |       mp.init(kernel_vals, 5, num_pixels);
116 |       mp.compute(*output_tensor, input_tensor, channels, backwards_);
117 | 
118 |       delete[] kernel_vals;
119 |     } else {
120 |       float * const kernel_vals = new float[2 * num_pixels];
121 |       compute_spatial_kernel(kernel_vals, width, height, theta_gamma_);
122 |       mp.init(kernel_vals, 2, num_pixels);
123 |       mp.compute(*output_tensor, input_tensor, channels, backwards_);
124 | 
125 |       delete[] kernel_vals;
126 |     }
127 | 
128 |   }
129 |  
130 |  private:
131 |   bool bilateral_;
132 |   float theta_alpha_;
133 |   float theta_beta_;
134 |   float theta_gamma_;
135 |   bool backwards_;
136 | };
137 | 
138 | REGISTER_KERNEL_BUILDER(Name("HighDimFilter").Device(DEVICE_CPU), HighDimFilterOp);
139 | 


--------------------------------------------------------------------------------
/Models/FCN8.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/models/fcn32s.py
  3 | # fc weights into the 1x1 convs  , get_upsampling_weight 
  4 | 
  5 | 
  6 | 
  7 | from keras.models import *
  8 | from keras.layers import *
  9 | 
 10 | 
 11 | import os
 12 | file_path = os.path.dirname( os.path.abspath(__file__) )
 13 | 
 14 | VGG_Weights_path = file_path+"/../data/vgg16_weights_th_dim_ordering_th_kernels.h5"
 15 | 
 16 | IMAGE_ORDERING = 'channels_first' 
 17 | 
 18 | # crop o1 wrt o2
 19 | def crop( o1 , o2 , i  ):
 20 | 	o_shape2 = Model( i  , o2 ).output_shape
 21 | 	outputHeight2 = o_shape2[2]
 22 | 	outputWidth2 = o_shape2[3]
 23 | 
 24 | 	o_shape1 = Model( i  , o1 ).output_shape
 25 | 	outputHeight1 = o_shape1[2]
 26 | 	outputWidth1 = o_shape1[3]
 27 | 
 28 | 	cx = abs( outputWidth1 - outputWidth2 )
 29 | 	cy = abs( outputHeight2 - outputHeight1 )
 30 | 
 31 | 	if outputWidth1 > outputWidth2:
 32 | 		o1 = Cropping2D( cropping=((0,0) ,  (  0 , cx )), data_format=IMAGE_ORDERING  )(o1)
 33 | 	else:
 34 | 		o2 = Cropping2D( cropping=((0,0) ,  (  0 , cx )), data_format=IMAGE_ORDERING  )(o2)
 35 | 	
 36 | 	if outputHeight1 > outputHeight2 :
 37 | 		o1 = Cropping2D( cropping=((0,cy) ,  (  0 , 0 )), data_format=IMAGE_ORDERING  )(o1)
 38 | 	else:
 39 | 		o2 = Cropping2D( cropping=((0, cy ) ,  (  0 , 0 )), data_format=IMAGE_ORDERING  )(o2)
 40 | 
 41 | 	return o1 , o2 
 42 | 
 43 | def FCN8( nClasses ,  input_height=416, input_width=608 , vgg_level=3):
 44 | 
 45 | 	# assert input_height%32 == 0
 46 | 	# assert input_width%32 == 0
 47 | 
 48 | 	# https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5
 49 | 	img_input = Input(shape=(3,input_height,input_width))
 50 | 
 51 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING )(img_input)
 52 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING )(x)
 53 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING )(x)
 54 | 	f1 = x
 55 | 	# Block 2
 56 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING )(x)
 57 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING )(x)
 58 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING )(x)
 59 | 	f2 = x
 60 | 
 61 | 	# Block 3
 62 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING )(x)
 63 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING )(x)
 64 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING )(x)
 65 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING )(x)
 66 | 	f3 = x
 67 | 
 68 | 	# Block 4
 69 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING )(x)
 70 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING )(x)
 71 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING )(x)
 72 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING )(x)
 73 | 	f4 = x
 74 | 
 75 | 	# Block 5
 76 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING )(x)
 77 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING )(x)
 78 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING )(x)
 79 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING )(x)
 80 | 	f5 = x
 81 | 
 82 | 	x = Flatten(name='flatten')(x)
 83 | 	x = Dense(4096, activation='relu', name='fc1')(x)
 84 | 	x = Dense(4096, activation='relu', name='fc2')(x)
 85 | 	x = Dense( 1000 , activation='softmax', name='predictions')(x)
 86 | 
 87 | 	vgg  = Model(  img_input , x  )
 88 | 	vgg.load_weights(VGG_Weights_path)
 89 | 
 90 | 	o = f5
 91 | 
 92 | 	o = ( Conv2D( 4096 , ( 7 , 7 ) , activation='relu' , padding='same', data_format=IMAGE_ORDERING))(o)
 93 | 	o = Dropout(0.5)(o)
 94 | 	o = ( Conv2D( 4096 , ( 1 , 1 ) , activation='relu' , padding='same', data_format=IMAGE_ORDERING))(o)
 95 | 	o = Dropout(0.5)(o)
 96 | 
 97 | 	o = ( Conv2D( nClasses ,  ( 1 , 1 ) ,kernel_initializer='he_normal' , data_format=IMAGE_ORDERING))(o)
 98 | 	o = Conv2DTranspose( nClasses , kernel_size=(4,4) ,  strides=(2,2) , use_bias=False, data_format=IMAGE_ORDERING )(o)
 99 | 
100 | 	o2 = f4
101 | 	o2 = ( Conv2D( nClasses ,  ( 1 , 1 ) ,kernel_initializer='he_normal' , data_format=IMAGE_ORDERING))(o2)
102 | 	
103 | 	o , o2 = crop( o , o2 , img_input )
104 | 	
105 | 	o = Add()([ o , o2 ])
106 | 
107 | 	o = Conv2DTranspose( nClasses , kernel_size=(4,4) ,  strides=(2,2) , use_bias=False, data_format=IMAGE_ORDERING )(o)
108 | 	o2 = f3 
109 | 	o2 = ( Conv2D( nClasses ,  ( 1 , 1 ) ,kernel_initializer='he_normal' , data_format=IMAGE_ORDERING))(o2)
110 | 	o2 , o = crop( o2 , o , img_input )
111 | 	o  = Add()([ o2 , o ])
112 | 
113 | 
114 | 	o = Conv2DTranspose( nClasses , kernel_size=(16,16) ,  strides=(8,8) , use_bias=False, data_format=IMAGE_ORDERING )(o)
115 | 	
116 | 	o_shape = Model(img_input , o ).output_shape
117 | 	
118 | 	outputHeight = o_shape[2]
119 | 	outputWidth = o_shape[3]
120 | 
121 | 	o = (Reshape((  -1  , outputHeight*outputWidth   )))(o)
122 | 	o = (Permute((2, 1)))(o)
123 | 	o = (Activation('softmax'))(o)
124 | 	model = Model( img_input , o )
125 | 	model.outputWidth = outputWidth
126 | 	model.outputHeight = outputHeight
127 | 
128 | 	return model
129 | 
130 | 
131 | 
132 | if __name__ == '__main__':
133 | 	m = FCN8( 101 )
134 | 	from keras.utils import plot_model
135 | 	plot_model( m , show_shapes=True , to_file='model.png')
136 | 


--------------------------------------------------------------------------------
/crfrnn_layer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MIT License
  3 | 
  4 | Copyright (c) 2017 Sadeep Jayasumana
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | """
 24 | 
 25 | import numpy as np
 26 | import tensorflow as tf
 27 | from keras.engine.topology import Layer
 28 | custom_module = tf.load_op_library('./cpp/high_dim_filter.so')
 29 | import high_dim_filter_grad  # Register gradients for the custom op
 30 | 
 31 | 
 32 | class CrfRnnLayer(Layer):
 33 |     """ Implements the CRF-RNN layer described in:
 34 | 
 35 |     Conditional Random Fields as Recurrent Neural Networks,
 36 |     S. Zheng, S. Jayasumana, B. Romera-Paredes, V. Vineet, Z. Su, D. Du, C. Huang and P. Torr,
 37 |     ICCV 2015
 38 |     """
 39 | 
 40 |     def __init__(self, image_dims, num_classes,
 41 |                  theta_alpha, theta_beta, theta_gamma,
 42 |                  num_iterations, **kwargs):
 43 |         self.image_dims = image_dims
 44 |         self.num_classes = num_classes
 45 |         self.theta_alpha = theta_alpha
 46 |         self.theta_beta = theta_beta
 47 |         self.theta_gamma = theta_gamma
 48 |         self.num_iterations = num_iterations
 49 |         self.spatial_ker_weights = None
 50 |         self.bilateral_ker_weights = None
 51 |         self.compatibility_matrix = None
 52 |         super(CrfRnnLayer, self).__init__(**kwargs)
 53 | 
 54 |     def build(self, input_shape):
 55 |         # Weights of the spatial kernel
 56 |         self.spatial_ker_weights = self.add_weight(name='spatial_ker_weights',
 57 |                                                    shape=(self.num_classes, self.num_classes),
 58 |                                                    initializer='uniform',
 59 |                                                    trainable=True)
 60 | 
 61 |         # Weights of the bilateral kernel
 62 |         self.bilateral_ker_weights = self.add_weight(name='bilateral_ker_weights',
 63 |                                                      shape=(self.num_classes, self.num_classes),
 64 |                                                      initializer='uniform',
 65 |                                                      trainable=True)
 66 | 
 67 |         # Compatibility matrix
 68 |         self.compatibility_matrix = self.add_weight(name='compatibility_matrix',
 69 |                                                     shape=(self.num_classes, self.num_classes),
 70 |                                                     initializer='uniform',
 71 |                                                     trainable=True)
 72 | 
 73 |         super(CrfRnnLayer, self).build(input_shape)
 74 | 
 75 |     def call(self, inputs):
 76 |         print inputs[0][0,:,:,:]
 77 |         unaries = tf.transpose(inputs[0][0, :, :, :], perm=(2, 0, 1))
 78 |         rgb = tf.transpose(inputs[1][0, :, :, :], perm=(2, 0, 1))
 79 | 
 80 |         c, h, w = self.num_classes, self.image_dims[0], self.image_dims[1]
 81 |         all_ones = np.ones((c, h, w), dtype=np.float32)
 82 | 
 83 |         # Prepare filter normalization coefficients
 84 |         spatial_norm_vals = custom_module.high_dim_filter(all_ones, rgb, bilateral=False,
 85 |                                                           theta_gamma=self.theta_gamma)
 86 |         bilateral_norm_vals = custom_module.high_dim_filter(all_ones, rgb, bilateral=True,
 87 |                                                             theta_alpha=self.theta_alpha,
 88 |                                                             theta_beta=self.theta_beta)
 89 |         q_values = unaries
 90 | 
 91 |         for i in range(self.num_iterations):
 92 |             softmax_out = tf.nn.softmax(q_values, dim=0)
 93 | 
 94 |             # Spatial filtering
 95 |             spatial_out = custom_module.high_dim_filter(softmax_out, rgb, bilateral=False,
 96 |                                                         theta_gamma=self.theta_gamma)
 97 |             spatial_out = spatial_out / spatial_norm_vals
 98 | 
 99 |             # Bilateral filtering
100 |             bilateral_out = custom_module.high_dim_filter(softmax_out, rgb, bilateral=True,
101 |                                                           theta_alpha=self.theta_alpha,
102 |                                                           theta_beta=self.theta_beta)
103 |             bilateral_out = bilateral_out / bilateral_norm_vals
104 | 
105 |             # Weighting filter outputs
106 |             message_passing = (tf.matmul(self.spatial_ker_weights,
107 |                                          tf.reshape(spatial_out, (c, -1))) +
108 |                                tf.matmul(self.bilateral_ker_weights,
109 |                                          tf.reshape(bilateral_out, (c, -1))))
110 | 
111 |             # Compatibility transform
112 |             pairwise = tf.matmul(self.compatibility_matrix, message_passing)
113 | 
114 |             # Adding unary potentials
115 |             pairwise = tf.reshape(pairwise, (c, h, w))
116 |             q_values = unaries - pairwise
117 | 
118 |         return tf.transpose(tf.reshape(q_values, (1, c, h, w)), perm=(0, 2, 3, 1))
119 | 
120 |     def compute_output_shape(self, input_shape):
121 |         return input_shape
122 | 


--------------------------------------------------------------------------------
/Models/VGGUnet.py:
--------------------------------------------------------------------------------
  1 | from keras.models import *
  2 | from keras.layers import *
  3 | 
  4 | import os
  5 | file_path = os.path.dirname( os.path.abspath(__file__) )
  6 | 
  7 | 
  8 | VGG_Weights_path = file_path+"/../data/vgg16_weights_th_dim_ordering_th_kernels.h5"
  9 | 
 10 | IMAGE_ORDERING = 'channels_first'
 11 | 
 12 | 
 13 | def VGGUnet( n_classes ,  input_height=416, input_width=608 , vgg_level=3):
 14 | 
 15 | 	assert input_height%32 == 0
 16 | 	assert input_width%32 == 0
 17 | 
 18 | 	# https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5
 19 | 	img_input = Input(shape=(3,input_height,input_width))
 20 | 
 21 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING )(img_input)
 22 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING )(x)
 23 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING )(x)
 24 | 	f1 = x
 25 | 	# Block 2
 26 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING )(x)
 27 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING )(x)
 28 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING )(x)
 29 | 	f2 = x
 30 | 
 31 | 	# Block 3
 32 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING )(x)
 33 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING )(x)
 34 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING )(x)
 35 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING )(x)
 36 | 	f3 = x
 37 | 
 38 | 	# Block 4
 39 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING )(x)
 40 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING )(x)
 41 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING )(x)
 42 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING )(x)
 43 | 	f4 = x
 44 | 
 45 | 	# Block 5
 46 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING )(x)
 47 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING )(x)
 48 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING )(x)
 49 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING )(x)
 50 | 	f5 = x
 51 | 
 52 | 	x = Flatten(name='flatten')(x)
 53 | 	x = Dense(4096, activation='relu', name='fc1')(x)
 54 | 	x = Dense(4096, activation='relu', name='fc2')(x)
 55 | 	x = Dense( 1000 , activation='softmax', name='predictions')(x)
 56 | 
 57 | 	vgg  = Model(  img_input , x  )
 58 | 	vgg.load_weights(VGG_Weights_path)
 59 | 
 60 | 	levels = [f1 , f2 , f3 , f4 , f5 ]
 61 | 
 62 | 	o = f4
 63 | 
 64 | 	o = ( ZeroPadding2D( (1,1) , data_format=IMAGE_ORDERING ))(o)
 65 | 	o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)
 66 | 	o = ( BatchNormalization())(o)
 67 | 
 68 | 	o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o)
 69 | 	o = ( concatenate([ o ,f3],axis=1 )  )
 70 | 	o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o)
 71 | 	o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)
 72 | 	o = ( BatchNormalization())(o)
 73 | 
 74 | 	o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o)
 75 | 	o = ( concatenate([o,f2],axis=1 ) )
 76 | 	o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o)
 77 | 	o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING ) )(o)
 78 | 	o = ( BatchNormalization())(o)
 79 | 
 80 | 	o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o)
 81 | 	o = ( concatenate([o,f1],axis=1 ) )
 82 | 	o = ( ZeroPadding2D((1,1)  , data_format=IMAGE_ORDERING ))(o)
 83 | 	o = ( Conv2D( 64 , (3, 3), padding='valid'  , data_format=IMAGE_ORDERING ))(o)
 84 | 	o = ( BatchNormalization())(o)
 85 | 
 86 | 
 87 | 	o =  Conv2D( n_classes , (3, 3) , padding='same', data_format=IMAGE_ORDERING )( o )
 88 | 	o_shape = Model(img_input , o ).output_shape
 89 | 	outputHeight = o_shape[2]
 90 | 	outputWidth = o_shape[3]
 91 | 
 92 | 	o = (Reshape((  n_classes , outputHeight*outputWidth   )))(o)
 93 | 	o = (Permute((2, 1)))(o)
 94 | 	o = (Activation('softmax'))(o)
 95 | 	model = Model( img_input , o )
 96 | 	model.outputWidth = outputWidth
 97 | 	model.outputHeight = outputHeight
 98 | 
 99 | 
100 | 
101 | 	return model
102 | 
103 | 
104 | def VGGUnet2( n_classes ,  input_height=416, input_width=608 , vgg_level=3):
105 | 
106 | 	assert input_height%32 == 0
107 | 	assert input_width%32 == 0
108 | 
109 | 	# https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5
110 | 	img_input = Input(shape=(3,input_height,input_width))
111 | 
112 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING )(img_input)
113 | 	x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING )(x)
114 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING )(x)
115 | 	f1 = x
116 | 	# Block 2
117 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING )(x)
118 | 	x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING )(x)
119 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING )(x)
120 | 	f2 = x
121 | 
122 | 	# Block 3
123 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING )(x)
124 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING )(x)
125 | 	x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING )(x)
126 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING )(x)
127 | 	f3 = x
128 | 
129 | 	# Block 4
130 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING )(x)
131 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING )(x)
132 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING )(x)
133 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING )(x)
134 | 	f4 = x
135 | 
136 | 	# Block 5
137 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING )(x)
138 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING )(x)
139 | 	x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING )(x)
140 | 	x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING )(x)
141 | 	f5 = x
142 | 
143 | 	x = Flatten(name='flatten')(x)
144 | 	x = Dense(4096, activation='relu', name='fc1')(x)
145 | 	x = Dense(4096, activation='relu', name='fc2')(x)
146 | 	x = Dense( 1024 , activation='softmax', name='predictions')(x)
147 | 
148 | 	vgg  = Model(  img_input , x  )
149 | 	vgg.load_weights(VGG_Weights_path)
150 | 
151 | 	levels = [f1 , f2 , f3 , f4 , f5 ]
152 | 
153 | 	o = f4
154 | 
155 | 	o = ( ZeroPadding2D( (1,1) , data_format=IMAGE_ORDERING ))(o)
156 | 	o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)
157 | 	o = ( BatchNormalization())(o)
158 | 
159 | 	o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o)
160 | 	o = ( concatenate([ o ,f3],axis=1 )  )
161 | 	o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o)
162 | 	o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o)
163 | 	o = ( BatchNormalization())(o)
164 | 
165 | 	o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o)
166 | 	o = ( concatenate([o,f2],axis=1 ) )
167 | 	o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o)
168 | 	o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING ) )(o)
169 | 	o = ( BatchNormalization())(o)
170 | 
171 | 	o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o)
172 | 	# o = ( concatenate([o,f1],axis=1 ) )
173 | 	o = ( ZeroPadding2D((1,1)  , data_format=IMAGE_ORDERING ))(o)
174 | 	o = ( Conv2D( 64 , (3, 3), padding='valid'  , data_format=IMAGE_ORDERING ))(o)
175 | 	o = ( BatchNormalization())(o)
176 | 
177 | 
178 | 	o =  Conv2D( n_classes , (3, 3) , padding='same', data_format=IMAGE_ORDERING )( o )
179 | 	o_shape = Model(img_input , o ).output_shape
180 | 	outputHeight = o_shape[2]
181 | 	outputWidth = o_shape[3]
182 | 
183 | 	o = (Reshape((  n_classes , outputHeight*outputWidth   )))(o)
184 | 	o = (Permute((2, 1)))(o)
185 | 	o = (Activation('softmax'))(o)
186 | 	model = Model( img_input , o )
187 | 	model.outputWidth = outputWidth
188 | 	model.outputHeight = outputHeight
189 | 
190 | 
191 | 
192 | 	return model
193 | 
194 | 


--------------------------------------------------------------------------------
/cpp/modified_permutohedral.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |    This file contains a modified version of the "permutohedral.cpp" code
  3 |    available at http://graphics.stanford.edu/projects/drf/. Copyright notice of
  4 |    the original file is included below:
  5 | 
  6 |     Copyright (c) 2013, Philipp Krähenbühl
  7 |     All rights reserved.
  8 | 
  9 |     Redistribution and use in source and binary forms, with or without
 10 |     modification, are permitted provided that the following conditions are met:
 11 |         * Redistributions of source code must retain the above copyright
 12 |         notice, this list of conditions and the following disclaimer.
 13 |         * Redistributions in binary form must reproduce the above copyright
 14 |         notice, this list of conditions and the following disclaimer in the
 15 |         documentation and/or other materials provided with the distribution.
 16 |         * Neither the name of the Stanford University nor the
 17 |         names of its contributors may be used to endorse or promote products
 18 |         derived from this software without specific prior written permission.
 19 | 
 20 |     THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY
 21 |     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 22 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 |     DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY
 24 |     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 25 |     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 26 |     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 27 |     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28 |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 29 |     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | */
 31 | 
 32 | //#include "stdafx.h"
 33 | #include "modified_permutohedral.h"
 34 | 
 35 | #ifdef __SSE__
 36 | // SSE Permutoheral lattice
 37 | # define SSE_PERMUTOHEDRAL
 38 | #endif
 39 | 
 40 | #if defined(SSE_PERMUTOHEDRAL)
 41 | # include <emmintrin.h>
 42 | # include <xmmintrin.h>
 43 | # ifdef __SSE4_1__
 44 | #  include <smmintrin.h>
 45 | # endif
 46 | #endif
 47 | 
 48 | 
 49 | using namespace tensorflow;
 50 | 
 51 | /************************************************/
 52 | /***                Hash Table                ***/
 53 | /************************************************/
 54 | 
 55 | class HashTableCopy{
 56 | protected:
 57 | 	size_t key_size_, filled_, capacity_;
 58 | 	std::vector< short > keys_;
 59 | 	std::vector< int > table_;
 60 | 	void grow(){
 61 | 		// Create the new memory and copy the values in
 62 | 		int old_capacity = capacity_;
 63 | 		capacity_ *= 2;
 64 | 		std::vector<short> old_keys( (old_capacity+10)*key_size_ );
 65 | 		std::copy( keys_.begin(), keys_.end(), old_keys.begin() );
 66 | 		std::vector<int> old_table( capacity_, -1 );
 67 | 
 68 | 		// Swap the memory
 69 | 		table_.swap( old_table );
 70 | 		keys_.swap( old_keys );
 71 | 
 72 | 		// Reinsert each element
 73 | 		for( int i=0; i<old_capacity; i++ )
 74 | 			if (old_table[i] >= 0){
 75 | 				int e = old_table[i];
 76 | 				size_t h = hash( getKey(e) ) % capacity_;
 77 | 				for(; table_[h] >= 0; h = h<capacity_-1 ? h+1 : 0);
 78 | 				table_[h] = e;
 79 | 			}
 80 | 	}
 81 | 	size_t hash( const short * k ) {
 82 | 		size_t r = 0;
 83 | 		for( size_t i=0; i<key_size_; i++ ){
 84 | 			r += k[i];
 85 | 			r *= 1664525;
 86 | 		}
 87 | 		return r;
 88 | 	}
 89 | public:
 90 | 	explicit HashTableCopy( int key_size, int n_elements ) : key_size_ ( key_size ), filled_(0), capacity_(2*n_elements), keys_((capacity_/2+10)*key_size_), table_(2*n_elements,-1) {
 91 | 	}
 92 | 	int size() const {
 93 | 		return filled_;
 94 | 	}
 95 | 	void reset() {
 96 | 		filled_ = 0;
 97 | 		std::fill( table_.begin(), table_.end(), -1 );
 98 | 	}
 99 | 	int find( const short * k, bool create = false ){
100 | 		if (2*filled_ >= capacity_) grow();
101 | 		// Get the hash value
102 | 		size_t h = hash( k ) % capacity_;
103 | 		// Find the element with he right key, using linear probing
104 | 		while(1){
105 | 			int e = table_[h];
106 | 			if (e==-1){
107 | 				if (create){
108 | 					// Insert a new key and return the new id
109 | 					for( size_t i=0; i<key_size_; i++ )
110 | 						keys_[ filled_*key_size_+i ] = k[i];
111 | 					return table_[h] = filled_++;
112 | 				}
113 | 				else
114 | 					return -1;
115 | 			}
116 | 			// Check if the current key is The One
117 | 			bool good = true;
118 | 			for( size_t i=0; i<key_size_ && good; i++ )
119 | 				if (keys_[ e*key_size_+i ] != k[i])
120 | 					good = false;
121 | 			if (good)
122 | 				return e;
123 | 			// Continue searching
124 | 			h++;
125 | 			if (h==capacity_) h = 0;
126 | 		}
127 | 	}
128 | 	const short * getKey( int i ) const{
129 | 		return &keys_[i*key_size_];
130 | 	}
131 | 
132 | };
133 | 
134 | /************************************************/
135 | /***          ModifiedPermutohedral Lattice           ***/
136 | /************************************************/
137 | 
138 | ModifiedPermutohedral::ModifiedPermutohedral():N_( 0 ), M_( 0 ), d_( 0 ) {
139 | }
140 | #ifdef SSE_PERMUTOHEDRAL
141 | void ModifiedPermutohedral::init(const float* features, int num_dimensions, int num_points)
142 | {
143 | 	// Compute the lattice coordinates for each feature [there is going to be a lot of magic here
144 | 	N_ = num_points;
145 | 	d_ = num_dimensions;
146 | 	HashTableCopy hash_table( d_, N_/**(d_+1)*/ );
147 | 
148 | 	const int blocksize = sizeof(__m128) / sizeof(float);
149 | 	const __m128 invdplus1   = _mm_set1_ps( 1.0f / (d_+1) );
150 | 	const __m128 dplus1      = _mm_set1_ps( d_+1 );
151 | 	const __m128 Zero        = _mm_set1_ps( 0 );
152 | 	const __m128 One         = _mm_set1_ps( 1 );
153 | 
154 | 	// Allocate the class memory
155 | 	offset_.resize( (d_+1)*(N_+16) );
156 | 	std::fill( offset_.begin(), offset_.end(), 0 );
157 | 	barycentric_.resize( (d_+1)*(N_+16) );
158 | 	std::fill( barycentric_.begin(), barycentric_.end(), 0 );
159 | 	rank_.resize( (d_+1)*(N_+16) );
160 | 
161 | 	// Allocate the local memory
162 | 	__m128 * scale_factor = (__m128*) _mm_malloc( (d_  )*sizeof(__m128) , 16 );
163 | 	__m128 * f            = (__m128*) _mm_malloc( (d_  )*sizeof(__m128) , 16 );
164 | 	__m128 * elevated     = (__m128*) _mm_malloc( (d_+1)*sizeof(__m128) , 16 );
165 | 	__m128 * rem0         = (__m128*) _mm_malloc( (d_+1)*sizeof(__m128) , 16 );
166 | 	__m128 * rank         = (__m128*) _mm_malloc( (d_+1)*sizeof(__m128), 16 );
167 | 	float * barycentric = new float[(d_+2)*blocksize];
168 | 	short * canonical = new short[(d_+1)*(d_+1)];
169 | 	short * key = new short[d_+1];
170 | 
171 | 	// Compute the canonical simplex
172 | 	for( int i=0; i<=d_; i++ ){
173 | 		for( int j=0; j<=d_-i; j++ )
174 | 			canonical[i*(d_+1)+j] = i;
175 | 		for( int j=d_-i+1; j<=d_; j++ )
176 | 			canonical[i*(d_+1)+j] = i - (d_+1);
177 | 	}
178 | 
179 | 	// Expected standard deviation of our filter (p.6 in [Adams etal 2010])
180 | 	float inv_std_dev = sqrt(2.0 / 3.0)*(d_+1);
181 | 	// Compute the diagonal part of E (p.5 in [Adams etal 2010])
182 | 	for( int i=0; i<d_; i++ )
183 | 		scale_factor[i] = _mm_set1_ps( 1.0 / sqrt( (i+2)*(i+1) ) * inv_std_dev );
184 | 
185 | 	// Setup the SSE rounding
186 | #ifndef __SSE4_1__
187 | 	const unsigned int old_rounding = _mm_getcsr();
188 | 	_mm_setcsr( (old_rounding&~_MM_ROUND_MASK) | _MM_ROUND_NEAREST );
189 | #endif
190 | 
191 | 	// Compute the simplex each feature lies in
192 | 	for( int k=0; k<N_; k+=blocksize ){
193 | 		// Load the feature from memory
194 | 		float * ff = (float*)f;
195 | 		for( int j=0; j<d_; j++ )
196 | 			for( int i=0; i<blocksize; i++ )
197 | 				ff[ j*blocksize + i ] = k+i < N_ ? *(features + (k+i)*num_dimensions + j) : 0.0;
198 | 
199 | 		// Elevate the feature ( y = Ep, see p.5 in [Adams etal 2010])
200 | 
201 | 		// sm contains the sum of 1..n of our faeture vector
202 | 		__m128 sm = Zero;
203 | 		for( int j=d_; j>0; j-- ){
204 | 			__m128 cf = f[j-1]*scale_factor[j-1];
205 | 			elevated[j] = sm - _mm_set1_ps(j)*cf;
206 | 			sm += cf;
207 | 		}
208 | 		elevated[0] = sm;
209 | 
210 | 		// Find the closest 0-colored simplex through rounding
211 | 		__m128 sum = Zero;
212 | 		for( int i=0; i<=d_; i++ ){
213 | 			__m128 v = invdplus1 * elevated[i];
214 | #ifdef __SSE4_1__
215 | 			v = _mm_round_ps( v, _MM_FROUND_TO_NEAREST_INT );
216 | #else
217 | 			v = _mm_cvtepi32_ps( _mm_cvtps_epi32( v ) );
218 | #endif
219 | 			rem0[i] = v*dplus1;
220 | 			sum += v;
221 | 		}
222 | 
223 | 		// Find the simplex we are in and store it in rank (where rank describes what position coorinate i has in the sorted order of the features values)
224 | 		for( int i=0; i<=d_; i++ )
225 | 			rank[i] = Zero;
226 | 		for( int i=0; i<d_; i++ ){
227 | 			__m128 di = elevated[i] - rem0[i];
228 | 			for( int j=i+1; j<=d_; j++ ){
229 | 				__m128 dj = elevated[j] - rem0[j];
230 | 				__m128 c = _mm_and_ps( One, _mm_cmplt_ps( di, dj ) );
231 | 				rank[i] += c;
232 | 				rank[j] += One-c;
233 | 			}
234 | 		}
235 | 
236 | 		// If the point doesn't lie on the plane (sum != 0) bring it back
237 | 		for( int i=0; i<=d_; i++ ){
238 | 			rank[i] += sum;
239 | 			__m128 add = _mm_and_ps( dplus1, _mm_cmplt_ps( rank[i], Zero ) );
240 | 			__m128 sub = _mm_and_ps( dplus1, _mm_cmpge_ps( rank[i], dplus1 ) );
241 | 			rank[i] += add-sub;
242 | 			rem0[i] += add-sub;
243 | 		}
244 | 
245 | 		// Compute the barycentric coordinates (p.10 in [Adams etal 2010])
246 | 		for( int i=0; i<(d_+2)*blocksize; i++ )
247 | 			barycentric[ i ] = 0;
248 | 		for( int i=0; i<=d_; i++ ){
249 | 			__m128 v = (elevated[i] - rem0[i])*invdplus1;
250 | 
251 | 			// Didn't figure out how to SSE this
252 | 			float * fv = (float*)&v;
253 | 			float * frank = (float*)&rank[i];
254 | 			for( int j=0; j<blocksize; j++ ){
255 | 				int p = d_-frank[j];
256 | 				barycentric[j*(d_+2)+p  ] += fv[j];
257 | 				barycentric[j*(d_+2)+p+1] -= fv[j];
258 | 			}
259 | 		}
260 | 
261 | 		// The rest is not SSE'd
262 | 		for( int j=0; j<blocksize; j++ ){
263 | 			// Wrap around
264 | 			barycentric[j*(d_+2)+0]+= 1 + barycentric[j*(d_+2)+d_+1];
265 | 
266 | 			float * frank = (float*)rank;
267 | 			float * frem0 = (float*)rem0;
268 | 			// Compute all vertices and their offset
269 | 			for( int remainder=0; remainder<=d_; remainder++ ){
270 | 				for( int i=0; i<d_; i++ ){
271 | 					key[i] = frem0[i*blocksize+j] + canonical[ remainder*(d_+1) + (int)frank[i*blocksize+j] ];
272 | 				}
273 | 				offset_[ (j+k)*(d_+1)+remainder ] = hash_table.find( key, true );
274 | 				rank_[ (j+k)*(d_+1)+remainder ] = frank[remainder*blocksize+j];
275 | 				barycentric_[ (j+k)*(d_+1)+remainder ] = barycentric[ j*(d_+2)+remainder ];
276 | 			}
277 | 		}
278 | 	}
279 | 	_mm_free( scale_factor );
280 | 	_mm_free( f );
281 | 	_mm_free( elevated );
282 | 	_mm_free( rem0 );
283 | 	_mm_free( rank );
284 | 	delete [] barycentric;
285 | 	delete [] canonical;
286 | 	delete [] key;
287 | 
288 | 	// Reset the SSE rounding
289 | #ifndef __SSE4_1__
290 | 	_mm_setcsr( old_rounding );
291 | #endif
292 | 
293 | 	// This is normally fast enough so no SSE needed here
294 | 	// Find the Neighbors of each lattice point
295 | 
296 | 	// Get the number of vertices in the lattice
297 | 	M_ = hash_table.size();
298 | 
299 | 	// Create the neighborhood structure
300 | 	blur_neighbors_.resize( (d_+1)*M_ );
301 | 
302 | 	short * n1 = new short[d_+1];
303 | 	short * n2 = new short[d_+1];
304 | 
305 | 	// For each of d+1 axes,
306 | 	for( int j = 0; j <= d_; j++ ){
307 | 		for( int i=0; i<M_; i++ ){
308 | 			const short * key = hash_table.getKey( i );
309 | 			for( int k=0; k<d_; k++ ){
310 | 				n1[k] = key[k] - 1;
311 | 				n2[k] = key[k] + 1;
312 | 			}
313 | 			n1[j] = key[j] + d_;
314 | 			n2[j] = key[j] - d_;
315 | 
316 | 			blur_neighbors_[j*M_+i].n1 = hash_table.find( n1 );
317 | 			blur_neighbors_[j*M_+i].n2 = hash_table.find( n2 );
318 | 		}
319 | 	}
320 | 	delete[] n1;
321 | 	delete[] n2;
322 | }
323 | #else
324 | void ModifiedPermutohedral::init (const float* features, int num_dimensions, int num_points)
325 | {
326 | 	// Compute the lattice coordinates for each feature [there is going to be a lot of magic here
327 | 	N_ = num_points;
328 | 	d_ = num_dimensions;
329 | 	HashTableCopy hash_table( d_, N_*(d_+1) );
330 | 
331 | 	// Allocate the class memory
332 | 	offset_.resize( (d_+1)*N_ );
333 | 	rank_.resize( (d_+1)*N_ );
334 | 	barycentric_.resize( (d_+1)*N_ );
335 | 
336 | 	// Allocate the local memory
337 | 	float * scale_factor = new float[d_];
338 | 	float * elevated = new float[d_+1];
339 | 	float * rem0 = new float[d_+1];
340 | 	float * barycentric = new float[d_+2];
341 | 	short * rank = new short[d_+1];
342 | 	short * canonical = new short[(d_+1)*(d_+1)];
343 | 	short * key = new short[d_+1];
344 | 
345 | 	// Compute the canonical simplex
346 | 	for( int i=0; i<=d_; i++ ){
347 | 		for( int j=0; j<=d_-i; j++ )
348 | 			canonical[i*(d_+1)+j] = i;
349 | 		for( int j=d_-i+1; j<=d_; j++ )
350 | 			canonical[i*(d_+1)+j] = i - (d_+1);
351 | 	}
352 | 
353 | 	// Expected standard deviation of our filter (p.6 in [Adams etal 2010])
354 | 	float inv_std_dev = sqrt(2.0 / 3.0)*(d_+1);
355 | 	// Compute the diagonal part of E (p.5 in [Adams etal 2010])
356 | 	for( int i=0; i<d_; i++ )
357 | 		scale_factor[i] = 1.0 / sqrt( double((i+2)*(i+1)) ) * inv_std_dev;
358 | 
359 | 	// Compute the simplex each feature lies in
360 | 	for( int k=0; k<N_; k++ ){
361 | 		// Elevate the feature ( y = Ep, see p.5 in [Adams etal 2010])
362 | 		const float * f = (feature + k * num_dimensions);
363 | 
364 | 		// sm contains the sum of 1..n of our faeture vector
365 | 		float sm = 0;
366 | 		for( int j=d_; j>0; j-- ){
367 | 			float cf = f[j-1]*scale_factor[j-1];
368 | 			elevated[j] = sm - j*cf;
369 | 			sm += cf;
370 | 		}
371 | 		elevated[0] = sm;
372 | 
373 | 		// Find the closest 0-colored simplex through rounding
374 | 		float down_factor = 1.0f / (d_+1);
375 | 		float up_factor = (d_+1);
376 | 		int sum = 0;
377 | 		for( int i=0; i<=d_; i++ ){
378 | 			//int rd1 = round( down_factor * elevated[i]);
379 | 			int rd2;
380 | 			float v = down_factor * elevated[i];
381 | 			float up = ceilf(v)*up_factor;
382 | 			float down = floorf(v)*up_factor;
383 | 			if (up - elevated[i] < elevated[i] - down) rd2 = (short)up;
384 | 			else rd2 = (short)down;
385 | 
386 | 			//if(rd1!=rd2)
387 | 			//	break;
388 | 
389 | 			rem0[i] = rd2;
390 | 			sum += rd2*down_factor;
391 | 		}
392 | 
393 | 		// Find the simplex we are in and store it in rank (where rank describes what position coorinate i has in the sorted order of the features values)
394 | 		for( int i=0; i<=d_; i++ )
395 | 			rank[i] = 0;
396 | 		for( int i=0; i<d_; i++ ){
397 | 			double di = elevated[i] - rem0[i];
398 | 			for( int j=i+1; j<=d_; j++ )
399 | 				if ( di < elevated[j] - rem0[j])
400 | 					rank[i]++;
401 | 				else
402 | 					rank[j]++;
403 | 		}
404 | 
405 | 		// If the point doesn't lie on the plane (sum != 0) bring it back
406 | 		for( int i=0; i<=d_; i++ ){
407 | 			rank[i] += sum;
408 | 			if ( rank[i] < 0 ){
409 | 				rank[i] += d_+1;
410 | 				rem0[i] += d_+1;
411 | 			}
412 | 			else if ( rank[i] > d_ ){
413 | 				rank[i] -= d_+1;
414 | 				rem0[i] -= d_+1;
415 | 			}
416 | 		}
417 | 
418 | 		// Compute the barycentric coordinates (p.10 in [Adams etal 2010])
419 | 		for( int i=0; i<=d_+1; i++ )
420 | 			barycentric[i] = 0;
421 | 		for( int i=0; i<=d_; i++ ){
422 | 			float v = (elevated[i] - rem0[i])*down_factor;
423 | 			barycentric[d_-rank[i]  ] += v;
424 | 			barycentric[d_-rank[i]+1] -= v;
425 | 		}
426 | 		// Wrap around
427 | 		barycentric[0] += 1.0 + barycentric[d_+1];
428 | 
429 | 		// Compute all vertices and their offset
430 | 		for( int remainder=0; remainder<=d_; remainder++ ){
431 | 			for( int i=0; i<d_; i++ )
432 | 				key[i] = rem0[i] + canonical[ remainder*(d_+1) + rank[i] ];
433 | 			offset_[ k*(d_+1)+remainder ] = hash_table.find( key, true );
434 | 			rank_[ k*(d_+1)+remainder ] = rank[remainder];
435 | 			barycentric_[ k*(d_+1)+remainder ] = barycentric[ remainder ];
436 | 		}
437 | 	}
438 | 	delete [] scale_factor;
439 | 	delete [] elevated;
440 | 	delete [] rem0;
441 | 	delete [] barycentric;
442 | 	delete [] rank;
443 | 	delete [] canonical;
444 | 	delete [] key;
445 | 
446 | 
447 | 	// Find the Neighbors of each lattice point
448 | 
449 | 	// Get the number of vertices in the lattice
450 | 	M_ = hash_table.size();
451 | 
452 | 	// Create the neighborhood structure
453 | 	blur_neighbors_.resize( (d_+1)*M_ );
454 | 
455 | 	short * n1 = new short[d_+1];
456 | 	short * n2 = new short[d_+1];
457 | 
458 | 	// For each of d+1 axes,
459 | 	for( int j = 0; j <= d_; j++ ){
460 | 		for( int i=0; i<M_; i++ ){
461 | 			const short * key = hash_table.getKey( i );
462 | 			for( int k=0; k<d_; k++ ){
463 | 				n1[k] = key[k] - 1;
464 | 				n2[k] = key[k] + 1;
465 | 			}
466 | 			n1[j] = key[j] + d_;
467 | 			n2[j] = key[j] - d_;
468 | 
469 | 			blur_neighbors_[j*M_+i].n1 = hash_table.find( n1 );
470 | 			blur_neighbors_[j*M_+i].n2 = hash_table.find( n2 );
471 | 		}
472 | 	}
473 | 	delete[] n1;
474 | 	delete[] n2;
475 | }
476 | #endif
477 | void ModifiedPermutohedral::seqCompute(Tensor& out_tensor, const Tensor& in_tensor,
478 |                                        int value_size, bool reverse, bool add) const
479 | {
480 | 	// Shift all values by 1 such that -1 -> 0 (used for blurring)
481 | 	float * values = new float[ (M_+2)*value_size ];
482 | 	float * new_values = new float[ (M_+2)*value_size ];
483 | 
484 | 	for( int i=0; i<(M_+2)*value_size; i++ )
485 | 		values[i] = new_values[i] = 0;
486 | 
487 |   auto in = in_tensor.flat<float>();
488 | 	// Splatting
489 | 	for( int i=0;  i<N_; i++ ){
490 | 		for( int j=0; j<=d_; j++ ){
491 | 			int o = offset_[i*(d_+1)+j]+1;
492 | 			float w = barycentric_[i*(d_+1)+j];
493 | 			for( int k=0; k<value_size; k++ )
494 | 				values[ o*value_size+k ] += w * in(k*N_ + i);
495 | 		}
496 | 	}
497 | 
498 | 	for( int j=reverse?d_:0; j<=d_ && j>=0; reverse?j--:j++ ){
499 | 		for( int i=0; i<M_; i++ ){
500 | 			float * old_val = values + (i+1)*value_size;
501 | 			float * new_val = new_values + (i+1)*value_size;
502 | 
503 | 			int n1 = blur_neighbors_[j*M_+i].n1+1;
504 | 			int n2 = blur_neighbors_[j*M_+i].n2+1;
505 | 			float * n1_val = values + n1*value_size;
506 | 			float * n2_val = values + n2*value_size;
507 | 			for( int k=0; k<value_size; k++ )
508 | 				new_val[k] = old_val[k]+0.5*(n1_val[k] + n2_val[k]);
509 | 		}
510 | 		std::swap( values, new_values );
511 | 	}
512 | 	// Alpha is a magic scaling constant (write Andrew if you really wanna understand this)
513 | 	float alpha = 1.0f / (1+powf(2, -d_));
514 |   auto out = out_tensor.flat<float>();
515 | 
516 | 	// Slicing
517 | 	for( int i=0; i<N_; i++ ){
518 | 	  if (!add) {
519 | 	    for( int k=0; k<value_size; k++ )
520 | 	      out(i + k*N_) = 0; //out[i*value_size+k] = 0;
521 | 	  }
522 | 		for( int j=0; j<=d_; j++ ){
523 | 			int o = offset_[i*(d_+1)+j]+1;
524 | 			float w = barycentric_[i*(d_+1)+j];
525 | 			for( int k=0; k<value_size; k++ )
526 | 				//out[ i*value_size+k ] += w * values[ o*value_size+k ] * alpha;
527 | 			  out(i + k*N_ ) += w * values[ o*value_size+k ] * alpha;
528 | 		}
529 | 	}
530 | 
531 | 
532 | 	delete[] values;
533 | 	delete[] new_values;
534 | }
535 | 
536 | #ifdef SSE_PERMUTOHEDRAL
537 | void ModifiedPermutohedral::sseCompute(Tensor& out_tensor, const Tensor& in_tensor,
538 |                                        int value_size, const bool reverse, const bool add) const
539 | {
540 | 
541 |   auto in = in_tensor.flat<float>();
542 |   auto out = out_tensor.flat<float>();
543 | 
544 | 	const int sse_value_size = (value_size-1)*sizeof(float) / sizeof(__m128) + 1;
545 | 	// Shift all values by 1 such that -1 -> 0 (used for blurring)
546 | 	__m128 * sse_val    = (__m128*) _mm_malloc( sse_value_size*sizeof(__m128), 16 );
547 | 	__m128 * values     = (__m128*) _mm_malloc( (M_+2)*sse_value_size*sizeof(__m128), 16 );
548 | 	__m128 * new_values = (__m128*) _mm_malloc( (M_+2)*sse_value_size*sizeof(__m128), 16 );
549 | 
550 | 	__m128 Zero = _mm_set1_ps( 0 );
551 | 
552 | 	for( int i=0; i<(M_+2)*sse_value_size; i++ )
553 | 		values[i] = new_values[i] = Zero;
554 | 	for( int i=0; i<sse_value_size; i++ )
555 | 		sse_val[i] = Zero;
556 | 
557 | 	float* sdp_temp = new float[value_size];
558 | 
559 | 	// Splatting
560 | 	for( int i=0;  i<N_; i++ ){
561 | 
562 | 
563 | 		for (int s = 0; s < value_size; s++) {
564 | 		  sdp_temp[s] = in(s*N_ + i);
565 | 		}
566 | 		memcpy(sse_val, sdp_temp, value_size*sizeof(float));
567 | 
568 | 		for( int j=0; j<=d_; j++ ){
569 | 			int o = offset_[i*(d_+1)+j]+1;
570 | 			__m128 w = _mm_set1_ps( barycentric_[i*(d_+1)+j] );
571 | 			for( int k=0; k<sse_value_size; k++ )
572 | 				values[ o*sse_value_size+k ] += w * sse_val[k];
573 | 		}
574 | 	}
575 | 	// Blurring
576 | 	__m128 half = _mm_set1_ps(0.5);
577 | 	for( int j=reverse?d_:0; j<=d_ && j>=0; reverse?j--:j++ ){
578 | 		for( int i=0; i<M_; i++ ){
579 | 			__m128 * old_val = values + (i+1)*sse_value_size;
580 | 			__m128 * new_val = new_values + (i+1)*sse_value_size;
581 | 
582 | 			int n1 = blur_neighbors_[j*M_+i].n1+1;
583 | 			int n2 = blur_neighbors_[j*M_+i].n2+1;
584 | 			__m128 * n1_val = values + n1*sse_value_size;
585 | 			__m128 * n2_val = values + n2*sse_value_size;
586 | 			for( int k=0; k<sse_value_size; k++ )
587 | 				new_val[k] = old_val[k]+half*(n1_val[k] + n2_val[k]);
588 | 		}
589 | 		std::swap( values, new_values );
590 | 	}
591 | 	// Alpha is a magic scaling constant (write Andrew if you really wanna understand this)
592 | 	float alpha = 1.0f / (1+powf(2, -d_));
593 | 
594 | 	// Slicing
595 | 	for( int i=0; i<N_; i++ ){
596 | 		for( int k=0; k<sse_value_size; k++ )
597 | 			sse_val[ k ] = Zero;
598 | 		for( int j=0; j<=d_; j++ ){
599 | 			int o = offset_[i*(d_+1)+j]+1;
600 | 			__m128 w = _mm_set1_ps( barycentric_[i*(d_+1)+j] * alpha );
601 | 			for( int k=0; k<sse_value_size; k++ )
602 | 				sse_val[ k ] += w * values[ o*sse_value_size+k ];
603 | 		}
604 | 
605 | 		memcpy(sdp_temp, sse_val, value_size*sizeof(float) );
606 |     if (!add) {
607 |       for (int s = 0; s < value_size; s++) {
608 |         out(i + s*N_) = sdp_temp[s];
609 |       }
610 |     } else {
611 |       for (int s = 0; s < value_size; s++) {
612 |         out(i + s*N_) += sdp_temp[s];
613 |       }
614 |     }
615 | 	}
616 | 
617 | 	_mm_free( sse_val );
618 | 	_mm_free( values );
619 | 	_mm_free( new_values );
620 | 	delete[] sdp_temp;
621 | }
622 | #else
623 | void ModifiedPermutohedral::sseCompute(Tensor& out, const Tensor& in, int value_size, bool reverse, bool add) const
624 | {
625 | 	seqCompute( out, in, value_size, reverse, add);
626 | }
627 | #endif
628 | 
629 | 
630 | void ModifiedPermutohedral::compute(Tensor& out, const Tensor& in, int value_size, bool reverse, bool add) const
631 | {
632 | 	if (value_size <= 2)
633 | 		seqCompute(out, in, value_size, reverse, add);
634 | 	else
635 | 		sseCompute(out, in, value_size, reverse, add);
636 | }
637 | 


--------------------------------------------------------------------------------