├── Models ├── PSPNet.py ├── __init__.py ├── Utils.py ├── Unet.py ├── Segnet_transpose.py ├── FCN32.py ├── Segnet.py ├── Segnet_crf_res.py ├── Segnet_res.py ├── VGGSegnet.py ├── Segnet_crf_res_l1_v1.py ├── Segnet_crf_res_l1_v2.py ├── FCN8.py └── VGGUnet.py ├── github_imgs ├── data_tree.png ├── segnet_crfasrnn.png ├── run_segnet_crfasrnn.png └── training_segnet_crfasrnn.png ├── cpp ├── compile.sh ├── modified_permutohedral.h ├── high_dim_filter.cc └── modified_permutohedral.cc ├── visualizeDataset.py ├── segnet_crfasrnn_env.yml ├── high_dim_filter_grad.py ├── predict.py ├── compute_test_results.m ├── LoadBatches.py ├── util.py ├── README.md ├── train_modifiedLoss.py ├── train.py ├── crfrnn_model.py └── crfrnn_layer.py /Models/PSPNet.py: -------------------------------------------------------------------------------- 1 | 2 | # todo -------------------------------------------------------------------------------- /github_imgs/data_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/HEAD/github_imgs/data_tree.png -------------------------------------------------------------------------------- /github_imgs/segnet_crfasrnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/HEAD/github_imgs/segnet_crfasrnn.png -------------------------------------------------------------------------------- /Models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import VGGUnet 3 | import VGGSegnet 4 | import FCN8 5 | import FCN32 6 | import Segnet 7 | 8 | 9 | -------------------------------------------------------------------------------- /github_imgs/run_segnet_crfasrnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/HEAD/github_imgs/run_segnet_crfasrnn.png -------------------------------------------------------------------------------- /github_imgs/training_segnet_crfasrnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/HEAD/github_imgs/training_segnet_crfasrnn.png -------------------------------------------------------------------------------- /Models/Utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | import h5py 5 | import os 6 | 7 | 8 | def loadWeightsPartial( model , weights_path , n_layers ): 9 | 10 | f = h5py.File(weights_path) 11 | for k in range(f.attrs['nb_layers']): 12 | if k >= n_layers : 13 | break 14 | g = f['layer_{}'.format(k)] 15 | weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] 16 | model.layers[k].set_weights(weights) 17 | f.close() 18 | 19 | -------------------------------------------------------------------------------- /cpp/compile.sh: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------------------------------i---------- 2 | # * Activate your Tensorflow virtualenv before running this script. 3 | # * This script assumes gcc version >=5. If you have an older version, remove the -D_GLIBCXX_USE_CXX11_ABI=0 flag below. 4 | # * On Mac OS X, the additional flag "-undefined dynamic_lookup" is required. 5 | # * If this script fails, please refer to https://www.tensorflow.org/extend/adding_an_op#build_the_op_library for help. 6 | # ----------------------------------------------------------------------------------------------------------------------- 7 | 8 | 9 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 10 | TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 11 | 12 | 13 | g++ -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=0 -shared high_dim_filter.cc modified_permutohedral.cc -o high_dim_filter.so -fPIC -I$TF_INC -I$TF_INC/external/nsync/public -L$TF_LIB -ltensorflow_framework -O2 14 | 15 | -------------------------------------------------------------------------------- /visualizeDataset.py: -------------------------------------------------------------------------------- 1 | 2 | import glob 3 | import numpy as np 4 | import cv2 5 | import random 6 | import argparse 7 | 8 | def imageSegmentationGenerator( images_path , segs_path , n_classes ): 9 | 10 | assert images_path[-1] == '/' 11 | assert segs_path[-1] == '/' 12 | 13 | images = glob.glob( images_path + "*.jpg" ) + glob.glob( images_path + "*.png" ) + glob.glob( images_path + "*.jpeg" ) 14 | images.sort() 15 | segmentations = glob.glob( segs_path + "*.jpg" ) + glob.glob( segs_path + "*.png" ) + glob.glob( segs_path + "*.jpeg" ) 16 | segmentations.sort() 17 | 18 | colors = [ ( random.randint(0,255),random.randint(0,255),random.randint(0,255) ) for _ in range(n_classes) ] 19 | 20 | assert len( images ) == len(segmentations) 21 | 22 | for im_fn , seg_fn in zip(images,segmentations): 23 | assert( im_fn.split('/')[-1] == seg_fn.split('/')[-1] ) 24 | 25 | img = cv2.imread( im_fn ) 26 | seg = cv2.imread( seg_fn ) 27 | print np.unique( seg ) 28 | 29 | seg_img = np.zeros_like( seg ) 30 | 31 | for c in range(n_classes): 32 | seg_img[:,:,0] += ( (seg[:,:,0] == c )*( colors[c][0] )).astype('uint8') 33 | seg_img[:,:,1] += ((seg[:,:,0] == c )*( colors[c][1] )).astype('uint8') 34 | seg_img[:,:,2] += ((seg[:,:,0] == c )*( colors[c][2] )).astype('uint8') 35 | 36 | cv2.imshow("img" , img ) 37 | cv2.imshow("seg_img" , seg_img ) 38 | cv2.waitKey() 39 | 40 | 41 | 42 | parser = argparse.ArgumentParser() 43 | parser.add_argument("--images", type = str ) 44 | parser.add_argument("--annotations", type = str ) 45 | parser.add_argument("--n_classes", type=int ) 46 | args = parser.parse_args() 47 | 48 | 49 | imageSegmentationGenerator(args.images , args.annotations , args.n_classes ) 50 | -------------------------------------------------------------------------------- /segnet_crfasrnn_env.yml: -------------------------------------------------------------------------------- 1 | name: segnet_crfasrnn 2 | channels: 3 | - menpo 4 | - soumith 5 | - defaults 6 | dependencies: 7 | - blas=1.0=mkl 8 | - ca-certificates=2018.03.07=0 9 | - cairo=1.14.12=h8948797_3 10 | - certifi=2018.10.15=py27_0 11 | - fontconfig=2.13.0=h9420a91_0 12 | - freetype=2.9.1=h8a8886c_1 13 | - glib=2.56.2=hd408876_0 14 | - h5py=2.7.0=np111py27_0 15 | - harfbuzz=0.9.39=1 16 | - hdf5=1.8.17=2 17 | - icu=58.2=h9c2bf20_1 18 | - intel-openmp=2019.1=144 19 | - jpeg=9b=h024ee3a_2 20 | - libedit=3.1.20170329=h6b74fdf_2 21 | - libffi=3.2.1=hd88cf55_4 22 | - libgcc-ng=8.2.0=hdf63c60_1 23 | - libgfortran-ng=7.3.0=hdf63c60_0 24 | - libpng=1.6.35=hbc83047_0 25 | - libstdcxx-ng=8.2.0=hdf63c60_1 26 | - libtiff=4.0.9=he85c1e1_2 27 | - libuuid=1.0.3=h1bed415_2 28 | - libxcb=1.13=h1bed415_1 29 | - libxml2=2.9.8=h26e45fe_1 30 | - linecache2=1.0.0=py27_0 31 | - mkl=2018.0.3=1 32 | - mkl_fft=1.0.6=py27h7dd41cf_0 33 | - mkl_random=1.0.1=py27h4414c95_1 34 | - ncurses=6.1=he6710b0_1 35 | - numpy=1.11.3=py27h3dfced4_4 36 | - numpy-base=1.15.4=py27h81de0dd_0 37 | - openssl=1.1.1a=h7b6447c_0 38 | - pcre=8.42=h439df22_0 39 | - pip=18.1=py27_0 40 | - pixman=0.34.0=hceecf20_3 41 | - python=2.7.15=h9bab390_4 42 | - readline=7.0=h7b6447c_5 43 | - setuptools=40.6.2=py27_0 44 | - six=1.11.0=py27_1 45 | - sqlite=3.25.3=h7b6447c_0 46 | - tk=8.6.8=hbc83047_0 47 | - traceback2=1.4.0=py27_0 48 | - unittest2=1.1.0=py27_0 49 | - wheel=0.32.3=py27_0 50 | - xz=5.2.4=h14c3975_4 51 | - zlib=1.2.11=h7b6447c_3 52 | - opencv3=3.2.0=np111py27_0 53 | - pip: 54 | - backports.weakref==1.0.post1 55 | - bleach==1.5.0 56 | - enum34==1.1.6 57 | - funcsigs==1.0.2 58 | - futures==3.2.0 59 | - html5lib==0.9999999 60 | - keras==2.2.4 61 | - keras-applications==1.0.6 62 | - keras-preprocessing==1.0.5 63 | - markdown==3.0.1 64 | - mock==2.0.0 65 | - pbr==5.1.1 66 | - protobuf==3.6.1 67 | - pyyaml==3.13 68 | - scipy==1.1.0 69 | - tensorflow-gpu==1.4.0 70 | - tensorflow-tensorboard==0.4.0 71 | - theano==1.0.3 72 | - werkzeug==0.14.1 73 | 74 | -------------------------------------------------------------------------------- /high_dim_filter_grad.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIT License 3 | 4 | Copyright (c) 2017 Sadeep Jayasumana 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | 25 | import tensorflow as tf 26 | from tensorflow.python.framework import ops 27 | custom_module = tf.load_op_library('./cpp/high_dim_filter.so') 28 | 29 | 30 | @ops.RegisterGradient("HighDimFilter") 31 | def _high_dim_filter_grad(op, grad): 32 | """ Gradients for the HighDimFilter op. We only need to calculate the gradients 33 | w.r.t. the first input (unaries) as we never need to backprop errors to the 34 | second input (RGB values of the image). 35 | 36 | Args: 37 | op: The `high_dim_filter` operation that we are differentiating. 38 | grad: Gradients with respect to the output of the `high_dim_filter` op. 39 | 40 | Returns: 41 | Gradients with respect to the input of `high_dim_filter`. 42 | """ 43 | 44 | rgb = op.inputs[1] 45 | grad_vals = custom_module.high_dim_filter(grad, rgb, 46 | bilateral=op.get_attr("bilateral"), 47 | theta_alpha=op.get_attr("theta_alpha"), 48 | theta_beta=op.get_attr("theta_beta"), 49 | theta_gamma=op.get_attr("theta_gamma"), 50 | backwards=True) 51 | 52 | return [grad_vals, tf.zeros_like(rgb)] 53 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import Models , LoadBatches 3 | from keras.models import load_model 4 | import glob 5 | import cv2 6 | import numpy as np 7 | import random 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--save_weights_path", type = str ) 11 | parser.add_argument("--epoch_number", type = int, default = 5 ) 12 | parser.add_argument("--test_images", type = str , default = "") 13 | parser.add_argument("--output_path", type = str , default = "") 14 | parser.add_argument("--input_height", type=int , default = 224 ) 15 | parser.add_argument("--input_width", type=int , default = 224 ) 16 | parser.add_argument("--model_name", type = str , default = "") 17 | parser.add_argument("--n_classes", type=int ) 18 | 19 | args = parser.parse_args() 20 | 21 | n_classes = args.n_classes 22 | model_name = args.model_name 23 | images_path = args.test_images 24 | input_width = args.input_width 25 | input_height = args.input_height 26 | epoch_number = args.epoch_number 27 | 28 | modelFns = { 'vgg_segnet':Models.VGGSegnet.VGGSegnet , 'vgg_unet':Models.VGGUnet.VGGUnet , 'vgg_unet2':Models.VGGUnet.VGGUnet2 , 'fcn8':Models.FCN8.FCN8 , 'fcn32':Models.FCN32.FCN32, 'segnet':Models.Segnet.segnet, 'segnet_transposed':Models.Segnet_transpose.segnet_transposed, 'segnet_res':Models.Segnet_res.segnet_res, 'segnet_res_crf':Models.Segnet_crf_res.segnet_crf_res} 29 | modelFN = modelFns[ model_name ] 30 | 31 | m = modelFN( n_classes , input_height=input_height, input_width=input_width ) 32 | m.load_weights( args.save_weights_path) #+ "." + str( epoch_number ) ) 33 | m.compile(loss='categorical_crossentropy', 34 | optimizer= 'sgd' , 35 | metrics=['accuracy']) 36 | 37 | 38 | output_height = m.outputHeight 39 | output_width = m.outputWidth 40 | 41 | images = glob.glob( images_path + "*.jpg" ) + glob.glob( images_path + "*.png" ) + glob.glob( images_path + "*.jpeg" ) 42 | images.sort() 43 | 44 | colors = [ ( random.randint(0,255),random.randint(0,255),random.randint(0,255) ) for _ in range(n_classes) ] 45 | 46 | for imgName in images: 47 | print (imgName) 48 | outName = imgName.replace( images_path , args.output_path ) 49 | X = LoadBatches.getImageArr(imgName , args.input_width , args.input_height ) 50 | pr = m.predict( np.array([X]) )[0] 51 | pr = pr.reshape(( output_height , output_width , n_classes ) ).argmax( axis=2 ) 52 | #seg_img = np.zeros( ( output_height , output_width , 3 ) ) 53 | #for c in range(n_classes): 54 | # seg_img[:,:,0] += ( (pr[:,: ] == c )*( colors[c][0] )).astype('uint8') 55 | # seg_img[:,:,1] += ((pr[:,: ] == c )*( colors[c][1] )).astype('uint8') 56 | # seg_img[:,:,2] += ((pr[:,: ] == c )*( colors[c][2] )).astype('uint8') 57 | #seg_img = cv2.resize(seg_img , (input_width , input_height )) 58 | #cv2.waitKey(0) 59 | cv2.imwrite( outName , pr ) 60 | 61 | -------------------------------------------------------------------------------- /Models/Unet.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # todo upgrade to keras 2.0 4 | 5 | from keras.models import Sequential 6 | from keras.layers import Reshape 7 | from keras.models import Model 8 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Merge, Permute 9 | from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D, Reshape, core, Dropout 10 | from keras.layers.normalization import BatchNormalization 11 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D , ZeroPadding3D , UpSampling3D 12 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D 13 | from keras.layers.convolutional import Convolution1D, MaxPooling1D 14 | from keras.layers.recurrent import LSTM 15 | from keras.layers.advanced_activations import LeakyReLU 16 | from keras.optimizers import Adam , SGD 17 | from keras.layers.embeddings import Embedding 18 | from keras.utils import np_utils 19 | from keras.regularizers import ActivityRegularizer 20 | from keras import backend as K 21 | 22 | 23 | 24 | 25 | 26 | def Unet (nClasses , optimizer=None , input_width=360 , input_height=480 , nChannels=1 ): 27 | 28 | inputs = Input((nChannels, input_height, input_width)) 29 | conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(inputs) 30 | conv1 = Dropout(0.2)(conv1) 31 | conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv1) 32 | pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) 33 | 34 | conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(pool1) 35 | conv2 = Dropout(0.2)(conv2) 36 | conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv2) 37 | pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) 38 | 39 | conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(pool2) 40 | conv3 = Dropout(0.2)(conv3) 41 | conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv3) 42 | 43 | up1 = merge([UpSampling2D(size=(2, 2))(conv3), conv2], mode='concat', concat_axis=1) 44 | conv4 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(up1) 45 | conv4 = Dropout(0.2)(conv4) 46 | conv4 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv4) 47 | 48 | up2 = merge([UpSampling2D(size=(2, 2))(conv4), conv1], mode='concat', concat_axis=1) 49 | conv5 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(up2) 50 | conv5 = Dropout(0.2)(conv5) 51 | conv5 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv5) 52 | 53 | conv6 = Convolution2D(nClasses, 1, 1, activation='relu',border_mode='same')(conv5) 54 | conv6 = core.Reshape((nClasses,input_height*input_width))(conv6) 55 | conv6 = core.Permute((2,1))(conv6) 56 | 57 | 58 | conv7 = core.Activation('softmax')(conv6) 59 | 60 | model = Model(input=inputs, output=conv7) 61 | 62 | if not optimizer is None: 63 | model.compile(loss="categorical_crossentropy", optimizer= optimizer , metrics=['accuracy'] ) 64 | 65 | return model 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /compute_test_results.m: -------------------------------------------------------------------------------- 1 | % if you use another test/train set change number of classes and the 2 | % unlabeled index as well as number of iterations (needs to be equal to the test set size) 3 | 4 | gtPath = './data/dataset1/annotations_prepped_test'; % path to your ground truth images 5 | predPath = './data/predictions/'; %path to your predictions (you get them after you implement saving images in the test_segmentation_camvid.py script - or you write your own) 6 | groundTruths = dir(gtPath); 7 | skip = 2; % first two are '.' and '..' so skip them 8 | predictions = dir(predPath); 9 | 10 | iter = 101; 11 | 12 | numClasses = 11; 13 | unknown_class = 12; 14 | 15 | img_height=50; 16 | img_width=50; 17 | 18 | totalpoints = 0; 19 | cf = zeros(iter,numClasses,numClasses); 20 | globalacc = 0; 21 | 22 | for i = 1:iter 23 | display(num2str(i)); 24 | 25 | %strcat(predPath, '/', predictions(i + skip).name) 26 | %strcat(gtPath, '/', groundTruths(i + skip).name) 27 | %waitforbuttonpress() 28 | 29 | pred = imread(strcat(predPath, '/', predictions(i + skip).name)); % set this to iterate through your segnet prediction images 30 | pred = imresize(pred, [img_height img_width]); 31 | 32 | pred = pred + 1; % i added this cause i labeled my classes from 0 to 11 33 | annot = imresize(imread(strcat(gtPath, '/', groundTruths(i + skip).name)),[img_height img_width]); % set this to iterate through your ground truth annotations 34 | annot = annot + 1; % i added this cause i labeled my classes from 0 to 11 -> so in that case the next line will find every pixel labeled with unknown_class=12 35 | imshow(pred) 36 | imshow(annot) 37 | %waitforbuttonpress() 38 | 39 | pixels_ignore = annot == unknown_class; 40 | pred(pixels_ignore) = 0; 41 | annot(pixels_ignore) = 0; 42 | 43 | totalpoints = totalpoints + sum(annot(:)>0); 44 | 45 | % global and class accuracy computation 46 | for j = 1:numClasses 47 | for k = 1:numClasses 48 | c1 = annot == j; 49 | c1p = pred == k; 50 | index = gather(c1 .* c1p); 51 | cf(i,j,k) = cf(i,j,k) + sum(index(:)); 52 | end 53 | c1 = annot == j; 54 | c1p = pred == j; 55 | index = gather(c1 .* c1p); 56 | globalacc = globalacc + sum(index(:)); 57 | 58 | end 59 | end 60 | 61 | cf = sum(cf,1); 62 | cf = squeeze(cf); 63 | 64 | % Compute confusion matrix 65 | conf = zeros(numClasses); 66 | for i = 1:numClasses 67 | if i ~= unknown_class && sum(cf(i,:)) > 0 68 | conf(i,:) = cf(i,:)/sum(cf(i,:)); 69 | end 70 | end 71 | globalacc = sum(globalacc)/sum(totalpoints); 72 | 73 | % Compute intersection over union for each class and its mean 74 | intoverunion = zeros(numClasses,1); 75 | for i = 1:numClasses 76 | if i ~= unknown_class && sum(conf(i,:)) > 0 77 | intoverunion(i) = (cf(i,i))/(sum(cf(i,:))+sum(cf(:,i))-cf(i,i)); 78 | end 79 | end 80 | 81 | display([' Global acc = ' num2str(globalacc*100) '% Class average acc = ' num2str(100*sum(diag(conf))/(numClasses)) '% Mean Int over Union = ' num2str(100*sum(intoverunion)/(numClasses)) '%']); 82 | -------------------------------------------------------------------------------- /LoadBatches.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import cv2 4 | import glob 5 | import itertools 6 | 7 | 8 | def getImageArr( path , width , height , imgNorm="sub_mean" , odering='channels_first' ): 9 | 10 | try: 11 | img = cv2.imread(path, 1) 12 | 13 | if imgNorm == "sub_and_divide": 14 | img = np.float32(cv2.resize(img, ( width , height ))) / 127.5 - 1 15 | elif imgNorm == "sub_mean": 16 | img = cv2.resize(img, ( width , height )) 17 | img = img.astype(np.float32) 18 | img[:,:,0] -= 103.939 19 | img[:,:,1] -= 116.779 20 | img[:,:,2] -= 123.68 21 | elif imgNorm == "divide": 22 | img = cv2.resize(img, ( width , height )) 23 | img = img.astype(np.float32) 24 | img = img/255.0 25 | 26 | #if odering == 'channels_first': 27 | # img = np.rollaxis(img, 2, 0) 28 | return img 29 | except Exception, e: 30 | print path , e 31 | img = np.zeros(( height , width , 3 )) 32 | if odering == 'channels_first': 33 | img = np.rollaxis(img, 2, 0) 34 | return img 35 | 36 | 37 | 38 | 39 | 40 | def getSegmentationArr( path , nClasses , width , height ): 41 | 42 | seg_labels = np.zeros(( height , width , nClasses )) 43 | try: 44 | img = cv2.imread(path, 1) 45 | img = cv2.resize(img, ( width , height )) 46 | img = img[:, : , 0] 47 | 48 | for c in range(nClasses): 49 | seg_labels[: , : , c ] = (img == c ).astype(int) 50 | 51 | except Exception, e: 52 | print e 53 | 54 | seg_labels = np.reshape(seg_labels, ( width*height , nClasses )) 55 | return seg_labels 56 | 57 | 58 | 59 | def imageSegmentationGenerator( images_path , segs_path , batch_size, n_classes , input_height , input_width , output_height , output_width ): 60 | 61 | assert images_path[-1] == '/' 62 | assert segs_path[-1] == '/' 63 | 64 | images = glob.glob( images_path + "*.jpg" ) + glob.glob( images_path + "*.png" ) + glob.glob( images_path + "*.jpeg" ) 65 | images.sort() 66 | segmentations = glob.glob( segs_path + "*.jpg" ) + glob.glob( segs_path + "*.png" ) + glob.glob( segs_path + "*.jpeg" ) 67 | segmentations.sort() 68 | 69 | assert len( images ) == len(segmentations) 70 | for im , seg in zip(images,segmentations): 71 | assert( im.split('/')[-1].split(".")[0] == seg.split('/')[-1].split(".")[0] ) 72 | 73 | zipped = itertools.cycle( zip(images,segmentations) ) 74 | 75 | while True: 76 | X = [] 77 | Y = [] 78 | for _ in range( batch_size) : 79 | im , seg = zipped.next() 80 | X.append( getImageArr(im , input_width , input_height ) ) 81 | Y.append( getSegmentationArr( seg , n_classes , output_width , output_height ) ) 82 | 83 | 84 | yield np.array(X) , np.array(Y) 85 | 86 | 87 | 88 | # import Models , LoadBatches 89 | # G = LoadBatches.imageSegmentationGenerator( "data/clothes_seg/prepped/images_prepped_train/" , "data/clothes_seg/prepped/annotations_prepped_train/" , 1, 10 , 800 , 550 , 400 , 272 ) 90 | # G2 = LoadBatches.imageSegmentationGenerator( "data/clothes_seg/prepped/images_prepped_test/" , "data/clothes_seg/prepped/annotations_prepped_test/" , 1, 10 , 800 , 550 , 400 , 272 ) 91 | 92 | # m = Models.VGGSegnet.VGGSegnet( 10 , use_vgg_weights=True , optimizer='adadelta' , input_image_size=( 800 , 550 ) ) 93 | # m.fit_generator( G , 512 , nb_epoch=10 ) 94 | 95 | 96 | -------------------------------------------------------------------------------- /cpp/modified_permutohedral.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file contains a modified version of the "permutohedral.h" code 3 | available at http://graphics.stanford.edu/projects/drf/. Copyright notice of 4 | the original file is included below: 5 | 6 | Copyright (c) 2013, Philipp Krähenbühl 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | * Neither the name of the Stanford University nor the 17 | names of its contributors may be used to endorse or promote products 18 | derived from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY 21 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY 24 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 27 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | #ifndef MODIFIED_PERMUTOHEDRAL_HPP_ 33 | #define MODIFIED_PERMUTOHEDRAL_HPP_ 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #include "tensorflow/core/framework/tensor.h" 43 | 44 | using namespace tensorflow; 45 | 46 | /************************************************/ 47 | /*** ModifiedPermutohedral Lattice ***/ 48 | /************************************************/ 49 | class ModifiedPermutohedral { 50 | protected: 51 | struct Neighbors { 52 | int n1, n2; 53 | 54 | Neighbors(int n1 = 0, int n2 = 0) : n1(n1), n2(n2) { 55 | } 56 | }; 57 | 58 | std::vector offset_, rank_; 59 | std::vector barycentric_; 60 | std::vector blur_neighbors_; 61 | // Number of elements, size of sparse discretized space, dimension of features 62 | int N_, M_, d_; 63 | 64 | void sseCompute(Tensor &out, const Tensor &in, int value_size, 65 | bool reverse = false, bool add = false) const; 66 | 67 | void seqCompute(Tensor &out, const Tensor &in, int value_size, 68 | bool reverse = false, bool add = false) const; 69 | 70 | public: 71 | ModifiedPermutohedral(); 72 | 73 | void init(const float *features, int num_dimensions, int num_points); 74 | 75 | void compute(Tensor &out, const Tensor &in, int value_size, 76 | bool reverse = false, bool add = false) const; 77 | }; 78 | 79 | #endif //_MODIFIED_PERMUTOHEDRAL_HPP_ 80 | -------------------------------------------------------------------------------- /Models/Segnet_transpose.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Reshape 3 | from keras.layers import Input 4 | 5 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute 6 | from keras.layers.normalization import BatchNormalization 7 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D, Conv2DTranspose 8 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D 9 | 10 | def segnet_transposed(nClasses, optimizer=None, input_height=360, input_width=480): 11 | kernel = 3 12 | filter_size = 64 13 | pad = 1 14 | pool_size = 2 15 | 16 | img_input = Input(shape=(input_height, input_width,3)) 17 | 18 | 19 | 20 | 21 | # encoder 22 | x = ZeroPadding2D(padding=(pad, pad))(img_input) 23 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 24 | x = BatchNormalization()(x) 25 | x = Activation('relu') (x) 26 | l1 = x 27 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 28 | 29 | x = ZeroPadding2D(padding=(pad, pad))(x) 30 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 31 | x = BatchNormalization()(x) 32 | x = Activation('relu')(x) 33 | l2 = x 34 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 35 | 36 | x = ZeroPadding2D(padding=(pad, pad))(x) 37 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 38 | x = BatchNormalization()(x) 39 | x = Activation('relu')(x) 40 | l3 = x 41 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 42 | 43 | x = ZeroPadding2D(padding=(pad, pad))(x) 44 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 45 | x = BatchNormalization()(x) 46 | l4 = x 47 | x = Activation('relu')(x) 48 | 49 | # decoder 50 | x = ZeroPadding2D(padding=(pad, pad))(x) 51 | x = Conv2DTranspose(512, (kernel, kernel), padding='valid')(x) 52 | x = BatchNormalization()(x) 53 | 54 | # x = Add()([l4, x]) 55 | x = UpSampling2D(size=(pool_size, pool_size))(x) 56 | x = ZeroPadding2D(padding=(pad, pad))(x) 57 | x = Conv2DTranspose(256, (kernel, kernel), padding='valid')(x) 58 | x = BatchNormalization()(x) 59 | 60 | # x = Add()([l3, x]) 61 | x = UpSampling2D(size=(pool_size, pool_size))(x) 62 | x = ZeroPadding2D(padding=(pad, pad))(x) 63 | x = Conv2DTranspose(128, (kernel, kernel), padding='valid')(x) 64 | x = BatchNormalization()(x) 65 | 66 | x = UpSampling2D(size=(pool_size, pool_size))(x) 67 | x = ZeroPadding2D(padding=(pad, pad))(x) 68 | x = Conv2DTranspose(filter_size, (kernel, kernel), padding='valid')(x) 69 | x = BatchNormalization()(x) 70 | 71 | # x = Add()([l1, x]) 72 | x = Conv2DTranspose(nClasses, (1, 1), padding='valid') (x) 73 | 74 | out = x 75 | a = Model(inputs=img_input, outputs=out) 76 | 77 | model = [] 78 | a.outputHeight = a.output_shape[1] 79 | a.outputWidth = a.output_shape[2] 80 | 81 | out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out) 82 | out = Activation('softmax')(out) 83 | # if not optimizer is None: 84 | # model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy']) 85 | model = Model(inputs=img_input, outputs=out) 86 | model.outputHeight = a.outputHeight 87 | model.outputWidth = a.outputWidth 88 | 89 | return model 90 | 91 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIT License 3 | 4 | Copyright (c) 2017 Sadeep Jayasumana 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | 25 | import numpy as np 26 | from PIL import Image 27 | 28 | 29 | # Pascal VOC color palette for labels 30 | _PALETTE = [0, 0, 0, 31 | 128, 0, 0, 32 | 0, 128, 0, 33 | 128, 128, 0, 34 | 0, 0, 128, 35 | 128, 0, 128, 36 | 0, 128, 128, 37 | 128, 128, 128, 38 | 64, 0, 0, 39 | 192, 0, 0, 40 | 64, 128, 0, 41 | 192, 128, 0, 42 | 64, 0, 128, 43 | 192, 0, 128, 44 | 64, 128, 128, 45 | 192, 128, 128, 46 | 0, 64, 0, 47 | 128, 64, 0, 48 | 0, 192, 0, 49 | 128, 192, 0, 50 | 0, 64, 128, 51 | 128, 64, 128, 52 | 0, 192, 128, 53 | 128, 192, 128, 54 | 64, 64, 0, 55 | 192, 64, 0, 56 | 64, 192, 0, 57 | 192, 192, 0] 58 | 59 | 60 | def get_preprocessed_image(file_name): 61 | """ Reads an image from the disk, pre-processes it by subtracting mean etc. and 62 | returns a numpy array that's ready to be fed into a Keras model. 63 | 64 | Note: This method assumes 'channels_last' data format in Keras. 65 | """ 66 | 67 | mean_values = np.array([123.68, 116.779, 103.939], dtype=np.float32) # RGB mean values 68 | mean_values = mean_values.reshape(1, 1, 3) 69 | im = np.array(Image.open(file_name)).astype(np.float32) 70 | assert im.ndim == 3, "Only RGB images are supported." 71 | im = im - mean_values 72 | im = im[:, :, ::-1] 73 | img_h, img_w, img_c = im.shape 74 | assert img_c == 3, "Only RGB images are supported." 75 | if img_h > 500 or img_w > 500: 76 | raise ValueError("Please resize your images to be not bigger than 500 x 500.") 77 | 78 | pad_h = 500 - img_h 79 | pad_w = 500 - img_w 80 | im = np.pad(im, pad_width=((0, pad_h), (0, pad_w), (0, 0)), mode='constant', constant_values=0) 81 | return im.astype(np.float32).reshape(1, 500, 500, 3), img_h, img_w 82 | 83 | 84 | def get_label_image(probs, img_h, img_w): 85 | """ Returns the label image (PNG with Pascal VOC colormap) given the probabilities. 86 | 87 | Note: This method assumes 'channels_last' data format. 88 | """ 89 | 90 | labels = probs.argmax(axis=2).astype("uint8")[:img_h, :img_w] 91 | label_im = Image.fromarray(labels, "P") 92 | label_im.putpalette(_PALETTE) 93 | return label_im 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SegNet + CRF as RNN. 2 | 3 | This project has the implementation of SegNetResCRF, combination of SegNet with CRF as RNN, published in the [2018 IJCNN](https://ieeexplore.ieee.org/xpl/conhome/8465565/proceeding) paper [SegNetRes-CRF: A Deep Convolutional Encoder-Decoder Architecture for Semantic Image Segmentation 4 | ](https://ieeexplore.ieee.org/abstract/document/8489376) 5 | 6 | Some references: 7 | * SegNet implementation: https://github.com/divamgupta/image-segmentation-keras 8 | * CRF as RNN implementation: https://github.com/sadeepj/crfasrnn_keras 9 | 10 | ## Repository working tree: 11 | [![Repo Working Tree](https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/segnet_crfasrnn.png)](https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/segnet_crfasrnn.png " Repo working tree") 12 | 13 | 14 | ## Installation 15 | 16 | Clone this repository and inside it, run: 17 | ``` 18 | conda env create -f segnet_crfasrnn_env.yml 19 | source activate segnet_crfasrnn 20 | ``` 21 | After that you need to run compile high_dim_filter (Go to cpp folder and run compile script): 22 | 23 | ``` 24 | cd cpp 25 | ./compile.sh 26 | ``` 27 | 28 | After that you can run train script as mentioned on Training the Model. 29 | 30 | 31 | 32 | ### Tested with: 33 | pip install --upgrade tensorflow-gpu==1.4 34 | conda install -c menpo opencv3 35 | 36 | ## keras.json content 37 | ```json 38 | { 39 | "epsilon": 1e-07, 40 | "floatx": "float32", 41 | "image_data_format": "channels_last", 42 | "backend": "theano" 43 | } 44 | ``` 45 | 46 | ## Visualizing the prepared data 47 | 48 | You can also visualize your prepared annotations for verification of the prepared data. 49 | 50 | ```shell 51 | python visualizeDataset.py \ 52 | --images="data/dataset1/images_prepped_train/" \ 53 | --annotations="data/dataset1/annotations_prepped_train/" \ 54 | --n_classes=11 55 | ``` 56 | 57 | ### Dataset working tree: 58 | 59 | [![Data Working Tree](https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/data_tree.png)](https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/data_tree.png "Data working tree") 60 | 61 | 62 | 63 | ## Training the Model 64 | 65 | To train the model run the following command: 66 | 67 | ```shell 68 | TENSORFLOW_FLAGS=device=cuda0,image_data_format=channels_last,floatX=float32 python train.py --save_weights_path="weights/ex1/" --train_images="path/train/" --train_annotations="data_semantics/trainannot/" --val_images="data_semantics/val/" --val_annotations="data_semantics/valannot/" --n_classes=8 --model_name="segnet_res_crf" --input_height=128 --input_width=128 69 | ``` 70 | 71 | [![Run segnet crfasrnn]( 72 | https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/run_segnet_crfasrnn.png)]( 73 | https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/run_segnet_crfasrnn.png "Run segnet crfasrnn") 74 | 75 | 76 | [![Training segnet crfasrnn]( 77 | https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/training_segnet_crfasrnn.png)]( 78 | https://raw.githubusercontent.com/laoj2/segnet_crfasrnn/master/github_imgs/training_segnet_crfasrnn.png "Training segnet crfasrnn") 79 | 80 | 81 | 82 | 83 | 84 | ## Getting the predictions 85 | 86 | ```shell 87 | TENSORFLOW_FLAGS=device=cuda0,image_data_format=channels_last,floatX=float32 python predict.py --output_path="teste/" --test_images="data_semantics/test/" --n_classes=8 --model_name="segnet_res_crf" --input_height=128 --input_width=128 --save_weights_path="weights_360_480_res_with_crf.hdf5" 88 | ``` 89 | -------------------------------------------------------------------------------- /train_modifiedLoss.py: -------------------------------------------------------------------------------- 1 | import math 2 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint 3 | import argparse 4 | import Models , LoadBatches 5 | 6 | # learning rate schedule 7 | def step_decay(epoch): 8 | initial_lrate = 0.000001 9 | drop = 0.5 10 | epochs_drop = 10.0 11 | lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop)) 12 | return lrate 13 | 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("--save_weights_path", type = str ) 17 | parser.add_argument("--train_images", type = str ) 18 | parser.add_argument("--train_annotations", type = str ) 19 | parser.add_argument("--n_classes", type=int ) 20 | parser.add_argument("--input_height", type=int , default = 224 ) 21 | parser.add_argument("--input_width", type=int , default = 224 ) 22 | 23 | parser.add_argument('--validate',action='store_false') 24 | parser.add_argument("--val_images", type = str , default = "") 25 | parser.add_argument("--val_annotations", type = str , default = "") 26 | 27 | parser.add_argument("--epochs", type = int, default = 100 ) 28 | parser.add_argument("--batch_size", type = int, default = 1 ) 29 | parser.add_argument("--val_batch_size", type = int, default = 1 ) 30 | parser.add_argument("--load_weights", type = str , default = "data/vgg16_weights_th_dim_ordering_th_kernels.h5") 31 | 32 | parser.add_argument("--model_name", type = str , default = "") 33 | parser.add_argument("--optimizer_name", type = str , default = "adadelta") 34 | 35 | 36 | args = parser.parse_args() 37 | 38 | train_images_path = args.train_images 39 | train_segs_path = args.train_annotations 40 | train_batch_size = args.batch_size 41 | n_classes = args.n_classes 42 | input_height = args.input_height 43 | input_width = args.input_width 44 | validate = args.validate 45 | save_weights_path = args.save_weights_path 46 | epochs = args.epochs 47 | load_weights = args.load_weights 48 | 49 | optimizer_name = args.optimizer_name 50 | model_name = args.model_name 51 | 52 | if validate: 53 | val_images_path = args.val_images 54 | val_segs_path = args.val_annotations 55 | val_batch_size = args.val_batch_size 56 | 57 | modelFns = { 'vgg_segnet':Models.VGGSegnet.VGGSegnet , 'vgg_unet':Models.VGGUnet.VGGUnet , 'vgg_unet2':Models.VGGUnet.VGGUnet2 , 'fcn8':Models.FCN8.FCN8 , 'fcn32':Models.FCN32.FCN32, 'segnet':Models.Segnet.segnet} 58 | modelFN = modelFns[ model_name ] 59 | 60 | m = modelFN( n_classes , input_height=input_height, input_width=input_width ) 61 | 62 | 63 | #if len( load_weights ) > 0: 64 | # m.load_weights(load_weights, by_name=True) 65 | 66 | 67 | lrate = LearningRateScheduler(step_decay) 68 | filepath="weights_360_480_correct_weights_best.hdf5" 69 | checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') 70 | callbacks_list = [checkpoint] 71 | 72 | 73 | 74 | print "Model output shape" , m.output_shape 75 | 76 | output_height = m.outputHeight 77 | output_width = m.outputWidth 78 | class_weighting= [0.2595, 0.1826, 4.5640, 0.1417, 0.9051, 0.3826, 9.6446, 1.8418, 0.6823, 6.2478, 7.3614, 0] 79 | G = LoadBatches.imageSegmentationGenerator( train_images_path , train_segs_path , train_batch_size, n_classes , input_height , input_width , output_height , output_width ) 80 | 81 | 82 | if validate: 83 | G2 = LoadBatches.imageSegmentationGenerator( val_images_path , val_segs_path , val_batch_size, n_classes , input_height , input_width , output_height , output_width ) 84 | 85 | if not validate: 86 | for ep in range( epochs ): 87 | m.fit_generator( G , 512 , epochs=1 ) 88 | m.save_weights( save_weights_path + "." + str( ep ) ) 89 | m.save( save_weights_path + ".model." + str( ep ) ) 90 | else: 91 | # for ep in range( epochs ): 92 | m.fit_generator( G , 512 , validation_data=G2 , validation_steps=200 , callbacks=callbacks_list,class_weight=class_weighting, epochs=epochs, verbose=1) 93 | # m.save_weights( save_weights_path + "." + str( ep ) ) 94 | # m.save( save_weights_path + ".model." + str( ep ) ) -------------------------------------------------------------------------------- /Models/FCN32.py: -------------------------------------------------------------------------------- 1 | 2 | # https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/models/fcn32s.py 3 | # fc weights into the 1x1 convs , get_upsampling_weight 4 | 5 | 6 | 7 | from keras.models import * 8 | from keras.layers import * 9 | 10 | 11 | import os 12 | file_path = os.path.dirname( os.path.abspath(__file__) ) 13 | 14 | VGG_Weights_path = file_path+"/../data/vgg16_weights_th_dim_ordering_th_kernels.h5" 15 | 16 | IMAGE_ORDERING = 'channels_first' 17 | 18 | 19 | def FCN32( n_classes , input_height=416, input_width=608 , vgg_level=3): 20 | 21 | assert input_height%32 == 0 22 | assert input_width%32 == 0 23 | 24 | # https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5 25 | img_input = Input(shape=(3,input_height,input_width)) 26 | 27 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING )(img_input) 28 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING )(x) 29 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING )(x) 30 | f1 = x 31 | # Block 2 32 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING )(x) 33 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING )(x) 34 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING )(x) 35 | f2 = x 36 | 37 | # Block 3 38 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING )(x) 39 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING )(x) 40 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING )(x) 41 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING )(x) 42 | f3 = x 43 | 44 | # Block 4 45 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING )(x) 46 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING )(x) 47 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING )(x) 48 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING )(x) 49 | f4 = x 50 | 51 | # Block 5 52 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING )(x) 53 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING )(x) 54 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING )(x) 55 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING )(x) 56 | f5 = x 57 | 58 | x = Flatten(name='flatten')(x) 59 | x = Dense(4096, activation='relu', name='fc1')(x) 60 | x = Dense(4096, activation='relu', name='fc2')(x) 61 | x = Dense( 1000 , activation='softmax', name='predictions')(x) 62 | 63 | vgg = Model( img_input , x ) 64 | vgg.load_weights(VGG_Weights_path) 65 | 66 | o = f5 67 | 68 | o = ( Conv2D( 4096 , ( 7 , 7 ) , activation='relu' , padding='same', data_format=IMAGE_ORDERING))(o) 69 | o = Dropout(0.5)(o) 70 | o = ( Conv2D( 4096 , ( 1 , 1 ) , activation='relu' , padding='same', data_format=IMAGE_ORDERING))(o) 71 | o = Dropout(0.5)(o) 72 | 73 | o = ( Conv2D( n_classes , ( 1 , 1 ) ,kernel_initializer='he_normal' , data_format=IMAGE_ORDERING))(o) 74 | o = Conv2DTranspose( n_classes , kernel_size=(64,64) , strides=(32,32) , use_bias=False , data_format=IMAGE_ORDERING )(o) 75 | o_shape = Model(img_input , o ).output_shape 76 | 77 | outputHeight = o_shape[2] 78 | outputWidth = o_shape[3] 79 | 80 | print "koko" , o_shape 81 | 82 | o = (Reshape(( -1 , outputHeight*outputWidth )))(o) 83 | o = (Permute((2, 1)))(o) 84 | o = (Activation('softmax'))(o) 85 | model = Model( img_input , o ) 86 | model.outputWidth = outputWidth 87 | model.outputHeight = outputHeight 88 | 89 | return model 90 | 91 | 92 | if __name__ == '__main__': 93 | m = FCN32( 101 ) 94 | from keras.utils import plot_model 95 | plot_model( m , show_shapes=True , to_file='model.png') 96 | -------------------------------------------------------------------------------- /Models/Segnet.py: -------------------------------------------------------------------------------- 1 | from keras.layers.merge import Add 2 | from keras.models import Model 3 | from keras.layers import Reshape 4 | from keras.layers import Input 5 | 6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute 7 | from keras.layers.normalization import BatchNormalization 8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D 9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D 11 | from keras.layers.recurrent import LSTM 12 | from keras.layers.advanced_activations import LeakyReLU 13 | from keras.optimizers import Adam, SGD 14 | from keras.layers.embeddings import Embedding 15 | from keras.utils import np_utils 16 | # from keras.regularizers import ActivityRegularizer 17 | from keras import backend as K 18 | 19 | from crfrnn_layer import CrfRnnLayer 20 | 21 | 22 | def segnet(nClasses, optimizer=None, input_height=360, input_width=480): 23 | kernel = 3 24 | filter_size = 64 25 | pad = 1 26 | pool_size = 2 27 | 28 | img_input = Input(shape=(input_height, input_width,3)) 29 | 30 | 31 | 32 | 33 | # encoder 34 | x = ZeroPadding2D(padding=(pad, pad))(img_input) 35 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 36 | x = BatchNormalization()(x) 37 | x = Activation('relu') (x) 38 | l1 = x 39 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 40 | 41 | x = ZeroPadding2D(padding=(pad, pad))(x) 42 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 43 | x = BatchNormalization()(x) 44 | x = Activation('relu')(x) 45 | l2 = x 46 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 47 | 48 | x = ZeroPadding2D(padding=(pad, pad))(x) 49 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 50 | x = BatchNormalization()(x) 51 | x = Activation('relu')(x) 52 | l3 = x 53 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 54 | 55 | x = ZeroPadding2D(padding=(pad, pad))(x) 56 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 57 | x = BatchNormalization()(x) 58 | l4 = x 59 | x = Activation('relu')(x) 60 | 61 | 62 | 63 | 64 | # decoder 65 | x = ZeroPadding2D(padding=(pad, pad))(x) 66 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 67 | x = BatchNormalization()(x) 68 | 69 | # x = Add()([l4, x]) 70 | x = UpSampling2D(size=(pool_size, pool_size))(x) 71 | x = ZeroPadding2D(padding=(pad, pad))(x) 72 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 73 | x = BatchNormalization()(x) 74 | 75 | # x = Add()([l3, x]) 76 | x = UpSampling2D(size=(pool_size, pool_size))(x) 77 | x = ZeroPadding2D(padding=(pad, pad))(x) 78 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 79 | x = BatchNormalization()(x) 80 | 81 | # x = Add()([l2, x]) 82 | x = UpSampling2D(size=(pool_size, pool_size))(x) 83 | x = ZeroPadding2D(padding=(pad, pad))(x) 84 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 85 | x = BatchNormalization()(x) 86 | 87 | # x = Add()([l1, x]) 88 | x = Convolution2D(nClasses, (1, 1), padding='valid') (x) 89 | 90 | out = CrfRnnLayer(image_dims=(input_height, input_width), 91 | num_classes=nClasses, 92 | theta_alpha=160., 93 | theta_beta=3., 94 | theta_gamma=3., 95 | num_iterations=5, 96 | name='crfrnn')([x, img_input]) 97 | 98 | a = Model(inputs=img_input, outputs=out) 99 | 100 | model = [] 101 | a.outputHeight = a.output_shape[1] 102 | a.outputWidth = a.output_shape[2] 103 | 104 | out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out) 105 | out = Activation('softmax')(out) 106 | # if not optimizer is None: 107 | # model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy']) 108 | model = Model(inputs=img_input, outputs=out) 109 | model.outputHeight = a.outputHeight 110 | model.outputWidth = a.outputWidth 111 | 112 | return model 113 | -------------------------------------------------------------------------------- /Models/Segnet_crf_res.py: -------------------------------------------------------------------------------- 1 | from keras.layers.merge import Add 2 | from keras.models import Model 3 | from keras.layers import Reshape 4 | from keras.layers import Input 5 | 6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute 7 | from keras.layers.normalization import BatchNormalization 8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D 9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D 11 | from keras.layers.recurrent import LSTM 12 | from keras.layers.advanced_activations import LeakyReLU 13 | from keras.optimizers import Adam, SGD 14 | from keras.layers.embeddings import Embedding 15 | from keras.utils import np_utils 16 | # from keras.regularizers import ActivityRegularizer 17 | from keras import backend as K 18 | 19 | from crfrnn_layer import CrfRnnLayer 20 | 21 | 22 | def segnet_crf_res(nClasses, optimizer=None, input_height=360, input_width=480): 23 | kernel = 3 24 | filter_size = 64 25 | pad = 1 26 | pool_size = 2 27 | 28 | img_input = Input(shape=(input_height, input_width,3)) 29 | 30 | 31 | 32 | 33 | # encoder 34 | x = ZeroPadding2D(padding=(pad, pad))(img_input) 35 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 36 | x = BatchNormalization()(x) 37 | x = Activation('relu') (x) 38 | l1 = x 39 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 40 | 41 | x = ZeroPadding2D(padding=(pad, pad))(x) 42 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 43 | x = BatchNormalization()(x) 44 | x = Activation('relu')(x) 45 | l2 = x 46 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 47 | 48 | x = ZeroPadding2D(padding=(pad, pad))(x) 49 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 50 | x = BatchNormalization()(x) 51 | x = Activation('relu')(x) 52 | l3 = x 53 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 54 | 55 | x = ZeroPadding2D(padding=(pad, pad))(x) 56 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 57 | x = BatchNormalization()(x) 58 | l4 = x 59 | x = Activation('relu')(x) 60 | 61 | 62 | 63 | 64 | # decoder 65 | x = ZeroPadding2D(padding=(pad, pad))(x) 66 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 67 | x = BatchNormalization()(x) 68 | 69 | x = Add()([l4, x]) 70 | x = UpSampling2D(size=(pool_size, pool_size))(x) 71 | x = ZeroPadding2D(padding=(pad, pad))(x) 72 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 73 | x = BatchNormalization()(x) 74 | 75 | x = Add()([l3, x]) 76 | x = UpSampling2D(size=(pool_size, pool_size))(x) 77 | x = ZeroPadding2D(padding=(pad, pad))(x) 78 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 79 | x = BatchNormalization()(x) 80 | 81 | x = Add()([l2, x]) 82 | x = UpSampling2D(size=(pool_size, pool_size))(x) 83 | x = ZeroPadding2D(padding=(pad, pad))(x) 84 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 85 | x = BatchNormalization()(x) 86 | 87 | x = Add()([l1, x]) 88 | x = Convolution2D(nClasses, (1, 1), padding='valid') (x) 89 | 90 | out = CrfRnnLayer(image_dims=(input_height, input_width), 91 | num_classes=nClasses, 92 | theta_alpha=160., 93 | theta_beta=3., 94 | theta_gamma=3., 95 | num_iterations=5, 96 | name='crfrnn')([x, img_input]) 97 | # out = x 98 | a = Model(inputs=img_input, outputs=out) 99 | 100 | model = [] 101 | a.outputHeight = a.output_shape[1] 102 | a.outputWidth = a.output_shape[2] 103 | 104 | out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out) 105 | out = Activation('softmax')(out) 106 | # if not optimizer is None: 107 | # model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy']) 108 | model = Model(inputs=img_input, outputs=out) 109 | model.outputHeight = a.outputHeight 110 | model.outputWidth = a.outputWidth 111 | 112 | return model 113 | -------------------------------------------------------------------------------- /Models/Segnet_res.py: -------------------------------------------------------------------------------- 1 | from keras.layers.merge import Add 2 | from keras.models import Model 3 | from keras.layers import Reshape 4 | from keras.layers import Input 5 | 6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute 7 | from keras.layers.normalization import BatchNormalization 8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D 9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D 11 | from keras.layers.recurrent import LSTM 12 | from keras.layers.advanced_activations import LeakyReLU 13 | from keras.optimizers import Adam, SGD 14 | from keras.layers.embeddings import Embedding 15 | from keras.utils import np_utils 16 | # from keras.regularizers import ActivityRegularizer 17 | from keras import backend as K 18 | 19 | from crfrnn_layer import CrfRnnLayer 20 | 21 | 22 | def segnet_res(nClasses, optimizer=None, input_height=360, input_width=480): 23 | kernel = 3 24 | filter_size = 64 25 | pad = 1 26 | pool_size = 2 27 | 28 | img_input = Input(shape=(input_height, input_width,3)) 29 | 30 | 31 | 32 | 33 | # encoder 34 | x = ZeroPadding2D(padding=(pad, pad))(img_input) 35 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 36 | x = BatchNormalization()(x) 37 | x = Activation('relu') (x) 38 | l1 = x 39 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 40 | 41 | x = ZeroPadding2D(padding=(pad, pad))(x) 42 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 43 | x = BatchNormalization()(x) 44 | x = Activation('relu')(x) 45 | l2 = x 46 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 47 | 48 | x = ZeroPadding2D(padding=(pad, pad))(x) 49 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 50 | x = BatchNormalization()(x) 51 | x = Activation('relu')(x) 52 | l3 = x 53 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 54 | 55 | x = ZeroPadding2D(padding=(pad, pad))(x) 56 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 57 | x = BatchNormalization()(x) 58 | l4 = x 59 | x = Activation('relu')(x) 60 | 61 | 62 | 63 | 64 | # decoder 65 | x = ZeroPadding2D(padding=(pad, pad))(x) 66 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 67 | x = BatchNormalization()(x) 68 | 69 | x = Add()([l4, x]) 70 | x = UpSampling2D(size=(pool_size, pool_size))(x) 71 | x = ZeroPadding2D(padding=(pad, pad))(x) 72 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 73 | x = BatchNormalization()(x) 74 | 75 | x = Add()([l3, x]) 76 | x = UpSampling2D(size=(pool_size, pool_size))(x) 77 | x = ZeroPadding2D(padding=(pad, pad))(x) 78 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 79 | x = BatchNormalization()(x) 80 | 81 | x = Add()([l2, x]) 82 | x = UpSampling2D(size=(pool_size, pool_size))(x) 83 | x = ZeroPadding2D(padding=(pad, pad))(x) 84 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 85 | x = BatchNormalization()(x) 86 | 87 | x = Add()([l1, x]) 88 | x = Convolution2D(nClasses, (1, 1), padding='valid') (x) 89 | 90 | # out = CrfRnnLayer(image_dims=(input_height, input_width), 91 | # num_classes=nClasses, 92 | # theta_alpha=160., 93 | # theta_beta=3., 94 | # theta_gamma=3., 95 | # num_iterations=5, 96 | # name='crfrnn')([x, img_input]) 97 | out = x 98 | a = Model(inputs=img_input, outputs=out) 99 | 100 | model = [] 101 | a.outputHeight = a.output_shape[1] 102 | a.outputWidth = a.output_shape[2] 103 | 104 | out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out) 105 | out = Activation('softmax')(out) 106 | # if not optimizer is None: 107 | # model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy']) 108 | model = Model(inputs=img_input, outputs=out) 109 | model.outputHeight = a.outputHeight 110 | model.outputWidth = a.outputWidth 111 | 112 | return model 113 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import math 2 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint 3 | import argparse 4 | import Models , LoadBatches 5 | from Models.Segnet_crf_res import segnet_crf_res 6 | from Models.VGGSegnet import VGGSegnet 7 | from Models.VGGUnet import VGGUnet 8 | from Models.VGGUnet import VGGUnet2 9 | from Models.FCN8 import FCN8 10 | from Models.FCN32 import FCN32 11 | from Models.Segnet import segnet 12 | from Models.Segnet_transpose import segnet_transposed 13 | from Models.Segnet_res import segnet_res 14 | 15 | 16 | # learning rate schedule 17 | def step_decay(epoch): 18 | initial_lrate = 0.000001 19 | drop = 0.5 20 | epochs_drop = 10.0 21 | lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop)) 22 | return lrate 23 | 24 | 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument("--save_weights_path", type = str ) 27 | parser.add_argument("--train_images", type = str ) 28 | parser.add_argument("--train_annotations", type = str ) 29 | parser.add_argument("--n_classes", type=int ) 30 | parser.add_argument("--input_height", type=int , default = 224 ) 31 | parser.add_argument("--input_width", type=int , default = 224 ) 32 | 33 | parser.add_argument('--validate',action='store_false') 34 | parser.add_argument("--val_images", type = str , default = "") 35 | parser.add_argument("--val_annotations", type = str , default = "") 36 | 37 | parser.add_argument("--epochs", type = int, default = 100 ) 38 | parser.add_argument("--batch_size", type = int, default = 1 ) 39 | parser.add_argument("--val_batch_size", type = int, default = 1 ) 40 | parser.add_argument("--load_weights", type = str , default = "data/vgg16_weights_th_dim_ordering_th_kernels.h5") 41 | 42 | parser.add_argument("--model_name", type = str , default = "") 43 | parser.add_argument("--optimizer_name", type = str , default = "adadelta") 44 | 45 | 46 | args = parser.parse_args() 47 | 48 | train_images_path = args.train_images 49 | train_segs_path = args.train_annotations 50 | train_batch_size = args.batch_size 51 | n_classes = args.n_classes 52 | input_height = args.input_height 53 | input_width = args.input_width 54 | validate = args.validate 55 | save_weights_path = args.save_weights_path 56 | epochs = args.epochs 57 | load_weights = args.load_weights 58 | 59 | optimizer_name = args.optimizer_name 60 | model_name = args.model_name 61 | 62 | if validate: 63 | val_images_path = args.val_images 64 | val_segs_path = args.val_annotations 65 | val_batch_size = args.val_batch_size 66 | 67 | modelFns = { 'vgg_segnet':VGGSegnet , 'vgg_unet':VGGUnet , 'vgg_unet2':VGGUnet2 , 'fcn8':FCN8 , 'fcn32':FCN32, 'segnet':segnet, 'segnet_transposed':segnet_transposed, 'segnet_res':segnet_res, 'segnet_res_crf':segnet_crf_res} 68 | modelFN = modelFns[ model_name ] 69 | 70 | m = modelFN( n_classes , input_height=input_height, input_width=input_width ) 71 | m.compile(loss='categorical_crossentropy', 72 | optimizer= optimizer_name , 73 | metrics=['accuracy']) 74 | 75 | 76 | #if len( load_weights ) > 0: 77 | # m.load_weights(load_weights) 78 | 79 | 80 | lrate = LearningRateScheduler(step_decay) 81 | filepath="weights_360_480_res_with_crf.hdf5" 82 | 83 | checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') 84 | callbacks_list = [checkpoint] 85 | 86 | 87 | 88 | print "Model output shape" , m.output_shape 89 | 90 | output_height = m.outputHeight 91 | output_width = m.outputWidth 92 | class_weighting= [0.2595, 0.1826, 4.5640, 0.1417, 0.9051, 0.3826, 9.6446, 1.8418, 0.6823, 6.2478, 7.3614] 93 | G = LoadBatches.imageSegmentationGenerator( train_images_path , train_segs_path , train_batch_size, n_classes , input_height , input_width , output_height , output_width ) 94 | 95 | 96 | if validate: 97 | G2 = LoadBatches.imageSegmentationGenerator( val_images_path , val_segs_path , val_batch_size, n_classes , input_height , input_width , output_height , output_width ) 98 | 99 | if not validate: 100 | for ep in range( epochs ): 101 | m.fit_generator( G , 512 , epochs=1 ) 102 | m.save_weights( save_weights_path + "." + str( ep ) ) 103 | m.save( save_weights_path + ".model." + str( ep ) ) 104 | else: 105 | # for ep in range( epochs ): 106 | m.fit_generator( G , 512 , validation_data=G2 , validation_steps=200 , callbacks=callbacks_list,class_weight=class_weighting, epochs=epochs, verbose=1) 107 | # m.save_weights( save_weights_path + "." + str( ep ) ) 108 | # m.save( save_weights_path + ".model." + str( ep ) ) 109 | -------------------------------------------------------------------------------- /Models/VGGSegnet.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | from keras.models import * 7 | from keras.layers import * 8 | 9 | 10 | import os 11 | file_path = os.path.dirname( os.path.abspath(__file__) ) 12 | VGG_Weights_path = file_path+"/../data/vgg16_weights_th_dim_ordering_th_kernels.h5" 13 | 14 | 15 | def VGGSegnet( n_classes , input_height=416, input_width=608 , vgg_level=3): 16 | 17 | img_input = Input(shape=(3,input_height,input_width)) 18 | 19 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format='channels_first' )(img_input) 20 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format='channels_first' )(x) 21 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format='channels_first' )(x) 22 | f1 = x 23 | # Block 2 24 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format='channels_first' )(x) 25 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format='channels_first' )(x) 26 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format='channels_first' )(x) 27 | f2 = x 28 | 29 | # Block 3 30 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format='channels_first' )(x) 31 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format='channels_first' )(x) 32 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format='channels_first' )(x) 33 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format='channels_first' )(x) 34 | f3 = x 35 | 36 | # Block 4 37 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format='channels_first' )(x) 38 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format='channels_first' )(x) 39 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format='channels_first' )(x) 40 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format='channels_first' )(x) 41 | f4 = x 42 | 43 | # Block 5 44 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format='channels_first' )(x) 45 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format='channels_first' )(x) 46 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format='channels_first' )(x) 47 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format='channels_first' )(x) 48 | f5 = x 49 | 50 | x = Flatten(name='flatten')(x) 51 | x = Dense(4096, activation='relu', name='fc1')(x) 52 | x = Dense(4096, activation='relu', name='fc2')(x) 53 | x = Dense( 1000 , activation='softmax', name='predictions')(x) 54 | 55 | vgg = Model( img_input , x ) 56 | vgg.load_weights(VGG_Weights_path) 57 | 58 | levels = [f1 , f2 , f3 , f4 , f5 ] 59 | 60 | o = levels[ vgg_level ] 61 | 62 | o = ( ZeroPadding2D( (1,1) , data_format='channels_first' ))(o) 63 | o = ( Conv2D(512, (3, 3), padding='valid', data_format='channels_first'))(o) 64 | o = ( BatchNormalization())(o) 65 | 66 | o = ( UpSampling2D( (2,2), data_format='channels_first'))(o) 67 | o = ( ZeroPadding2D( (1,1), data_format='channels_first'))(o) 68 | o = ( Conv2D( 256, (3, 3), padding='valid', data_format='channels_first'))(o) 69 | o = ( BatchNormalization())(o) 70 | 71 | o = ( UpSampling2D((2,2) , data_format='channels_first' ) )(o) 72 | o = ( ZeroPadding2D((1,1) , data_format='channels_first' ))(o) 73 | o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format='channels_first' ))(o) 74 | o = ( BatchNormalization())(o) 75 | 76 | o = ( UpSampling2D((2,2) , data_format='channels_first' ))(o) 77 | o = ( ZeroPadding2D((1,1) , data_format='channels_first' ))(o) 78 | o = ( Conv2D( 64 , (3, 3), padding='valid' , data_format='channels_first' ))(o) 79 | o = ( BatchNormalization())(o) 80 | 81 | 82 | o = Conv2D( n_classes , (3, 3) , padding='same', data_format='channels_first' )( o ) 83 | o_shape = Model(img_input , o ).output_shape 84 | outputHeight = o_shape[2] 85 | outputWidth = o_shape[3] 86 | 87 | o = (Reshape(( -1 , outputHeight*outputWidth )))(o) 88 | o = (Permute((2, 1)))(o) 89 | o = (Activation('softmax'))(o) 90 | model = Model( img_input , o ) 91 | model.outputWidth = outputWidth 92 | model.outputHeight = outputHeight 93 | 94 | return model 95 | 96 | 97 | 98 | 99 | if __name__ == '__main__': 100 | m = VGGSegnet( 101 ) 101 | from keras.utils import plot_model 102 | plot_model( m , show_shapes=True , to_file='model.png') 103 | 104 | -------------------------------------------------------------------------------- /Models/Segnet_crf_res_l1_v1.py: -------------------------------------------------------------------------------- 1 | from keras.layers.merge import Add 2 | from keras.models import Model 3 | from keras.layers import Reshape 4 | from keras.layers import Input 5 | 6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute 7 | from keras.layers.normalization import BatchNormalization 8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D 9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D 11 | from keras.layers.recurrent import LSTM 12 | from keras.layers.advanced_activations import LeakyReLU 13 | from keras.optimizers import Adam, SGD 14 | from keras.layers.embeddings import Embedding 15 | from keras.utils import np_utils 16 | # from keras.regularizers import ActivityRegularizer 17 | from keras import backend as K 18 | 19 | from crfrnn_layer import CrfRnnLayer 20 | 21 | def l1_reg(weight_matrix): 22 | return 0.01 * K.sum(K.abs(weight_matrix)) 23 | 24 | 25 | def penalized_loss(bottleNeckFeatures): 26 | def custom_loss(y_true, y_pred): 27 | loss1=K.categorical_crossentropy(y_pred, y_true) 28 | loss2=l1_reg(bottleNeckFeatures) 29 | return loss1+loss2 30 | return custom_loss 31 | 32 | 33 | def segnet(nClasses, optimizer=None, input_height=360, input_width=480): 34 | kernel = 3 35 | filter_size = 64 36 | pad = 1 37 | pool_size = 2 38 | 39 | img_input = Input(shape=(input_height, input_width,3)) 40 | 41 | # encoder 42 | x = ZeroPadding2D(padding=(pad, pad))(img_input) 43 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 44 | x = BatchNormalization()(x) 45 | x = Activation('relu') (x) 46 | l1 = x 47 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 48 | 49 | x = ZeroPadding2D(padding=(pad, pad))(x) 50 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 51 | x = BatchNormalization()(x) 52 | x = Activation('relu')(x) 53 | l2 = x 54 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 55 | 56 | x = ZeroPadding2D(padding=(pad, pad))(x) 57 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 58 | x = BatchNormalization()(x) 59 | x = Activation('relu')(x) 60 | l3 = x 61 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 62 | 63 | x = ZeroPadding2D(padding=(pad, pad))(x) 64 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 65 | x = BatchNormalization()(x) 66 | l4 = x 67 | x = Activation('relu')(x) 68 | 69 | # decoder 70 | x = ZeroPadding2D(padding=(pad, pad))(x) 71 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 72 | x = BatchNormalization()(x) 73 | 74 | x = Add()([l4, x]) 75 | x = UpSampling2D(size=(pool_size, pool_size))(x) 76 | x = ZeroPadding2D(padding=(pad, pad))(x) 77 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 78 | x = BatchNormalization()(x) 79 | 80 | x = Add()([l3, x]) 81 | x = UpSampling2D(size=(pool_size, pool_size))(x) 82 | x = ZeroPadding2D(padding=(pad, pad))(x) 83 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 84 | x = BatchNormalization()(x) 85 | 86 | x = Add()([l2, x]) 87 | x = UpSampling2D(size=(pool_size, pool_size))(x) 88 | x = ZeroPadding2D(padding=(pad, pad))(x) 89 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 90 | x = BatchNormalization()(x) 91 | 92 | x = Add()([l1, x]) 93 | x = Convolution2D(nClasses, (1, 1), padding='valid') (x) 94 | 95 | beforeCrfRNN = x 96 | 97 | out = CrfRnnLayer(image_dims=(input_height, input_width), 98 | num_classes=nClasses, 99 | theta_alpha=160., 100 | theta_beta=3., 101 | theta_gamma=3., 102 | num_iterations=5, 103 | name='crfrnn')([x, img_input]) 104 | 105 | a = Model(inputs=img_input, outputs=out) 106 | 107 | model = [] 108 | a.outputHeight = a.output_shape[1] 109 | a.outputWidth = a.output_shape[2] 110 | 111 | out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out) 112 | out = Activation('softmax')(out) 113 | 114 | 115 | 116 | model = Model(inputs=img_input, outputs=out) 117 | model.outputHeight = a.outputHeight 118 | model.outputWidth = a.outputWidth 119 | 120 | print beforeCrfRNN.shape 121 | print img_input.shape 122 | print out.shape 123 | print x.shape 124 | 125 | model.compile(loss=penalized_loss(bottleNeckFeatures=l4), optimizer="adadelta", metrics=['accuracy']) 126 | 127 | return model -------------------------------------------------------------------------------- /Models/Segnet_crf_res_l1_v2.py: -------------------------------------------------------------------------------- 1 | from keras.layers.merge import Add 2 | from keras.models import Model 3 | from keras.layers import Reshape 4 | from keras.layers import Input 5 | 6 | from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Permute 7 | from keras.layers.normalization import BatchNormalization 8 | from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D 9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D 10 | from keras.layers.convolutional import Convolution1D, MaxPooling1D 11 | from keras.layers.recurrent import LSTM 12 | from keras.layers.advanced_activations import LeakyReLU 13 | from keras.optimizers import Adam, SGD 14 | from keras.layers.embeddings import Embedding 15 | from keras.utils import np_utils 16 | from keras import backend as K 17 | 18 | from crfrnn_layer import CrfRnnLayer 19 | 20 | def l1_reg(weight_matrix): 21 | return 0.01 * K.sum(K.abs(weight_matrix)) 22 | 23 | def penalized_loss2(bottleNeckFeatures): 24 | def custom_loss(y_true, y_pred): 25 | loss1=K.categorical_crossentropy(y_pred, y_true) 26 | loss2=l1_reg(bottleNeckFeatures) 27 | return loss1+loss2 28 | return custom_loss 29 | 30 | def penalized_loss(bottleNeckFeatures): 31 | def custom_loss(y_true, y_pred): 32 | loss1=K.categorical_crossentropy(y_pred, y_true) 33 | loss2=l1_reg(bottleNeckFeatures) 34 | return loss1+(0.1*loss2) 35 | return custom_loss 36 | 37 | def segnet(nClasses, optimizer=None, input_height=360, input_width=480): 38 | kernel = 3 39 | filter_size = 64 40 | pad = 1 41 | pool_size = 2 42 | 43 | img_input = Input(shape=(input_height, input_width,3)) 44 | 45 | # encoder 46 | x = ZeroPadding2D(padding=(pad, pad))(img_input) 47 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 48 | x = BatchNormalization()(x) 49 | x = Activation('relu') (x) 50 | l1 = x 51 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 52 | 53 | x = ZeroPadding2D(padding=(pad, pad))(x) 54 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 55 | x = BatchNormalization()(x) 56 | x = Activation('relu')(x) 57 | l2 = x 58 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 59 | 60 | x = ZeroPadding2D(padding=(pad, pad))(x) 61 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 62 | x = BatchNormalization()(x) 63 | x = Activation('relu')(x) 64 | l3 = x 65 | x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) 66 | 67 | x = ZeroPadding2D(padding=(pad, pad))(x) 68 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 69 | x = BatchNormalization()(x) 70 | l4 = x 71 | x = Activation('relu')(x) 72 | 73 | # decoder 74 | x = ZeroPadding2D(padding=(pad, pad))(x) 75 | x = Convolution2D(512, (kernel, kernel), padding='valid')(x) 76 | x = BatchNormalization()(x) 77 | 78 | x = Add()([l4, x]) 79 | x = UpSampling2D(size=(pool_size, pool_size))(x) 80 | x = ZeroPadding2D(padding=(pad, pad))(x) 81 | x = Convolution2D(256, (kernel, kernel), padding='valid')(x) 82 | x = BatchNormalization()(x) 83 | 84 | x = Add()([l3, x]) 85 | x = UpSampling2D(size=(pool_size, pool_size))(x) 86 | x = ZeroPadding2D(padding=(pad, pad))(x) 87 | x = Convolution2D(128, (kernel, kernel), padding='valid')(x) 88 | x = BatchNormalization()(x) 89 | 90 | x = Add()([l2, x]) 91 | x = UpSampling2D(size=(pool_size, pool_size))(x) 92 | x = ZeroPadding2D(padding=(pad, pad))(x) 93 | x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) 94 | x = BatchNormalization()(x) 95 | 96 | x = Add()([l1, x]) 97 | x = Convolution2D(nClasses, (1, 1), padding='valid') (x) 98 | 99 | beforeCrfRNN = x 100 | 101 | out = CrfRnnLayer(image_dims=(input_height, input_width), 102 | num_classes=nClasses, 103 | theta_alpha=160., 104 | theta_beta=3., 105 | theta_gamma=3., 106 | num_iterations=5, 107 | name='crfrnn')([x, img_input]) 108 | 109 | a = Model(inputs=img_input, outputs=out) 110 | 111 | model = [] 112 | a.outputHeight = a.output_shape[1] 113 | a.outputWidth = a.output_shape[2] 114 | 115 | out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out) 116 | out = Activation('softmax')(out) 117 | 118 | model = Model(inputs=img_input, outputs=out) 119 | model.outputHeight = a.outputHeight 120 | model.outputWidth = a.outputWidth 121 | 122 | print beforeCrfRNN.shape 123 | print img_input.shape 124 | print out.shape 125 | print x.shape 126 | 127 | model.compile(loss=penalized_loss(bottleNeckFeatures=l4), optimizer="adadelta", metrics=['accuracy']) 128 | 129 | return model -------------------------------------------------------------------------------- /crfrnn_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIT License 3 | 4 | Copyright (c) 2017 Sadeep Jayasumana 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | from keras.layers.convolutional import UpSampling2D 25 | from keras.layers.core import Activation, Reshape, Permute 26 | from keras.models import Model 27 | from keras.layers import Conv2D, MaxPooling2D, Input, ZeroPadding2D, \ 28 | Dropout, Conv2DTranspose, Cropping2D, Add, BatchNormalization 29 | from crfrnn_layer import CrfRnnLayer 30 | 31 | 32 | def get_crfrnn_model_def (nClasses , optimizer=None , input_height=360, input_width=480 ): 33 | """ Returns Keras CRN-RNN model definition. 34 | 35 | Currently, only 500 x 500 images are supported. However, one can get this to 36 | work with different image sizes by adjusting the parameters of the Cropping2D layers 37 | below. 38 | """ 39 | 40 | channels, height, weight = 3, input_height, input_width 41 | 42 | # Input 43 | input_shape = (height, weight, 3) 44 | img_input = Input(shape=input_shape) 45 | 46 | kernel = 3 47 | filter_size = 64 48 | pad = 1 49 | pool_size = 2 50 | 51 | 52 | # Add plenty of zero padding 53 | x = ZeroPadding2D(padding=(pad, pad))(img_input) 54 | 55 | 56 | # VGG-16 convolution block 1 57 | x = Conv2D(filter_size, (kernel, kernel), padding='valid', name='conv1_1')(x) 58 | x = BatchNormalization()(x) 59 | x = Activation('relu')(x) 60 | x = MaxPooling2D((pool_size, pool_size), name='pool1')(x) 61 | 62 | 63 | # VGG-16 convolution block 1 64 | x = Conv2D(128, (kernel, kernel), padding='valid', name='conv1_2')(x) 65 | x = BatchNormalization()(x) 66 | x = Activation('relu')(x) 67 | x = MaxPooling2D((pool_size, pool_size), name='pool2')(x) 68 | 69 | # VGG-16 convolution block 1 70 | x = Conv2D(256, (kernel, kernel), padding='valid', name='conv1_3')(x) 71 | x = BatchNormalization()(x) 72 | x = Activation('relu')(x) 73 | x = MaxPooling2D((pool_size, pool_size), name='pool3')(x) 74 | pool3 = x 75 | 76 | # VGG-16 convolution block 1 77 | x = Conv2D(512, (kernel, kernel), padding='valid', name='conv1_4')(x) 78 | x = BatchNormalization()(x) 79 | x = Activation('relu')(x) 80 | x = MaxPooling2D((pool_size, pool_size), name='pool4')(x) 81 | pool4 = x 82 | 83 | #decoder 84 | x = ZeroPadding2D(padding=(pad, pad))(x) 85 | x = Conv2D(512, (kernel, kernel), padding='valid', name='conv2_1')(x) 86 | x = BatchNormalization()(x) 87 | 88 | 89 | x = UpSampling2D((pool_size, pool_size))(x) 90 | x = ZeroPadding2D(padding=(pad, pad))(x) 91 | x = Conv2D(256, (kernel, kernel), padding='valid', name='conv2_2')(x) 92 | x = BatchNormalization()(x) 93 | 94 | 95 | 96 | x = UpSampling2D((pool_size, pool_size))(x) 97 | x = ZeroPadding2D(padding=(pad, pad))(x) 98 | x = Conv2D(128, (kernel, kernel), padding='valid', name='conv2_3')(x) 99 | x = BatchNormalization()(x) 100 | 101 | x = UpSampling2D((pool_size, pool_size))(x) 102 | x = ZeroPadding2D(padding=(pad, pad))(x) 103 | x = Conv2D(filter_size, (kernel, kernel), padding='valid', name='conv2_4')(x) 104 | x = BatchNormalization()(x) 105 | 106 | 107 | 108 | 109 | x = Conv2D(nClasses, (1, 1), padding='valid', name='conv3_1')(x) 110 | #x = Conv2D(100,(kernel,kernel),padding='valid')(x) 111 | 112 | #out_height = x.shape[1] 113 | #out_width = x.shape[2] 114 | 115 | #x = Reshape((nClasses,32*32), input_shape=(32, 32, nClasses))(x) 116 | 117 | #x = Permute((2,1))(x) 118 | 119 | #x = Activation('softmax')(x) 120 | print x 121 | #x = UpSampling2D(size=(4,4))(x) 122 | 123 | 124 | output = CrfRnnLayer(image_dims=(32, 32), 125 | num_classes=nClasses, 126 | theta_alpha=160., 127 | theta_beta=3., 128 | theta_gamma=3., 129 | num_iterations=10, 130 | name='crfrnn')([x, img_input]) 131 | 132 | # Build the model 133 | model = Model(img_input, output, name='crfrnn_net') 134 | model.outputHeight = 32 135 | model.outputWidth = 32 136 | return model 137 | -------------------------------------------------------------------------------- /cpp/high_dim_filter.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * MIT License 3 | * 4 | * Copyright (c) 2017 Sadeep Jayasumana 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #include "tensorflow/core/framework/op.h" 26 | #include "tensorflow/core/framework/shape_inference.h" 27 | #include "tensorflow/core/framework/op_kernel.h" 28 | #include "tensorflow/core/framework/tensor_shape.h" 29 | #include "modified_permutohedral.h" 30 | 31 | using namespace tensorflow; 32 | 33 | void compute_spatial_kernel(float * const output_kernel, const int width, 34 | const int height, const float theta_gamma) { 35 | 36 | const int num_pixels = width * height; 37 | for (int p = 0; p < num_pixels; ++p) { 38 | output_kernel[2 * p] = static_cast(p % width) / theta_gamma; 39 | output_kernel[2 * p + 1] = static_cast(p / width) / theta_gamma; 40 | } 41 | } 42 | 43 | void compute_bilateral_kernel(float * const output_kernel, const Tensor& rgb_tensor, 44 | const float theta_alpha, const float theta_beta) { 45 | 46 | const int height = rgb_tensor.dim_size(1); 47 | const int width = rgb_tensor.dim_size(2); 48 | const int num_pixels = height * width; 49 | auto rgb = rgb_tensor.flat(); 50 | 51 | for (int p = 0; p < num_pixels; ++p) { 52 | // Spatial terms 53 | output_kernel[5 * p] = static_cast(p % width) / theta_alpha; 54 | output_kernel[5 * p + 1] = static_cast(p / width) / theta_alpha; 55 | 56 | // Color terms 57 | output_kernel[5 * p + 2] = static_cast(rgb(p) / theta_beta); 58 | output_kernel[5 * p + 3] = static_cast(rgb(num_pixels + p) / theta_beta); 59 | output_kernel[5 * p + 4] = static_cast(rgb(2 * num_pixels + p) / theta_beta); 60 | } 61 | } 62 | 63 | REGISTER_OP("HighDimFilter") 64 | .Attr("bilateral: bool") 65 | .Attr("theta_alpha: float = 1.0") 66 | .Attr("theta_beta: float = 1.0") 67 | .Attr("theta_gamma: float = 1.0") 68 | .Attr("backwards: bool = false") 69 | .Input("raw: float32") 70 | .Input("rgb: float32") 71 | .Output("filtered: float32") 72 | .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { 73 | c->set_output(0, c->input(0)); 74 | return Status::OK(); 75 | }); 76 | 77 | class HighDimFilterOp : public OpKernel { 78 | public: 79 | explicit HighDimFilterOp(OpKernelConstruction* context) : OpKernel(context) { 80 | 81 | OP_REQUIRES_OK(context, 82 | context->GetAttr("bilateral", &bilateral_)); 83 | OP_REQUIRES_OK(context, 84 | context->GetAttr("theta_alpha", &theta_alpha_)); 85 | OP_REQUIRES_OK(context, 86 | context->GetAttr("theta_beta", &theta_beta_)); 87 | OP_REQUIRES_OK(context, 88 | context->GetAttr("theta_gamma", &theta_gamma_)); 89 | OP_REQUIRES_OK(context, 90 | context->GetAttr("backwards", &backwards_)); 91 | } 92 | 93 | void Compute(OpKernelContext* context) override { 94 | 95 | // Grab the unary tensor 96 | const Tensor& input_tensor = context->input(0); 97 | // Grab the RGB image tensor 98 | const Tensor& image_tensor = context->input(1); 99 | 100 | const int channels = input_tensor.dim_size(0); 101 | const int height = input_tensor.dim_size(1); 102 | const int width = input_tensor.dim_size(2); 103 | const int num_pixels = width * height; 104 | 105 | // Create the output tensor 106 | Tensor* output_tensor = NULL; 107 | OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), 108 | &output_tensor)); 109 | ModifiedPermutohedral mp; 110 | 111 | if (bilateral_) { 112 | float * const kernel_vals = new float[5 * num_pixels]; 113 | compute_bilateral_kernel(kernel_vals, image_tensor, 114 | theta_alpha_, theta_beta_); 115 | mp.init(kernel_vals, 5, num_pixels); 116 | mp.compute(*output_tensor, input_tensor, channels, backwards_); 117 | 118 | delete[] kernel_vals; 119 | } else { 120 | float * const kernel_vals = new float[2 * num_pixels]; 121 | compute_spatial_kernel(kernel_vals, width, height, theta_gamma_); 122 | mp.init(kernel_vals, 2, num_pixels); 123 | mp.compute(*output_tensor, input_tensor, channels, backwards_); 124 | 125 | delete[] kernel_vals; 126 | } 127 | 128 | } 129 | 130 | private: 131 | bool bilateral_; 132 | float theta_alpha_; 133 | float theta_beta_; 134 | float theta_gamma_; 135 | bool backwards_; 136 | }; 137 | 138 | REGISTER_KERNEL_BUILDER(Name("HighDimFilter").Device(DEVICE_CPU), HighDimFilterOp); 139 | -------------------------------------------------------------------------------- /Models/FCN8.py: -------------------------------------------------------------------------------- 1 | 2 | # https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/models/fcn32s.py 3 | # fc weights into the 1x1 convs , get_upsampling_weight 4 | 5 | 6 | 7 | from keras.models import * 8 | from keras.layers import * 9 | 10 | 11 | import os 12 | file_path = os.path.dirname( os.path.abspath(__file__) ) 13 | 14 | VGG_Weights_path = file_path+"/../data/vgg16_weights_th_dim_ordering_th_kernels.h5" 15 | 16 | IMAGE_ORDERING = 'channels_first' 17 | 18 | # crop o1 wrt o2 19 | def crop( o1 , o2 , i ): 20 | o_shape2 = Model( i , o2 ).output_shape 21 | outputHeight2 = o_shape2[2] 22 | outputWidth2 = o_shape2[3] 23 | 24 | o_shape1 = Model( i , o1 ).output_shape 25 | outputHeight1 = o_shape1[2] 26 | outputWidth1 = o_shape1[3] 27 | 28 | cx = abs( outputWidth1 - outputWidth2 ) 29 | cy = abs( outputHeight2 - outputHeight1 ) 30 | 31 | if outputWidth1 > outputWidth2: 32 | o1 = Cropping2D( cropping=((0,0) , ( 0 , cx )), data_format=IMAGE_ORDERING )(o1) 33 | else: 34 | o2 = Cropping2D( cropping=((0,0) , ( 0 , cx )), data_format=IMAGE_ORDERING )(o2) 35 | 36 | if outputHeight1 > outputHeight2 : 37 | o1 = Cropping2D( cropping=((0,cy) , ( 0 , 0 )), data_format=IMAGE_ORDERING )(o1) 38 | else: 39 | o2 = Cropping2D( cropping=((0, cy ) , ( 0 , 0 )), data_format=IMAGE_ORDERING )(o2) 40 | 41 | return o1 , o2 42 | 43 | def FCN8( nClasses , input_height=416, input_width=608 , vgg_level=3): 44 | 45 | # assert input_height%32 == 0 46 | # assert input_width%32 == 0 47 | 48 | # https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5 49 | img_input = Input(shape=(3,input_height,input_width)) 50 | 51 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING )(img_input) 52 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING )(x) 53 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING )(x) 54 | f1 = x 55 | # Block 2 56 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING )(x) 57 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING )(x) 58 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING )(x) 59 | f2 = x 60 | 61 | # Block 3 62 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING )(x) 63 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING )(x) 64 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING )(x) 65 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING )(x) 66 | f3 = x 67 | 68 | # Block 4 69 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING )(x) 70 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING )(x) 71 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING )(x) 72 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING )(x) 73 | f4 = x 74 | 75 | # Block 5 76 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING )(x) 77 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING )(x) 78 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING )(x) 79 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING )(x) 80 | f5 = x 81 | 82 | x = Flatten(name='flatten')(x) 83 | x = Dense(4096, activation='relu', name='fc1')(x) 84 | x = Dense(4096, activation='relu', name='fc2')(x) 85 | x = Dense( 1000 , activation='softmax', name='predictions')(x) 86 | 87 | vgg = Model( img_input , x ) 88 | vgg.load_weights(VGG_Weights_path) 89 | 90 | o = f5 91 | 92 | o = ( Conv2D( 4096 , ( 7 , 7 ) , activation='relu' , padding='same', data_format=IMAGE_ORDERING))(o) 93 | o = Dropout(0.5)(o) 94 | o = ( Conv2D( 4096 , ( 1 , 1 ) , activation='relu' , padding='same', data_format=IMAGE_ORDERING))(o) 95 | o = Dropout(0.5)(o) 96 | 97 | o = ( Conv2D( nClasses , ( 1 , 1 ) ,kernel_initializer='he_normal' , data_format=IMAGE_ORDERING))(o) 98 | o = Conv2DTranspose( nClasses , kernel_size=(4,4) , strides=(2,2) , use_bias=False, data_format=IMAGE_ORDERING )(o) 99 | 100 | o2 = f4 101 | o2 = ( Conv2D( nClasses , ( 1 , 1 ) ,kernel_initializer='he_normal' , data_format=IMAGE_ORDERING))(o2) 102 | 103 | o , o2 = crop( o , o2 , img_input ) 104 | 105 | o = Add()([ o , o2 ]) 106 | 107 | o = Conv2DTranspose( nClasses , kernel_size=(4,4) , strides=(2,2) , use_bias=False, data_format=IMAGE_ORDERING )(o) 108 | o2 = f3 109 | o2 = ( Conv2D( nClasses , ( 1 , 1 ) ,kernel_initializer='he_normal' , data_format=IMAGE_ORDERING))(o2) 110 | o2 , o = crop( o2 , o , img_input ) 111 | o = Add()([ o2 , o ]) 112 | 113 | 114 | o = Conv2DTranspose( nClasses , kernel_size=(16,16) , strides=(8,8) , use_bias=False, data_format=IMAGE_ORDERING )(o) 115 | 116 | o_shape = Model(img_input , o ).output_shape 117 | 118 | outputHeight = o_shape[2] 119 | outputWidth = o_shape[3] 120 | 121 | o = (Reshape(( -1 , outputHeight*outputWidth )))(o) 122 | o = (Permute((2, 1)))(o) 123 | o = (Activation('softmax'))(o) 124 | model = Model( img_input , o ) 125 | model.outputWidth = outputWidth 126 | model.outputHeight = outputHeight 127 | 128 | return model 129 | 130 | 131 | 132 | if __name__ == '__main__': 133 | m = FCN8( 101 ) 134 | from keras.utils import plot_model 135 | plot_model( m , show_shapes=True , to_file='model.png') 136 | -------------------------------------------------------------------------------- /crfrnn_layer.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIT License 3 | 4 | Copyright (c) 2017 Sadeep Jayasumana 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | 25 | import numpy as np 26 | import tensorflow as tf 27 | from keras.engine.topology import Layer 28 | custom_module = tf.load_op_library('./cpp/high_dim_filter.so') 29 | import high_dim_filter_grad # Register gradients for the custom op 30 | 31 | 32 | class CrfRnnLayer(Layer): 33 | """ Implements the CRF-RNN layer described in: 34 | 35 | Conditional Random Fields as Recurrent Neural Networks, 36 | S. Zheng, S. Jayasumana, B. Romera-Paredes, V. Vineet, Z. Su, D. Du, C. Huang and P. Torr, 37 | ICCV 2015 38 | """ 39 | 40 | def __init__(self, image_dims, num_classes, 41 | theta_alpha, theta_beta, theta_gamma, 42 | num_iterations, **kwargs): 43 | self.image_dims = image_dims 44 | self.num_classes = num_classes 45 | self.theta_alpha = theta_alpha 46 | self.theta_beta = theta_beta 47 | self.theta_gamma = theta_gamma 48 | self.num_iterations = num_iterations 49 | self.spatial_ker_weights = None 50 | self.bilateral_ker_weights = None 51 | self.compatibility_matrix = None 52 | super(CrfRnnLayer, self).__init__(**kwargs) 53 | 54 | def build(self, input_shape): 55 | # Weights of the spatial kernel 56 | self.spatial_ker_weights = self.add_weight(name='spatial_ker_weights', 57 | shape=(self.num_classes, self.num_classes), 58 | initializer='uniform', 59 | trainable=True) 60 | 61 | # Weights of the bilateral kernel 62 | self.bilateral_ker_weights = self.add_weight(name='bilateral_ker_weights', 63 | shape=(self.num_classes, self.num_classes), 64 | initializer='uniform', 65 | trainable=True) 66 | 67 | # Compatibility matrix 68 | self.compatibility_matrix = self.add_weight(name='compatibility_matrix', 69 | shape=(self.num_classes, self.num_classes), 70 | initializer='uniform', 71 | trainable=True) 72 | 73 | super(CrfRnnLayer, self).build(input_shape) 74 | 75 | def call(self, inputs): 76 | print inputs[0][0,:,:,:] 77 | unaries = tf.transpose(inputs[0][0, :, :, :], perm=(2, 0, 1)) 78 | rgb = tf.transpose(inputs[1][0, :, :, :], perm=(2, 0, 1)) 79 | 80 | c, h, w = self.num_classes, self.image_dims[0], self.image_dims[1] 81 | all_ones = np.ones((c, h, w), dtype=np.float32) 82 | 83 | # Prepare filter normalization coefficients 84 | spatial_norm_vals = custom_module.high_dim_filter(all_ones, rgb, bilateral=False, 85 | theta_gamma=self.theta_gamma) 86 | bilateral_norm_vals = custom_module.high_dim_filter(all_ones, rgb, bilateral=True, 87 | theta_alpha=self.theta_alpha, 88 | theta_beta=self.theta_beta) 89 | q_values = unaries 90 | 91 | for i in range(self.num_iterations): 92 | softmax_out = tf.nn.softmax(q_values, dim=0) 93 | 94 | # Spatial filtering 95 | spatial_out = custom_module.high_dim_filter(softmax_out, rgb, bilateral=False, 96 | theta_gamma=self.theta_gamma) 97 | spatial_out = spatial_out / spatial_norm_vals 98 | 99 | # Bilateral filtering 100 | bilateral_out = custom_module.high_dim_filter(softmax_out, rgb, bilateral=True, 101 | theta_alpha=self.theta_alpha, 102 | theta_beta=self.theta_beta) 103 | bilateral_out = bilateral_out / bilateral_norm_vals 104 | 105 | # Weighting filter outputs 106 | message_passing = (tf.matmul(self.spatial_ker_weights, 107 | tf.reshape(spatial_out, (c, -1))) + 108 | tf.matmul(self.bilateral_ker_weights, 109 | tf.reshape(bilateral_out, (c, -1)))) 110 | 111 | # Compatibility transform 112 | pairwise = tf.matmul(self.compatibility_matrix, message_passing) 113 | 114 | # Adding unary potentials 115 | pairwise = tf.reshape(pairwise, (c, h, w)) 116 | q_values = unaries - pairwise 117 | 118 | return tf.transpose(tf.reshape(q_values, (1, c, h, w)), perm=(0, 2, 3, 1)) 119 | 120 | def compute_output_shape(self, input_shape): 121 | return input_shape 122 | -------------------------------------------------------------------------------- /Models/VGGUnet.py: -------------------------------------------------------------------------------- 1 | from keras.models import * 2 | from keras.layers import * 3 | 4 | import os 5 | file_path = os.path.dirname( os.path.abspath(__file__) ) 6 | 7 | 8 | VGG_Weights_path = file_path+"/../data/vgg16_weights_th_dim_ordering_th_kernels.h5" 9 | 10 | IMAGE_ORDERING = 'channels_first' 11 | 12 | 13 | def VGGUnet( n_classes , input_height=416, input_width=608 , vgg_level=3): 14 | 15 | assert input_height%32 == 0 16 | assert input_width%32 == 0 17 | 18 | # https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5 19 | img_input = Input(shape=(3,input_height,input_width)) 20 | 21 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING )(img_input) 22 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING )(x) 23 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING )(x) 24 | f1 = x 25 | # Block 2 26 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING )(x) 27 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING )(x) 28 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING )(x) 29 | f2 = x 30 | 31 | # Block 3 32 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING )(x) 33 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING )(x) 34 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING )(x) 35 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING )(x) 36 | f3 = x 37 | 38 | # Block 4 39 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING )(x) 40 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING )(x) 41 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING )(x) 42 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING )(x) 43 | f4 = x 44 | 45 | # Block 5 46 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING )(x) 47 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING )(x) 48 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING )(x) 49 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING )(x) 50 | f5 = x 51 | 52 | x = Flatten(name='flatten')(x) 53 | x = Dense(4096, activation='relu', name='fc1')(x) 54 | x = Dense(4096, activation='relu', name='fc2')(x) 55 | x = Dense( 1000 , activation='softmax', name='predictions')(x) 56 | 57 | vgg = Model( img_input , x ) 58 | vgg.load_weights(VGG_Weights_path) 59 | 60 | levels = [f1 , f2 , f3 , f4 , f5 ] 61 | 62 | o = f4 63 | 64 | o = ( ZeroPadding2D( (1,1) , data_format=IMAGE_ORDERING ))(o) 65 | o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) 66 | o = ( BatchNormalization())(o) 67 | 68 | o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) 69 | o = ( concatenate([ o ,f3],axis=1 ) ) 70 | o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) 71 | o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) 72 | o = ( BatchNormalization())(o) 73 | 74 | o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) 75 | o = ( concatenate([o,f2],axis=1 ) ) 76 | o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) 77 | o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING ) )(o) 78 | o = ( BatchNormalization())(o) 79 | 80 | o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) 81 | o = ( concatenate([o,f1],axis=1 ) ) 82 | o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) 83 | o = ( Conv2D( 64 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING ))(o) 84 | o = ( BatchNormalization())(o) 85 | 86 | 87 | o = Conv2D( n_classes , (3, 3) , padding='same', data_format=IMAGE_ORDERING )( o ) 88 | o_shape = Model(img_input , o ).output_shape 89 | outputHeight = o_shape[2] 90 | outputWidth = o_shape[3] 91 | 92 | o = (Reshape(( n_classes , outputHeight*outputWidth )))(o) 93 | o = (Permute((2, 1)))(o) 94 | o = (Activation('softmax'))(o) 95 | model = Model( img_input , o ) 96 | model.outputWidth = outputWidth 97 | model.outputHeight = outputHeight 98 | 99 | 100 | 101 | return model 102 | 103 | 104 | def VGGUnet2( n_classes , input_height=416, input_width=608 , vgg_level=3): 105 | 106 | assert input_height%32 == 0 107 | assert input_width%32 == 0 108 | 109 | # https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5 110 | img_input = Input(shape=(3,input_height,input_width)) 111 | 112 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING )(img_input) 113 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING )(x) 114 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING )(x) 115 | f1 = x 116 | # Block 2 117 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING )(x) 118 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING )(x) 119 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING )(x) 120 | f2 = x 121 | 122 | # Block 3 123 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING )(x) 124 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING )(x) 125 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING )(x) 126 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING )(x) 127 | f3 = x 128 | 129 | # Block 4 130 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING )(x) 131 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING )(x) 132 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING )(x) 133 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING )(x) 134 | f4 = x 135 | 136 | # Block 5 137 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING )(x) 138 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING )(x) 139 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING )(x) 140 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING )(x) 141 | f5 = x 142 | 143 | x = Flatten(name='flatten')(x) 144 | x = Dense(4096, activation='relu', name='fc1')(x) 145 | x = Dense(4096, activation='relu', name='fc2')(x) 146 | x = Dense( 1024 , activation='softmax', name='predictions')(x) 147 | 148 | vgg = Model( img_input , x ) 149 | vgg.load_weights(VGG_Weights_path) 150 | 151 | levels = [f1 , f2 , f3 , f4 , f5 ] 152 | 153 | o = f4 154 | 155 | o = ( ZeroPadding2D( (1,1) , data_format=IMAGE_ORDERING ))(o) 156 | o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) 157 | o = ( BatchNormalization())(o) 158 | 159 | o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) 160 | o = ( concatenate([ o ,f3],axis=1 ) ) 161 | o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) 162 | o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) 163 | o = ( BatchNormalization())(o) 164 | 165 | o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) 166 | o = ( concatenate([o,f2],axis=1 ) ) 167 | o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) 168 | o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING ) )(o) 169 | o = ( BatchNormalization())(o) 170 | 171 | o = (UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) 172 | # o = ( concatenate([o,f1],axis=1 ) ) 173 | o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) 174 | o = ( Conv2D( 64 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING ))(o) 175 | o = ( BatchNormalization())(o) 176 | 177 | 178 | o = Conv2D( n_classes , (3, 3) , padding='same', data_format=IMAGE_ORDERING )( o ) 179 | o_shape = Model(img_input , o ).output_shape 180 | outputHeight = o_shape[2] 181 | outputWidth = o_shape[3] 182 | 183 | o = (Reshape(( n_classes , outputHeight*outputWidth )))(o) 184 | o = (Permute((2, 1)))(o) 185 | o = (Activation('softmax'))(o) 186 | model = Model( img_input , o ) 187 | model.outputWidth = outputWidth 188 | model.outputHeight = outputHeight 189 | 190 | 191 | 192 | return model 193 | 194 | -------------------------------------------------------------------------------- /cpp/modified_permutohedral.cc: -------------------------------------------------------------------------------- 1 | /* 2 | This file contains a modified version of the "permutohedral.cpp" code 3 | available at http://graphics.stanford.edu/projects/drf/. Copyright notice of 4 | the original file is included below: 5 | 6 | Copyright (c) 2013, Philipp Krähenbühl 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | * Neither the name of the Stanford University nor the 17 | names of its contributors may be used to endorse or promote products 18 | derived from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY Philipp Krähenbühl ''AS IS'' AND ANY 21 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL Philipp Krähenbühl BE LIABLE FOR ANY 24 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 27 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | 32 | //#include "stdafx.h" 33 | #include "modified_permutohedral.h" 34 | 35 | #ifdef __SSE__ 36 | // SSE Permutoheral lattice 37 | # define SSE_PERMUTOHEDRAL 38 | #endif 39 | 40 | #if defined(SSE_PERMUTOHEDRAL) 41 | # include 42 | # include 43 | # ifdef __SSE4_1__ 44 | # include 45 | # endif 46 | #endif 47 | 48 | 49 | using namespace tensorflow; 50 | 51 | /************************************************/ 52 | /*** Hash Table ***/ 53 | /************************************************/ 54 | 55 | class HashTableCopy{ 56 | protected: 57 | size_t key_size_, filled_, capacity_; 58 | std::vector< short > keys_; 59 | std::vector< int > table_; 60 | void grow(){ 61 | // Create the new memory and copy the values in 62 | int old_capacity = capacity_; 63 | capacity_ *= 2; 64 | std::vector old_keys( (old_capacity+10)*key_size_ ); 65 | std::copy( keys_.begin(), keys_.end(), old_keys.begin() ); 66 | std::vector old_table( capacity_, -1 ); 67 | 68 | // Swap the memory 69 | table_.swap( old_table ); 70 | keys_.swap( old_keys ); 71 | 72 | // Reinsert each element 73 | for( int i=0; i= 0){ 75 | int e = old_table[i]; 76 | size_t h = hash( getKey(e) ) % capacity_; 77 | for(; table_[h] >= 0; h = h= capacity_) grow(); 101 | // Get the hash value 102 | size_t h = hash( k ) % capacity_; 103 | // Find the element with he right key, using linear probing 104 | while(1){ 105 | int e = table_[h]; 106 | if (e==-1){ 107 | if (create){ 108 | // Insert a new key and return the new id 109 | for( size_t i=0; i0; j-- ){ 204 | __m128 cf = f[j-1]*scale_factor[j-1]; 205 | elevated[j] = sm - _mm_set1_ps(j)*cf; 206 | sm += cf; 207 | } 208 | elevated[0] = sm; 209 | 210 | // Find the closest 0-colored simplex through rounding 211 | __m128 sum = Zero; 212 | for( int i=0; i<=d_; i++ ){ 213 | __m128 v = invdplus1 * elevated[i]; 214 | #ifdef __SSE4_1__ 215 | v = _mm_round_ps( v, _MM_FROUND_TO_NEAREST_INT ); 216 | #else 217 | v = _mm_cvtepi32_ps( _mm_cvtps_epi32( v ) ); 218 | #endif 219 | rem0[i] = v*dplus1; 220 | sum += v; 221 | } 222 | 223 | // Find the simplex we are in and store it in rank (where rank describes what position coorinate i has in the sorted order of the features values) 224 | for( int i=0; i<=d_; i++ ) 225 | rank[i] = Zero; 226 | for( int i=0; i0; j-- ){ 367 | float cf = f[j-1]*scale_factor[j-1]; 368 | elevated[j] = sm - j*cf; 369 | sm += cf; 370 | } 371 | elevated[0] = sm; 372 | 373 | // Find the closest 0-colored simplex through rounding 374 | float down_factor = 1.0f / (d_+1); 375 | float up_factor = (d_+1); 376 | int sum = 0; 377 | for( int i=0; i<=d_; i++ ){ 378 | //int rd1 = round( down_factor * elevated[i]); 379 | int rd2; 380 | float v = down_factor * elevated[i]; 381 | float up = ceilf(v)*up_factor; 382 | float down = floorf(v)*up_factor; 383 | if (up - elevated[i] < elevated[i] - down) rd2 = (short)up; 384 | else rd2 = (short)down; 385 | 386 | //if(rd1!=rd2) 387 | // break; 388 | 389 | rem0[i] = rd2; 390 | sum += rd2*down_factor; 391 | } 392 | 393 | // Find the simplex we are in and store it in rank (where rank describes what position coorinate i has in the sorted order of the features values) 394 | for( int i=0; i<=d_; i++ ) 395 | rank[i] = 0; 396 | for( int i=0; i d_ ){ 413 | rank[i] -= d_+1; 414 | rem0[i] -= d_+1; 415 | } 416 | } 417 | 418 | // Compute the barycentric coordinates (p.10 in [Adams etal 2010]) 419 | for( int i=0; i<=d_+1; i++ ) 420 | barycentric[i] = 0; 421 | for( int i=0; i<=d_; i++ ){ 422 | float v = (elevated[i] - rem0[i])*down_factor; 423 | barycentric[d_-rank[i] ] += v; 424 | barycentric[d_-rank[i]+1] -= v; 425 | } 426 | // Wrap around 427 | barycentric[0] += 1.0 + barycentric[d_+1]; 428 | 429 | // Compute all vertices and their offset 430 | for( int remainder=0; remainder<=d_; remainder++ ){ 431 | for( int i=0; i 0 (used for blurring) 481 | float * values = new float[ (M_+2)*value_size ]; 482 | float * new_values = new float[ (M_+2)*value_size ]; 483 | 484 | for( int i=0; i<(M_+2)*value_size; i++ ) 485 | values[i] = new_values[i] = 0; 486 | 487 | auto in = in_tensor.flat(); 488 | // Splatting 489 | for( int i=0; i=0; reverse?j--:j++ ){ 499 | for( int i=0; i(); 515 | 516 | // Slicing 517 | for( int i=0; i(); 542 | auto out = out_tensor.flat(); 543 | 544 | const int sse_value_size = (value_size-1)*sizeof(float) / sizeof(__m128) + 1; 545 | // Shift all values by 1 such that -1 -> 0 (used for blurring) 546 | __m128 * sse_val = (__m128*) _mm_malloc( sse_value_size*sizeof(__m128), 16 ); 547 | __m128 * values = (__m128*) _mm_malloc( (M_+2)*sse_value_size*sizeof(__m128), 16 ); 548 | __m128 * new_values = (__m128*) _mm_malloc( (M_+2)*sse_value_size*sizeof(__m128), 16 ); 549 | 550 | __m128 Zero = _mm_set1_ps( 0 ); 551 | 552 | for( int i=0; i<(M_+2)*sse_value_size; i++ ) 553 | values[i] = new_values[i] = Zero; 554 | for( int i=0; i=0; reverse?j--:j++ ){ 578 | for( int i=0; i