├── .gitignore ├── LICENSE ├── misc ├── save_colorful_grayscale.m ├── report.m └── Evaluation.m ├── src ├── train.py ├── train_small.py ├── test.py ├── demo.py ├── util.py ├── Dataloader.py └── Model.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *pyc 2 | *npy 3 | *txt 4 | *DS_Store 5 | data 6 | model 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Yuliang (Zack) Zou 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /misc/save_colorful_grayscale.m: -------------------------------------------------------------------------------- 1 | % Description: save grayscale segmentation to RGB 2 | % 3 | % Author: Chen Gao 4 | % chengao@umich.edu 5 | % 6 | % Date: March 7, 2017 7 | % 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | function save_colorful_grayscale(in_directory,out_directory) 11 | 12 | cmap = VOClabelcolormap(256); 13 | contents = dir([in_directory,'*.png']); 14 | 15 | 16 | for idx = 1:numel(contents) 17 | 18 | idx 19 | filename = contents(idx).name; 20 | img_gray = imread([in_directory filename]); 21 | imwrite(img_gray,cmap,[out_directory filename]); 22 | 23 | end 24 | 25 | 26 | function cmap = VOClabelcolormap(N) 27 | 28 | if nargin==0 29 | N = 256; 30 | end 31 | cmap = zeros(N,3); 32 | for i=1:N 33 | id = i-1; r=0;g=0;b=0; 34 | for j=0:7 35 | r = bitor(r, bitshift(bitget(id,1),7 - j)); 36 | g = bitor(g, bitshift(bitget(id,2),7 - j)); 37 | b = bitor(b, bitshift(bitget(id,3),7 - j)); 38 | id = bitshift(id,-3); 39 | end 40 | cmap(i,1)=r; cmap(i,2)=g; cmap(i,3)=b; 41 | end 42 | cmap = cmap / 255; -------------------------------------------------------------------------------- /misc/report.m: -------------------------------------------------------------------------------- 1 | run('/Users/chengao/Desktop/tf_fcn-master/data/VOCdevkit/VOCcode/VOCinit.m') 2 | [accuracies,avacc,conf,rawcounts] = Evaluation(VOCopts,'32rgb'); 3 | 4 | figure(1) 5 | hBar = bar(accuracies / 100); 6 | Xt = 1 : length(accuracies); 7 | Xl = [0 22]; 8 | set(gca, 'XTick', Xt, 'XLim', Xl,'FontSize', 15); 9 | 10 | label = [' background'; 11 | ' aeroplane'; 12 | ' bicycle'; 13 | ' bird'; 14 | ' boat'; 15 | ' bottle'; 16 | ' bus'; 17 | ' car'; 18 | ' cat'; 19 | ' chair'; 20 | ' cow'; 21 | 'diningtable'; 22 | ' dog'; 23 | ' horse'; 24 | ' motorbike'; 25 | ' person'; 26 | 'pottedplant'; 27 | ' sheep'; 28 | ' sofa'; 29 | ' train'; 30 | ' tvmonitor']; 31 | 32 | ax = axis; 33 | axis(axis); 34 | Yl = ax(3:4); % Y-axis limits 35 | 36 | t = text(Xt,Yl(1) * ones(1,length(Xt)),label(1:21,:),'FontSize',20); 37 | set(t,'HorizontalAlignment','right','VerticalAlignment','top', ... 38 | 'Rotation',45); 39 | 40 | set(gca,'XTickLabel','') 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | # Training code 2 | # Author: Yuliang Zou 3 | # ylzou@umich.edu 4 | # Date: 2017-02-21 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from Model import FCN32, FCN16, FCN8 9 | from Dataloader import Dataloader 10 | import ipdb 11 | 12 | 13 | config = { 14 | 'batch_num':5, 15 | 'iter':100000, 16 | 'num_classes':21, 17 | 'max_size':(640,640), 18 | 'weight_decay': 0.0005, 19 | 'base_lr': 0.0001, 20 | 'momentum': 0.9 21 | } 22 | 23 | if __name__ == '__main__': 24 | # Load pre-trained model 25 | model_path = '../model/FCN32_adam_iter_20000_500.npy' 26 | data_dict = np.load(model_path).item() 27 | 28 | # Set up model and data loader 29 | model = FCN16(config) 30 | loss_list = [] 31 | f = open('./FCN16.txt', 'w') 32 | DECAY = False # decay flag 33 | init = tf.initialize_all_variables() 34 | 35 | data_loader = Dataloader('train', config) 36 | 37 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session: 38 | session.run(init) 39 | model.load(data_dict, session) 40 | saver = tf.train.Saver() 41 | 42 | loss = 0 43 | for i in xrange(config['iter']): 44 | minibatch = data_loader.get_next_minibatch() 45 | feed_dict = {model.img: minibatch[0], 46 | model.seg: minibatch[1], 47 | model.mask: minibatch[2]} 48 | _, temp_loss = session.run([model.train_op, model.loss], feed_dict=feed_dict) 49 | loss += temp_loss 50 | 51 | loss_list.append(temp_loss) 52 | f.write(str(temp_loss) + '\n') 53 | print str(i) + ': ' + str(temp_loss) 54 | 55 | # Learning rate decay 56 | if len(loss_list) > 100 and not DECAY: 57 | avg = sum(loss_list[-100::]) / 100.0 58 | if avg <= 0.4: 59 | model.base_lr /= 10 60 | DECAY = True 61 | 62 | # Monitor 63 | if i % 20 == 0 and i != 0: 64 | loss /= 20 65 | print 'Iter: {}'.format(i) + '/{}'.format(config['iter']) + ', loss = ' + str(loss) 66 | loss = 0 67 | 68 | # Write to saver 69 | if i % 5000 == 0 and i != 0: 70 | saver.save(session, '../model/FCN16_adam_iter_'+str(i)+'.ckpt') 71 | 72 | f.close() 73 | 74 | -------------------------------------------------------------------------------- /src/train_small.py: -------------------------------------------------------------------------------- 1 | # Training code for small size input 2 | # Author: Yuliang Zou 3 | # ylzou@umich.edu 4 | # Date: 2017-02-21 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from Model import FCN32, FCN16, FCN8 9 | from Dataloader import Dataloader_small 10 | import ipdb 11 | 12 | 13 | config = { 14 | 'batch_num':20, 15 | 'iter':100000, 16 | 'num_classes':21, 17 | 'max_size':(256,256), 18 | 'weight_decay': 0.005, 19 | 'base_lr': 0.0001, 20 | 'momentum': 0.9 21 | } 22 | 23 | if __name__ == '__main__': 24 | # Load pre-trained model 25 | model_path = '../model/VGG_imagenet.npy' 26 | data_dict = np.load(model_path).item() 27 | 28 | # Set up model and data loader 29 | model = FCN32(config) 30 | loss_list = [] 31 | f = open('./FCN32_small.txt', 'w') 32 | DECAY = False # decay flag 33 | init = tf.initialize_all_variables() 34 | 35 | data_loader = Dataloader_small('train', config) 36 | 37 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session: 38 | session.run(init) 39 | model.load(data_dict, session) 40 | saver = tf.train.Saver() 41 | 42 | loss = 0 43 | for i in xrange(config['iter']): 44 | minibatch = data_loader.get_next_minibatch() 45 | feed_dict = {model.img: minibatch[0], 46 | model.seg: minibatch[1], 47 | model.mask: minibatch[2]} 48 | _, temp_loss = session.run([model.train_op, model.loss], feed_dict=feed_dict) 49 | loss += temp_loss 50 | 51 | loss_list.append(temp_loss) 52 | f.write(str(temp_loss) + '\n') 53 | print str(i) + ': ' + str(temp_loss) 54 | 55 | # Learning rate decay 56 | if len(loss_list) > 100 and not DECAY: 57 | avg = sum(loss_list[-100::]) / 100.0 58 | if avg <= 0.4: 59 | model.base_lr /= 10 60 | DECAY = True 61 | 62 | # Monitor 63 | if i % 20 == 0 and i != 0: 64 | loss /= 20 65 | print 'Iter: {}'.format(i) + '/{}'.format(config['iter']) + ', loss = ' + str(loss) 66 | loss = 0 67 | 68 | # Write to saver 69 | if i % 5000 == 0 and i != 0: 70 | saver.save(session, '../model/FCN32_small_adam_iter_'+str(i)+'.ckpt') 71 | 72 | f.close() 73 | 74 | -------------------------------------------------------------------------------- /src/test.py: -------------------------------------------------------------------------------- 1 | # Generate segmentation results 2 | # Author: Yuliang Zou 3 | # ylzou@umich.edu 4 | # Date: 2017-03-07 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from Model import FCN32_test, FCN16_test, FCN8_test 9 | from Dataloader import Dataloader, Dataloader_test 10 | from util import get_original_size, seg_gray_to_rgb 11 | import cv2 12 | from os import makedirs 13 | from os.path import exists, join 14 | import ipdb 15 | 16 | 17 | config = { 18 | 'batch_num':1, 19 | 'iter':100000, 20 | 'num_classes':21, 21 | 'max_size':(640,640), 22 | 'weight_decay': 0.0005, 23 | 'base_lr': 0.0001, 24 | 'momentum': 0.9 25 | } 26 | 27 | if __name__ == '__main__': 28 | # Specify which set to test 29 | split = 'val' 30 | model = FCN8_test(config) 31 | # Import, since we don't want the random shuffle 32 | data_loader = Dataloader_test(split, config) 33 | 34 | saver = tf.train.Saver() 35 | ckpt = '../model/FCN8_adam_iter_10000.ckpt' 36 | ID = ckpt.split('/')[-1][:-5] 37 | 38 | res_dir = '../result/' 39 | dump_path = join(res_dir, ID) 40 | dump_path = join(dump_path, split) 41 | rgb_path = join(dump_path, 'rgb') 42 | gray_path = join(dump_path, 'gray') 43 | 44 | if not exists(rgb_path): 45 | makedirs(rgb_path) 46 | if not exists(gray_path): 47 | makedirs(gray_path) 48 | 49 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session: 50 | saver.restore(session, ckpt) 51 | print 'Model restored.' 52 | 53 | # Iterate the whole set once 54 | for i in range(data_loader.num_images): 55 | minibatch = data_loader.get_minibatch_at(i) 56 | feed_dict = {model.img: minibatch[0]} 57 | pred = session.run(model.get_output('deconv'), feed_dict=feed_dict) 58 | 59 | mask = minibatch[2][0] 60 | seg = np.argmax(pred[0], axis=2) 61 | 62 | row, col = minibatch[3] 63 | seg_valid = np.zeros((row, col)) 64 | seg_valid[:, :] = seg[0:row, 0:col] 65 | seg_rgb = seg_gray_to_rgb(seg_valid, data_loader.gray_to_rgb) 66 | 67 | im_name = data_loader._seg_at(i).split('/')[-1] 68 | cv2.imwrite(join(rgb_path, im_name), seg_rgb[:,:,::-1]) 69 | cv2.imwrite(join(gray_path, im_name), seg_valid) 70 | 71 | print str(i) + '/' + str(data_loader.num_images) + ' done!' 72 | -------------------------------------------------------------------------------- /src/demo.py: -------------------------------------------------------------------------------- 1 | # Demo 2 | # Author: Yuliang Zou 3 | # ylzou@umich.edu 4 | # Date: 2017-03-03 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from Model import FCN32_test, FCN16_test, FCN8_test 9 | from Dataloader import Dataloader, Dataloader_small 10 | import matplotlib.pyplot as plt 11 | import cv2 12 | import ipdb 13 | 14 | 15 | # BGR mean pixel value 16 | MEAN_PIXEL = np.array([103.939, 116.779, 123.68]) 17 | 18 | CLASSES = ('__background__', 19 | 'aeroplane', 'bicycle', 'bird', 'boat', 20 | 'bottle', 'bus', 'car', 'cat', 'chair', 21 | 'cow', 'diningtable', 'dog', 'horse', 22 | 'motorbike', 'person', 'pottedplant', 23 | 'sheep', 'sofa', 'train', 'tvmonitor') 24 | 25 | config = { 26 | 'batch_num':5, 27 | 'iter':100000, 28 | 'num_classes':21, 29 | 'max_size':(640,640), 30 | 'weight_decay': 0.0005, 31 | 'base_lr': 0.001, 32 | 'momentum': 0.9 33 | } 34 | 35 | if __name__ == '__main__': 36 | model = FCN8_test(config) 37 | data_loader = Dataloader('val', config) 38 | 39 | saver = tf.train.Saver() 40 | ckpt = '../model/FCN8_adam_iter_10000.ckpt' 41 | # Extract ckpt into npy, if needed 42 | # with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session: 43 | # model.extract(ckpt, session, saver) 44 | # ipdb.set_trace() 45 | 46 | dump_path = '../demo/' 47 | 48 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session: 49 | saver.restore(session, ckpt) 50 | print 'Model restored.' 51 | 52 | minibatch = data_loader.get_next_minibatch() 53 | feed_dict = {model.img: minibatch[0], 54 | model.seg: minibatch[1], 55 | model.mask: minibatch[2]} 56 | pred = session.run(model.get_output('deconv'), feed_dict=feed_dict) 57 | 58 | for i in range(config['batch_num']): 59 | mask = minibatch[2][i] 60 | seg = np.argmax(pred[i], axis=2) 61 | img = minibatch[0][i] 62 | gt = minibatch[1][i][:,:,0] 63 | f, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=False) 64 | ax1.imshow(seg) 65 | img = img + MEAN_PIXEL 66 | ax2.imshow(img[:,:,::-1]) 67 | ax3.imshow(gt) 68 | plt.show() 69 | cv2.imwrite(dump_path + str(i) + '_seg.png', seg) 70 | cv2.imwrite(dump_path + str(i) + '_img.png', img) 71 | 72 | -------------------------------------------------------------------------------- /misc/Evaluation.m: -------------------------------------------------------------------------------- 1 | % Description: Evaluation 2 | % 3 | % Author: Chen Gao 4 | % chengao@umich.edu 5 | % 6 | % Date: March 7, 2017 7 | % 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | function [accuracies,avacc,conf,rawcounts] = Evaluation(VOCopts,id) 11 | 12 | % image test set 13 | [gtids,~]=textread(sprintf(VOCopts.seg.imgsetpath,VOCopts.testset),'%s %d'); 14 | 15 | % number of labels = number of classes plus one for the background 16 | num = VOCopts.nclasses+1; 17 | confcounts = zeros(num); 18 | count=0; 19 | tic; 20 | for i=1:length(gtids) 21 | % display progress 22 | if toc>1 23 | fprintf('test confusion: %d/%d\n',i,length(gtids)); 24 | drawnow; 25 | tic; 26 | end 27 | 28 | imname = gtids{i}; 29 | 30 | % ground truth label file 31 | gtfile = sprintf(VOCopts.seg.clsimgpath,imname); 32 | [gtim,~] = imread(gtfile); 33 | gtim = double(gtim); 34 | 35 | % results file 36 | resfile = sprintf(VOCopts.seg.clsrespath,id,VOCopts.testset,imname); 37 | [resim,~] = imread(resfile); 38 | resim = double(resim); 39 | 40 | % Check validity of results image 41 | maxlabel = max(resim(:)); 42 | if (maxlabel>VOCopts.nclasses), 43 | error('Results image ''%s'' has out of range value %d (the value should be <= %d)',imname,maxlabel,VOCopts.nclasses); 44 | end 45 | 46 | szgtim = size(gtim); szresim = size(resim); 47 | if any(szgtim~=szresim) 48 | error('Results image ''%s'' is the wrong size, was %d x %d, should be %d x %d.',imname,szresim(1),szresim(2),szgtim(1),szgtim(2)); 49 | end 50 | 51 | %pixel locations to include in computation 52 | locs = gtim<255; 53 | 54 | % joint histogram 55 | sumim = 1+gtim+resim*num; 56 | hs = histc(sumim(locs),1:num*num); 57 | count = count + numel(find(locs)); 58 | confcounts(:) = confcounts(:) + hs(:); 59 | end 60 | 61 | % confusion matrix - first index is true label, second is inferred label 62 | %conf = zeros(num); 63 | conf = 100*confcounts./repmat(1E-20+sum(confcounts,2),[1 size(confcounts,2)]); 64 | rawcounts = confcounts; 65 | 66 | % Percentage correct labels measure is no longer being used. Uncomment if 67 | % you wish to see it anyway 68 | %overall_acc = 100*sum(diag(confcounts)) / sum(confcounts(:)); 69 | %fprintf('Percentage of pixels correctly labelled overall: %6.3f%%\n',overall_acc); 70 | 71 | accuracies = zeros(VOCopts.nclasses,1); 72 | fprintf('Accuracy for each class (intersection/union measure)\n'); 73 | for j=1:num 74 | 75 | gtj=sum(confcounts(j,:)); 76 | resj=sum(confcounts(:,j)); 77 | gtjresj=confcounts(j,j); 78 | % The accuracy is: true positive / (true positive + false positive + false negative) 79 | % which is equivalent to the following percentage: 80 | accuracies(j)=100*gtjresj/(gtj+resj-gtjresj); 81 | 82 | clname = 'background'; 83 | if (j>1), clname = VOCopts.classes{j-1};end; 84 | fprintf(' %14s: %6.3f%%\n',clname,accuracies(j)); 85 | end 86 | accuracies = accuracies(1:end); 87 | avacc = mean(accuracies); 88 | fprintf('-------------------------\n'); 89 | fprintf('Average accuracy: %6.3f%%\n',avacc); 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tf_fcn 2 | 3 | A TensorFlow Implementation of: 4 | 5 | [CVPR 2015] Long et al. [Fully Convolutional Networks for Semantic Segmentation](https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf) 6 | 7 | **NOTE:** In this repository, we only implement the VGG16 version. 8 | 9 | 10 | ## Requirements 11 | 12 | 1. TensorFlow r0.10 (r0.11 should be fine, not sure if this can work for later versions) 13 | 14 | 2. OpenCV 2 and its Python bindings 15 | 16 | 3. Ipdb: IPython environment debugger 17 | 18 | 4. (Optional) pathos. Check the other branch for further details 19 | 20 | 21 | ## Prepare dataset 22 | 23 | In this implementation, we use the [VOC2011 dataset](http://host.robots.ox.ac.uk/pascal/VOC/voc2011/). Please do as follows to set up the dataset: 24 | 25 | 1. `mkdir data` to set up the directory of dataset 26 | 27 | 2. Download the **train/val dataset** and **Development kit** tar files, put them under the `data` folder. Unzip Development kit tar file, then unzip train/val tar file and rename the folder as `VOC2011`. 28 | 29 | 3. It should have this basic structure (under `data` directory) 30 | 31 | ```bash 32 | $ VOCdevkit/ # development kit 33 | $ VOCdevkit/VOCcode # VOC utility code 34 | $ VOCdevkit/VOC2011 # image sets, annotations, etc. 35 | # ... and several other directories ... 36 | ``` 37 | 38 | You may also download the **test set** if you want to evaluate your prediction results on this dataset. 39 | 40 | ## Pre-trained model 41 | 42 | ```bash 43 | mkdir model 44 | ``` 45 | 46 | We use a ImageNet pre-trained model to initialize the network, please download the npy file [here](https://drive.google.com/file/d/0B2SnTpv8L4iLRTFZb0FWenRJTlU/view?usp=sharing) and put it under the `model` folder. 47 | 48 | 49 | ## How to train 50 | 51 | Since input images have different sizes, in order to make them as minibatch input, we used two different strategies: 1) padding to a large size; or 2) resize to a small size (256, 256) 52 | 53 | ```bash 54 | cd src 55 | python train.py # padding 56 | python train_small.py # resize 57 | ``` 58 | 59 | You can choose either one to run. And you can also change the `config` dictionary to use custom settings. 60 | 61 | 62 | ## Demo 63 | 64 | ```bash 65 | cd src 66 | python demo.py 67 | ``` 68 | 69 | You can change the `config` dictionary to use custom settings. 70 | 71 | 72 | ## Generate Predictions 73 | 74 | First you should run the following code: 75 | 76 | ```bash 77 | cd src 78 | python test.py 79 | ``` 80 | 81 | You might want to change the used model. Check the code for futher details. 82 | 83 | After that, you should find the following structure in `result` folder: 84 | 85 | ```bash 86 | $ FCN8_adam_iter_10000/ # folder name depends on the model you used 87 | $ FCN8_adam_iter_10000/gray/ # gray-scale segmentation result 88 | $ FCN8_adam_iter_10000/rgb/ # rgb segmentation result 89 | # ... and maybe several other directories ... 90 | ``` 91 | 92 | Then you can use the VOC2011 provided eval code to do evaluation (see the next section for details). 93 | 94 | If you want to evaluate your model on the test split, you may submit your prediction results to [their server](http://host.robots.ox.ac.uk:8080/) 95 | 96 | 97 | ## Evaluation 98 | 99 | 1. `cd misc` 100 | 101 | 2. Run `save_colorful_grayscale(in_directory,out_directory)` (our generated results is grayscale png format, but the eval code uses indexed png format) 102 | 103 | 3. Run `report.m` 104 | 105 | **Note:** 106 | 107 | 1. make sure `VOCinit.m` is at `/tf_fcn-master/data/VOCdevkit/VOCcode/` 108 | 109 | 2. make sure the segmentation result is stored in `/tf_fcn-master/data/VOCdevkit/results/VOC2011/Segmentation/%s_val_cls/` while the folder is named as `%s_val_cls` 110 | 111 | 3. make sure the second input of `Evaluation(VOCopts, ~)` is the string `%s` above. 112 | 113 | 114 | 115 | ## Models 116 | 117 | Padding to (640, 640): 118 | 119 | - FCN32_adam_20000: [ckpt](https://drive.google.com/file/d/0B3vJudZqxciYbTRuY21WZXREV0E/view?usp=sharing), [npy](https://drive.google.com/file/d/0B2SnTpv8L4iLNEVFd2RHcUZOX00/view?usp=sharing) 120 | 121 | - FCN16_adam_5000: [ckpt](https://drive.google.com/file/d/0B2SnTpv8L4iLT2VuREZwUHg4cjg/view?usp=sharing) 122 | 123 | - FCN8_adam_10000: [ckpt](https://drive.google.com/file/d/0B2SnTpv8L4iLRExqQTVONWxTX0U/view?usp=sharing) 124 | 125 | 126 | Padding to (500, 500): 127 | 128 | - FCN32_adam_35000: [ckpt](https://drive.google.com/file/d/0B3vJudZqxciYVWZfbXdybzFhWDA/view?usp=sharing) (You can extract npy with `extract` method defined in `Model.py`) 129 | 130 | - FCN8_adam_30000: [ckpt](https://drive.google.com/file/d/0B3vJudZqxciYVWZfbXdybzFhWDA/view?usp=sharing) 131 | 132 | 133 | **Note:** When you train the shortcut version model (FCN16 and FCN8), you will need FCN32 model npy file as initialization, instead of the ImageNet pre-trained model npy file. -------------------------------------------------------------------------------- /src/util.py: -------------------------------------------------------------------------------- 1 | # Define some util functions 2 | # Author: Yuliang Zou 3 | # ylzou@umich.edu 4 | # Date: 2017-02-19 5 | 6 | import numpy as np 7 | import cv2 8 | import ipdb 9 | 10 | # BGR mean pixel value 11 | MEAN_PIXEL = np.array([103.939, 116.779, 123.68]) 12 | 13 | """Padding image and segmentation ground truth to (640, 640)""" 14 | def prep_im_for_blob(im_name, seg_name, rgb_to_gray, max_size=(640,640)): 15 | im = cv2.imread(im_name) # OpenCV color map default BGR 16 | im = im - MEAN_PIXEL 17 | seg = cv2.imread(seg_name)[:,:,::-1] 18 | 19 | row, col, _ = im.shape 20 | im_blob = np.zeros((max_size[0], max_size[1], 3)) 21 | im_blob[0:row,0:col,:] = im 22 | 23 | seg_blob = np.zeros((max_size[0], max_size[1], 1)) 24 | mask = np.zeros_like(seg_blob) 25 | for i in xrange(row): 26 | for j in xrange(col): 27 | seg_blob[i,j] = rgb_to_gray[tuple(seg[i,j,:])] 28 | # Discard 255 edge class 29 | if seg_blob[i,j] != 255: 30 | mask[i,j] = 1 31 | else: 32 | seg_blob[i,j] = 0 33 | 34 | return {'im_blob':im_blob, 'seg_blob':seg_blob, 'mask':mask, 'original_size':(row,col)} 35 | 36 | """Minus mean pixel value""" 37 | def prep_im(im_name): 38 | im = cv2.imread(im_name) # OpenCV color map default BGR 39 | im = np.array([im - MEAN_PIXEL]) 40 | return im 41 | 42 | """For multi-processing dataloader""" 43 | def prep_run_wrapper(args): 44 | return prep_im_for_blob(*args) 45 | 46 | """Resize input image to (256, 256)""" 47 | def prep_small_im_for_blob(im_name, seg_name, rgb_to_gray, max_size=(256,256)): 48 | im = cv2.imread(im_name) # OpenCV color map default BGR 49 | seg = cv2.imread(seg_name)[:,:,::-1] 50 | 51 | row, col, _ = im.shape 52 | im_blob = np.zeros((max_size[0], max_size[1], 3)) 53 | im_blob = cv2.resize(im, max_size, interpolation=cv2.INTER_NEAREST) 54 | im_blob = im_blob - MEAN_PIXEL 55 | 56 | 57 | seg_gray = np.zeros((row, col)) 58 | seg_blob = np.zeros((max_size[0], max_size[1])) 59 | mask = np.zeros_like(seg_blob) 60 | 61 | for i in xrange(row): 62 | for j in xrange(col): 63 | seg_gray[i,j] = rgb_to_gray[tuple(seg[i,j])] 64 | seg_blob = cv2.resize(seg_gray, max_size, interpolation=cv2.INTER_NEAREST) 65 | 66 | for i in xrange(max_size[0]): 67 | for j in xrange(max_size[1]): 68 | if seg_blob[i,j] != 255: 69 | mask[i, j] = 1 70 | else: 71 | seg_blob[i,j] = 0 72 | seg_blob = np.array([seg_blob]).transpose((1,2,0)) 73 | mask = np.array([mask]).transpose((1,2,0)) 74 | 75 | return {'im_blob':im_blob, 'seg_blob':seg_blob, 'mask':mask, 'original_size':(row,col)} 76 | 77 | """For multi-processing dataloader""" 78 | def prep_small_run_wrapper(args): 79 | return prep_small_im_for_blob(*args) 80 | 81 | 82 | """Create color mappings, check VOClabelcolormap.m for reference""" 83 | def colormap(N=256): 84 | # Create double side mappings 85 | gray_to_rgb = {} 86 | rgb_to_gray = {} 87 | 88 | for i in range(N): 89 | temp = i 90 | r = 0 91 | g = 0 92 | b = 0 93 | for j in range(8): 94 | r = r | ((temp & 1) << (7-j)) 95 | g = g | (((temp >> 1) & 1) << (7-j)) 96 | b = b | (((temp >> 2) & 1) << (7-j)) 97 | temp = temp >> 3 98 | gray_to_rgb[i] = (r,g,b) 99 | 100 | for key, val in gray_to_rgb.iteritems(): 101 | rgb_to_gray[val] = key 102 | 103 | return gray_to_rgb, rgb_to_gray 104 | 105 | """Get original size""" 106 | def get_original_size(mask, max_size=(640,640)): 107 | row = None 108 | col = None 109 | for i in range(max_size[0]-1, -1, -1): 110 | if mask[i,0,0] == 1: 111 | row = i + 1 112 | break 113 | 114 | for i in range(max_size[1]-1, -1, -1): 115 | if mask[0,i,0] == 1: 116 | col = i + 1 117 | break 118 | 119 | if row is None or col is None: 120 | ipdb.set_trace() 121 | return row, col 122 | 123 | """Transform gray scale segmentation result to rgb format""" 124 | def seg_gray_to_rgb(seg, gray_to_rgb): 125 | row, col = seg.shape 126 | rgb = np.zeros((row, col, 3)) 127 | 128 | for i in range(row): 129 | for j in range(col): 130 | r, g, b = gray_to_rgb[seg[i, j]] 131 | rgb[i, j, 0] = r 132 | rgb[i, j, 1] = g 133 | rgb[i, j, 2] = b 134 | 135 | return rgb 136 | 137 | 138 | """ 139 | Helper functions for bilinear upsampling 140 | credit: http://warmspringwinds.github.io/tensorflow/tf-slim/2016/11/22/upsampling-and-image-segmentation-with-tensorflow-and-tf-slim/ 141 | """ 142 | def get_kernel_size(factor): 143 | """ 144 | Find the kernel size given the desired factor of upsampling. 145 | """ 146 | return 2 * factor - factor % 2 147 | 148 | def upsample_filt(size): 149 | """ 150 | Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size. 151 | """ 152 | factor = (size + 1) // 2 153 | if size % 2 == 1: 154 | center = factor - 1 155 | else: 156 | center = factor - 0.5 157 | og = np.ogrid[:size, :size] 158 | return (1 - abs(og[0] - center) / factor) * \ 159 | (1 - abs(og[1] - center) / factor) 160 | 161 | def bilinear_upsample_weights(factor, number_of_classes): 162 | """ 163 | Create weights matrix for transposed convolution with bilinear filter 164 | initialization. 165 | """ 166 | filter_size = get_kernel_size(factor) 167 | 168 | weights = np.zeros((filter_size, 169 | filter_size, 170 | number_of_classes, 171 | number_of_classes), dtype=np.float32) 172 | 173 | upsample_kernel = upsample_filt(filter_size) 174 | 175 | for i in xrange(number_of_classes): 176 | weights[:, :, i, i] = upsample_kernel 177 | 178 | return weights 179 | 180 | 181 | 182 | if __name__ == '__main__': 183 | root = '../data/VOCdevkit/VOC2011/' 184 | im_name = root + 'JPEGImages/2007_000033.jpg' 185 | seg_name = root + 'SegmentationClass/2007_000033.png' 186 | _, rgb_to_gray = colormap() 187 | 188 | # im_blob, seg_blob = prep_im_for_blob(im_name, seg_name, rgb_to_gray) 189 | data = prep_small_im_for_blob(im_name, seg_name, rgb_to_gray) 190 | import matplotlib.pyplot as plt 191 | im_blob = data['im_blob'] 192 | seg_blob = data['seg_blob'] 193 | plt.imshow(im_blob) 194 | plt.show() 195 | plt.imshow(seg_blob[:,:,0]) 196 | plt.show() 197 | ipdb.set_trace() 198 | -------------------------------------------------------------------------------- /src/Dataloader.py: -------------------------------------------------------------------------------- 1 | # Define the data loader for segmentation task 2 | # Author: Yuliang Zou 3 | # ylzou@umich.edu 4 | # Date: 2017-02-14 5 | 6 | import ipdb 7 | import numpy as np 8 | from os.path import join 9 | from util import colormap, prep_im_for_blob, prep_run_wrapper, prep_small_run_wrapper 10 | import multiprocessing 11 | 12 | """ 13 | The Dataloader for VOC2011 to load and preprocess input image and segmentation 14 | ground truth. (Only) 15 | """ 16 | class Dataloader(object): 17 | def __init__(self, split, config): 18 | # Validate split input 19 | if split != 'train' and split != 'val' and split != 'trainval' and split != 'test': 20 | raise Exception('Please enter valid split variable!') 21 | 22 | root = '../data/VOCdevkit/VOC2011/' 23 | self.img_path = join(root, 'JPEGImages/') 24 | self.seg_path = join(root, 'SegmentationClass/') 25 | self.split = split 26 | img_set = join(root, 'ImageSets/Segmentation/' + split + '.txt') 27 | with open(img_set) as f: 28 | self.img_list = f.read().rstrip().split('\n') 29 | 30 | self.num_images = len(self.img_list) 31 | self.temp_pointer = 0 # First idx of the current batch 32 | self._shuffle() 33 | 34 | self.batch_num = config['batch_num'] 35 | self.max_size = config['max_size'] 36 | 37 | # Create double side mappings 38 | self.gray_to_rgb, self.rgb_to_gray = colormap() 39 | 40 | 41 | def _shuffle(self): 42 | self.img_list = np.random.permutation(self.img_list) 43 | 44 | def _img_at(self, i): 45 | return self.img_path + self.img_list[i] + '.jpg' 46 | 47 | def _seg_at(self, i): 48 | return self.seg_path + self.img_list[i] + '.png' 49 | 50 | """Use padding to get same shapes""" 51 | def get_next_minibatch(self): 52 | img_blobs = [] 53 | seg_blobs = [] 54 | mask_blobs = [] 55 | ori_sizes = [] 56 | 57 | process_size = self.batch_num 58 | # process mini_batch as 5 process, require that the number of 59 | # sample in a mini_batch is a multiplying of 5 60 | for _ in xrange(self.batch_num/process_size): 61 | # Permutate the data again 62 | 63 | if self.temp_pointer+process_size > self.num_images: 64 | self.temp_pointer = 0 65 | self._shuffle() 66 | 67 | temp_range = range(self.temp_pointer, self.temp_pointer+process_size, 1) 68 | temp_imName = [self._img_at(x) for x in temp_range] 69 | temp_segName = [self._seg_at(x) for x in temp_range] 70 | temp_map = [self.rgb_to_gray,]*process_size 71 | temp_size = [self.max_size,]*process_size 72 | 73 | p = multiprocessing.Pool(process_size) 74 | 75 | temp_result = p.map(prep_run_wrapper, zip(temp_imName, temp_segName, temp_map, temp_size)) 76 | p.close() 77 | p.join() 78 | 79 | for x in temp_result: 80 | img_blobs.append(x['im_blob']) 81 | seg_blobs.append(x['seg_blob']) 82 | mask_blobs.append(x['mask']) 83 | ori_sizes.append(x['original_size']) 84 | 85 | self.temp_pointer += process_size 86 | 87 | 88 | return [img_blobs, seg_blobs, mask_blobs, ori_sizes] 89 | 90 | """No shuffle, fix batch num to 1""" 91 | class Dataloader_test(Dataloader): 92 | def __init__(self, split, config): 93 | # Validate split input 94 | if split != 'train' and split != 'val' and split != 'trainval' and split != 'test': 95 | raise Exception('Please enter valid split variable!') 96 | 97 | root = '../data/VOCdevkit/VOC2011/' 98 | self.img_path = join(root, 'JPEGImages/') 99 | self.seg_path = join(root, 'SegmentationClass/') 100 | self.split = split 101 | img_set = join(root, 'ImageSets/Segmentation/' + split + '.txt') 102 | with open(img_set) as f: 103 | self.img_list = f.read().rstrip().split('\n') 104 | 105 | self.num_images = len(self.img_list) 106 | self.temp_pointer = 0 # First idx of the current batch 107 | 108 | self.batch_num = 1 109 | self.max_size = config['max_size'] 110 | 111 | # Create double side mappings 112 | self.gray_to_rgb, self.rgb_to_gray = colormap() 113 | 114 | """Get minibatch by index""" 115 | def get_minibatch_at(self, i): 116 | img_name = self._img_at(i) 117 | seg_name = self._seg_at(i) 118 | data = prep_im_for_blob(img_name, seg_name, self.rgb_to_gray, self.max_size) 119 | img_blob = data['im_blob'] 120 | # seg_blob = data['seg_blob'] 121 | mask = data['mask'] 122 | ori_size = data['original_size'] 123 | 124 | img_blobs = np.array([img_blob]) 125 | # seg_blobs = np.array([seg_blob]) 126 | mask_blobs = np.array([mask]) 127 | seg_blobs = None 128 | # mask_blobs = None 129 | 130 | return [img_blobs, seg_blobs, mask_blobs, ori_size] 131 | 132 | 133 | """Small size dataloader""" 134 | class Dataloader_small(Dataloader): 135 | def __init__(self, split, config): 136 | Dataloader.__init__(self, split, config) 137 | 138 | """Override""" 139 | def get_next_minibatch(self): 140 | img_blobs = [] 141 | seg_blobs = [] 142 | mask_blobs = [] 143 | ori_sizes = [] 144 | 145 | process_size = 5 146 | # process mini_batch as 5 process, require that the number of 147 | # sample in a mini_batch is a multiplying of 5 148 | for _ in xrange(self.batch_num/process_size): 149 | # Permutate the data again 150 | 151 | if self.temp_pointer+process_size > self.num_images: 152 | self.temp_pointer = 0 153 | self._shuffle() 154 | 155 | temp_range = range(self.temp_pointer, self.temp_pointer+process_size, 1) 156 | temp_imName = [self._img_at(x) for x in temp_range] 157 | temp_segName = [self._seg_at(x) for x in temp_range] 158 | temp_map = [self.rgb_to_gray,]*process_size 159 | 160 | p = multiprocessing.Pool(process_size) 161 | 162 | # Use prep_small_run_wrapper instead! 163 | temp_result = p.map(prep_small_run_wrapper, zip(temp_imName, temp_segName, temp_map)) 164 | p.close() 165 | p.join() 166 | 167 | for x in temp_result: 168 | img_blobs.append(x['im_blob']) 169 | seg_blobs.append(x['seg_blob']) 170 | mask_blobs.append(x['mask']) 171 | ori_sizes.append(x['original_size']) 172 | 173 | self.temp_pointer += process_size 174 | 175 | 176 | return [img_blobs, seg_blobs, mask_blobs, ori_sizes] 177 | 178 | if __name__ == '__main__': 179 | config = { 180 | 'batch_num':1, 181 | 'iter':100000, 182 | 'num_classes':21, 183 | 'max_size':(640,640), 184 | 'weight_decay': 0.0005, 185 | 'base_lr': 0.001, 186 | 'momentum': 0.9 187 | } 188 | 189 | # dataloader = Dataloader('train', 10) 190 | # minibatch = dataloader.get_next_minibatch() 191 | dataloader = Dataloader('val', config) 192 | minibatch = dataloader.get_next_minibatch() 193 | 194 | 195 | ipdb.set_trace() 196 | -------------------------------------------------------------------------------- /src/Model.py: -------------------------------------------------------------------------------- 1 | # Define the vgg16 style model 2 | # Author: Yuliang Zou 3 | # ylzou@umich.edu 4 | # Date: 2017-02-19 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | from util import bilinear_upsample_weights 9 | import ipdb 10 | 11 | """Define a base class, containing some useful layer functions""" 12 | class Network(object): 13 | def __init__(self, inputs): 14 | self.inputs = [] 15 | self.layers = {} 16 | self.outputs = {} 17 | 18 | """Extract parameters from ckpt file to npy file""" 19 | def extract(self, data_path, session, saver): 20 | raise NotImplementedError('Must be subclassed.') 21 | 22 | """Load pre-trained model from numpy data_dict""" 23 | def load(self, data_dict, session, ignore_missing=True): 24 | fc_shapes = {'fc6':(7,7,512,4096), 'fc7':(1,1,4096,4096)} 25 | fc_scopes = {'fc6':'conv6', 'fc7':'conv7'} 26 | for key in data_dict: 27 | # Special cases: fc6 and fc7 28 | if key == 'fc6' or key == 'fc7': 29 | w = np.reshape(data_dict[key]['weights'], fc_shapes[key]) 30 | b = data_dict[key]['biases'] 31 | with tf.variable_scope(fc_scopes[key], reuse=True): 32 | var1 = tf.get_variable('weights') 33 | session.run(var1.assign(w)) 34 | print "Assign pretrain model weights to " + fc_scopes[key] 35 | var2 = tf.get_variable('biases') 36 | session.run(var2.assign(b)) 37 | print "Assign pretrain model biases to " + fc_scopes[key] 38 | continue 39 | 40 | with tf.variable_scope(key, reuse=True): 41 | for subkey in data_dict[key]: 42 | try: 43 | var = tf.get_variable(subkey) 44 | session.run(var.assign(data_dict[key][subkey])) 45 | print "Assign pretrain model " + subkey + " to " + key 46 | except ValueError: 47 | print "Ignore " + key 48 | if not ignore_missing: 49 | raise 50 | 51 | """Get outputs given key names""" 52 | def get_output(self, key): 53 | if key not in self.outputs: 54 | raise KeyError 55 | return self.outputs[key] 56 | 57 | """Get parameters given key names""" 58 | def get_param(self, key): 59 | if key not in self.layers: 60 | raise KeyError 61 | return self.layers[key]['weights'], self.layers[key]['biases'] 62 | 63 | """Add conv part of vgg16""" 64 | def add_conv(self, inputs, num_classes, stage='TRAIN'): 65 | # Dropout is different for training and testing 66 | if stage == 'TRAIN': 67 | keep_prob = 0.5 68 | elif stage == 'TEST': 69 | keep_prob = 1 70 | else: 71 | raise ValueError 72 | 73 | # Conv1 74 | with tf.variable_scope('conv1_1') as scope: 75 | w_conv1_1 = tf.get_variable('weights', [3, 3, 3, 64], 76 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 77 | b_conv1_1 = tf.get_variable('biases', [64], 78 | initializer=tf.constant_initializer(0)) 79 | z_conv1_1 = tf.nn.conv2d(inputs, w_conv1_1, strides=[1, 1, 1, 1], 80 | padding='SAME') + b_conv1_1 81 | a_conv1_1 = tf.nn.relu(z_conv1_1) 82 | 83 | with tf.variable_scope('conv1_2') as scope: 84 | w_conv1_2 = tf.get_variable('weights', [3, 3, 64, 64], 85 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 86 | b_conv1_2 = tf.get_variable('biases', [64], 87 | initializer=tf.constant_initializer(0)) 88 | z_conv1_2 = tf.nn.conv2d(a_conv1_1, w_conv1_2, strides=[1, 1, 1, 1], 89 | padding='SAME') + b_conv1_2 90 | a_conv1_2 = tf.nn.relu(z_conv1_2) 91 | 92 | pool1 = tf.nn.max_pool(a_conv1_2, ksize=[1,2,2,1], strides=[1,2,2,1], 93 | padding='SAME', name='pool1') 94 | 95 | # Conv2 96 | with tf.variable_scope('conv2_1') as scope: 97 | w_conv2_1 = tf.get_variable('weights', [3, 3, 64, 128], 98 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 99 | b_conv2_1 = tf.get_variable('biases', [128], 100 | initializer=tf.constant_initializer(0)) 101 | z_conv2_1 = tf.nn.conv2d(pool1, w_conv2_1, strides=[1, 1, 1, 1], 102 | padding='SAME') + b_conv2_1 103 | a_conv2_1 = tf.nn.relu(z_conv2_1) 104 | 105 | with tf.variable_scope('conv2_2') as scope: 106 | w_conv2_2 = tf.get_variable('weights', [3, 3, 128, 128], 107 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 108 | b_conv2_2 = tf.get_variable('biases', [128], 109 | initializer=tf.constant_initializer(0)) 110 | z_conv2_2 = tf.nn.conv2d(a_conv2_1, w_conv2_2, strides=[1, 1, 1, 1], 111 | padding='SAME') + b_conv2_2 112 | a_conv2_2 = tf.nn.relu(z_conv2_2) 113 | 114 | pool2 = tf.nn.max_pool(a_conv2_2, ksize=[1,2,2,1], strides=[1,2,2,1], 115 | padding='SAME', name='pool2') 116 | 117 | # Conv3 118 | with tf.variable_scope('conv3_1') as scope: 119 | w_conv3_1 = tf.get_variable('weights', [3, 3, 128, 256], 120 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 121 | b_conv3_1 = tf.get_variable('biases', [256], 122 | initializer=tf.constant_initializer(0)) 123 | z_conv3_1 = tf.nn.conv2d(pool2, w_conv3_1, strides= [1, 1, 1, 1], 124 | padding='SAME') + b_conv3_1 125 | a_conv3_1 = tf.nn.relu(z_conv3_1) 126 | 127 | with tf.variable_scope('conv3_2') as scope: 128 | w_conv3_2 = tf.get_variable('weights', [3, 3, 256, 256], 129 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 130 | b_conv3_2 = tf.get_variable('biases', [256], 131 | initializer=tf.constant_initializer(0)) 132 | z_conv3_2 = tf.nn.conv2d(a_conv3_1, w_conv3_2, strides= [1, 1, 1, 1], 133 | padding='SAME') + b_conv3_2 134 | a_conv3_2 = tf.nn.relu(z_conv3_2) 135 | 136 | with tf.variable_scope('conv3_3') as scope: 137 | w_conv3_3 = tf.get_variable('weights', [3, 3, 256, 256], 138 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 139 | b_conv3_3 = tf.get_variable('biases', [256], 140 | initializer=tf.constant_initializer(0)) 141 | z_conv3_3 = tf.nn.conv2d(a_conv3_2, w_conv3_3, strides= [1, 1, 1, 1], 142 | padding='SAME') + b_conv3_3 143 | a_conv3_3 = tf.nn.relu(z_conv3_3) 144 | 145 | pool3 = tf.nn.max_pool(a_conv3_3, ksize=[1,2,2,1], strides=[1,2,2,1], 146 | padding='SAME', name='pool3') 147 | 148 | # Conv4 149 | with tf.variable_scope('conv4_1') as scope: 150 | w_conv4_1 = tf.get_variable('weights', [3, 3, 256, 512], 151 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 152 | b_conv4_1 = tf.get_variable('biases', [512], 153 | initializer=tf.constant_initializer(0)) 154 | z_conv4_1 = tf.nn.conv2d(pool3, w_conv4_1, strides= [1, 1, 1, 1], 155 | padding='SAME') + b_conv4_1 156 | a_conv4_1 = tf.nn.relu(z_conv4_1) 157 | 158 | with tf.variable_scope('conv4_2') as scope: 159 | w_conv4_2 = tf.get_variable('weights', [3, 3, 512, 512], 160 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 161 | b_conv4_2 = tf.get_variable('biases', [512], 162 | initializer=tf.constant_initializer(0)) 163 | z_conv4_2 = tf.nn.conv2d(a_conv4_1, w_conv4_2, strides= [1, 1, 1, 1], 164 | padding='SAME') + b_conv4_2 165 | a_conv4_2 = tf.nn.relu(z_conv4_2) 166 | 167 | with tf.variable_scope('conv4_3') as scope: 168 | w_conv4_3 = tf.get_variable('weights', [3, 3, 512, 512], 169 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 170 | b_conv4_3 = tf.get_variable('biases', [512], 171 | initializer=tf.constant_initializer(0)) 172 | z_conv4_3 = tf.nn.conv2d(a_conv4_2, w_conv4_3, strides= [1, 1, 1, 1], 173 | padding='SAME') + b_conv4_3 174 | a_conv4_3 = tf.nn.relu(z_conv4_3) 175 | 176 | pool4 = tf.nn.max_pool(a_conv4_3, ksize=[1,2,2,1], strides=[1,2,2,1], 177 | padding='SAME', name='pool4') 178 | 179 | # Conv5 180 | with tf.variable_scope('conv5_1') as scope: 181 | w_conv5_1 = tf.get_variable('weights', [3, 3, 512, 512], 182 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 183 | b_conv5_1 = tf.get_variable('biases', [512], 184 | initializer=tf.constant_initializer(0)) 185 | z_conv5_1 = tf.nn.conv2d(pool4, w_conv5_1, strides= [1, 1, 1, 1], 186 | padding='SAME') + b_conv5_1 187 | a_conv5_1 = tf.nn.relu(z_conv5_1) 188 | 189 | with tf.variable_scope('conv5_2') as scope: 190 | w_conv5_2 = tf.get_variable('weights', [3, 3, 512, 512], 191 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 192 | b_conv5_2 = tf.get_variable('biases', [512], 193 | initializer=tf.constant_initializer(0)) 194 | z_conv5_2 = tf.nn.conv2d(a_conv5_1, w_conv5_2, strides= [1, 1, 1, 1], 195 | padding='SAME') + b_conv5_2 196 | a_conv5_2 = tf.nn.relu(z_conv5_2) 197 | 198 | with tf.variable_scope('conv5_3') as scope: 199 | w_conv5_3 = tf.get_variable('weights', [3, 3, 512, 512], 200 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 201 | b_conv5_3 = tf.get_variable('biases', [512], 202 | initializer=tf.constant_initializer(0)) 203 | z_conv5_3 = tf.nn.conv2d(a_conv5_2, w_conv5_3, strides= [1, 1, 1, 1], 204 | padding='SAME') + b_conv5_3 205 | a_conv5_3 = tf.nn.relu(z_conv5_3) 206 | 207 | pool5 = tf.nn.max_pool(a_conv5_3, ksize=[1,2,2,1], strides=[1,2,2,1], 208 | padding='SAME', name='pool5') 209 | 210 | # Transform fully-connected layers to convolutional layers 211 | with tf.variable_scope('conv6') as scope: 212 | w_conv6 = tf.get_variable('weights', [7, 7, 512, 4096], 213 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 214 | b_conv6 = tf.get_variable('biases', [4096], 215 | initializer=tf.constant_initializer(0)) 216 | z_conv6 = tf.nn.conv2d(pool5, w_conv6, strides= [1, 1, 1, 1], 217 | padding='SAME') + b_conv6 218 | a_conv6 = tf.nn.relu(z_conv6) 219 | d_conv6 = tf.nn.dropout(a_conv6, keep_prob) 220 | 221 | with tf.variable_scope('conv7') as scope: 222 | w_conv7 = tf.get_variable('weights', [1, 1, 4096, 4096], 223 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 224 | b_conv7 = tf.get_variable('biases', [4096], 225 | initializer=tf.constant_initializer(0)) 226 | z_conv7 = tf.nn.conv2d(d_conv6, w_conv7, strides= [1, 1, 1, 1], 227 | padding='SAME') + b_conv7 228 | a_conv7 = tf.nn.relu(z_conv7) 229 | d_conv7 = tf.nn.dropout(a_conv7, keep_prob) 230 | 231 | # Replace the original classifier layer 232 | with tf.variable_scope('conv8') as scope: 233 | w_conv8 = tf.get_variable('weights', [1, 1, 4096, num_classes], 234 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 235 | b_conv8 = tf.get_variable('biases', [num_classes], 236 | initializer=tf.constant_initializer(0)) 237 | z_conv8 = tf.nn.conv2d(d_conv7, w_conv8, strides= [1, 1, 1, 1], 238 | padding='SAME') + b_conv8 239 | 240 | # Add to store dicts 241 | self.outputs['conv1_1'] = a_conv1_1 242 | self.outputs['conv1_2'] = a_conv1_2 243 | self.outputs['pool1'] = pool1 244 | self.outputs['conv2_1'] = a_conv2_1 245 | self.outputs['conv2_2'] = a_conv2_2 246 | self.outputs['pool2'] = pool2 247 | self.outputs['conv3_1'] = a_conv3_1 248 | self.outputs['conv3_2'] = a_conv3_2 249 | self.outputs['conv3_3'] = a_conv3_3 250 | self.outputs['pool3'] = pool3 251 | self.outputs['conv4_1'] = a_conv4_1 252 | self.outputs['conv4_2'] = a_conv4_2 253 | self.outputs['conv4_3'] = a_conv4_3 254 | self.outputs['pool4'] = pool4 255 | self.outputs['conv5_1'] = a_conv5_1 256 | self.outputs['conv5_2'] = a_conv5_2 257 | self.outputs['conv5_3'] = a_conv5_3 258 | self.outputs['pool5'] = pool5 259 | self.outputs['conv6'] = d_conv6 260 | self.outputs['conv7'] = d_conv7 261 | self.outputs['conv8'] = z_conv8 262 | 263 | self.layers['conv1_1'] = {'weights':w_conv1_1, 'biases':b_conv1_1} 264 | self.layers['conv1_2'] = {'weights':w_conv1_2, 'biases':b_conv1_2} 265 | self.layers['conv2_1'] = {'weights':w_conv2_1, 'biases':b_conv2_1} 266 | self.layers['conv2_2'] = {'weights':w_conv2_2, 'biases':b_conv2_2} 267 | self.layers['conv3_1'] = {'weights':w_conv3_1, 'biases':b_conv3_1} 268 | self.layers['conv3_2'] = {'weights':w_conv3_2, 'biases':b_conv3_2} 269 | self.layers['conv3_3'] = {'weights':w_conv3_3, 'biases':b_conv3_3} 270 | self.layers['conv4_1'] = {'weights':w_conv4_1, 'biases':b_conv4_1} 271 | self.layers['conv4_2'] = {'weights':w_conv4_2, 'biases':b_conv4_2} 272 | self.layers['conv4_3'] = {'weights':w_conv4_3, 'biases':b_conv4_3} 273 | self.layers['conv5_1'] = {'weights':w_conv5_1, 'biases':b_conv5_1} 274 | self.layers['conv5_2'] = {'weights':w_conv5_2, 'biases':b_conv5_2} 275 | self.layers['conv5_3'] = {'weights':w_conv5_3, 'biases':b_conv5_3} 276 | self.layers['conv6'] = {'weights':w_conv6, 'biases':b_conv6} 277 | self.layers['conv7'] = {'weights':w_conv7, 'biases':b_conv7} 278 | self.layers['conv8'] = {'weights':w_conv8, 'biases':b_conv8} 279 | 280 | 281 | """Baseline model""" 282 | class FCN32(Network): 283 | def __init__(self, config): 284 | self.num_classes = config['num_classes'] 285 | self.batch_num = config['batch_num'] 286 | self.max_size = config['max_size'] 287 | self.weight_decay = config['weight_decay'] 288 | self.base_lr = config['base_lr'] 289 | self.momentum = config['momentum'] 290 | 291 | self.img = tf.placeholder(tf.float32, 292 | [self.batch_num, self.max_size[0], self.max_size[1], 3]) 293 | self.seg = tf.placeholder(tf.int32, 294 | [self.batch_num, self.max_size[0], self.max_size[1], 1]) 295 | self.mask = tf.placeholder(tf.float32, 296 | [self.batch_num, self.max_size[0], self.max_size[1], 1]) 297 | 298 | self.layers = {} 299 | self.outputs = {} 300 | self.set_up() 301 | 302 | def set_up(self): 303 | self.add_conv(self.img, self.num_classes) 304 | self.add_deconv(bilinear=False) 305 | self.add_loss_op() 306 | self.add_weight_decay() 307 | self.add_train_op() 308 | 309 | """Extract parameters from ckpt file to npy file""" 310 | def extract(self, data_path, session, saver): 311 | saver.restore(session, data_path) 312 | scopes = ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 313 | 'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 314 | 'conv5_2', 'conv5_3', 'conv6', 'conv7', 'conv8'] 315 | data_dict = {} 316 | for scope in scopes: 317 | [w, b] = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) 318 | data_dict[scope] = {'weights':w.eval(), 'biases':b.eval()} 319 | file_name = data_path[0:-5] 320 | np.save(file_name, data_dict) 321 | ipdb.set_trace() 322 | return file_name + '.npy' 323 | 324 | 325 | """Add the deconv(upsampling) layer to get dense prediction""" 326 | def add_deconv(self, bilinear=False): 327 | conv8 = self.get_output('conv8') 328 | 329 | with tf.variable_scope('deconv') as scope: 330 | # Learn from scratch 331 | if not bilinear: 332 | w_deconv = tf.get_variable('weights', [64, 64, self.num_classes, self.num_classes], 333 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 334 | # Using fiexed bilinearing upsampling filter 335 | else: 336 | w_deconv = tf.get_variable('weights', trainable=True, 337 | initializer=bilinear_upsample_weights(32, self.num_classes)) 338 | 339 | b_deconv = tf.get_variable('biases', [self.num_classes], 340 | initializer=tf.constant_initializer(0)) 341 | z_deconv = tf.nn.conv2d_transpose(conv8, w_deconv, 342 | [self.batch_num, self.max_size[0], self.max_size[1], self.num_classes], 343 | strides=[1,32,32,1], padding='SAME', name='z') + b_deconv 344 | 345 | # Add to store dicts 346 | self.outputs['deconv'] = z_deconv 347 | self.layers['deconv'] = {'weights':w_deconv, 'biases':b_deconv} 348 | 349 | """Add pixelwise softmax loss""" 350 | def add_loss_op(self): 351 | pred = self.get_output('deconv') 352 | pred_reshape = tf.reshape(pred, [-1, self.num_classes]) 353 | gt_reshape = tf.reshape(self.seg, [-1]) 354 | 355 | loss_reshape = tf.nn.sparse_softmax_cross_entropy_with_logits(pred_reshape, gt_reshape) 356 | loss = tf.reshape(loss_reshape, [self.batch_num, self.max_size[0], self.max_size[1], 1]) 357 | loss_valid = tf.reduce_sum(loss * self.mask, (1,2,3)) 358 | 359 | valid_pixels = tf.reduce_sum(self.mask, (1,2,3)) 360 | loss_avg = tf.reduce_mean(loss_valid / valid_pixels) 361 | 362 | self.loss = loss_avg 363 | 364 | """Add weight decay""" 365 | def add_weight_decay(self): 366 | for key in self.layers: 367 | w = self.layers[key]['weights'] 368 | self.loss += self.weight_decay * tf.nn.l2_loss(w) 369 | 370 | """Set up training optimization""" 371 | def add_train_op(self): 372 | # self.train_op = tf.train.MomentumOptimizer(self.base_lr, 373 | # self.momentum).minimize(self.loss) 374 | self.train_op = tf.train.AdamOptimizer(self.base_lr).minimize(self.loss) 375 | 376 | 377 | """A better model""" 378 | class FCN16(FCN32): 379 | def __init__(self, config): 380 | FCN32.__init__(self, config) 381 | 382 | def set_up(self): 383 | self.add_conv(self.img, self.num_classes) 384 | self.add_shortcut(bilinear=True) 385 | self.add_deconv(bilinear=False) 386 | self.add_loss_op() 387 | self.add_weight_decay() 388 | self.add_train_op() 389 | 390 | """Extract parameters from ckpt file to npy file""" 391 | def extract(self, data_path, session, saver): 392 | saver.restore(session, data_path) 393 | scopes = ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 394 | 'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 395 | 'conv5_2', 'conv5_3', 'conv6', 'conv7', 'conv8', '2x_conv8', 396 | 'pool4_1x1'] 397 | data_dict = {} 398 | for scope in scopes: 399 | [w, b] = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) 400 | data_dict[scope] = {'weights':w.eval(), 'biases':b.eval()} 401 | file_name = data_path[0:-5] 402 | np.save(file_name, data_dict) 403 | ipdb.set_trace() 404 | return file_name + '.npy' 405 | 406 | def add_shortcut(self, bilinear=True): 407 | conv8 = self.get_output('conv8') 408 | pool4 = self.get_output('pool4') 409 | 410 | target_size = int(pool4.get_shape()[1]) 411 | 412 | with tf.variable_scope('2x_conv8') as scope: 413 | # Learn from scratch 414 | if not bilinear: 415 | w_deconv = tf.get_variable('weights', [4, 4, self.num_classes, self.num_classes], 416 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 417 | # Using fiexed bilinearing upsampling filter 418 | else: 419 | w_deconv = tf.get_variable('weights', trainable=True, 420 | initializer=bilinear_upsample_weights(2, self.num_classes)) 421 | 422 | b_deconv = tf.get_variable('biases', [self.num_classes], 423 | initializer=tf.constant_initializer(0)) 424 | z_deconv = tf.nn.conv2d_transpose(conv8, w_deconv, 425 | [self.batch_num, target_size, target_size, self.num_classes], 426 | strides=[1,2,2,1], padding='SAME', name='z') + b_deconv 427 | 428 | with tf.variable_scope('pool4_1x1') as scope: 429 | w_pool4 = tf.get_variable('weights', [1, 1, 512, self.num_classes], 430 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 431 | b_pool4 = tf.get_variable('biases', [self.num_classes], 432 | initializer=tf.constant_initializer(0)) 433 | z_pool4 = tf.nn.conv2d(pool4, w_pool4, strides= [1, 1, 1, 1], 434 | padding='SAME') + b_pool4 435 | 436 | # Element-wise sum 437 | fusion = z_deconv + z_pool4 438 | 439 | # Add to store dicts 440 | self.outputs['2x_conv8'] = z_deconv 441 | self.outputs['pool4_1x1'] = z_pool4 442 | self.outputs['fusion'] = fusion 443 | self.layers['2x_conv8'] = {'weights':w_deconv, 'biases':b_deconv} 444 | self.layers['pool4_1x1'] = {'weights':w_pool4, 'biases':b_pool4} 445 | 446 | 447 | """Add the deconv(upsampling) layer to get dense prediction""" 448 | def add_deconv(self, bilinear=False): 449 | fusion = self.get_output('fusion') 450 | 451 | with tf.variable_scope('deconv') as scope: 452 | # Learn from scratch 453 | if not bilinear: 454 | w_deconv = tf.get_variable('weights', [32, 32, self.num_classes, self.num_classes], 455 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 456 | # Using fiexed bilinearing upsampling filter 457 | else: 458 | w_deconv = tf.get_variable('weights', trainable=True, 459 | initializer=bilinear_upsample_weights(16, self.num_classes)) 460 | 461 | b_deconv = tf.get_variable('biases', [self.num_classes], 462 | initializer=tf.constant_initializer(0)) 463 | z_deconv = tf.nn.conv2d_transpose(fusion, w_deconv, 464 | [self.batch_num, self.max_size[0], self.max_size[1], self.num_classes], 465 | strides=[1,16,16,1], padding='SAME', name='z') + b_deconv 466 | 467 | # Add to store dicts 468 | self.outputs['deconv'] = z_deconv 469 | self.layers['deconv'] = {'weights':w_deconv, 'biases':b_deconv} 470 | 471 | 472 | """The best model""" 473 | class FCN8(FCN16): 474 | def __init__(self, config): 475 | FCN16.__init__(self, config) 476 | 477 | def add_shortcut(self, bilinear=True): 478 | conv8 = self.get_output('conv8') 479 | pool4 = self.get_output('pool4') 480 | 481 | target_size = int(pool4.get_shape()[1]) 482 | 483 | with tf.variable_scope('2x_conv8') as scope: 484 | # Learn from scratch 485 | if not bilinear: 486 | w_deconv = tf.get_variable('weights', [4, 4, self.num_classes, self.num_classes], 487 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 488 | # Using fiexed bilinearing upsampling filter 489 | else: 490 | w_deconv = tf.get_variable('weights', trainable=True, 491 | initializer=bilinear_upsample_weights(2, self.num_classes)) 492 | 493 | b_deconv = tf.get_variable('biases', [self.num_classes], 494 | initializer=tf.constant_initializer(0)) 495 | z_deconv = tf.nn.conv2d_transpose(conv8, w_deconv, 496 | [self.batch_num, target_size, target_size, self.num_classes], 497 | strides=[1,2,2,1], padding='SAME', name='z') + b_deconv 498 | 499 | 500 | with tf.variable_scope('pool4_1x1') as scope: 501 | w_pool4 = tf.get_variable('weights', [1, 1, 512, self.num_classes], 502 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 503 | b_pool4 = tf.get_variable('biases', [self.num_classes], 504 | initializer=tf.constant_initializer(0)) 505 | z_pool4 = tf.nn.conv2d(pool4, w_pool4, strides= [1, 1, 1, 1], 506 | padding='SAME') + b_pool4 507 | 508 | # Element-wise sum 509 | fusion1 = z_deconv + z_pool4 510 | 511 | ## Second fusion stage 512 | pool3 = self.get_output('pool3') 513 | 514 | with tf.variable_scope('pool3_1x1') as scope: 515 | w_pool3 = tf.get_variable('weights', [1, 1, 256, self.num_classes], 516 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 517 | b_pool3 = tf.get_variable('biases', [self.num_classes], 518 | initializer=tf.constant_initializer(0)) 519 | z_pool3 = tf.nn.conv2d(pool3, w_pool3, strides= [1, 1, 1, 1], 520 | padding='SAME') + b_pool3 521 | 522 | target_size = int(pool3.get_shape()[1]) 523 | 524 | with tf.variable_scope('2x_fusion') as scope: 525 | # Learn from scratch 526 | if not bilinear: 527 | w_deconv2 = tf.get_variable('weights', [4, 4, self.num_classes, self.num_classes], 528 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 529 | # Using fiexed bilinearing upsampling filter 530 | else: 531 | w_deconv2 = tf.get_variable('weights', trainable=True, 532 | initializer=bilinear_upsample_weights(2, self.num_classes)) 533 | 534 | b_deconv2 = tf.get_variable('biases', [self.num_classes], 535 | initializer=tf.constant_initializer(0)) 536 | z_deconv2 = tf.nn.conv2d_transpose(fusion1, w_deconv2, 537 | [self.batch_num, target_size, target_size, self.num_classes], 538 | strides=[1,2,2,1], padding='SAME', name='z') + b_deconv2 539 | 540 | fusion2 = z_pool3 + z_deconv2 541 | 542 | # Add to store dicts 543 | self.outputs['2x_conv8'] = z_deconv 544 | self.outputs['pool4_1x1'] = z_pool4 545 | self.outputs['pool3_1x1'] = z_pool3 546 | self.outputs['2x_fusion'] = z_deconv2 547 | self.outputs['fusion'] = fusion2 548 | self.layers['2x_conv8'] = {'weights':w_deconv, 'biases':b_deconv} 549 | self.layers['pool4_1x1'] = {'weights':w_pool4, 'biases':b_pool4} 550 | self.layers['pool3_1x1'] = {'weights':w_pool3, 'biases':b_pool3} 551 | self.layers['2x_fusion'] = {'weights':w_deconv2, 'biases':b_deconv2} 552 | 553 | 554 | """Add the deconv(upsampling) layer to get dense prediction""" 555 | def add_deconv(self, bilinear=False): 556 | fusion = self.get_output('fusion') 557 | 558 | with tf.variable_scope('deconv') as scope: 559 | # Learn from scratch 560 | if not bilinear: 561 | w_deconv = tf.get_variable('weights', [16, 16, self.num_classes, self.num_classes], 562 | initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) 563 | # Using fiexed bilinearing upsampling filter 564 | else: 565 | w_deconv = tf.get_variable('weights', trainable=True, 566 | initializer=bilinear_upsample_weights(16, self.num_classes)) 567 | 568 | b_deconv = tf.get_variable('biases', [self.num_classes], 569 | initializer=tf.constant_initializer(0)) 570 | z_deconv = tf.nn.conv2d_transpose(fusion, w_deconv, 571 | [self.batch_num, self.max_size[0], self.max_size[1], self.num_classes], 572 | strides=[1,8,8,1], padding='SAME', name='z') + b_deconv 573 | 574 | # Add to store dicts 575 | self.outputs['deconv'] = z_deconv 576 | self.layers['deconv'] = {'weights':w_deconv, 'biases':b_deconv} 577 | 578 | 579 | class FCN32_test(FCN32): 580 | def __init__(self, config): 581 | FCN32.__init__(self, config) 582 | 583 | def set_up(self): 584 | self.add_conv(self.img, self.num_classes, 'TEST') 585 | self.add_deconv(bilinear=False) 586 | 587 | 588 | class FCN16_test(FCN16): 589 | def __init__(self, config): 590 | FCN16.__init__(self, config) 591 | 592 | def set_up(self): 593 | self.add_conv(self.img, self.num_classes, 'TEST') 594 | self.add_shortcut(bilinear=True) 595 | self.add_deconv(bilinear=False) 596 | 597 | 598 | class FCN8_test(FCN8): 599 | def __init__(self, config): 600 | FCN8.__init__(self, config) 601 | 602 | def set_up(self): 603 | self.add_conv(self.img, self.num_classes, 'TEST') 604 | self.add_shortcut(bilinear=True) 605 | self.add_deconv(bilinear=False) 606 | 607 | 608 | if __name__ == '__main__': 609 | config = { 610 | 'batch_num':5, 611 | 'iter':100000, 612 | 'num_classes':21, 613 | 'max_size':(500,500), 614 | 'weight_decay': 0.0005, 615 | 'base_lr': 0.0001, 616 | 'momentum': 0.9 617 | } 618 | 619 | model = FCN32(config) 620 | #model = FCN16(config) 621 | # model = FCN8(config) 622 | 623 | --------------------------------------------------------------------------------