├── .gitignore
├── LICENSE
├── misc
    ├── save_colorful_grayscale.m
    ├── report.m
    └── Evaluation.m
├── src
    ├── train.py
    ├── train_small.py
    ├── test.py
    ├── demo.py
    ├── util.py
    ├── Dataloader.py
    └── Model.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *pyc
2 | *npy
3 | *txt
4 | *DS_Store
5 | data
6 | model
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Yuliang (Zack) Zou
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/misc/save_colorful_grayscale.m:
--------------------------------------------------------------------------------
 1 | % Description:  save grayscale segmentation to RGB
 2 | %
 3 | % Author:       Chen Gao
 4 | %               chengao@umich.edu
 5 | %
 6 | % Date:         March 7, 2017
 7 | %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | 
10 | function save_colorful_grayscale(in_directory,out_directory)
11 | 
12 | cmap     = VOClabelcolormap(256);
13 | contents = dir([in_directory,'*.png']);
14 | 
15 | 
16 | for idx = 1:numel(contents)
17 |     
18 |     idx
19 |     filename = contents(idx).name;
20 |     img_gray = imread([in_directory filename]);   
21 |     imwrite(img_gray,cmap,[out_directory filename]);
22 |     
23 | end
24 | 
25 | 
26 | function cmap = VOClabelcolormap(N)
27 | 
28 | if nargin==0
29 |     N = 256;
30 | end
31 | cmap = zeros(N,3);
32 | for i=1:N
33 |     id = i-1; r=0;g=0;b=0;
34 |     for j=0:7
35 |         r = bitor(r, bitshift(bitget(id,1),7 - j));
36 |         g = bitor(g, bitshift(bitget(id,2),7 - j));
37 |         b = bitor(b, bitshift(bitget(id,3),7 - j));
38 |         id = bitshift(id,-3);
39 |     end
40 |     cmap(i,1)=r; cmap(i,2)=g; cmap(i,3)=b;
41 | end
42 | cmap = cmap / 255;


--------------------------------------------------------------------------------
/misc/report.m:
--------------------------------------------------------------------------------
 1 | run('/Users/chengao/Desktop/tf_fcn-master/data/VOCdevkit/VOCcode/VOCinit.m')
 2 | [accuracies,avacc,conf,rawcounts] = Evaluation(VOCopts,'32rgb');
 3 | 
 4 | figure(1)
 5 | hBar = bar(accuracies / 100);
 6 | Xt = 1 : length(accuracies);
 7 | Xl = [0 22];
 8 | set(gca, 'XTick', Xt, 'XLim', Xl,'FontSize', 15);
 9 | 
10 | label = [' background';
11 |          '  aeroplane';
12 |          '    bicycle';
13 |          '       bird'; 
14 |          '       boat';
15 |          '     bottle';
16 |          '        bus';
17 |          '        car';
18 |          '        cat';
19 |          '      chair';
20 |          '        cow';
21 |          'diningtable';
22 |          '        dog';
23 |          '      horse';
24 |          '  motorbike';
25 |          '     person';
26 |          'pottedplant';
27 |          '      sheep';
28 |          '       sofa';
29 |          '      train';
30 |          '  tvmonitor'];
31 | 
32 | ax = axis;   
33 | axis(axis);   
34 | Yl = ax(3:4);  % Y-axis limits
35 | 
36 | t = text(Xt,Yl(1) * ones(1,length(Xt)),label(1:21,:),'FontSize',20);
37 | set(t,'HorizontalAlignment','right','VerticalAlignment','top', ...
38 |       'Rotation',45);
39 | 
40 | set(gca,'XTickLabel','')
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
 1 | # Training code
 2 | # Author: Yuliang Zou
 3 | #         ylzou@umich.edu
 4 | # Date:   2017-02-21
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | from Model import FCN32, FCN16, FCN8
 9 | from Dataloader import Dataloader
10 | import ipdb
11 | 
12 | 
13 | config = {
14 | 'batch_num':5, 
15 | 'iter':100000, 
16 | 'num_classes':21, 
17 | 'max_size':(640,640),
18 | 'weight_decay': 0.0005,
19 | 'base_lr': 0.0001,
20 | 'momentum': 0.9
21 | }
22 | 
23 | if __name__ == '__main__':
24 | 	# Load pre-trained model
25 | 	model_path = '../model/FCN32_adam_iter_20000_500.npy'
26 | 	data_dict = np.load(model_path).item()
27 | 
28 | 	# Set up model and data loader
29 | 	model = FCN16(config)
30 | 	loss_list = []
31 | 	f = open('./FCN16.txt', 'w')
32 | 	DECAY = False    # decay flag
33 | 	init = tf.initialize_all_variables()
34 | 
35 | 	data_loader = Dataloader('train', config)
36 | 
37 | 	with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session:
38 | 		session.run(init)
39 | 		model.load(data_dict, session)
40 | 		saver = tf.train.Saver()
41 | 
42 | 		loss = 0
43 | 		for i in xrange(config['iter']):
44 | 			minibatch = data_loader.get_next_minibatch()
45 | 			feed_dict = {model.img: minibatch[0],
46 | 						model.seg: minibatch[1],
47 | 						model.mask: minibatch[2]}
48 | 			_, temp_loss = session.run([model.train_op, model.loss], feed_dict=feed_dict)
49 | 			loss += temp_loss
50 | 
51 | 			loss_list.append(temp_loss)
52 | 			f.write(str(temp_loss) + '\n')
53 | 			print str(i) + ': ' + str(temp_loss)
54 | 
55 | 			# Learning rate decay
56 | 			if len(loss_list) > 100 and not DECAY:
57 | 				avg = sum(loss_list[-100::]) / 100.0
58 | 				if avg <= 0.4:
59 | 					model.base_lr /= 10
60 | 					DECAY = True
61 | 
62 | 			# Monitor
63 | 			if i % 20 == 0 and i != 0:
64 | 				loss /= 20
65 | 				print 'Iter: {}'.format(i) + '/{}'.format(config['iter']) + ', loss = ' + str(loss)					
66 | 				loss = 0
67 | 
68 | 			# Write to saver
69 | 			if i % 5000 == 0 and i != 0:
70 | 				saver.save(session, '../model/FCN16_adam_iter_'+str(i)+'.ckpt')
71 | 
72 | 	f.close()
73 | 
74 | 


--------------------------------------------------------------------------------
/src/train_small.py:
--------------------------------------------------------------------------------
 1 | # Training code for small size input
 2 | # Author: Yuliang Zou
 3 | #         ylzou@umich.edu
 4 | # Date:   2017-02-21
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | from Model import FCN32, FCN16, FCN8
 9 | from Dataloader import Dataloader_small
10 | import ipdb
11 | 
12 | 
13 | config = {
14 | 'batch_num':20, 
15 | 'iter':100000, 
16 | 'num_classes':21, 
17 | 'max_size':(256,256),
18 | 'weight_decay': 0.005,
19 | 'base_lr': 0.0001,
20 | 'momentum': 0.9
21 | }
22 | 
23 | if __name__ == '__main__':
24 | 	# Load pre-trained model
25 | 	model_path = '../model/VGG_imagenet.npy'
26 | 	data_dict = np.load(model_path).item()
27 | 
28 | 	# Set up model and data loader
29 | 	model = FCN32(config)
30 | 	loss_list = []
31 | 	f = open('./FCN32_small.txt', 'w')
32 | 	DECAY = False    # decay flag
33 | 	init = tf.initialize_all_variables()
34 | 
35 | 	data_loader = Dataloader_small('train', config)
36 | 
37 | 	with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session:
38 | 		session.run(init)
39 | 		model.load(data_dict, session)
40 | 		saver = tf.train.Saver()
41 | 
42 | 		loss = 0
43 | 		for i in xrange(config['iter']):
44 | 			minibatch = data_loader.get_next_minibatch()
45 | 			feed_dict = {model.img: minibatch[0],
46 | 						model.seg: minibatch[1],
47 | 						model.mask: minibatch[2]}
48 | 			_, temp_loss = session.run([model.train_op, model.loss], feed_dict=feed_dict)
49 | 			loss += temp_loss
50 | 
51 | 			loss_list.append(temp_loss)
52 | 			f.write(str(temp_loss) + '\n')
53 | 			print str(i) + ': ' + str(temp_loss)
54 | 
55 | 			# Learning rate decay
56 | 			if len(loss_list) > 100 and not DECAY:
57 | 				avg = sum(loss_list[-100::]) / 100.0
58 | 				if avg <= 0.4:
59 | 					model.base_lr /= 10
60 | 					DECAY = True
61 | 
62 | 			# Monitor
63 | 			if i % 20 == 0 and i != 0:
64 | 				loss /= 20
65 | 				print 'Iter: {}'.format(i) + '/{}'.format(config['iter']) + ', loss = ' + str(loss)					
66 | 				loss = 0
67 | 
68 | 			# Write to saver
69 | 			if i % 5000 == 0 and i != 0:
70 | 				saver.save(session, '../model/FCN32_small_adam_iter_'+str(i)+'.ckpt')
71 | 
72 | 	f.close()
73 | 
74 | 


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
 1 | # Generate segmentation results
 2 | # Author: Yuliang Zou
 3 | #         ylzou@umich.edu
 4 | # Date:   2017-03-07
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | from Model import FCN32_test, FCN16_test, FCN8_test
 9 | from Dataloader import Dataloader, Dataloader_test
10 | from util import get_original_size, seg_gray_to_rgb
11 | import cv2
12 | from os import makedirs
13 | from os.path import exists, join
14 | import ipdb
15 | 
16 | 
17 | config = {
18 | 'batch_num':1, 
19 | 'iter':100000, 
20 | 'num_classes':21, 
21 | 'max_size':(640,640),
22 | 'weight_decay': 0.0005,
23 | 'base_lr': 0.0001,
24 | 'momentum': 0.9
25 | }
26 | 
27 | if __name__ == '__main__':
28 | 	# Specify which set to test
29 | 	split = 'val'
30 | 	model = FCN8_test(config)
31 | 	# Import, since we don't want the random shuffle
32 | 	data_loader = Dataloader_test(split, config)
33 | 
34 | 	saver = tf.train.Saver()
35 | 	ckpt = '../model/FCN8_adam_iter_10000.ckpt'
36 | 	ID = ckpt.split('/')[-1][:-5]
37 | 
38 | 	res_dir = '../result/'
39 | 	dump_path = join(res_dir, ID)
40 | 	dump_path = join(dump_path, split)
41 | 	rgb_path = join(dump_path, 'rgb')
42 | 	gray_path = join(dump_path, 'gray')
43 | 
44 | 	if not exists(rgb_path):
45 | 		makedirs(rgb_path)
46 | 	if not exists(gray_path):
47 | 		makedirs(gray_path)
48 | 
49 | 	with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session:
50 | 		saver.restore(session, ckpt)
51 | 		print 'Model restored.'
52 | 
53 | 		# Iterate the whole set once
54 | 		for i in range(data_loader.num_images):
55 | 			minibatch = data_loader.get_minibatch_at(i)
56 | 			feed_dict = {model.img: minibatch[0]}
57 | 			pred = session.run(model.get_output('deconv'), feed_dict=feed_dict)
58 | 
59 | 			mask = minibatch[2][0]
60 | 			seg  = np.argmax(pred[0], axis=2)
61 | 
62 | 			row, col = minibatch[3]
63 | 			seg_valid = np.zeros((row, col))
64 | 			seg_valid[:, :] = seg[0:row, 0:col]
65 | 			seg_rgb = seg_gray_to_rgb(seg_valid, data_loader.gray_to_rgb)
66 | 
67 | 			im_name = data_loader._seg_at(i).split('/')[-1]
68 | 			cv2.imwrite(join(rgb_path, im_name), seg_rgb[:,:,::-1])
69 | 			cv2.imwrite(join(gray_path, im_name), seg_valid)
70 | 
71 | 			print str(i) + '/' + str(data_loader.num_images) + ' done!'
72 | 


--------------------------------------------------------------------------------
/src/demo.py:
--------------------------------------------------------------------------------
 1 | # Demo
 2 | # Author: Yuliang Zou
 3 | #         ylzou@umich.edu
 4 | # Date:   2017-03-03
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | from Model import FCN32_test, FCN16_test, FCN8_test
 9 | from Dataloader import Dataloader, Dataloader_small
10 | import matplotlib.pyplot as plt
11 | import cv2
12 | import ipdb
13 | 
14 | 
15 | # BGR mean pixel value
16 | MEAN_PIXEL = np.array([103.939, 116.779, 123.68])
17 | 
18 | CLASSES = ('__background__',
19 |            'aeroplane', 'bicycle', 'bird', 'boat',
20 |            'bottle', 'bus', 'car', 'cat', 'chair',
21 |            'cow', 'diningtable', 'dog', 'horse',
22 |            'motorbike', 'person', 'pottedplant',
23 |            'sheep', 'sofa', 'train', 'tvmonitor')
24 | 
25 | config = {
26 | 'batch_num':5, 
27 | 'iter':100000, 
28 | 'num_classes':21, 
29 | 'max_size':(640,640),
30 | 'weight_decay': 0.0005,
31 | 'base_lr': 0.001,
32 | 'momentum': 0.9
33 | }
34 | 
35 | if __name__ == '__main__':
36 | 	model = FCN8_test(config)
37 | 	data_loader = Dataloader('val', config)
38 | 
39 | 	saver = tf.train.Saver()
40 | 	ckpt = '../model/FCN8_adam_iter_10000.ckpt'
41 | 	# Extract ckpt into npy, if needed
42 | 	# with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session:
43 | 		# model.extract(ckpt, session, saver)
44 | 	# ipdb.set_trace()
45 | 
46 | 	dump_path = '../demo/'
47 | 
48 | 	with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as session:
49 | 		saver.restore(session, ckpt)
50 | 		print 'Model restored.'
51 | 
52 | 		minibatch = data_loader.get_next_minibatch()
53 | 		feed_dict = {model.img: minibatch[0],
54 | 						model.seg: minibatch[1],
55 | 						model.mask: minibatch[2]}
56 | 		pred = session.run(model.get_output('deconv'), feed_dict=feed_dict)
57 | 
58 | 		for i in range(config['batch_num']):
59 | 			mask = minibatch[2][i]
60 | 			seg  = np.argmax(pred[i], axis=2)
61 | 			img  = minibatch[0][i]
62 | 			gt   = minibatch[1][i][:,:,0]
63 | 			f, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=False)
64 | 			ax1.imshow(seg)
65 | 			img = img + MEAN_PIXEL
66 | 			ax2.imshow(img[:,:,::-1])
67 | 			ax3.imshow(gt)
68 | 			plt.show()
69 | 			cv2.imwrite(dump_path + str(i) + '_seg.png', seg)
70 | 			cv2.imwrite(dump_path + str(i) + '_img.png', img)
71 | 
72 | 


--------------------------------------------------------------------------------
/misc/Evaluation.m:
--------------------------------------------------------------------------------
 1 | % Description:  Evaluation
 2 | %
 3 | % Author:       Chen Gao
 4 | %               chengao@umich.edu
 5 | %                             
 6 | % Date:         March 7, 2017
 7 | %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | 
10 | function [accuracies,avacc,conf,rawcounts] = Evaluation(VOCopts,id)
11 | 
12 | % image test set
13 | [gtids,~]=textread(sprintf(VOCopts.seg.imgsetpath,VOCopts.testset),'%s %d');
14 | 
15 | % number of labels = number of classes plus one for the background
16 | num = VOCopts.nclasses+1; 
17 | confcounts = zeros(num);
18 | count=0;
19 | tic;
20 | for i=1:length(gtids)
21 |     % display progress
22 |     if toc>1
23 |         fprintf('test confusion: %d/%d\n',i,length(gtids));
24 |         drawnow;
25 |         tic;
26 |     end
27 |         
28 |     imname = gtids{i};
29 |     
30 |     % ground truth label file
31 |     gtfile = sprintf(VOCopts.seg.clsimgpath,imname);
32 |     [gtim,~] = imread(gtfile);    
33 |     gtim = double(gtim);
34 |     
35 |     % results file
36 |     resfile = sprintf(VOCopts.seg.clsrespath,id,VOCopts.testset,imname);
37 |     [resim,~] = imread(resfile);
38 |     resim = double(resim);
39 |     
40 |     % Check validity of results image
41 |     maxlabel = max(resim(:));
42 |     if (maxlabel>VOCopts.nclasses), 
43 |         error('Results image ''%s'' has out of range value %d (the value should be <= %d)',imname,maxlabel,VOCopts.nclasses);
44 |     end
45 | 
46 |     szgtim = size(gtim); szresim = size(resim);
47 |     if any(szgtim~=szresim)
48 |         error('Results image ''%s'' is the wrong size, was %d x %d, should be %d x %d.',imname,szresim(1),szresim(2),szgtim(1),szgtim(2));
49 |     end
50 |     
51 |     %pixel locations to include in computation
52 |     locs = gtim<255;
53 |     
54 |     % joint histogram
55 |     sumim = 1+gtim+resim*num; 
56 |     hs = histc(sumim(locs),1:num*num); 
57 |     count = count + numel(find(locs));
58 |     confcounts(:) = confcounts(:) + hs(:);
59 | end
60 | 
61 | % confusion matrix - first index is true label, second is inferred label
62 | %conf = zeros(num);
63 | conf = 100*confcounts./repmat(1E-20+sum(confcounts,2),[1 size(confcounts,2)]);
64 | rawcounts = confcounts;
65 | 
66 | % Percentage correct labels measure is no longer being used.  Uncomment if
67 | % you wish to see it anyway
68 | %overall_acc = 100*sum(diag(confcounts)) / sum(confcounts(:));
69 | %fprintf('Percentage of pixels correctly labelled overall: %6.3f%%\n',overall_acc);
70 | 
71 | accuracies = zeros(VOCopts.nclasses,1);
72 | fprintf('Accuracy for each class (intersection/union measure)\n');
73 | for j=1:num
74 |    
75 |    gtj=sum(confcounts(j,:));
76 |    resj=sum(confcounts(:,j));
77 |    gtjresj=confcounts(j,j);
78 |    % The accuracy is: true positive / (true positive + false positive + false negative) 
79 |    % which is equivalent to the following percentage:
80 |    accuracies(j)=100*gtjresj/(gtj+resj-gtjresj);   
81 |    
82 |    clname = 'background';
83 |    if (j>1), clname = VOCopts.classes{j-1};end;
84 |    fprintf('  %14s: %6.3f%%\n',clname,accuracies(j));
85 | end
86 | accuracies = accuracies(1:end);
87 | avacc = mean(accuracies);
88 | fprintf('-------------------------\n');
89 | fprintf('Average accuracy: %6.3f%%\n',avacc);
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # tf_fcn
  2 | 
  3 | A TensorFlow Implementation of:
  4 | 
  5 | [CVPR 2015] Long et al. [Fully Convolutional Networks for Semantic Segmentation](https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf)
  6 | 
  7 | **NOTE:** In this repository, we only implement the VGG16 version.
  8 | 
  9 | 
 10 | ## Requirements
 11 | 
 12 | 1. TensorFlow r0.10 (r0.11 should be fine, not sure if this can work for later versions)
 13 | 
 14 | 2. OpenCV 2 and its Python bindings
 15 | 
 16 | 3. Ipdb: IPython environment debugger
 17 | 
 18 | 4. (Optional) pathos. Check the other branch for further details
 19 | 
 20 | 
 21 | ## Prepare dataset
 22 | 
 23 | In this implementation, we use the [VOC2011 dataset](http://host.robots.ox.ac.uk/pascal/VOC/voc2011/). Please do as follows to set up the dataset:
 24 | 
 25 | 1. `mkdir data` to set up the directory of dataset
 26 | 
 27 | 2. Download the **train/val dataset** and **Development kit** tar files, put them under the `data` folder. Unzip Development kit tar file, then unzip train/val tar file and rename the folder as `VOC2011`.
 28 | 
 29 | 3. It should have this basic structure (under `data` directory)
 30 | 
 31 | ```bash
 32 | $ VOCdevkit/                      # development kit
 33 | $ VOCdevkit/VOCcode               # VOC utility code
 34 | $ VOCdevkit/VOC2011               # image sets, annotations, etc.
 35 | # ... and several other directories ...
 36 | ```
 37 | 
 38 | You may also download the **test set** if you want to evaluate your prediction results on this dataset.
 39 | 
 40 | ## Pre-trained model
 41 | 
 42 | ```bash
 43 | mkdir model
 44 | ```
 45 | 
 46 | We use a ImageNet pre-trained model to initialize the network, please download the npy file [here](https://drive.google.com/file/d/0B2SnTpv8L4iLRTFZb0FWenRJTlU/view?usp=sharing) and put it under the `model` folder.
 47 | 
 48 | 
 49 | ## How to train
 50 | 
 51 | Since input images have different sizes, in order to make them as minibatch input, we used two different strategies: 1) padding to a large size; or 2) resize to a small size (256, 256)
 52 | 
 53 | ```bash
 54 | cd src
 55 | python train.py          # padding
 56 | python train_small.py    # resize
 57 | ```
 58 | 
 59 | You can choose either one to run. And you can also change the `config` dictionary to use custom settings.
 60 | 
 61 | 
 62 | ## Demo
 63 | 
 64 | ```bash
 65 | cd src
 66 | python demo.py
 67 | ```
 68 | 
 69 | You can change the `config` dictionary to use custom settings.
 70 | 
 71 | 
 72 | ## Generate Predictions
 73 | 
 74 | First you should run the following code:
 75 | 
 76 | ```bash
 77 | cd src
 78 | python test.py
 79 | ```
 80 | 
 81 | You might want to change the used model. Check the code for futher details.
 82 | 
 83 | After that, you should find the following structure in `result` folder:
 84 | 
 85 | ```bash
 86 | $ FCN8_adam_iter_10000/          # folder name depends on the model you used
 87 | $ FCN8_adam_iter_10000/gray/     # gray-scale segmentation result
 88 | $ FCN8_adam_iter_10000/rgb/      # rgb segmentation result
 89 | # ... and maybe several other directories ...
 90 | ```
 91 | 
 92 | Then you can use the VOC2011 provided eval code to do evaluation (see the next section for details).
 93 | 
 94 | If you want to evaluate your model on the test split, you may submit your prediction results to [their server](http://host.robots.ox.ac.uk:8080/)
 95 | 
 96 | 
 97 | ## Evaluation
 98 | 
 99 | 1. `cd misc`
100 | 
101 | 2. Run `save_colorful_grayscale(in_directory,out_directory)` (our generated results is grayscale png format, but the eval code uses indexed png format)
102 | 
103 | 3. Run `report.m`
104 | 
105 | **Note:**
106 | 
107 | 1. make sure `VOCinit.m` is at `/tf_fcn-master/data/VOCdevkit/VOCcode/`
108 | 
109 | 2. make sure the segmentation result is stored in `/tf_fcn-master/data/VOCdevkit/results/VOC2011/Segmentation/%s_val_cls/` while the folder is named as `%s_val_cls`
110 | 
111 | 3. make sure the second input of `Evaluation(VOCopts, ~)` is the string `%s` above.
112 | 
113 | 
114 | 
115 | ## Models
116 | 
117 | Padding to (640, 640):
118 | 
119 | - FCN32_adam_20000: [ckpt](https://drive.google.com/file/d/0B3vJudZqxciYbTRuY21WZXREV0E/view?usp=sharing), [npy](https://drive.google.com/file/d/0B2SnTpv8L4iLNEVFd2RHcUZOX00/view?usp=sharing)
120 | 
121 | - FCN16_adam_5000:  [ckpt](https://drive.google.com/file/d/0B2SnTpv8L4iLT2VuREZwUHg4cjg/view?usp=sharing)
122 | 
123 | - FCN8_adam_10000:  [ckpt](https://drive.google.com/file/d/0B2SnTpv8L4iLRExqQTVONWxTX0U/view?usp=sharing)
124 | 
125 | 
126 | Padding to (500, 500):
127 | 
128 | - FCN32_adam_35000: [ckpt](https://drive.google.com/file/d/0B3vJudZqxciYVWZfbXdybzFhWDA/view?usp=sharing) (You can extract npy with `extract` method defined in `Model.py`)
129 | 
130 | - FCN8_adam_30000: [ckpt](https://drive.google.com/file/d/0B3vJudZqxciYVWZfbXdybzFhWDA/view?usp=sharing)
131 | 
132 | 
133 | **Note:** When you train the shortcut version model (FCN16 and FCN8), you will need FCN32 model npy file as initialization, instead of the ImageNet pre-trained model npy file.


--------------------------------------------------------------------------------
/src/util.py:
--------------------------------------------------------------------------------
  1 | # Define some util functions
  2 | # Author: Yuliang Zou
  3 | #         ylzou@umich.edu
  4 | # Date:   2017-02-19
  5 | 
  6 | import numpy as np
  7 | import cv2
  8 | import ipdb
  9 | 
 10 | # BGR mean pixel value
 11 | MEAN_PIXEL = np.array([103.939, 116.779, 123.68])
 12 | 
 13 | """Padding image and segmentation ground truth to (640, 640)"""
 14 | def prep_im_for_blob(im_name, seg_name, rgb_to_gray, max_size=(640,640)):
 15 | 	im = cv2.imread(im_name)    # OpenCV color map default BGR
 16 | 	im = im - MEAN_PIXEL
 17 | 	seg = cv2.imread(seg_name)[:,:,::-1]
 18 | 
 19 | 	row, col, _ = im.shape
 20 | 	im_blob = np.zeros((max_size[0], max_size[1], 3))
 21 | 	im_blob[0:row,0:col,:] = im
 22 | 
 23 | 	seg_blob = np.zeros((max_size[0], max_size[1], 1))
 24 | 	mask = np.zeros_like(seg_blob)
 25 | 	for i in xrange(row):
 26 | 		for j in xrange(col):
 27 | 			seg_blob[i,j] = rgb_to_gray[tuple(seg[i,j,:])]
 28 | 			# Discard 255 edge class
 29 | 			if seg_blob[i,j] != 255:
 30 | 				mask[i,j] = 1
 31 | 			else:
 32 | 				seg_blob[i,j] = 0
 33 | 
 34 | 	return {'im_blob':im_blob, 'seg_blob':seg_blob, 'mask':mask, 'original_size':(row,col)}
 35 | 
 36 | """Minus mean pixel value"""
 37 | def prep_im(im_name):
 38 | 	im = cv2.imread(im_name)    # OpenCV color map default BGR
 39 | 	im = np.array([im - MEAN_PIXEL])
 40 | 	return im
 41 | 
 42 | """For multi-processing dataloader"""
 43 | def prep_run_wrapper(args):
 44 | 	return prep_im_for_blob(*args)
 45 | 
 46 | """Resize input image to (256, 256)"""
 47 | def prep_small_im_for_blob(im_name, seg_name, rgb_to_gray, max_size=(256,256)):
 48 | 	im = cv2.imread(im_name)    # OpenCV color map default BGR
 49 | 	seg = cv2.imread(seg_name)[:,:,::-1]
 50 | 
 51 | 	row, col, _ = im.shape
 52 | 	im_blob = np.zeros((max_size[0], max_size[1], 3))
 53 | 	im_blob = cv2.resize(im, max_size, interpolation=cv2.INTER_NEAREST)
 54 | 	im_blob = im_blob - MEAN_PIXEL
 55 | 
 56 | 
 57 | 	seg_gray = np.zeros((row, col))
 58 | 	seg_blob = np.zeros((max_size[0], max_size[1]))
 59 | 	mask = np.zeros_like(seg_blob)
 60 | 
 61 | 	for i in xrange(row):
 62 | 		for j in xrange(col):
 63 | 			seg_gray[i,j] = rgb_to_gray[tuple(seg[i,j])]
 64 | 	seg_blob = cv2.resize(seg_gray, max_size, interpolation=cv2.INTER_NEAREST)
 65 | 
 66 | 	for i in xrange(max_size[0]):
 67 | 		for j in xrange(max_size[1]):
 68 | 			if seg_blob[i,j] != 255:
 69 | 				mask[i, j] = 1
 70 | 			else:
 71 | 				seg_blob[i,j] = 0
 72 | 	seg_blob = np.array([seg_blob]).transpose((1,2,0))
 73 | 	mask = np.array([mask]).transpose((1,2,0))
 74 | 
 75 | 	return {'im_blob':im_blob, 'seg_blob':seg_blob, 'mask':mask, 'original_size':(row,col)}
 76 | 
 77 | """For multi-processing dataloader"""
 78 | def prep_small_run_wrapper(args):
 79 | 	return prep_small_im_for_blob(*args)
 80 | 
 81 | 
 82 | """Create color mappings, check VOClabelcolormap.m for reference"""
 83 | def colormap(N=256):
 84 | 	# Create double side mappings
 85 | 	gray_to_rgb = {}
 86 | 	rgb_to_gray = {}
 87 | 
 88 | 	for i in range(N):
 89 | 		temp = i
 90 | 		r = 0
 91 | 		g = 0
 92 | 		b = 0
 93 | 		for j in range(8):
 94 | 			r = r | ((temp & 1) << (7-j))
 95 | 			g = g | (((temp >> 1) & 1) << (7-j))
 96 | 			b = b | (((temp >> 2) & 1) << (7-j))
 97 | 			temp = temp >> 3
 98 | 		gray_to_rgb[i] = (r,g,b)
 99 | 
100 | 	for key, val in gray_to_rgb.iteritems():
101 | 		rgb_to_gray[val] = key
102 | 
103 | 	return gray_to_rgb, rgb_to_gray
104 | 
105 | """Get original size"""
106 | def get_original_size(mask, max_size=(640,640)):
107 | 	row = None
108 | 	col = None
109 | 	for i in range(max_size[0]-1, -1, -1):
110 | 		if mask[i,0,0] == 1:
111 | 			row = i + 1
112 | 			break
113 | 
114 | 	for i in range(max_size[1]-1, -1, -1):
115 | 		if mask[0,i,0] == 1:
116 | 			col = i + 1
117 | 			break
118 | 
119 | 	if row is None or col is None:
120 | 		ipdb.set_trace()
121 | 	return row, col
122 | 
123 | """Transform gray scale segmentation result to rgb format"""
124 | def seg_gray_to_rgb(seg, gray_to_rgb):
125 | 	row, col = seg.shape
126 | 	rgb = np.zeros((row, col, 3))
127 | 
128 | 	for i in range(row):
129 | 		for j in range(col):
130 | 			r, g, b = gray_to_rgb[seg[i, j]]
131 | 			rgb[i, j, 0] = r
132 | 			rgb[i, j, 1] = g
133 | 			rgb[i, j, 2] = b
134 | 
135 | 	return rgb
136 | 
137 | 
138 | """
139 | Helper functions for bilinear upsampling
140 | credit: http://warmspringwinds.github.io/tensorflow/tf-slim/2016/11/22/upsampling-and-image-segmentation-with-tensorflow-and-tf-slim/
141 | """
142 | def get_kernel_size(factor):
143 | 	"""
144 | 	Find the kernel size given the desired factor of upsampling.
145 | 	"""
146 | 	return 2 * factor - factor % 2
147 | 
148 | def upsample_filt(size):
149 | 	"""
150 | 	Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
151 | 	"""
152 | 	factor = (size + 1) // 2
153 | 	if size % 2 == 1:
154 | 		center = factor - 1
155 | 	else:
156 | 		center = factor - 0.5
157 | 	og = np.ogrid[:size, :size]
158 | 	return (1 - abs(og[0] - center) / factor) * \
159 | 			(1 - abs(og[1] - center) / factor)
160 | 
161 | def bilinear_upsample_weights(factor, number_of_classes):
162 | 	"""
163 | 	Create weights matrix for transposed convolution with bilinear filter
164 | 	initialization.
165 | 	"""
166 | 	filter_size = get_kernel_size(factor)
167 | 
168 | 	weights = np.zeros((filter_size,
169 | 						filter_size,
170 | 						number_of_classes,
171 | 						number_of_classes), dtype=np.float32)
172 | 
173 | 	upsample_kernel = upsample_filt(filter_size)
174 | 
175 | 	for i in xrange(number_of_classes): 
176 | 		weights[:, :, i, i] = upsample_kernel
177 | 
178 | 	return weights
179 | 
180 | 
181 | 
182 | if __name__ == '__main__':
183 | 	root = '../data/VOCdevkit/VOC2011/'
184 | 	im_name = root + 'JPEGImages/2007_000033.jpg'
185 | 	seg_name = root + 'SegmentationClass/2007_000033.png'
186 | 	_, rgb_to_gray = colormap()
187 | 
188 | 	# im_blob, seg_blob = prep_im_for_blob(im_name, seg_name, rgb_to_gray)
189 | 	data = prep_small_im_for_blob(im_name, seg_name, rgb_to_gray)
190 | 	import matplotlib.pyplot as plt
191 | 	im_blob = data['im_blob']
192 | 	seg_blob = data['seg_blob']
193 | 	plt.imshow(im_blob)
194 | 	plt.show()
195 | 	plt.imshow(seg_blob[:,:,0])
196 | 	plt.show()
197 | 	ipdb.set_trace()
198 | 


--------------------------------------------------------------------------------
/src/Dataloader.py:
--------------------------------------------------------------------------------
  1 | # Define the data loader for segmentation task
  2 | # Author: Yuliang Zou
  3 | #         ylzou@umich.edu
  4 | # Date:   2017-02-14
  5 | 
  6 | import ipdb
  7 | import numpy as np
  8 | from os.path import join
  9 | from util import colormap, prep_im_for_blob, prep_run_wrapper, prep_small_run_wrapper
 10 | import multiprocessing
 11 | 
 12 | """
 13 | The Dataloader for VOC2011 to load and preprocess input image and segmentation
 14 | ground truth. (Only)
 15 | """
 16 | class Dataloader(object):
 17 | 	def __init__(self, split, config):
 18 | 		# Validate split input
 19 | 		if split != 'train' and split != 'val' and split != 'trainval' and split != 'test':
 20 | 			raise Exception('Please enter valid split variable!')
 21 | 
 22 | 		root = '../data/VOCdevkit/VOC2011/'
 23 | 		self.img_path = join(root, 'JPEGImages/')
 24 | 		self.seg_path = join(root, 'SegmentationClass/')
 25 | 		self.split = split
 26 | 		img_set = join(root, 'ImageSets/Segmentation/' + split + '.txt')
 27 | 		with open(img_set) as f:
 28 | 			self.img_list = f.read().rstrip().split('\n')
 29 | 
 30 | 		self.num_images = len(self.img_list)
 31 | 		self.temp_pointer = 0    # First idx of the current batch
 32 | 		self._shuffle()
 33 | 
 34 | 		self.batch_num = config['batch_num']
 35 | 		self.max_size = config['max_size']
 36 | 
 37 | 		# Create double side mappings
 38 | 		self.gray_to_rgb, self.rgb_to_gray = colormap()
 39 | 
 40 | 
 41 | 	def _shuffle(self):
 42 | 		self.img_list = np.random.permutation(self.img_list)
 43 | 
 44 | 	def _img_at(self, i):
 45 | 		return self.img_path + self.img_list[i] + '.jpg'
 46 | 
 47 | 	def _seg_at(self, i):
 48 | 		return self.seg_path + self.img_list[i] + '.png'
 49 | 
 50 | 	"""Use padding to get same shapes"""
 51 | 	def get_next_minibatch(self):
 52 | 		img_blobs = []
 53 | 		seg_blobs = []
 54 | 		mask_blobs = []
 55 | 		ori_sizes = []
 56 | 
 57 | 		process_size = self.batch_num
 58 | 		# process mini_batch as 5 process, require that the number of 
 59 | 		# sample in a mini_batch is a multiplying of 5
 60 | 		for _ in xrange(self.batch_num/process_size):
 61 | 			# Permutate the data again
 62 | 
 63 | 			if self.temp_pointer+process_size > self.num_images:
 64 | 				self.temp_pointer = 0
 65 | 				self._shuffle()
 66 | 
 67 | 			temp_range = range(self.temp_pointer, self.temp_pointer+process_size, 1)
 68 | 			temp_imName = [self._img_at(x) for x in temp_range]
 69 | 			temp_segName = [self._seg_at(x) for x in temp_range]
 70 | 			temp_map = [self.rgb_to_gray,]*process_size
 71 | 			temp_size = [self.max_size,]*process_size
 72 | 
 73 | 			p = multiprocessing.Pool(process_size)
 74 | 
 75 | 			temp_result = p.map(prep_run_wrapper, zip(temp_imName, temp_segName, temp_map, temp_size))
 76 | 			p.close()
 77 | 			p.join()
 78 | 
 79 | 			for x in temp_result:
 80 | 				img_blobs.append(x['im_blob'])
 81 | 				seg_blobs.append(x['seg_blob'])
 82 | 				mask_blobs.append(x['mask'])
 83 | 				ori_sizes.append(x['original_size'])
 84 | 
 85 | 			self.temp_pointer += process_size
 86 | 
 87 | 
 88 | 		return [img_blobs, seg_blobs, mask_blobs, ori_sizes]
 89 | 
 90 | """No shuffle, fix batch num to 1"""
 91 | class Dataloader_test(Dataloader):
 92 | 	def __init__(self, split, config):
 93 | 		# Validate split input
 94 | 		if split != 'train' and split != 'val' and split != 'trainval' and split != 'test':
 95 | 			raise Exception('Please enter valid split variable!')
 96 | 
 97 | 		root = '../data/VOCdevkit/VOC2011/'
 98 | 		self.img_path = join(root, 'JPEGImages/')
 99 | 		self.seg_path = join(root, 'SegmentationClass/')
100 | 		self.split = split
101 | 		img_set = join(root, 'ImageSets/Segmentation/' + split + '.txt')
102 | 		with open(img_set) as f:
103 | 			self.img_list = f.read().rstrip().split('\n')
104 | 
105 | 		self.num_images = len(self.img_list)
106 | 		self.temp_pointer = 0    # First idx of the current batch
107 | 
108 | 		self.batch_num = 1
109 | 		self.max_size = config['max_size']
110 | 
111 | 		# Create double side mappings
112 | 		self.gray_to_rgb, self.rgb_to_gray = colormap()
113 | 
114 | 	"""Get minibatch by index"""
115 | 	def get_minibatch_at(self, i):
116 | 		img_name = self._img_at(i)
117 | 		seg_name = self._seg_at(i)
118 | 		data = prep_im_for_blob(img_name, seg_name, self.rgb_to_gray, self.max_size)
119 | 		img_blob = data['im_blob']
120 | 		# seg_blob = data['seg_blob']
121 | 		mask = data['mask']
122 | 		ori_size = data['original_size']
123 | 
124 | 		img_blobs = np.array([img_blob])
125 | 		# seg_blobs = np.array([seg_blob])
126 | 		mask_blobs = np.array([mask])
127 | 		seg_blobs = None
128 | 		# mask_blobs = None
129 | 
130 | 		return [img_blobs, seg_blobs, mask_blobs, ori_size]
131 | 
132 | 
133 | """Small size dataloader"""
134 | class Dataloader_small(Dataloader):
135 | 	def __init__(self, split, config):
136 | 		Dataloader.__init__(self, split, config)
137 | 
138 | 	"""Override"""
139 | 	def get_next_minibatch(self):
140 | 		img_blobs = []
141 | 		seg_blobs = []
142 | 		mask_blobs = []
143 | 		ori_sizes = []
144 | 
145 | 		process_size = 5
146 | 		# process mini_batch as 5 process, require that the number of 
147 | 		# sample in a mini_batch is a multiplying of 5
148 | 		for _ in xrange(self.batch_num/process_size):
149 | 			# Permutate the data again
150 | 
151 | 			if self.temp_pointer+process_size > self.num_images:
152 | 				self.temp_pointer = 0
153 | 				self._shuffle()
154 | 
155 | 			temp_range = range(self.temp_pointer, self.temp_pointer+process_size, 1)
156 | 			temp_imName = [self._img_at(x) for x in temp_range]
157 | 			temp_segName = [self._seg_at(x) for x in temp_range]
158 | 			temp_map = [self.rgb_to_gray,]*process_size
159 | 
160 | 			p = multiprocessing.Pool(process_size)
161 | 
162 | 			# Use prep_small_run_wrapper instead!
163 | 			temp_result = p.map(prep_small_run_wrapper, zip(temp_imName, temp_segName, temp_map))
164 | 			p.close()
165 | 			p.join()
166 | 
167 | 			for x in temp_result:
168 | 				img_blobs.append(x['im_blob'])
169 | 				seg_blobs.append(x['seg_blob'])
170 | 				mask_blobs.append(x['mask'])
171 | 				ori_sizes.append(x['original_size'])
172 | 
173 | 			self.temp_pointer += process_size
174 | 
175 | 
176 | 		return [img_blobs, seg_blobs, mask_blobs, ori_sizes]
177 | 
178 | if __name__ == '__main__':
179 | 	config = {
180 | 	'batch_num':1, 
181 | 	'iter':100000, 
182 | 	'num_classes':21, 
183 | 	'max_size':(640,640),
184 | 	'weight_decay': 0.0005,
185 | 	'base_lr': 0.001,
186 | 	'momentum': 0.9
187 | 	}
188 | 
189 | 	# dataloader = Dataloader('train', 10)
190 | 	# minibatch = dataloader.get_next_minibatch()
191 | 	dataloader = Dataloader('val', config)
192 | 	minibatch = dataloader.get_next_minibatch()
193 | 
194 | 
195 | 	ipdb.set_trace()
196 | 


--------------------------------------------------------------------------------
/src/Model.py:
--------------------------------------------------------------------------------
  1 | # Define the vgg16 style model
  2 | # Author: Yuliang Zou
  3 | #         ylzou@umich.edu
  4 | # Date:   2017-02-19
  5 | 
  6 | import tensorflow as tf
  7 | import numpy as np
  8 | from util import bilinear_upsample_weights
  9 | import ipdb
 10 | 
 11 | """Define a base class, containing some useful layer functions"""
 12 | class Network(object):
 13 | 	def __init__(self, inputs):
 14 | 		self.inputs = []
 15 | 		self.layers = {}
 16 | 		self.outputs = {}
 17 | 
 18 | 	"""Extract parameters from ckpt file to npy file"""
 19 | 	def extract(self, data_path, session, saver):
 20 | 		raise NotImplementedError('Must be subclassed.')
 21 | 
 22 | 	"""Load pre-trained model from numpy data_dict"""
 23 | 	def load(self, data_dict, session, ignore_missing=True):
 24 | 		fc_shapes = {'fc6':(7,7,512,4096), 'fc7':(1,1,4096,4096)}
 25 | 		fc_scopes = {'fc6':'conv6', 'fc7':'conv7'}
 26 | 		for key in data_dict:
 27 | 			# Special cases: fc6 and fc7
 28 | 			if key == 'fc6' or key == 'fc7':
 29 | 				w = np.reshape(data_dict[key]['weights'], fc_shapes[key])
 30 | 				b = data_dict[key]['biases']
 31 | 				with tf.variable_scope(fc_scopes[key], reuse=True):
 32 | 					var1 = tf.get_variable('weights')
 33 | 					session.run(var1.assign(w))
 34 | 					print "Assign pretrain model weights to " + fc_scopes[key]
 35 | 					var2 = tf.get_variable('biases')
 36 | 					session.run(var2.assign(b))
 37 | 					print "Assign pretrain model biases to " + fc_scopes[key]
 38 | 				continue
 39 | 
 40 | 			with tf.variable_scope(key, reuse=True):
 41 | 				for subkey in data_dict[key]:
 42 | 					try:
 43 | 						var = tf.get_variable(subkey)
 44 | 						session.run(var.assign(data_dict[key][subkey]))
 45 | 						print "Assign pretrain model " + subkey + " to " + key
 46 | 					except ValueError:
 47 | 						print "Ignore " + key
 48 | 						if not ignore_missing:
 49 | 							raise
 50 | 
 51 | 	"""Get outputs given key names"""
 52 | 	def get_output(self, key):
 53 | 		if key not in self.outputs:
 54 | 			raise KeyError
 55 | 		return self.outputs[key]
 56 | 
 57 | 	"""Get parameters given key names"""
 58 | 	def get_param(self, key):
 59 | 		if key not in self.layers:
 60 | 			raise KeyError
 61 | 		return self.layers[key]['weights'], self.layers[key]['biases']
 62 | 
 63 | 	"""Add conv part of vgg16"""
 64 | 	def add_conv(self, inputs, num_classes, stage='TRAIN'):
 65 | 		# Dropout is different for training and testing
 66 | 		if stage == 'TRAIN':
 67 | 			keep_prob = 0.5
 68 | 		elif stage == 'TEST':
 69 | 			keep_prob = 1
 70 | 		else:
 71 | 			raise ValueError
 72 | 
 73 | 		# Conv1
 74 | 		with tf.variable_scope('conv1_1') as scope:
 75 | 			w_conv1_1 = tf.get_variable('weights', [3, 3, 3, 64], 
 76 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
 77 | 			b_conv1_1 = tf.get_variable('biases', [64], 
 78 | 				initializer=tf.constant_initializer(0))
 79 | 			z_conv1_1 = tf.nn.conv2d(inputs, w_conv1_1, strides=[1, 1, 1, 1], 
 80 | 				padding='SAME') + b_conv1_1
 81 | 			a_conv1_1 = tf.nn.relu(z_conv1_1)
 82 | 
 83 | 		with tf.variable_scope('conv1_2') as scope:
 84 | 			w_conv1_2 = tf.get_variable('weights', [3, 3, 64, 64], 
 85 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
 86 | 			b_conv1_2 = tf.get_variable('biases', [64], 
 87 | 				initializer=tf.constant_initializer(0))
 88 | 			z_conv1_2 = tf.nn.conv2d(a_conv1_1, w_conv1_2, strides=[1, 1, 1, 1], 
 89 | 				padding='SAME') + b_conv1_2
 90 | 			a_conv1_2 = tf.nn.relu(z_conv1_2)
 91 | 		
 92 | 		pool1 = tf.nn.max_pool(a_conv1_2, ksize=[1,2,2,1], strides=[1,2,2,1],
 93 | 			padding='SAME', name='pool1')
 94 | 
 95 | 		# Conv2
 96 | 		with tf.variable_scope('conv2_1') as scope:
 97 | 			w_conv2_1 = tf.get_variable('weights', [3, 3, 64, 128], 
 98 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
 99 | 			b_conv2_1 = tf.get_variable('biases', [128], 
100 | 				initializer=tf.constant_initializer(0))
101 | 			z_conv2_1 = tf.nn.conv2d(pool1, w_conv2_1, strides=[1, 1, 1, 1], 
102 | 				padding='SAME') + b_conv2_1
103 | 			a_conv2_1 = tf.nn.relu(z_conv2_1)
104 | 
105 | 		with tf.variable_scope('conv2_2') as scope:
106 | 			w_conv2_2 = tf.get_variable('weights', [3, 3, 128, 128], 
107 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
108 | 			b_conv2_2 = tf.get_variable('biases', [128], 
109 | 				initializer=tf.constant_initializer(0))
110 | 			z_conv2_2 = tf.nn.conv2d(a_conv2_1, w_conv2_2, strides=[1, 1, 1, 1], 
111 | 				padding='SAME') + b_conv2_2
112 | 			a_conv2_2 = tf.nn.relu(z_conv2_2)
113 | 
114 | 		pool2 = tf.nn.max_pool(a_conv2_2, ksize=[1,2,2,1], strides=[1,2,2,1],
115 | 			padding='SAME', name='pool2')
116 | 
117 | 		# Conv3
118 | 		with tf.variable_scope('conv3_1') as scope:
119 | 			w_conv3_1 = tf.get_variable('weights', [3, 3, 128, 256],
120 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
121 | 			b_conv3_1 = tf.get_variable('biases', [256],
122 | 				initializer=tf.constant_initializer(0))
123 | 			z_conv3_1 = tf.nn.conv2d(pool2, w_conv3_1, strides= [1, 1, 1, 1],
124 | 				padding='SAME') + b_conv3_1
125 | 			a_conv3_1 = tf.nn.relu(z_conv3_1)
126 | 
127 | 		with tf.variable_scope('conv3_2') as scope:
128 | 			w_conv3_2 = tf.get_variable('weights', [3, 3, 256, 256],
129 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
130 | 			b_conv3_2 = tf.get_variable('biases', [256],
131 | 				initializer=tf.constant_initializer(0))
132 | 			z_conv3_2 = tf.nn.conv2d(a_conv3_1, w_conv3_2, strides= [1, 1, 1, 1],
133 | 				padding='SAME') + b_conv3_2
134 | 			a_conv3_2 = tf.nn.relu(z_conv3_2)
135 | 
136 | 		with tf.variable_scope('conv3_3') as scope:
137 | 			w_conv3_3 = tf.get_variable('weights', [3, 3, 256, 256],
138 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
139 | 			b_conv3_3 = tf.get_variable('biases', [256],
140 | 				initializer=tf.constant_initializer(0))
141 | 			z_conv3_3 = tf.nn.conv2d(a_conv3_2, w_conv3_3, strides= [1, 1, 1, 1],
142 | 				padding='SAME') + b_conv3_3
143 | 			a_conv3_3 = tf.nn.relu(z_conv3_3)
144 | 
145 | 		pool3 = tf.nn.max_pool(a_conv3_3, ksize=[1,2,2,1], strides=[1,2,2,1],
146 | 			padding='SAME', name='pool3')
147 | 
148 | 		# Conv4
149 | 		with tf.variable_scope('conv4_1') as scope:
150 | 			w_conv4_1 = tf.get_variable('weights', [3, 3, 256, 512],
151 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
152 | 			b_conv4_1 = tf.get_variable('biases', [512],
153 | 				initializer=tf.constant_initializer(0))
154 | 			z_conv4_1 = tf.nn.conv2d(pool3, w_conv4_1, strides= [1, 1, 1, 1],
155 | 				padding='SAME') + b_conv4_1
156 | 			a_conv4_1 = tf.nn.relu(z_conv4_1)
157 | 
158 | 		with tf.variable_scope('conv4_2') as scope:
159 | 			w_conv4_2 = tf.get_variable('weights', [3, 3, 512, 512],
160 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
161 | 			b_conv4_2 = tf.get_variable('biases', [512],
162 | 				initializer=tf.constant_initializer(0))
163 | 			z_conv4_2 = tf.nn.conv2d(a_conv4_1, w_conv4_2, strides= [1, 1, 1, 1],
164 | 				padding='SAME') + b_conv4_2
165 | 			a_conv4_2 = tf.nn.relu(z_conv4_2)
166 | 
167 | 		with tf.variable_scope('conv4_3') as scope:
168 | 			w_conv4_3 = tf.get_variable('weights', [3, 3, 512, 512],
169 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
170 | 			b_conv4_3 = tf.get_variable('biases', [512],
171 | 				initializer=tf.constant_initializer(0))
172 | 			z_conv4_3 = tf.nn.conv2d(a_conv4_2, w_conv4_3, strides= [1, 1, 1, 1],
173 | 				padding='SAME') + b_conv4_3
174 | 			a_conv4_3 = tf.nn.relu(z_conv4_3)
175 | 
176 | 		pool4 = tf.nn.max_pool(a_conv4_3, ksize=[1,2,2,1], strides=[1,2,2,1],
177 | 			padding='SAME', name='pool4')
178 | 
179 | 		# Conv5
180 | 		with tf.variable_scope('conv5_1') as scope:
181 | 			w_conv5_1 = tf.get_variable('weights', [3, 3, 512, 512],
182 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
183 | 			b_conv5_1 = tf.get_variable('biases', [512],
184 | 				initializer=tf.constant_initializer(0))
185 | 			z_conv5_1 = tf.nn.conv2d(pool4, w_conv5_1, strides= [1, 1, 1, 1],
186 | 				padding='SAME') + b_conv5_1
187 | 			a_conv5_1 = tf.nn.relu(z_conv5_1)
188 | 
189 | 		with tf.variable_scope('conv5_2') as scope:
190 | 			w_conv5_2 = tf.get_variable('weights', [3, 3, 512, 512],
191 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
192 | 			b_conv5_2 = tf.get_variable('biases', [512],
193 | 				initializer=tf.constant_initializer(0))
194 | 			z_conv5_2 = tf.nn.conv2d(a_conv5_1, w_conv5_2, strides= [1, 1, 1, 1],
195 | 				padding='SAME') + b_conv5_2
196 | 			a_conv5_2 = tf.nn.relu(z_conv5_2)
197 | 
198 | 		with tf.variable_scope('conv5_3') as scope:
199 | 			w_conv5_3 = tf.get_variable('weights', [3, 3, 512, 512],
200 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
201 | 			b_conv5_3 = tf.get_variable('biases', [512],
202 | 				initializer=tf.constant_initializer(0))
203 | 			z_conv5_3 = tf.nn.conv2d(a_conv5_2, w_conv5_3, strides= [1, 1, 1, 1],
204 | 				padding='SAME') + b_conv5_3
205 | 			a_conv5_3 = tf.nn.relu(z_conv5_3)
206 | 
207 | 		pool5 = tf.nn.max_pool(a_conv5_3, ksize=[1,2,2,1], strides=[1,2,2,1],
208 | 			padding='SAME', name='pool5')
209 | 
210 | 		# Transform fully-connected layers to convolutional layers
211 | 		with tf.variable_scope('conv6') as scope:
212 | 			w_conv6 = tf.get_variable('weights', [7, 7, 512, 4096],
213 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
214 | 			b_conv6 = tf.get_variable('biases', [4096],
215 | 				initializer=tf.constant_initializer(0))
216 | 			z_conv6 = tf.nn.conv2d(pool5, w_conv6, strides= [1, 1, 1, 1],
217 | 				padding='SAME') + b_conv6
218 | 			a_conv6 = tf.nn.relu(z_conv6)
219 | 			d_conv6 = tf.nn.dropout(a_conv6, keep_prob)
220 | 
221 | 		with tf.variable_scope('conv7') as scope:
222 | 			w_conv7 = tf.get_variable('weights', [1, 1, 4096, 4096],
223 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
224 | 			b_conv7 = tf.get_variable('biases', [4096],
225 | 				initializer=tf.constant_initializer(0))
226 | 			z_conv7 = tf.nn.conv2d(d_conv6, w_conv7, strides= [1, 1, 1, 1],
227 | 				padding='SAME') + b_conv7
228 | 			a_conv7 = tf.nn.relu(z_conv7)
229 | 			d_conv7 = tf.nn.dropout(a_conv7, keep_prob)
230 | 
231 | 		# Replace the original classifier layer
232 | 		with tf.variable_scope('conv8') as scope:
233 | 			w_conv8 = tf.get_variable('weights', [1, 1, 4096, num_classes],
234 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
235 | 			b_conv8 = tf.get_variable('biases', [num_classes],
236 | 				initializer=tf.constant_initializer(0))
237 | 			z_conv8 = tf.nn.conv2d(d_conv7, w_conv8, strides= [1, 1, 1, 1],
238 | 				padding='SAME') + b_conv8
239 | 
240 | 		# Add to store dicts
241 | 		self.outputs['conv1_1'] = a_conv1_1
242 | 		self.outputs['conv1_2'] = a_conv1_2
243 | 		self.outputs['pool1']   = pool1
244 | 		self.outputs['conv2_1'] = a_conv2_1
245 | 		self.outputs['conv2_2'] = a_conv2_2
246 | 		self.outputs['pool2']   = pool2
247 | 		self.outputs['conv3_1'] = a_conv3_1
248 | 		self.outputs['conv3_2'] = a_conv3_2
249 | 		self.outputs['conv3_3'] = a_conv3_3
250 | 		self.outputs['pool3']   = pool3
251 | 		self.outputs['conv4_1'] = a_conv4_1
252 | 		self.outputs['conv4_2'] = a_conv4_2
253 | 		self.outputs['conv4_3'] = a_conv4_3
254 | 		self.outputs['pool4']   = pool4
255 | 		self.outputs['conv5_1'] = a_conv5_1
256 | 		self.outputs['conv5_2'] = a_conv5_2
257 | 		self.outputs['conv5_3'] = a_conv5_3
258 | 		self.outputs['pool5']   = pool5
259 | 		self.outputs['conv6']   = d_conv6
260 | 		self.outputs['conv7']   = d_conv7
261 | 		self.outputs['conv8']   = z_conv8
262 | 
263 | 		self.layers['conv1_1'] = {'weights':w_conv1_1, 'biases':b_conv1_1}
264 | 		self.layers['conv1_2'] = {'weights':w_conv1_2, 'biases':b_conv1_2}
265 | 		self.layers['conv2_1'] = {'weights':w_conv2_1, 'biases':b_conv2_1}
266 | 		self.layers['conv2_2'] = {'weights':w_conv2_2, 'biases':b_conv2_2}
267 | 		self.layers['conv3_1'] = {'weights':w_conv3_1, 'biases':b_conv3_1}
268 | 		self.layers['conv3_2'] = {'weights':w_conv3_2, 'biases':b_conv3_2}
269 | 		self.layers['conv3_3'] = {'weights':w_conv3_3, 'biases':b_conv3_3}
270 | 		self.layers['conv4_1'] = {'weights':w_conv4_1, 'biases':b_conv4_1}
271 | 		self.layers['conv4_2'] = {'weights':w_conv4_2, 'biases':b_conv4_2}
272 | 		self.layers['conv4_3'] = {'weights':w_conv4_3, 'biases':b_conv4_3}
273 | 		self.layers['conv5_1'] = {'weights':w_conv5_1, 'biases':b_conv5_1}
274 | 		self.layers['conv5_2'] = {'weights':w_conv5_2, 'biases':b_conv5_2}
275 | 		self.layers['conv5_3'] = {'weights':w_conv5_3, 'biases':b_conv5_3}
276 | 		self.layers['conv6']   = {'weights':w_conv6, 'biases':b_conv6}
277 | 		self.layers['conv7']   = {'weights':w_conv7, 'biases':b_conv7}
278 | 		self.layers['conv8']   = {'weights':w_conv8, 'biases':b_conv8}
279 | 
280 | 
281 | """Baseline model"""
282 | class FCN32(Network):
283 | 	def __init__(self, config):
284 | 		self.num_classes = config['num_classes']
285 | 		self.batch_num = config['batch_num']
286 | 		self.max_size = config['max_size']
287 | 		self.weight_decay = config['weight_decay']
288 | 		self.base_lr = config['base_lr']
289 | 		self.momentum = config['momentum']
290 | 
291 | 		self.img  = tf.placeholder(tf.float32, 
292 | 			[self.batch_num, self.max_size[0], self.max_size[1], 3])
293 | 		self.seg  = tf.placeholder(tf.int32, 
294 | 			[self.batch_num, self.max_size[0], self.max_size[1], 1])
295 | 		self.mask = tf.placeholder(tf.float32, 
296 | 			[self.batch_num, self.max_size[0], self.max_size[1], 1])
297 | 
298 | 		self.layers = {}
299 | 		self.outputs = {}
300 | 		self.set_up()
301 | 
302 | 	def set_up(self):
303 | 		self.add_conv(self.img, self.num_classes)
304 | 		self.add_deconv(bilinear=False)
305 | 		self.add_loss_op()
306 | 		self.add_weight_decay()
307 | 		self.add_train_op()
308 | 
309 | 	"""Extract parameters from ckpt file to npy file"""
310 | 	def extract(self, data_path, session, saver):
311 | 		saver.restore(session, data_path)
312 | 		scopes = ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1',
313 | 		'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1',
314 | 		'conv5_2', 'conv5_3', 'conv6', 'conv7', 'conv8']
315 | 		data_dict = {}
316 | 		for scope in scopes:
317 | 			[w, b] = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
318 | 			data_dict[scope] = {'weights':w.eval(), 'biases':b.eval()}
319 | 		file_name = data_path[0:-5]
320 | 		np.save(file_name, data_dict)
321 | 		ipdb.set_trace()
322 | 		return file_name + '.npy'
323 | 
324 | 
325 | 	"""Add the deconv(upsampling) layer to get dense prediction"""
326 | 	def add_deconv(self, bilinear=False):
327 | 		conv8 = self.get_output('conv8')
328 | 
329 | 		with tf.variable_scope('deconv') as scope:
330 | 			# Learn from scratch
331 | 			if not bilinear:
332 | 				w_deconv = tf.get_variable('weights', [64, 64, self.num_classes, self.num_classes],
333 | 					initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
334 | 			# Using fiexed bilinearing upsampling filter
335 | 			else:
336 | 				w_deconv = tf.get_variable('weights', trainable=True, 
337 | 					initializer=bilinear_upsample_weights(32, self.num_classes))
338 | 
339 | 			b_deconv = tf.get_variable('biases', [self.num_classes],
340 | 				initializer=tf.constant_initializer(0))
341 | 			z_deconv = tf.nn.conv2d_transpose(conv8, w_deconv, 
342 | 				[self.batch_num, self.max_size[0], self.max_size[1], self.num_classes],
343 | 				strides=[1,32,32,1], padding='SAME', name='z') + b_deconv
344 | 
345 | 		# Add to store dicts
346 | 		self.outputs['deconv'] = z_deconv
347 | 		self.layers['deconv']  = {'weights':w_deconv, 'biases':b_deconv}
348 | 
349 | 	"""Add pixelwise softmax loss"""
350 | 	def add_loss_op(self):
351 | 		pred = self.get_output('deconv')
352 | 		pred_reshape = tf.reshape(pred, [-1, self.num_classes])
353 | 		gt_reshape = tf.reshape(self.seg, [-1])
354 | 
355 | 		loss_reshape = tf.nn.sparse_softmax_cross_entropy_with_logits(pred_reshape, gt_reshape)
356 | 		loss = tf.reshape(loss_reshape, [self.batch_num, self.max_size[0], self.max_size[1], 1])
357 | 		loss_valid = tf.reduce_sum(loss * self.mask, (1,2,3))
358 | 
359 | 		valid_pixels = tf.reduce_sum(self.mask, (1,2,3))
360 | 		loss_avg = tf.reduce_mean(loss_valid / valid_pixels)
361 | 
362 | 		self.loss = loss_avg
363 | 
364 | 	"""Add weight decay"""
365 | 	def add_weight_decay(self):
366 | 		for key in self.layers:
367 | 			w = self.layers[key]['weights']
368 | 			self.loss += self.weight_decay * tf.nn.l2_loss(w)
369 | 
370 | 	"""Set up training optimization"""
371 | 	def add_train_op(self):
372 | 		# self.train_op = tf.train.MomentumOptimizer(self.base_lr, 
373 | 			# self.momentum).minimize(self.loss)
374 | 		self.train_op = tf.train.AdamOptimizer(self.base_lr).minimize(self.loss)
375 | 
376 | 
377 | """A better model"""
378 | class FCN16(FCN32):
379 | 	def __init__(self, config):
380 | 		FCN32.__init__(self, config)
381 | 
382 | 	def set_up(self):
383 | 		self.add_conv(self.img, self.num_classes)
384 | 		self.add_shortcut(bilinear=True)
385 | 		self.add_deconv(bilinear=False)
386 | 		self.add_loss_op()
387 | 		self.add_weight_decay()
388 | 		self.add_train_op()
389 | 
390 | 	"""Extract parameters from ckpt file to npy file"""
391 | 	def extract(self, data_path, session, saver):
392 | 		saver.restore(session, data_path)
393 | 		scopes = ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1',
394 | 		'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1',
395 | 		'conv5_2', 'conv5_3', 'conv6', 'conv7', 'conv8', '2x_conv8', 
396 | 		'pool4_1x1']
397 | 		data_dict = {}
398 | 		for scope in scopes:
399 | 			[w, b] = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
400 | 			data_dict[scope] = {'weights':w.eval(), 'biases':b.eval()}
401 | 		file_name = data_path[0:-5]
402 | 		np.save(file_name, data_dict)
403 | 		ipdb.set_trace()
404 | 		return file_name + '.npy'
405 | 
406 | 	def add_shortcut(self, bilinear=True):
407 | 		conv8 = self.get_output('conv8')
408 | 		pool4 = self.get_output('pool4')
409 | 
410 | 		target_size = int(pool4.get_shape()[1])
411 | 
412 | 		with tf.variable_scope('2x_conv8') as scope:
413 | 			# Learn from scratch
414 | 			if not bilinear:
415 | 				w_deconv = tf.get_variable('weights', [4, 4, self.num_classes, self.num_classes],
416 | 					initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
417 | 			# Using fiexed bilinearing upsampling filter
418 | 			else:
419 | 				w_deconv = tf.get_variable('weights', trainable=True, 
420 | 					initializer=bilinear_upsample_weights(2, self.num_classes))
421 | 
422 | 			b_deconv = tf.get_variable('biases', [self.num_classes],
423 | 				initializer=tf.constant_initializer(0))
424 | 			z_deconv = tf.nn.conv2d_transpose(conv8, w_deconv, 
425 | 				[self.batch_num, target_size, target_size, self.num_classes],
426 | 				strides=[1,2,2,1], padding='SAME', name='z') + b_deconv
427 | 
428 | 		with tf.variable_scope('pool4_1x1') as scope:
429 | 			w_pool4 = tf.get_variable('weights', [1, 1, 512, self.num_classes],
430 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
431 | 			b_pool4 = tf.get_variable('biases', [self.num_classes],
432 | 				initializer=tf.constant_initializer(0))
433 | 			z_pool4 = tf.nn.conv2d(pool4, w_pool4, strides= [1, 1, 1, 1],
434 | 				padding='SAME') + b_pool4
435 | 
436 | 		# Element-wise sum
437 | 		fusion = z_deconv + z_pool4
438 | 
439 | 		# Add to store dicts
440 | 		self.outputs['2x_conv8'] = z_deconv
441 | 		self.outputs['pool4_1x1'] = z_pool4
442 | 		self.outputs['fusion'] = fusion
443 | 		self.layers['2x_conv8']  = {'weights':w_deconv, 'biases':b_deconv}
444 | 		self.layers['pool4_1x1'] = {'weights':w_pool4, 'biases':b_pool4}
445 | 
446 | 
447 | 	"""Add the deconv(upsampling) layer to get dense prediction"""
448 | 	def add_deconv(self, bilinear=False):
449 | 		fusion = self.get_output('fusion')
450 | 
451 | 		with tf.variable_scope('deconv') as scope:
452 | 			# Learn from scratch
453 | 			if not bilinear:
454 | 				w_deconv = tf.get_variable('weights', [32, 32, self.num_classes, self.num_classes],
455 | 					initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
456 | 			# Using fiexed bilinearing upsampling filter
457 | 			else:
458 | 				w_deconv = tf.get_variable('weights', trainable=True, 
459 | 					initializer=bilinear_upsample_weights(16, self.num_classes))
460 | 
461 | 			b_deconv = tf.get_variable('biases', [self.num_classes],
462 | 				initializer=tf.constant_initializer(0))
463 | 			z_deconv = tf.nn.conv2d_transpose(fusion, w_deconv, 
464 | 				[self.batch_num, self.max_size[0], self.max_size[1], self.num_classes],
465 | 				strides=[1,16,16,1], padding='SAME', name='z') + b_deconv
466 | 
467 | 		# Add to store dicts
468 | 		self.outputs['deconv'] = z_deconv
469 | 		self.layers['deconv']  = {'weights':w_deconv, 'biases':b_deconv}
470 | 
471 | 
472 | """The best model"""
473 | class FCN8(FCN16):
474 | 	def __init__(self, config):
475 | 		FCN16.__init__(self, config)
476 | 
477 | 	def add_shortcut(self, bilinear=True):
478 | 		conv8 = self.get_output('conv8')
479 | 		pool4 = self.get_output('pool4')
480 | 
481 | 		target_size = int(pool4.get_shape()[1])
482 | 
483 | 		with tf.variable_scope('2x_conv8') as scope:
484 | 			# Learn from scratch
485 | 			if not bilinear:
486 | 				w_deconv = tf.get_variable('weights', [4, 4, self.num_classes, self.num_classes],
487 | 					initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
488 | 			# Using fiexed bilinearing upsampling filter
489 | 			else:
490 | 				w_deconv = tf.get_variable('weights', trainable=True, 
491 | 					initializer=bilinear_upsample_weights(2, self.num_classes))
492 | 
493 | 			b_deconv = tf.get_variable('biases', [self.num_classes],
494 | 				initializer=tf.constant_initializer(0))
495 | 			z_deconv = tf.nn.conv2d_transpose(conv8, w_deconv, 
496 | 				[self.batch_num, target_size, target_size, self.num_classes],
497 | 				strides=[1,2,2,1], padding='SAME', name='z') + b_deconv
498 | 
499 | 
500 | 		with tf.variable_scope('pool4_1x1') as scope:
501 | 			w_pool4 = tf.get_variable('weights', [1, 1, 512, self.num_classes],
502 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
503 | 			b_pool4 = tf.get_variable('biases', [self.num_classes],
504 | 				initializer=tf.constant_initializer(0))
505 | 			z_pool4 = tf.nn.conv2d(pool4, w_pool4, strides= [1, 1, 1, 1],
506 | 				padding='SAME') + b_pool4
507 | 
508 | 		# Element-wise sum
509 | 		fusion1 = z_deconv + z_pool4
510 | 
511 | 		## Second fusion stage
512 | 		pool3 = self.get_output('pool3')
513 | 
514 | 		with tf.variable_scope('pool3_1x1') as scope:
515 | 			w_pool3 = tf.get_variable('weights', [1, 1, 256, self.num_classes],
516 | 				initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
517 | 			b_pool3 = tf.get_variable('biases', [self.num_classes],
518 | 				initializer=tf.constant_initializer(0))
519 | 			z_pool3 = tf.nn.conv2d(pool3, w_pool3, strides= [1, 1, 1, 1],
520 | 				padding='SAME') + b_pool3
521 | 
522 | 		target_size = int(pool3.get_shape()[1])
523 | 
524 | 		with tf.variable_scope('2x_fusion') as scope:
525 | 			# Learn from scratch
526 | 			if not bilinear:
527 | 				w_deconv2 = tf.get_variable('weights', [4, 4, self.num_classes, self.num_classes],
528 | 					initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
529 | 			# Using fiexed bilinearing upsampling filter
530 | 			else:
531 | 				w_deconv2 = tf.get_variable('weights', trainable=True, 
532 | 					initializer=bilinear_upsample_weights(2, self.num_classes))
533 | 
534 | 			b_deconv2 = tf.get_variable('biases', [self.num_classes],
535 | 				initializer=tf.constant_initializer(0))
536 | 			z_deconv2 = tf.nn.conv2d_transpose(fusion1, w_deconv2, 
537 | 				[self.batch_num, target_size, target_size, self.num_classes],
538 | 				strides=[1,2,2,1], padding='SAME', name='z') + b_deconv2
539 | 
540 | 		fusion2 = z_pool3 + z_deconv2
541 | 
542 | 		# Add to store dicts
543 | 		self.outputs['2x_conv8'] = z_deconv
544 | 		self.outputs['pool4_1x1'] = z_pool4
545 | 		self.outputs['pool3_1x1'] = z_pool3
546 | 		self.outputs['2x_fusion'] = z_deconv2
547 | 		self.outputs['fusion'] = fusion2
548 | 		self.layers['2x_conv8']  = {'weights':w_deconv, 'biases':b_deconv}
549 | 		self.layers['pool4_1x1'] = {'weights':w_pool4, 'biases':b_pool4}
550 | 		self.layers['pool3_1x1'] = {'weights':w_pool3, 'biases':b_pool3}
551 | 		self.layers['2x_fusion'] = {'weights':w_deconv2, 'biases':b_deconv2}
552 | 
553 | 
554 | 	"""Add the deconv(upsampling) layer to get dense prediction"""
555 | 	def add_deconv(self, bilinear=False):
556 | 		fusion = self.get_output('fusion')
557 | 
558 | 		with tf.variable_scope('deconv') as scope:
559 | 			# Learn from scratch
560 | 			if not bilinear:
561 | 				w_deconv = tf.get_variable('weights', [16, 16, self.num_classes, self.num_classes],
562 | 					initializer=tf.truncated_normal_initializer(0.0, stddev=0.01))
563 | 			# Using fiexed bilinearing upsampling filter
564 | 			else:
565 | 				w_deconv = tf.get_variable('weights', trainable=True, 
566 | 					initializer=bilinear_upsample_weights(16, self.num_classes))
567 | 
568 | 			b_deconv = tf.get_variable('biases', [self.num_classes],
569 | 				initializer=tf.constant_initializer(0))
570 | 			z_deconv = tf.nn.conv2d_transpose(fusion, w_deconv, 
571 | 				[self.batch_num, self.max_size[0], self.max_size[1], self.num_classes],
572 | 				strides=[1,8,8,1], padding='SAME', name='z') + b_deconv
573 | 
574 | 		# Add to store dicts
575 | 		self.outputs['deconv'] = z_deconv
576 | 		self.layers['deconv']  = {'weights':w_deconv, 'biases':b_deconv}
577 | 
578 | 
579 | class FCN32_test(FCN32):
580 | 	def __init__(self, config):
581 | 		FCN32.__init__(self, config)
582 | 
583 | 	def set_up(self):
584 | 		self.add_conv(self.img, self.num_classes, 'TEST')
585 | 		self.add_deconv(bilinear=False)
586 | 
587 | 
588 | class FCN16_test(FCN16):
589 | 	def __init__(self, config):
590 | 		FCN16.__init__(self, config)
591 | 
592 | 	def set_up(self):
593 | 		self.add_conv(self.img, self.num_classes, 'TEST')
594 | 		self.add_shortcut(bilinear=True)
595 | 		self.add_deconv(bilinear=False)
596 | 
597 | 
598 | class FCN8_test(FCN8):
599 | 	def __init__(self, config):
600 | 		FCN8.__init__(self, config)
601 | 
602 | 	def set_up(self):
603 | 		self.add_conv(self.img, self.num_classes, 'TEST')
604 | 		self.add_shortcut(bilinear=True)
605 | 		self.add_deconv(bilinear=False)
606 | 
607 | 
608 | if __name__ == '__main__':
609 | 	config = {
610 | 	'batch_num':5, 
611 | 	'iter':100000, 
612 | 	'num_classes':21, 
613 | 	'max_size':(500,500),
614 | 	'weight_decay': 0.0005,
615 | 	'base_lr': 0.0001,
616 | 	'momentum': 0.9
617 | 	}
618 | 
619 | 	model = FCN32(config)
620 | 	#model = FCN16(config)
621 | 	# model = FCN8(config)
622 | 
623 | 


--------------------------------------------------------------------------------