├── CMPC_model.py
├── CMPC_video
├── CMPC_video_mm_tgraph_allvec.py
├── build_A2D_batches.py
├── train_a2d_new.sh
└── trainval_video.py
├── LICENSE
├── README.md
├── build_batches.py
├── data
├── referit_query_test.json
├── referit_query_trainval.json
├── vocabulary_Gref.txt
└── vocabulary_referit.txt
├── external
└── tensorflow-deeplab-resnet
│ ├── LICENSE
│ ├── README.md
│ ├── convert.py
│ ├── dataset
│ ├── debug.txt
│ ├── test.txt
│ ├── train.txt
│ ├── val.txt
│ └── val_reduced.txt
│ ├── deeplab_resnet
│ ├── __init__.py
│ ├── image_reader.py
│ ├── model.py
│ └── utils.py
│ ├── evaluate.py
│ ├── evaluate_msc.py
│ ├── fine_tune.py
│ ├── images
│ ├── colour_scheme.png
│ ├── mask.png
│ └── summary.png
│ ├── inference.py
│ ├── kaffe
│ ├── __init__.py
│ ├── caffe
│ │ ├── __init__.py
│ │ ├── caffepb.py
│ │ └── resolver.py
│ ├── errors.py
│ ├── graph.py
│ ├── layers.py
│ ├── shapes.py
│ ├── tensorflow
│ │ ├── __init__.py
│ │ ├── network.py
│ │ └── transformer.py
│ └── transformers.py
│ ├── misc
│ ├── 2007_000129.jpg
│ ├── 2007_000129.png
│ └── deploy.prototxt
│ ├── npy2ckpt.py
│ ├── requirements.txt
│ ├── train.py
│ └── train_msc.py
├── get_model.py
├── motivation.png
├── trainval.sh
├── trainval_model.py
└── util
├── __init__.py
├── cell.py
├── data_reader.py
├── data_reader_ignore.py
├── eval_tools.py
├── functions.py
├── h5_reader.py
├── im_processing.py
├── io.py
├── loss.py
├── nms.pyx
├── processing_tools.py
├── text_processing.py
└── vgg16_fcn.py
/CMPC_video/build_A2D_batches.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import skimage
4 | import skimage.io
5 | import csv
6 | import glob
7 | import h5py
8 | import re
9 |
10 | from tqdm import tqdm
11 | from util import im_processing, text_processing
12 |
13 | debug = False
14 | # root directory
15 | root_dir = os.getcwd()
16 | # data directory
17 | a2d_dir = '/mnt/lustre/share/huitianrui/DATASET/A2D-Sentences'
18 |
19 |
20 | def build_a2d_batches(T, input_H, input_W, video=False):
21 | """
22 | Build data batches of A2D Sentence dataset
23 |
24 | Args:
25 | T: limit of number of words
26 | input_H: height of input frame of I3D backbone
27 | input_W: width of input frame of I3D backbone
28 | video: select consecutive frames or standalone frame
29 | """
30 |
31 | query_file = os.path.join(a2d_dir, 'a2d_annotation.txt')
32 | frame_dir = os.path.join(a2d_dir, 'Release/frames')
33 | vocab_file = os.path.join(root_dir, 'data/vocabulary_Gref.txt')
34 |
35 | dataset_name = 'a2d_sent_new'
36 | out_dataset_dir = os.path.join(root_dir, dataset_name)
37 | if not os.path.exists(out_dataset_dir):
38 | os.mkdir(out_dataset_dir)
39 | test_batch = os.path.join(out_dataset_dir, 'test_batch')
40 | train_batch = os.path.join(out_dataset_dir, 'train_batch')
41 | if not os.path.exists(test_batch):
42 | os.mkdir(test_batch)
43 | if not os.path.exists(train_batch):
44 | os.mkdir(train_batch)
45 |
46 | vocab_dict = text_processing.load_vocab_dict_from_file(vocab_file)
47 | test_prefix_list = list()
48 | train_prefix_list = list()
49 | split_dict = gen_split_dict()
50 | SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)')
51 |
52 | with open(query_file, 'r') as f:
53 | reader = csv.reader(f)
54 | next(reader)
55 | total_count = 0
56 | test_count = 0
57 | train_count = 0
58 | all_zero_mask_count = 0
59 | for row in tqdm(reader):
60 | # each video belongs to test or train
61 | video_id = row[0]
62 | data_prefix = video_id
63 | if split_dict[data_prefix] == 1:
64 | save_dir = test_batch
65 | test_prefix_list.append(data_prefix)
66 | test = True
67 | else:
68 | save_dir = train_batch
69 | train_prefix_list.append(data_prefix)
70 | test = False
71 | # load sentence
72 | instance_id = int(row[1])
73 | sent = row[2].lower()
74 | words = SENTENCE_SPLIT_REGEX.split(sent.strip())
75 | words = [w for w in words if len(w.strip()) > 0]
76 | # remove punctuation and restrict sentence within 20 words
77 | if words[-1] == '.':
78 | words = words[:-1]
79 | if len(words) > T:
80 | words = words[:T]
81 | n_sent = ""
82 | for w in words:
83 | n_sent = n_sent + w + ' '
84 | n_sent = n_sent.strip()
85 | n_sent = n_sent.encode('utf-8').decode("utf-8")
86 | text = text_processing.preprocess_sentence(n_sent, vocab_dict, T)
87 |
88 | image_paths = list()
89 | # for each video, get all the gt masks of a certain instance
90 | masks, frame_ids = get_masks(video_id, instance_id)
91 |
92 | for frame_id in frame_ids:
93 | image_path = os.path.join(frame_dir, video_id, '{:0>5d}.png'.format(frame_id))
94 | image_paths.append(image_path)
95 |
96 | for frame_id, image_path, mask in zip(frame_ids, image_paths, masks):
97 | # abandon all zero mask batch
98 | if np.sum(mask) == 0:
99 | print("all zeros mask caught")
100 | all_zero_mask_count += 1
101 | continue
102 | if video:
103 | # obtain 16 consecutive frames centered at the gt frame
104 | frame_paths = frame_range(frame_id=frame_id, frame_dir=os.path.join(frame_dir, video_id))
105 | else:
106 | # only use the gt frame
107 | frame_paths = list()
108 | frames = list()
109 | if test:
110 | count = test_count
111 | test_count = test_count + 1
112 | prefix = 'test_'
113 | image = skimage.io.imread(image_path)
114 | for frame_path in frame_paths:
115 | frames.append(skimage.io.imread(frame_path))
116 | else:
117 | prefix = 'train_'
118 | count = train_count
119 | train_count = train_count + 1
120 | image = skimage.io.imread(image_path)
121 | image = skimage.img_as_ubyte(im_processing.resize_and_pad(image, input_H, input_W))
122 | mask = im_processing.resize_and_pad(mask, input_H, input_W)
123 | for frame_path in frame_paths:
124 | frame = skimage.io.imread(frame_path)
125 | frame = skimage.img_as_ubyte(im_processing.resize_and_pad(frame, input_H, input_W))
126 | frames.append(frame)
127 |
128 | if debug:
129 | m0 = mask[:, :, np.newaxis]
130 | m0 = (m0 > 0).astype(np.uint8)
131 | m0 = np.concatenate([m0, m0, m0], axis=2)
132 | debug_image = image * m0
133 | skimage.io.imsave('./debug/{}_{}_{}.png'.format(data_prefix, frame_id,
134 | sent.replace(' ', '_')), debug_image)
135 |
136 | # save batches
137 | np.savez(file=os.path.join(save_dir, dataset_name + '_' + prefix + str(count)),
138 | text_batch=text,
139 | mask_batch=(mask > 0),
140 | sent_batch=[sent],
141 | im_batch=image,
142 | frame_id=frame_id,
143 | frames=frames)
144 | total_count = total_count + 1
145 |
146 | print()
147 | print("num of all zeros masks is: {}".format(all_zero_mask_count))
148 |
149 |
150 | def frame_range(frame_id, frame_dir):
151 | frame_paths = os.listdir(frame_dir)
152 | frame_paths.sort()
153 | biggest = frame_paths[-1]
154 | frame_num = int(biggest[:-4])
155 | start = frame_id - 8
156 | end = frame_id + 8
157 | result = list()
158 | for i in range(start, end):
159 | if i < 1:
160 | frame_id = 1
161 | elif i > frame_num:
162 | frame_id = frame_num
163 | else:
164 | frame_id = i
165 | result.append(os.path.join(frame_dir, '{:0>5d}.png'.format(frame_id)))
166 | assert len(result) == 16
167 | return result
168 |
169 |
170 | def gen_split_dict():
171 | split_file = os.path.join(a2d_dir, 'Release/videoset.csv')
172 | result = dict()
173 | result.setdefault(0)
174 | with open(split_file, 'r') as f:
175 | reader = csv.reader(f)
176 | for line in reader:
177 | video_id = line[0]
178 | split_code = line[-1]
179 | result[video_id] = int(split_code)
180 | return result
181 |
182 |
183 | def get_masks(video_id, instance_id):
184 | anno_dir = os.path.join(a2d_dir, 'a2d_annotation_with_instances')
185 | masks_path = os.path.join(anno_dir, video_id, '*')
186 | mask_files = glob.glob(masks_path)
187 | mask_files.sort()
188 | masks = list()
189 | frame_ids = list()
190 |
191 | for mask_file in mask_files:
192 | f = h5py.File(mask_file, 'r')
193 | instance_ids = f['instance'][:]
194 | if instance_ids.shape[0] == 1:
195 | mask = f['reMask'][:].T
196 | else:
197 | index = np.argwhere(instance_ids == instance_id)
198 | index = np.squeeze(index)
199 | mask = f['reMask'][index].T
200 | mask = np.squeeze(mask)
201 | if index.size != 1:
202 | mask = np.sum(mask, axis=2)
203 |
204 | masks.append(mask)
205 | base_name = os.path.basename(mask_file)
206 | frame_id = int(base_name[:-3])
207 | frame_ids.append(frame_id)
208 | f.close()
209 | return masks, frame_ids
210 |
211 |
212 | if __name__ == "__main__":
213 | T = 20
214 | input_H = 320
215 | input_W = 320
216 | build_a2d_batches(T=T, input_H=input_H, input_W=input_W, video=True)
217 |
--------------------------------------------------------------------------------
/CMPC_video/train_a2d_new.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | LOG=logs/a2d_sent_new/deeplab_cmpc_video_mm_tgraph_allvec
4 | mkdir -p ${LOG}
5 | now=$(date +"%Y%m%d_%H%M%S")
6 |
7 | python -u trainval_video.py \
8 | -m train \
9 | -d a2d_sent_new \
10 | -t train \
11 | -n CMPC_video_mm_tgraph_allvec \
12 | -i 400000 \
13 | -s 20000 \
14 | -st 380000 \
15 | -lrd 400000 \
16 | -emb \
17 | -g 2 \
18 | -f ckpts/a2d_sent_new/deeplab_cmpc_video_mm_tgraph_allvec 2>&1 | tee ${LOG}/train_$now.txt
19 |
20 | python -u trainval_video.py \
21 | -m test \
22 | -d a2d_sent_new \
23 | -t test \
24 | -n CMPC_video_mm_tgraph_allvec \
25 | -i 360000 \
26 | -c \
27 | -emb \
28 | -g 2 \
29 | -f ckpts/a2d_sent_new/deeplab_cmpc_video_mm_tgraph_allvec 2>&1 | tee ${LOG}/test_$now.txt
30 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 spyflying
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CMPC-Refseg
2 | Code of our CVPR 2020 paper [*Referring Image Segmentation via Cross-Modal Progressive Comprehension*](https://openaccess.thecvf.com/content_CVPR_2020/papers/Huang_Referring_Image_Segmentation_via_Cross-Modal_Progressive_Comprehension_CVPR_2020_paper.pdf).
3 |
4 | Shaofei Huang*, Tianrui Hui*, Si Liu, Guanbin Li, Yunchao Wei, Jizhong Han, Luoqi Liu, Bo Li (* Equal contribution)
5 |
6 | ## Interpretation of CMPC.
7 |
8 | * (a) Input referring expression and image.
9 |
10 | * (b) The model first perceives all the entities described in the expression based on entity words and attribute words, e.g., “man” and “white frisbee” (orange masks and blue outline).
11 |
12 | * (c) After finding out all the candidate entities that may match with input expression, relational word “holding” can be further exploited to highlight the entity involved with the relationship (green arrow) and suppress the others which are not involved.
13 |
14 | * (d) Benefiting from the relation-aware reasoning process, the referred entity is found as the final prediction (purple mask).
15 | 
16 |
17 | ## Experimental Results
18 |
19 | We modify the way of feature concatenation in the end of CMPC module and achieve higher performances than the results reported in our paper.
20 | New experimental results are summarized in the table bellow.
21 | You can download our trained checkpoints to test on the four datasets. The link to the checkpoints is:
22 | [Baidu Drive](https://pan.baidu.com/s/1Vm7JqqCJ6Gl3Rp4P2M-obA), pswd: jjsf.
23 |
24 | | Method | UNC val | UNC testA | UNC testB | UNC+ val | UNC+ testA | UNC+ testB | G-Ref val | ReferIt test |
25 | | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: |
26 | | STEP-ICCV19 \[1\] | 60.04 | 63.46 | 57.97 | 48.19 | 52.33 | 40.41| 46.40 | 64.13 |
27 | | Ours-CVPR20 | 61.36 | 64.53 | 59.64 | 49.56 | 53.44 | 43.23 | 49.05 | 65.53 |
28 | |Ours-Updated | **62.47** | **65.08** | **60.82** | **50.25** | **54.04** | **43.47** | **49.89** | **65.58** |
29 |
30 | ## Setup
31 |
32 | We recommended the following dependencies.
33 |
34 | * Python 2.7
35 | * TensorFlow 1.5
36 | * Numpy
37 | * pydensecrf
38 |
39 | This code is derived from [RRN](https://github.com/liruiyu/referseg_rrn) \[2\]. Please refer to it for more details of setup.
40 |
41 | ## Data Preparation
42 | * Dataset Preprocessing
43 |
44 | We conduct experiments on 4 datasets of referring image segmentation, including `UNC`, `UNC+`, `Gref` and `ReferIt`. After downloading these datasets, you can run the following commands for data preparation:
45 | ```
46 | python build_batches.py -d Gref -t train
47 | python build_batches.py -d Gref -t val
48 | python build_batches.py -d unc -t train
49 | python build_batches.py -d unc -t val
50 | python build_batches.py -d unc -t testA
51 | python build_batches.py -d unc -t testB
52 | python build_batches.py -d unc+ -t train
53 | python build_batches.py -d unc+ -t val
54 | python build_batches.py -d unc+ -t testA
55 | python build_batches.py -d unc+ -t testB
56 | python build_batches.py -d referit -t trainval
57 | python build_batches.py -d referit -t test
58 | ```
59 |
60 | * Glove Embedding
61 |
62 | Download `Gref_emb.npy` and `referit_emb.npy` and put them in `data/`. We provide download link for Glove Embedding here:
63 | [Baidu Drive](https://pan.baidu.com/s/19f8CxT3lc_UyjCIIE_74FA), password: 2m28.
64 |
65 |
66 | ## Training
67 | Train on UNC training set with:
68 | ```
69 | python -u trainval_model.py -m train -d unc -t train -n CMPC_model -emb -f ckpts/unc/cmpc_model
70 | ```
71 |
72 | ## Testing
73 | Test on UNC validation set with:
74 | ```
75 | python -u trainval_model.py -m test -d unc -t val -n CMPC_model -i 700000 -c -emb -f ckpts/unc/cmpc_model
76 | ```
77 |
78 | ## CMPC for video referring segmentation
79 | We release video version code for CMPC on A2D dataset under `CMPC_video/`.
80 |
81 | ## Reference
82 | \[1\] Chen, Ding-Jie, et al. "See-through-text grouping for referring image segmentation." Proceedings of the IEEE International Conference on Computer Vision. 2019.
83 |
84 | \[2\] Li, Ruiyu, et al. "Referring image segmentation via recurrent refinement networks." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018.
85 |
86 | ## Citation
87 | If our CMPC is useful to your research, please consider citing:
88 | ```
89 | @inproceedings{huang2020referring,
90 | title={Referring Image Segmentation via Cross-Modal Progressive Comprehension},
91 | author={Huang, Shaofei and Hui, Tianrui and Liu, Si and Li, Guanbin and Wei, Yunchao and Han, Jizhong and Liu, Luoqi and Li, Bo},
92 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
93 | pages={10488--10497},
94 | year={2020}
95 | }
96 | ```
97 |
--------------------------------------------------------------------------------
/build_batches.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./external/coco/PythonAPI')
3 | import os
4 | import argparse
5 | import numpy as np
6 | import json
7 | import skimage
8 | import skimage.io
9 |
10 | from util import im_processing, text_processing
11 | from util.io import load_referit_gt_mask as load_gt_mask
12 | from refer import REFER
13 | from pycocotools import mask as cocomask
14 |
15 |
16 | def build_referit_batches(setname, T, input_H, input_W):
17 | # data directory
18 | im_dir = './data/referit/images/'
19 | mask_dir = './data/referit/mask/'
20 | query_file = './data/referit_query_' + setname + '.json'
21 | vocab_file = './data/vocabulary_referit.txt'
22 |
23 | # saving directory
24 | data_folder = './referit/' + setname + '_batch/'
25 | data_prefix = 'referit_' + setname
26 | if not os.path.isdir(data_folder):
27 | os.makedirs(data_folder)
28 |
29 | # load annotations
30 | query_dict = json.load(open(query_file))
31 | im_list = query_dict.keys()
32 | vocab_dict = text_processing.load_vocab_dict_from_file(vocab_file)
33 |
34 | # collect training samples
35 | samples = []
36 | for n_im, name in enumerate(im_list):
37 | im_name = name.split('_', 1)[0] + '.jpg'
38 | mask_name = name + '.mat'
39 | for sent in query_dict[name]:
40 | samples.append((im_name, mask_name, sent))
41 |
42 | # save batches to disk
43 | num_batch = len(samples)
44 | for n_batch in range(num_batch):
45 | print('saving batch %d / %d' % (n_batch + 1, num_batch))
46 | im_name, mask_name, sent = samples[n_batch]
47 | im = skimage.io.imread(im_dir + im_name)
48 | mask = load_gt_mask(mask_dir + mask_name).astype(np.float32)
49 |
50 | if 'train' in setname:
51 | im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, input_H, input_W))
52 | mask = im_processing.resize_and_pad(mask, input_H, input_W)
53 | if im.ndim == 2:
54 | im = np.tile(im[:, :, np.newaxis], (1, 1, 3))
55 |
56 | text = text_processing.preprocess_sentence(sent, vocab_dict, T)
57 |
58 | np.savez(file = data_folder + data_prefix + '_' + str(n_batch) + '.npz',
59 | text_batch = text,
60 | im_batch = im,
61 | mask_batch = (mask > 0),
62 | sent_batch = [sent])
63 |
64 |
65 | def build_coco_batches(dataset, setname, T, input_H, input_W):
66 | im_dir = './data/coco/images'
67 | im_type = 'train2014'
68 | vocab_file = './data/vocabulary_Gref.txt'
69 |
70 | data_folder = './' + dataset + '/' + setname + '_batch/'
71 | data_prefix = dataset + '_' + setname
72 | if not os.path.isdir(data_folder):
73 | os.makedirs(data_folder)
74 |
75 | if dataset == 'Gref':
76 | refer = REFER('./external/refer/data', dataset = 'refcocog', splitBy = 'google')
77 | elif dataset == 'unc':
78 | refer = REFER('./external/refer/data', dataset = 'refcoco', splitBy = 'unc')
79 | elif dataset == 'unc+':
80 | refer = REFER('./external/refer/data', dataset = 'refcoco+', splitBy = 'unc')
81 | else:
82 | raise ValueError('Unknown dataset %s' % dataset)
83 | refs = [refer.Refs[ref_id] for ref_id in refer.Refs if refer.Refs[ref_id]['split'] == setname]
84 | vocab_dict = text_processing.load_vocab_dict_from_file(vocab_file)
85 |
86 | n_batch = 0
87 | for ref in refs:
88 | im_name = 'COCO_' + im_type + '_' + str(ref['image_id']).zfill(12)
89 | im = skimage.io.imread('%s/%s/%s.jpg' % (im_dir, im_type, im_name))
90 | seg = refer.Anns[ref['ann_id']]['segmentation']
91 | rle = cocomask.frPyObjects(seg, im.shape[0], im.shape[1])
92 | mask = np.max(cocomask.decode(rle), axis = 2).astype(np.float32)
93 |
94 | if 'train' in setname:
95 | im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, input_H, input_W))
96 | mask = im_processing.resize_and_pad(mask, input_H, input_W)
97 | if im.ndim == 2:
98 | im = np.tile(im[:, :, np.newaxis], (1, 1, 3))
99 |
100 | for sentence in ref['sentences']:
101 | print('saving batch %d' % (n_batch + 1))
102 | sent = sentence['sent']
103 | text = text_processing.preprocess_sentence(sent, vocab_dict, T)
104 |
105 | np.savez(file = data_folder + data_prefix + '_' + str(n_batch) + '.npz',
106 | text_batch = text,
107 | im_batch = im,
108 | mask_batch = (mask > 0),
109 | sent_batch = [sent])
110 | n_batch += 1
111 |
112 |
113 | if __name__ == "__main__":
114 | parser = argparse.ArgumentParser()
115 | parser.add_argument('-d', type = str, default = 'referit') # 'unc', 'unc+', 'Gref'
116 | parser.add_argument('-t', type = str, default = 'trainval') # 'test', val', 'testA', 'testB'
117 |
118 | args = parser.parse_args()
119 | T = 20
120 | input_H = 320
121 | input_W = 320
122 | if args.d == 'referit':
123 | build_referit_batches(setname = args.t,
124 | T = T, input_H = input_H, input_W = input_W)
125 | else:
126 | build_coco_batches(dataset = args.d, setname = args.t,
127 | T = T, input_H = input_H, input_W = input_W)
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Vladimir Nekrasov
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/README.md:
--------------------------------------------------------------------------------
1 | # DeepLab-ResNet-TensorFlow
2 |
3 | [](https://travis-ci.org/DrSleep/tensorflow-deeplab-resnet)
4 |
5 | This is an (re-)implementation of [DeepLab-ResNet](http://liangchiehchen.com/projects/DeepLabv2_resnet.html) in TensorFlow for semantic image segmentation on the [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/).
6 |
7 | ## Updates
8 |
9 | **29 Jan, 2017**:
10 | * Fixed the implementation of the batch normalisation layer: it now supports both the training and inference steps. If the flag `--is-training` is provided, the running means and variances will be updated; otherwise, they will be kept intact. The `.ckpt` files have been updated accordingly - to download please refer to the new link provided below.
11 | * Image summaries during the training process can now be seen using TensorBoard.
12 | * Fixed the evaluation procedure: the 'void' label (255
) is now correctly ignored. As a result, the performance score on the validation set has increased to 80.1%
.
13 |
14 | **11 Feb, 2017**:
15 | * The training script `train.py` has been re-written following the original optimisation setup: SGD with momentum, weight decay, learning rate with polynomial decay, different learning rates for different layers, ignoring the 'void' label (255
).
16 | * The training script with multi-scale inputs `train_msc.py` has been added: the input is resized to 0.5
and 0.75
of the original resolution, and 4
losses are aggregated: loss on the original resolution, on the 0.75
resolution, on the 0.5
resolution, and loss on the all fused outputs.
17 | * Evaluation of a single-scale converted pre-trained model on the PASCAL VOC validation dataset (using ['SegmentationClassAug'](https://www.dropbox.com/s/oeu149j8qtbs1x0/SegmentationClassAug.zip?dl=0)) leads to 86.9%
mIoU. This is confirmed by [the official PASCAL VOC server](http://host.robots.ox.ac.uk/anonymous/FIQPRH.html). The score on the test dataset is [75.8%
](http://host.robots.ox.ac.uk/anonymous/EPBIGU.html).
18 |
19 | **22 Feb, 2017**:
20 | * The training script with multi-scale inputs `train_msc.py` now supports gradients accumulation: the relevant parameter `--grad-update-every` effectively mimics the behaviour of `iter_size` of Caffe. This allows to use batches of bigger sizes with less GPU memory being consumed. (Thanks to @arslan-chaudhry for this contribution!)
21 | * The random mirror and random crop options have been added. (Again big thanks to @arslan-chaudhry !)
22 |
23 | **23 Apr, 2017**:
24 | * TensorFlow 1.1.0 is now supported.
25 | * Three new flags `--num-classes`, `--ignore-label` and `--not-restore-last` are added to ease the usability of the scripts on new datasets. Check out [these instructions](https://github.com/DrSleep/tensorflow-deeplab-resnet#using-your-dataset) on how to set up the training process on your dataset.
26 |
27 | ## Model Description
28 |
29 | The DeepLab-ResNet is built on a fully convolutional variant of [ResNet-101](https://github.com/KaimingHe/deep-residual-networks) with [atrous (dilated) convolutions](https://github.com/fyu/dilation), atrous spatial pyramid pooling, and multi-scale inputs (not implemented here).
30 |
31 | The model is trained on a mini-batch of images and corresponding ground truth masks with the softmax classifier at the top. During training, the masks are downsampled to match the size of the output from the network; during inference, to acquire the output of the same size as the input, bilinear upsampling is applied. The final segmentation mask is computed using argmax over the logits.
32 | Optionally, a fully-connected probabilistic graphical model, namely, CRF, can be applied to refine the final predictions.
33 | On the test set of PASCAL VOC, the model achieves 79.7%
of mean intersection-over-union.
34 |
35 | For more details on the underlying model please refer to the following paper:
36 |
37 |
38 | @article{CP2016Deeplab,
39 | title={DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs},
40 | author={Liang-Chieh Chen and George Papandreou and Iasonas Kokkinos and Kevin Murphy and Alan L Yuille},
41 | journal={arXiv:1606.00915},
42 | year={2016}
43 | }
44 |
45 |
46 |
47 | ## Requirements
48 |
49 | TensorFlow needs to be installed before running the scripts.
50 | TensorFlow v1.1.0 is supported; for TensorFlow v0.12 please refer to this [branch](https://github.com/DrSleep/tensorflow-deeplab-resnet/tree/tf-0.12); for TensorFlow v0.11 please refer to this [branch](https://github.com/DrSleep/tensorflow-deeplab-resnet/tree/tf-0.11). Note that those branches may not have the same functional as the current master.
51 |
52 | To install the required python packages (except TensorFlow), run
53 | ```bash
54 | pip install -r requirements.txt
55 | ```
56 | or for a local installation
57 | ```bash
58 | pip install -user -r requirements.txt
59 | ```
60 |
61 | ## Caffe to TensorFlow conversion
62 |
63 | To imitate the structure of the model, we have used `.caffemodel` files provided by the [authors](http://liangchiehchen.com/projects/DeepLabv2_resnet.html). The conversion has been performed using [Caffe to TensorFlow](https://github.com/ethereon/caffe-tensorflow) with an additional configuration for atrous convolution and batch normalisation (since the batch normalisation provided by Caffe-tensorflow only supports inference).
64 | There is no need to perform the conversion yourself as you can download the already converted models - `deeplab_resnet.ckpt` (pre-trained) and `deeplab_resnet_init.ckpt` (the last layers are randomly initialised) - [here](https://drive.google.com/open?id=0B_rootXHuswsZ0E4Mjh1ZU5xZVU).
65 |
66 | Nevertheless, it is easy to perform the conversion manually, given that the appropriate `.caffemodel` file has been downloaded, and [Caffe to TensorFlow](https://github.com/ethereon/caffe-tensorflow) dependencies have been installed. The Caffe model definition is provided in `misc/deploy.prototxt`.
67 | To extract weights from `.caffemodel`, run the following:
68 | ```bash
69 | python convert.py /path/to/deploy/prototxt --caffemodel /path/to/caffemodel --data-output-path /where/to/save/numpy/weights
70 | ```
71 | As a result of running the command above, the model weights will be stored in `/where/to/save/numpy/weights`. To convert them to the native TensorFlow format (`.ckpt`), simply execute:
72 | ```bash
73 | python npy2ckpt.py /where/to/save/numpy/weights --save-dir=/where/to/save/ckpt/weights
74 | ```
75 |
76 | ## Dataset and Training
77 |
78 | To train the network, one can use the augmented PASCAL VOC 2012 dataset with 10582
images for training and 1449
images for validation.
79 |
80 | The training script allows to monitor the progress in the optimisation process using TensorBoard's image summary. Besides that, one can also exploit random scaling and mirroring of the inputs during training as a means for data augmentation. For example, to train the model from scratch with random scale and mirroring turned on, simply run:
81 | ```bash
82 | python train.py --random-mirror --random-scale
83 | ```
84 |
85 |
86 |
87 | To see the documentation on each of the training settings run the following:
88 |
89 | ```bash
90 | python train.py --help
91 | ```
92 |
93 | An additional script, `fine_tune.py`, demonstrates how to train only the last layers of the network. The script `train_msc.py` with multi-scale inputs fully resembles the training setup of the original model.
94 |
95 |
96 | ## Evaluation
97 |
98 | The single-scale model shows 86.9%
mIoU on the Pascal VOC 2012 validation dataset (['SegmentationClassAug'](https://www.dropbox.com/s/oeu149j8qtbs1x0/SegmentationClassAug.zip?dl=0)). No post-processing step with CRF is applied.
99 |
100 | The following command provides the description of each of the evaluation settings:
101 | ```bash
102 | python evaluate.py --help
103 | ```
104 |
105 | ## Inference
106 |
107 | To perform inference over your own images, use the following command:
108 | ```bash
109 | python inference.py /path/to/your/image /path/to/ckpt/file
110 | ```
111 | This will run the forward pass and save the resulted mask with this colour map:
112 |
113 |
114 |
115 | ## Using your dataset
116 |
117 | In order to apply the same scripts using your own dataset, you would need to follow the next steps:
118 |
119 | 0. Make sure that your segmentation masks are in the same format as the ones in the DeepLab setup (i.e., without a colour map). This means that if your segmentation masks are RGB images, you would need to convert each 3-D RGB vector into a 1-D label. For example, take a look [here](https://gist.github.com/DrSleep/4bce37254c5900545e6b65f6a0858b9c);
120 | 1. Create a file with instances of your dataset in the same format as in files [here](https://github.com/DrSleep/tensorflow-deeplab-resnet/tree/master/dataset);
121 | 2. Change the flags `data-dir` and `data-list` accordingly in thehttps://gist.github.com/DrSleep/4bce37254c5900545e6b65f6a0858b9c); script file that you will be using (e.g., `python train.py --data-dir /my/data/dir --data-list /my/data/list`);
122 | 3. Change the `IMG_MEAN` vector accordingly in the script file that you will be using;
123 | 4. For visualisation purposes, you will also need to change the colour map [here](https://github.com/DrSleep/tensorflow-deeplab-resnet/blob/master/deeplab_resnet/utils.py);
124 | 5. Change the flags `num-classes` and `ignore-label` accordingly in the script that you will be using (e.g., `python train.py --ignore-label 255 --num-classes 21`).
125 | 6. If restoring weights from the `PASCAL` models for your dataset with a different number of classes, you will also need to pass the `--not-restore-last` flag, which will prevent the last layers of size 21
from being restored.
126 |
127 |
128 | ## Missing features
129 |
130 | The post-processing step with CRF is currently being implemented [here](https://github.com/DrSleep/tensorflow-deeplab-resnet/tree/crf).
131 |
132 |
133 | ## Other implementations
134 | * [DeepLab-LargeFOV in TensorFlow](https://github.com/DrSleep/tensorflow-deeplab-lfov)
135 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/convert.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This script belongs to https://github.com/ethereon/caffe-tensorflow
4 | import os
5 | import sys
6 | import numpy as np
7 | import argparse
8 | from kaffe import KaffeError, print_stderr
9 | from kaffe.tensorflow import TensorFlowTransformer
10 |
11 |
12 | def fatal_error(msg):
13 | print_stderr(msg)
14 | exit(-1)
15 |
16 |
17 | def validate_arguments(args):
18 | if (args.data_output_path is not None) and (args.caffemodel is None):
19 | fatal_error('No input data path provided.')
20 | if (args.caffemodel is not None) and (args.data_output_path is None):
21 | fatal_error('No output data path provided.')
22 | if (args.code_output_path is None) and (args.data_output_path is None):
23 | fatal_error('No output path specified.')
24 |
25 |
26 | def convert(def_path, caffemodel_path, data_output_path, code_output_path, phase):
27 | try:
28 | transformer = TensorFlowTransformer(def_path, caffemodel_path, phase=phase)
29 | print_stderr('Converting data...')
30 | if caffemodel_path is not None:
31 | data = transformer.transform_data()
32 | print_stderr('Saving data...')
33 | with open(data_output_path, 'wb') as data_out:
34 | np.save(data_out, data)
35 | if code_output_path:
36 | print_stderr('Saving source...')
37 | with open(code_output_path, 'wb') as src_out:
38 | src_out.write(transformer.transform_source())
39 | print_stderr('Done.')
40 | except KaffeError as err:
41 | fatal_error('Error encountered: {}'.format(err))
42 |
43 |
44 | def main():
45 | parser = argparse.ArgumentParser()
46 | parser.add_argument('def_path', help='Model definition (.prototxt) path')
47 | parser.add_argument('--caffemodel', help='Model data (.caffemodel) path')
48 | parser.add_argument('--data-output-path', help='Converted data output path')
49 | parser.add_argument('--code-output-path', help='Save generated source to this path')
50 | parser.add_argument('-p',
51 | '--phase',
52 | default='test',
53 | help='The phase to convert: test (default) or train')
54 | args = parser.parse_args()
55 | validate_arguments(args)
56 | convert(args.def_path, args.caffemodel, args.data_output_path, args.code_output_path,
57 | args.phase)
58 |
59 |
60 | if __name__ == '__main__':
61 | main()
62 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/dataset/debug.txt:
--------------------------------------------------------------------------------
1 | misc/2007_000129.jpg misc/2007_000129.png
2 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/deeplab_resnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import DeepLabResNetModel
2 | from .image_reader import ImageReader
3 | from .utils import decode_labels, inv_preprocess, prepare_label
4 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/deeplab_resnet/image_reader.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import tensorflow as tf
5 |
6 | def image_scaling(img, label):
7 | """
8 | Randomly scales the images between 0.5 to 1.5 times the original size.
9 |
10 | Args:
11 | img: Training image to scale.
12 | label: Segmentation mask to scale.
13 | """
14 |
15 | scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None)
16 | h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale))
17 | w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale))
18 | new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1])
19 | img = tf.image.resize_images(img, new_shape)
20 | label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
21 | label = tf.squeeze(label, squeeze_dims=[0])
22 |
23 | return img, label
24 |
25 | def image_mirroring(img, label):
26 | """
27 | Randomly mirrors the images.
28 |
29 | Args:
30 | img: Training image to mirror.
31 | label: Segmentation mask to mirror.
32 | """
33 |
34 | distort_left_right_random = tf.random_uniform([1], 0, 1.0, dtype=tf.float32)[0]
35 | mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5)
36 | mirror = tf.boolean_mask([0, 1, 2], mirror)
37 | img = tf.reverse(img, mirror)
38 | label = tf.reverse(label, mirror)
39 | return img, label
40 |
41 | def random_crop_and_pad_image_and_labels(image, label, crop_h, crop_w, ignore_label=255):
42 | """
43 | Randomly crop and pads the input images.
44 |
45 | Args:
46 | image: Training image to crop/ pad.
47 | label: Segmentation mask to crop/ pad.
48 | crop_h: Height of cropped segment.
49 | crop_w: Width of cropped segment.
50 | ignore_label: Label to ignore during the training.
51 | """
52 |
53 | label = tf.cast(label, dtype=tf.float32)
54 | label = label - ignore_label # Needs to be subtracted and later added due to 0 padding.
55 | combined = tf.concat(axis=2, values=[image, label])
56 | image_shape = tf.shape(image)
57 | combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1]))
58 |
59 | last_image_dim = tf.shape(image)[-1]
60 | last_label_dim = tf.shape(label)[-1]
61 | combined_crop = tf.random_crop(combined_pad, [crop_h,crop_w,4])
62 | img_crop = combined_crop[:, :, :last_image_dim]
63 | label_crop = combined_crop[:, :, last_image_dim:]
64 | label_crop = label_crop + ignore_label
65 | label_crop = tf.cast(label_crop, dtype=tf.uint8)
66 |
67 | # Set static shape so that tensorflow knows shape at compile time.
68 | img_crop.set_shape((crop_h, crop_w, 3))
69 | label_crop.set_shape((crop_h,crop_w, 1))
70 | return img_crop, label_crop
71 |
72 | def read_labeled_image_list(data_dir, data_list):
73 | """Reads txt file containing paths to images and ground truth masks.
74 |
75 | Args:
76 | data_dir: path to the directory with images and masks.
77 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
78 |
79 | Returns:
80 | Two lists with all file names for images and masks, respectively.
81 | """
82 | f = open(data_list, 'r')
83 | images = []
84 | masks = []
85 | for line in f:
86 | try:
87 | image, mask = line.strip("\n").split(' ')
88 | except ValueError: # Adhoc for test.
89 | image = mask = line.strip("\n")
90 | images.append(data_dir + image)
91 | masks.append(data_dir + mask)
92 | return images, masks
93 |
94 | def read_images_from_disk(input_queue, input_size, random_scale, random_mirror, ignore_label, img_mean): # optional pre-processing arguments
95 | """Read one image and its corresponding mask with optional pre-processing.
96 |
97 | Args:
98 | input_queue: tf queue with paths to the image and its mask.
99 | input_size: a tuple with (height, width) values.
100 | If not given, return images of original size.
101 | random_scale: whether to randomly scale the images prior
102 | to random crop.
103 | random_mirror: whether to randomly mirror the images prior
104 | to random crop.
105 | ignore_label: index of label to ignore during the training.
106 | img_mean: vector of mean colour values.
107 |
108 | Returns:
109 | Two tensors: the decoded image and its mask.
110 | """
111 |
112 | img_contents = tf.read_file(input_queue[0])
113 | label_contents = tf.read_file(input_queue[1])
114 |
115 | img = tf.image.decode_jpeg(img_contents, channels=3)
116 | img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img)
117 | img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32)
118 | # Extract mean.
119 | img -= img_mean
120 |
121 | label = tf.image.decode_png(label_contents, channels=1)
122 |
123 | if input_size is not None:
124 | h, w = input_size
125 |
126 | # Randomly scale the images and labels.
127 | if random_scale:
128 | img, label = image_scaling(img, label)
129 |
130 | # Randomly mirror the images and labels.
131 | if random_mirror:
132 | img, label = image_mirroring(img, label)
133 |
134 | # Randomly crops the images and labels.
135 | img, label = random_crop_and_pad_image_and_labels(img, label, h, w, ignore_label)
136 |
137 | return img, label
138 |
139 | class ImageReader(object):
140 | '''Generic ImageReader which reads images and corresponding segmentation
141 | masks from the disk, and enqueues them into a TensorFlow queue.
142 | '''
143 |
144 | def __init__(self, data_dir, data_list, input_size,
145 | random_scale, random_mirror, ignore_label, img_mean, coord):
146 | '''Initialise an ImageReader.
147 |
148 | Args:
149 | data_dir: path to the directory with images and masks.
150 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
151 | input_size: a tuple with (height, width) values, to which all the images will be resized.
152 | random_scale: whether to randomly scale the images prior to random crop.
153 | random_mirror: whether to randomly mirror the images prior to random crop.
154 | ignore_label: index of label to ignore during the training.
155 | img_mean: vector of mean colour values.
156 | coord: TensorFlow queue coordinator.
157 | '''
158 | self.data_dir = data_dir
159 | self.data_list = data_list
160 | self.input_size = input_size
161 | self.coord = coord
162 |
163 | self.image_list, self.label_list = read_labeled_image_list(self.data_dir, self.data_list)
164 | self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string)
165 | self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string)
166 | self.queue = tf.train.slice_input_producer([self.images, self.labels],
167 | shuffle=input_size is not None) # not shuffling if it is val
168 | self.image, self.label = read_images_from_disk(self.queue, self.input_size, random_scale, random_mirror, ignore_label, img_mean)
169 |
170 | def dequeue(self, num_elements):
171 | '''Pack images and labels into a batch.
172 |
173 | Args:
174 | num_elements: the batch size.
175 |
176 | Returns:
177 | Two tensors of size (batch_size, h, w, {3, 1}) for images and masks.'''
178 | image_batch, label_batch = tf.train.batch([self.image, self.label],
179 | num_elements)
180 | return image_batch, label_batch
181 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/deeplab_resnet/utils.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 | import numpy as np
3 | import tensorflow as tf
4 |
5 | # colour map
6 | label_colours = [(0,0,0)
7 | # 0=background
8 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
9 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
10 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
11 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
12 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
13 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
14 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)]
15 | # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
16 |
17 | def decode_labels(mask, num_images=1, num_classes=21):
18 | """Decode batch of segmentation masks.
19 |
20 | Args:
21 | mask: result of inference after taking argmax.
22 | num_images: number of images to decode from the batch.
23 | num_classes: number of classes to predict (including background).
24 |
25 | Returns:
26 | A batch with num_images RGB images of the same size as the input.
27 | """
28 | n, h, w, c = mask.shape
29 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images)
30 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8)
31 | for i in range(num_images):
32 | img = Image.new('RGB', (len(mask[i, 0]), len(mask[i])))
33 | pixels = img.load()
34 | for j_, j in enumerate(mask[i, :, :, 0]):
35 | for k_, k in enumerate(j):
36 | if k < num_classes:
37 | pixels[k_,j_] = label_colours[k]
38 | outputs[i] = np.array(img)
39 | return outputs
40 |
41 | def prepare_label(input_batch, new_size, num_classes, one_hot=True):
42 | """Resize masks and perform one-hot encoding.
43 |
44 | Args:
45 | input_batch: input tensor of shape [batch_size H W 1].
46 | new_size: a tensor with new height and width.
47 | num_classes: number of classes to predict (including background).
48 | one_hot: whether perform one-hot encoding.
49 |
50 | Returns:
51 | Outputs a tensor of shape [batch_size h w 21]
52 | with last dimension comprised of 0's and 1's only.
53 | """
54 | with tf.name_scope('label_encode'):
55 | input_batch = tf.image.resize_nearest_neighbor(input_batch, new_size) # as labels are integer numbers, need to use NN interp.
56 | input_batch = tf.squeeze(input_batch, squeeze_dims=[3]) # reducing the channel dimension.
57 | if one_hot:
58 | input_batch = tf.one_hot(input_batch, depth=num_classes)
59 | return input_batch
60 |
61 | def inv_preprocess(imgs, num_images, img_mean):
62 | """Inverse preprocessing of the batch of images.
63 | Add the mean vector and convert from BGR to RGB.
64 |
65 | Args:
66 | imgs: batch of input images.
67 | num_images: number of images to apply the inverse transformations on.
68 | img_mean: vector of mean colour values.
69 |
70 | Returns:
71 | The batch of the size num_images with the same spatial dimensions as the input.
72 | """
73 | n, h, w, c = imgs.shape
74 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images)
75 | outputs = np.zeros((num_images, h, w, c), dtype=np.uint8)
76 | for i in range(num_images):
77 | outputs[i] = (imgs[i] + img_mean)[:, :, ::-1].astype(np.uint8)
78 | return outputs
79 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/evaluate.py:
--------------------------------------------------------------------------------
1 | """Evaluation script for the DeepLab-ResNet network on the validation subset
2 | of PASCAL VOC dataset.
3 |
4 | This script evaluates the model on 1449 validation images.
5 | """
6 |
7 | from __future__ import print_function
8 |
9 | import argparse
10 | from datetime import datetime
11 | import os
12 | import sys
13 | import time
14 |
15 | import tensorflow as tf
16 | import numpy as np
17 |
18 | from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label
19 |
20 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
21 |
22 | DATA_DIRECTORY = '/home/VOCdevkit'
23 | DATA_LIST_PATH = './dataset/val.txt'
24 | IGNORE_LABEL = 255
25 | NUM_CLASSES = 21
26 | NUM_STEPS = 1449 # Number of images in the validation set.
27 | RESTORE_FROM = './deeplab_resnet.ckpt'
28 |
29 | def get_arguments():
30 | """Parse all the arguments provided from the CLI.
31 |
32 | Returns:
33 | A list of parsed arguments.
34 | """
35 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network")
36 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY,
37 | help="Path to the directory containing the PASCAL VOC dataset.")
38 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH,
39 | help="Path to the file listing the images in the dataset.")
40 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL,
41 | help="The index of the label to ignore during the training.")
42 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
43 | help="Number of classes to predict (including background).")
44 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS,
45 | help="Number of images in the validation set.")
46 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM,
47 | help="Where restore model parameters from.")
48 | return parser.parse_args()
49 |
50 | def load(saver, sess, ckpt_path):
51 | '''Load trained weights.
52 |
53 | Args:
54 | saver: TensorFlow saver object.
55 | sess: TensorFlow session.
56 | ckpt_path: path to checkpoint file with parameters.
57 | '''
58 | saver.restore(sess, ckpt_path)
59 | print("Restored model parameters from {}".format(ckpt_path))
60 |
61 | def main():
62 | """Create the model and start the evaluation process."""
63 | args = get_arguments()
64 |
65 | # Create queue coordinator.
66 | coord = tf.train.Coordinator()
67 |
68 | # Load reader.
69 | with tf.name_scope("create_inputs"):
70 | reader = ImageReader(
71 | args.data_dir,
72 | args.data_list,
73 | None, # No defined input size.
74 | False, # No random scale.
75 | False, # No random mirror.
76 | args.ignore_label,
77 | IMG_MEAN,
78 | coord)
79 | image, label = reader.image, reader.label
80 | image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension.
81 |
82 | # Create network.
83 | net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes)
84 |
85 | # Which variables to load.
86 | restore_var = tf.global_variables()
87 |
88 | # Predictions.
89 | raw_output = net.layers['fc1_voc12']
90 | raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
91 | raw_output = tf.argmax(raw_output, dimension=3)
92 | pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor.
93 |
94 | # mIoU
95 | pred = tf.reshape(pred, [-1,])
96 | gt = tf.reshape(label_batch, [-1,])
97 | weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes.
98 | mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights)
99 |
100 | # Set up tf session and initialize variables.
101 | config = tf.ConfigProto()
102 | config.gpu_options.allow_growth = True
103 | sess = tf.Session(config=config)
104 | init = tf.global_variables_initializer()
105 |
106 | sess.run(init)
107 | sess.run(tf.local_variables_initializer())
108 |
109 | # Load weights.
110 | loader = tf.train.Saver(var_list=restore_var)
111 | if args.restore_from is not None:
112 | load(loader, sess, args.restore_from)
113 |
114 | # Start queue threads.
115 | threads = tf.train.start_queue_runners(coord=coord, sess=sess)
116 |
117 | # Iterate over training steps.
118 | for step in range(args.num_steps):
119 | preds, _ = sess.run([pred, update_op])
120 | if step % 100 == 0:
121 | print('step {:d}'.format(step))
122 | print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess)))
123 | coord.request_stop()
124 | coord.join(threads)
125 |
126 | if __name__ == '__main__':
127 | main()
128 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/evaluate_msc.py:
--------------------------------------------------------------------------------
1 | """Evaluation script for the DeepLab-ResNet network on the validation subset
2 | of PASCAL VOC dataset.
3 |
4 | This script evaluates the model on 1449 validation images.
5 | """
6 |
7 | from __future__ import print_function
8 |
9 | import argparse
10 | from datetime import datetime
11 | import os
12 | import sys
13 | import time
14 |
15 | import tensorflow as tf
16 | import numpy as np
17 |
18 | from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label
19 |
20 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
21 |
22 | DATA_DIRECTORY = '/home/VOCdevkit'
23 | DATA_LIST_PATH = './dataset/val.txt'
24 | IGNORE_LABEL = 255
25 | NUM_CLASSES = 21
26 | NUM_STEPS = 1449 # Number of images in the validation set.
27 | RESTORE_FROM = './deeplab_resnet.ckpt'
28 |
29 | def get_arguments():
30 | """Parse all the arguments provided from the CLI.
31 |
32 | Returns:
33 | A list of parsed arguments.
34 | """
35 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network")
36 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY,
37 | help="Path to the directory containing the PASCAL VOC dataset.")
38 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH,
39 | help="Path to the file listing the images in the dataset.")
40 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL,
41 | help="The index of the label to ignore during the training.")
42 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
43 | help="Number of classes to predict (including background).")
44 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS,
45 | help="Number of images in the validation set.")
46 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM,
47 | help="Where restore model parameters from.")
48 | return parser.parse_args()
49 |
50 | def load(saver, sess, ckpt_path):
51 | '''Load trained weights.
52 |
53 | Args:
54 | saver: TensorFlow saver object.
55 | sess: TensorFlow session.
56 | ckpt_path: path to checkpoint file with parameters.
57 | '''
58 | saver.restore(sess, ckpt_path)
59 | print("Restored model parameters from {}".format(ckpt_path))
60 |
61 | def main():
62 | """Create the model and start the evaluation process."""
63 | args = get_arguments()
64 |
65 | # Create queue coordinator.
66 | coord = tf.train.Coordinator()
67 |
68 | # Load reader.
69 | with tf.name_scope("create_inputs"):
70 | reader = ImageReader(
71 | args.data_dir,
72 | args.data_list,
73 | None, # No defined input size.
74 | False, # No random scale.
75 | False, # No random mirror.
76 | args.ignore_label,
77 | IMG_MEAN,
78 | coord)
79 | image, label = reader.image, reader.label
80 |
81 | image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension.
82 | h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2])
83 | image_batch075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))]))
84 | image_batch05 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))]))
85 |
86 | # Create network.
87 | with tf.variable_scope('', reuse=False):
88 | net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes)
89 | with tf.variable_scope('', reuse=True):
90 | net075 = DeepLabResNetModel({'data': image_batch075}, is_training=False, num_classes=args.num_classes)
91 | with tf.variable_scope('', reuse=True):
92 | net05 = DeepLabResNetModel({'data': image_batch05}, is_training=False, num_classes=args.num_classes)
93 |
94 | # Which variables to load.
95 | restore_var = tf.global_variables()
96 |
97 | # Predictions.
98 | raw_output100 = net.layers['fc1_voc12']
99 | raw_output075 = tf.image.resize_images(net075.layers['fc1_voc12'], tf.shape(raw_output100)[1:3,])
100 | raw_output05 = tf.image.resize_images(net05.layers['fc1_voc12'], tf.shape(raw_output100)[1:3,])
101 |
102 | raw_output = tf.reduce_max(tf.stack([raw_output100, raw_output075, raw_output05]), axis=0)
103 | raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
104 | raw_output = tf.argmax(raw_output, dimension=3)
105 | pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor.
106 |
107 | # mIoU
108 | pred = tf.reshape(pred, [-1,])
109 | gt = tf.reshape(label_batch, [-1,])
110 | weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes.
111 | mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights)
112 |
113 | # Set up tf session and initialize variables.
114 | config = tf.ConfigProto()
115 | config.gpu_options.allow_growth = True
116 | sess = tf.Session(config=config)
117 | init = tf.global_variables_initializer()
118 |
119 | sess.run(init)
120 | sess.run(tf.local_variables_initializer())
121 |
122 | # Load weights.
123 | loader = tf.train.Saver(var_list=restore_var)
124 | if args.restore_from is not None:
125 | load(loader, sess, args.restore_from)
126 |
127 | # Start queue threads.
128 | threads = tf.train.start_queue_runners(coord=coord, sess=sess)
129 |
130 | # Iterate over training steps.
131 | for step in range(args.num_steps):
132 | preds, _ = sess.run([pred, update_op])
133 | if step % 100 == 0:
134 | print('step {:d}'.format(step))
135 | print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess)))
136 | coord.request_stop()
137 | coord.join(threads)
138 |
139 | if __name__ == '__main__':
140 | main()
141 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/fine_tune.py:
--------------------------------------------------------------------------------
1 | """Training script for the DeepLab-ResNet network on the PASCAL VOC dataset
2 | for semantic image segmentation.
3 |
4 | This script fine-tunes the model using augmented PASCAL VOC,
5 | which contains approximately 10000 images for training and 1500 images for validation.
6 | Only the last 'fc1_voc12' layers are being trained.
7 | """
8 |
9 | from __future__ import print_function
10 |
11 | import argparse
12 | from datetime import datetime
13 | import os
14 | import sys
15 | import time
16 |
17 | import tensorflow as tf
18 | import numpy as np
19 |
20 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, inv_preprocess, prepare_label
21 |
22 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
23 |
24 | BATCH_SIZE = 4
25 | DATA_DIRECTORY = '/home/VOCdevkit'
26 | DATA_LIST_PATH = './dataset/train.txt'
27 | IGNORE_LABEL = 255
28 | INPUT_SIZE = '321,321'
29 | LEARNING_RATE = 1e-4
30 | NUM_CLASSES = 21
31 | NUM_STEPS = 20000
32 | RANDOM_SEED = 1234
33 | RESTORE_FROM = './deeplab_resnet.ckpt'
34 | SAVE_NUM_IMAGES = 2
35 | SAVE_PRED_EVERY = 100
36 | SNAPSHOT_DIR = './snapshots_finetune/'
37 |
38 | def get_arguments():
39 | """Parse all the arguments provided from the CLI.
40 |
41 | Returns:
42 | A list of parsed arguments.
43 | """
44 | parser = argparse.ArgumentParser(description="DeepLab-ResNet Network")
45 | parser.add_argument("--batch-size", type=int, default=BATCH_SIZE,
46 | help="Number of images sent to the network in one step.")
47 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY,
48 | help="Path to the directory containing the PASCAL VOC dataset.")
49 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH,
50 | help="Path to the file listing the images in the dataset.")
51 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL,
52 | help="The index of the label to ignore during the training.")
53 | parser.add_argument("--input-size", type=str, default=INPUT_SIZE,
54 | help="Comma-separated string with height and width of images.")
55 | parser.add_argument("--is-training", action="store_true",
56 | help="Whether to updates the running means and variances during the training.")
57 | parser.add_argument("--learning-rate", type=float, default=LEARNING_RATE,
58 | help="Learning rate for training.")
59 | parser.add_argument("--not-restore-last", action="store_true",
60 | help="Whether to not restore last (FC) layers.")
61 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
62 | help="Number of classes to predict (including background).")
63 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS,
64 | help="Number of training steps.")
65 | parser.add_argument("--random-mirror", action="store_true",
66 | help="Whether to randomly mirror the inputs during the training.")
67 | parser.add_argument("--random-scale", action="store_true",
68 | help="Whether to randomly scale the inputs during the training.")
69 | parser.add_argument("--random-seed", type=int, default=RANDOM_SEED,
70 | help="Random seed to have reproducible results.")
71 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM,
72 | help="Where restore model parameters from.")
73 | parser.add_argument("--save-num-images", type=int, default=SAVE_NUM_IMAGES,
74 | help="How many images to save.")
75 | parser.add_argument("--save-pred-every", type=int, default=SAVE_PRED_EVERY,
76 | help="Save summaries and checkpoint every often.")
77 | parser.add_argument("--snapshot-dir", type=str, default=SNAPSHOT_DIR,
78 | help="Where to save snapshots of the model.")
79 | return parser.parse_args()
80 |
81 | def save(saver, sess, logdir, step):
82 | model_name = 'model.ckpt'
83 | checkpoint_path = os.path.join(logdir, model_name)
84 |
85 | if not os.path.exists(logdir):
86 | os.makedirs(logdir)
87 |
88 | saver.save(sess, checkpoint_path, global_step=step)
89 | print('The checkpoint has been created.')
90 |
91 | def load(saver, sess, ckpt_path):
92 | '''Load trained weights.
93 |
94 | Args:
95 | saver: TensorFlow saver object.
96 | sess: TensorFlow session.
97 | ckpt_path: path to checkpoint file with parameters.
98 | '''
99 | saver.restore(sess, ckpt_path)
100 | print("Restored model parameters from {}".format(ckpt_path))
101 |
102 | def main():
103 | """Create the model and start the training."""
104 | args = get_arguments()
105 |
106 | h, w = map(int, args.input_size.split(','))
107 | input_size = (h, w)
108 |
109 | tf.set_random_seed(args.random_seed)
110 |
111 | # Create queue coordinator.
112 | coord = tf.train.Coordinator()
113 |
114 | # Load reader.
115 | with tf.name_scope("create_inputs"):
116 | reader = ImageReader(
117 | args.data_dir,
118 | args.data_list,
119 | input_size,
120 | args.random_scale,
121 | args.random_mirror,
122 | args.ignore_label,
123 | IMG_MEAN,
124 | coord)
125 | image_batch, label_batch = reader.dequeue(args.batch_size)
126 |
127 | # Create network.
128 | net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes)
129 | # For a small batch size, it is better to keep
130 | # the statistics of the BN layers (running means and variances)
131 | # frozen, and to not update the values provided by the pre-trained model.
132 | # If is_training=True, the statistics will be updated during the training.
133 | # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
134 | # if they are presented in var_list of the optimiser definition.
135 |
136 | # Predictions.
137 | raw_output = net.layers['fc1_voc12']
138 | # Which variables to load. Running means and variances are not trainable,
139 | # thus all_variables() should be restored.
140 | # Restore all variables, or all except the last ones.
141 | restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
142 | trainable = [v for v in tf.trainable_variables() if 'fc1_voc12' in v.name] # Fine-tune only the last layers.
143 |
144 | prediction = tf.reshape(raw_output, [-1, args.num_classes])
145 | label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes)
146 | gt = tf.reshape(label_proc, [-1, args.num_classes])
147 |
148 | # Pixel-wise softmax loss.
149 | loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
150 | reduced_loss = tf.reduce_mean(loss)
151 |
152 | # Processed predictions.
153 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
154 | raw_output_up = tf.argmax(raw_output_up, dimension=3)
155 | pred = tf.expand_dims(raw_output_up, dim=3)
156 |
157 | # Image summary.
158 | images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
159 | labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)
160 | preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)
161 |
162 | total_summary = tf.summary.image('images',
163 | tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]),
164 | max_outputs=args.save_num_images) # Concatenate row-wise.
165 | summary_writer = tf.summary.FileWriter(args.snapshot_dir,
166 | graph=tf.get_default_graph())
167 |
168 | # Define loss and optimisation parameters.
169 | optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
170 | optim = optimiser.minimize(reduced_loss, var_list=trainable)
171 |
172 | # Set up tf session and initialize variables.
173 | config = tf.ConfigProto()
174 | config.gpu_options.allow_growth = True
175 | sess = tf.Session(config=config)
176 | init = tf.global_variables_initializer()
177 |
178 | sess.run(init)
179 |
180 | # Saver for storing checkpoints of the model.
181 | saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=40)
182 |
183 | # Load variables if the checkpoint is provided.
184 | if args.restore_from is not None:
185 | loader = tf.train.Saver(var_list=restore_var)
186 | load(loader, sess, args.restore_from)
187 |
188 | # Start queue threads.
189 | threads = tf.train.start_queue_runners(coord=coord, sess=sess)
190 |
191 | # Iterate over training steps.
192 | for step in range(args.num_steps):
193 | start_time = time.time()
194 |
195 | if step % args.save_pred_every == 0:
196 | loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, optim])
197 | summary_writer.add_summary(summary, step)
198 | save(saver, sess, args.snapshot_dir, step)
199 | else:
200 | loss_value, _ = sess.run([reduced_loss, optim])
201 | duration = time.time() - start_time
202 | print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
203 | coord.request_stop()
204 | coord.join(threads)
205 |
206 | if __name__ == '__main__':
207 | main()
208 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/images/colour_scheme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/images/colour_scheme.png
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/images/mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/images/mask.png
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/images/summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/images/summary.png
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/inference.py:
--------------------------------------------------------------------------------
1 | """Run DeepLab-ResNet on a given image.
2 |
3 | This script computes a segmentation mask for a given image.
4 | """
5 |
6 | from __future__ import print_function
7 |
8 | import argparse
9 | from datetime import datetime
10 | import os
11 | import sys
12 | import time
13 |
14 | from PIL import Image
15 |
16 | import tensorflow as tf
17 | import numpy as np
18 |
19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, prepare_label
20 |
21 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
22 |
23 | NUM_CLASSES = 21
24 | SAVE_DIR = './output/'
25 |
26 | def get_arguments():
27 | """Parse all the arguments provided from the CLI.
28 |
29 | Returns:
30 | A list of parsed arguments.
31 | """
32 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference.")
33 | parser.add_argument("img_path", type=str,
34 | help="Path to the RGB image file.")
35 | parser.add_argument("model_weights", type=str,
36 | help="Path to the file with model weights.")
37 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
38 | help="Number of classes to predict (including background).")
39 | parser.add_argument("--save-dir", type=str, default=SAVE_DIR,
40 | help="Where to save predicted mask.")
41 | return parser.parse_args()
42 |
43 | def load(saver, sess, ckpt_path):
44 | '''Load trained weights.
45 |
46 | Args:
47 | saver: TensorFlow saver object.
48 | sess: TensorFlow session.
49 | ckpt_path: path to checkpoint file with parameters.
50 | '''
51 | saver.restore(sess, ckpt_path)
52 | print("Restored model parameters from {}".format(ckpt_path))
53 |
54 | def main():
55 | """Create the model and start the evaluation process."""
56 | args = get_arguments()
57 |
58 | # Prepare image.
59 | img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3)
60 | # Convert RGB to BGR.
61 | img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img)
62 | img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32)
63 | # Extract mean.
64 | img -= IMG_MEAN
65 |
66 | # Create network.
67 | net = DeepLabResNetModel({'data': tf.expand_dims(img, dim=0)}, is_training=False, num_classes=args.num_classes)
68 |
69 | # Which variables to load.
70 | restore_var = tf.global_variables()
71 |
72 | # Predictions.
73 | raw_output = net.layers['fc1_voc12']
74 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2,])
75 | raw_output_up = tf.argmax(raw_output_up, dimension=3)
76 | pred = tf.expand_dims(raw_output_up, dim=3)
77 |
78 |
79 | # Set up TF session and initialize variables.
80 | config = tf.ConfigProto()
81 | config.gpu_options.allow_growth = True
82 | sess = tf.Session(config=config)
83 | init = tf.global_variables_initializer()
84 |
85 | sess.run(init)
86 |
87 | # Load weights.
88 | loader = tf.train.Saver(var_list=restore_var)
89 | load(loader, sess, args.model_weights)
90 |
91 | # Perform inference.
92 | preds = sess.run(pred)
93 |
94 | msk = decode_labels(preds, num_classes=args.num_classes)
95 | im = Image.fromarray(msk[0])
96 | if not os.path.exists(args.save_dir):
97 | os.makedirs(args.save_dir)
98 | im.save(args.save_dir + 'mask.png')
99 |
100 | print('The output file has been saved to {}'.format(args.save_dir + 'mask.png'))
101 |
102 |
103 | if __name__ == '__main__':
104 | main()
105 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/__init__.py:
--------------------------------------------------------------------------------
1 | from .graph import GraphBuilder, NodeMapper
2 | from .errors import KaffeError, print_stderr
3 |
4 | from . import tensorflow
5 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/caffe/__init__.py:
--------------------------------------------------------------------------------
1 | from .resolver import get_caffe_resolver, has_pycaffe
2 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/caffe/resolver.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | SHARED_CAFFE_RESOLVER = None
4 |
5 | class CaffeResolver(object):
6 | def __init__(self):
7 | self.import_caffe()
8 |
9 | def import_caffe(self):
10 | self.caffe = None
11 | try:
12 | # Try to import PyCaffe first
13 | import caffe
14 | self.caffe = caffe
15 | except ImportError:
16 | # Fall back to the protobuf implementation
17 | from . import caffepb
18 | self.caffepb = caffepb
19 | show_fallback_warning()
20 | if self.caffe:
21 | # Use the protobuf code from the imported distribution.
22 | # This way, Caffe variants with custom layers will work.
23 | self.caffepb = self.caffe.proto.caffe_pb2
24 | self.NetParameter = self.caffepb.NetParameter
25 |
26 | def has_pycaffe(self):
27 | return self.caffe is not None
28 |
29 | def get_caffe_resolver():
30 | global SHARED_CAFFE_RESOLVER
31 | if SHARED_CAFFE_RESOLVER is None:
32 | SHARED_CAFFE_RESOLVER = CaffeResolver()
33 | return SHARED_CAFFE_RESOLVER
34 |
35 | def has_pycaffe():
36 | return get_caffe_resolver().has_pycaffe()
37 |
38 | def show_fallback_warning():
39 | msg = '''
40 | ------------------------------------------------------------
41 | WARNING: PyCaffe not found!
42 | Falling back to a pure protocol buffer implementation.
43 | * Conversions will be drastically slower.
44 | * This backend is UNTESTED!
45 | ------------------------------------------------------------
46 |
47 | '''
48 | sys.stderr.write(msg)
49 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/errors.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | class KaffeError(Exception):
4 | pass
5 |
6 | def print_stderr(msg):
7 | sys.stderr.write('%s\n' % msg)
8 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/graph.py:
--------------------------------------------------------------------------------
1 | from google.protobuf import text_format
2 |
3 | from .caffe import get_caffe_resolver
4 | from .errors import KaffeError, print_stderr
5 | from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch
6 | from .shapes import TensorShape
7 |
8 | class Node(object):
9 |
10 | def __init__(self, name, kind, layer=None):
11 | self.name = name
12 | self.kind = kind
13 | self.layer = LayerAdapter(layer, kind) if layer else None
14 | self.parents = []
15 | self.children = []
16 | self.data = None
17 | self.output_shape = None
18 | self.metadata = {}
19 |
20 | def add_parent(self, parent_node):
21 | assert parent_node not in self.parents
22 | self.parents.append(parent_node)
23 | if self not in parent_node.children:
24 | parent_node.children.append(self)
25 |
26 | def add_child(self, child_node):
27 | assert child_node not in self.children
28 | self.children.append(child_node)
29 | if self not in child_node.parents:
30 | child_node.parents.append(self)
31 |
32 | def get_only_parent(self):
33 | if len(self.parents) != 1:
34 | raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' %
35 | (self, len(self.parents)))
36 | return self.parents[0]
37 |
38 | @property
39 | def parameters(self):
40 | if self.layer is not None:
41 | return self.layer.parameters
42 | return None
43 |
44 | def __str__(self):
45 | return '[%s] %s' % (self.kind, self.name)
46 |
47 | def __repr__(self):
48 | return '%s (0x%x)' % (self.name, id(self))
49 |
50 |
51 | class Graph(object):
52 |
53 | def __init__(self, nodes=None, name=None):
54 | self.nodes = nodes or []
55 | self.node_lut = {node.name: node for node in self.nodes}
56 | self.name = name
57 |
58 | def add_node(self, node):
59 | self.nodes.append(node)
60 | self.node_lut[node.name] = node
61 |
62 | def get_node(self, name):
63 | try:
64 | return self.node_lut[name]
65 | except KeyError:
66 | raise KaffeError('Layer not found: %s' % name)
67 |
68 | def get_input_nodes(self):
69 | return [node for node in self.nodes if len(node.parents) == 0]
70 |
71 | def get_output_nodes(self):
72 | return [node for node in self.nodes if len(node.children) == 0]
73 |
74 | def topologically_sorted(self):
75 | sorted_nodes = []
76 | unsorted_nodes = list(self.nodes)
77 | temp_marked = set()
78 | perm_marked = set()
79 |
80 | def visit(node):
81 | if node in temp_marked:
82 | raise KaffeError('Graph is not a DAG.')
83 | if node in perm_marked:
84 | return
85 | temp_marked.add(node)
86 | for child in node.children:
87 | visit(child)
88 | perm_marked.add(node)
89 | temp_marked.remove(node)
90 | sorted_nodes.insert(0, node)
91 |
92 | while len(unsorted_nodes):
93 | visit(unsorted_nodes.pop())
94 | return sorted_nodes
95 |
96 | def compute_output_shapes(self):
97 | sorted_nodes = self.topologically_sorted()
98 | for node in sorted_nodes:
99 | node.output_shape = TensorShape(*NodeKind.compute_output_shape(node))
100 |
101 | def replaced(self, new_nodes):
102 | return Graph(nodes=new_nodes, name=self.name)
103 |
104 | def transformed(self, transformers):
105 | graph = self
106 | for transformer in transformers:
107 | graph = transformer(graph)
108 | if graph is None:
109 | raise KaffeError('Transformer failed: {}'.format(transformer))
110 | assert isinstance(graph, Graph)
111 | return graph
112 |
113 | def __contains__(self, key):
114 | return key in self.node_lut
115 |
116 | def __str__(self):
117 | hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', 'Output')
118 | s = [hdr, '-' * 94]
119 | for node in self.topologically_sorted():
120 | # If the node has learned parameters, display the first one's shape.
121 | # In case of convolutions, this corresponds to the weights.
122 | data_shape = node.data[0].shape if node.data else '--'
123 | out_shape = node.output_shape or '--'
124 | s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape,
125 | tuple(out_shape)))
126 | return '\n'.join(s)
127 |
128 |
129 | class GraphBuilder(object):
130 | '''Constructs a model graph from a Caffe protocol buffer definition.'''
131 |
132 | def __init__(self, def_path, phase='test'):
133 | '''
134 | def_path: Path to the model definition (.prototxt)
135 | data_path: Path to the model data (.caffemodel)
136 | phase: Either 'test' or 'train'. Used for filtering phase-specific nodes.
137 | '''
138 | self.def_path = def_path
139 | self.phase = phase
140 | self.load()
141 |
142 | def load(self):
143 | '''Load the layer definitions from the prototxt.'''
144 | self.params = get_caffe_resolver().NetParameter()
145 | with open(self.def_path, 'rb') as def_file:
146 | text_format.Merge(def_file.read(), self.params)
147 |
148 | def filter_layers(self, layers):
149 | '''Filter out layers based on the current phase.'''
150 | phase_map = {0: 'train', 1: 'test'}
151 | filtered_layer_names = set()
152 | filtered_layers = []
153 | for layer in layers:
154 | phase = self.phase
155 | if len(layer.include):
156 | phase = phase_map[layer.include[0].phase]
157 | if len(layer.exclude):
158 | phase = phase_map[1 - layer.include[0].phase]
159 | exclude = (phase != self.phase)
160 | # Dropout layers appear in a fair number of Caffe
161 | # test-time networks. These are just ignored. We'll
162 | # filter them out here.
163 | if (not exclude) and (phase == 'test'):
164 | exclude = (layer.type == LayerType.Dropout)
165 | if not exclude:
166 | filtered_layers.append(layer)
167 | # Guard against dupes.
168 | assert layer.name not in filtered_layer_names
169 | filtered_layer_names.add(layer.name)
170 | return filtered_layers
171 |
172 | def make_node(self, layer):
173 | '''Create a graph node for the given layer.'''
174 | kind = NodeKind.map_raw_kind(layer.type)
175 | if kind is None:
176 | raise KaffeError('Unknown layer type encountered: %s' % layer.type)
177 | # We want to use the layer's top names (the "output" names), rather than the
178 | # name attribute, which is more of readability thing than a functional one.
179 | # Other layers will refer to a node by its "top name".
180 | return Node(layer.name, kind, layer=layer)
181 |
182 | def make_input_nodes(self):
183 | '''
184 | Create data input nodes.
185 |
186 | This method is for old-style inputs, where the input specification
187 | was not treated as a first-class layer in the prototext.
188 | Newer models use the "Input layer" type.
189 | '''
190 | nodes = [Node(name, NodeKind.Data) for name in self.params.input]
191 | if len(nodes):
192 | input_dim = map(int, self.params.input_dim)
193 | if not input_dim:
194 | if len(self.params.input_shape) > 0:
195 | input_dim = map(int, self.params.input_shape[0].dim)
196 | else:
197 | raise KaffeError('Dimensions for input not specified.')
198 | for node in nodes:
199 | node.output_shape = tuple(input_dim)
200 | return nodes
201 |
202 | def build(self):
203 | '''
204 | Builds the graph from the Caffe layer definitions.
205 | '''
206 | # Get the layers
207 | layers = self.params.layers or self.params.layer
208 | # Filter out phase-excluded layers
209 | layers = self.filter_layers(layers)
210 | # Get any separately-specified input layers
211 | nodes = self.make_input_nodes()
212 | nodes += [self.make_node(layer) for layer in layers]
213 | # Initialize the graph
214 | graph = Graph(nodes=nodes, name=self.params.name)
215 | # Connect the nodes
216 | #
217 | # A note on layers and outputs:
218 | # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs
219 | # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom
220 | # (in case of in-place operations). Note that the layer's name is not used for establishing
221 | # any connectivity. It's only used for data association. By convention, a layer with a
222 | # single top will often use the same name (although this is not required).
223 | #
224 | # The current implementation only supports single-output nodes (note that a node can still
225 | # have multiple children, since multiple child nodes can refer to the single top's name).
226 | node_outputs = {}
227 | for layer in layers:
228 | node = graph.get_node(layer.name)
229 | for input_name in layer.bottom:
230 | assert input_name != layer.name
231 | parent_node = node_outputs.get(input_name)
232 | if (parent_node is None) or (parent_node == node):
233 | parent_node = graph.get_node(input_name)
234 | node.add_parent(parent_node)
235 | if len(layer.top)>1:
236 | raise KaffeError('Multiple top nodes are not supported.')
237 | for output_name in layer.top:
238 | if output_name == layer.name:
239 | # Output is named the same as the node. No further action required.
240 | continue
241 | # There are two possibilities here:
242 | #
243 | # Case 1: output_name refers to another node in the graph.
244 | # This is an "in-place operation" that overwrites an existing node.
245 | # This would create a cycle in the graph. We'll undo the in-placing
246 | # by substituting this node wherever the overwritten node is referenced.
247 | #
248 | # Case 2: output_name violates the convention layer.name == output_name.
249 | # Since we are working in the single-output regime, we will can rename it to
250 | # match the layer name.
251 | #
252 | # For both cases, future references to this top re-routes to this node.
253 | node_outputs[output_name] = node
254 |
255 | graph.compute_output_shapes()
256 | return graph
257 |
258 |
259 | class NodeMapper(NodeDispatch):
260 |
261 | def __init__(self, graph):
262 | self.graph = graph
263 |
264 | def map(self):
265 | nodes = self.graph.topologically_sorted()
266 | # Remove input nodes - we'll handle them separately.
267 | input_nodes = self.graph.get_input_nodes()
268 | nodes = [t for t in nodes if t not in input_nodes]
269 | # Decompose DAG into chains.
270 | chains = []
271 | for node in nodes:
272 | attach_to_chain = None
273 | if len(node.parents) == 1:
274 | parent = node.get_only_parent()
275 | for chain in chains:
276 | if chain[-1] == parent:
277 | # Node is part of an existing chain.
278 | attach_to_chain = chain
279 | break
280 | if attach_to_chain is None:
281 | # Start a new chain for this node.
282 | attach_to_chain = []
283 | chains.append(attach_to_chain)
284 | attach_to_chain.append(node)
285 | # Map each chain.
286 | mapped_chains = []
287 | for chain in chains:
288 | mapped_chains.append(self.map_chain(chain))
289 | return self.commit(mapped_chains)
290 |
291 | def map_chain(self, chain):
292 | return [self.map_node(node) for node in chain]
293 |
294 | def map_node(self, node):
295 | map_func = self.get_handler(node.kind, 'map')
296 | mapped_node = map_func(node)
297 | assert mapped_node is not None
298 | mapped_node.node = node
299 | return mapped_node
300 |
301 | def commit(self, mapped_chains):
302 | raise NotImplementedError('Must be implemented by subclass.')
303 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/layers.py:
--------------------------------------------------------------------------------
1 | import re
2 | import numbers
3 | from collections import namedtuple
4 |
5 | from .shapes import *
6 |
7 | LAYER_DESCRIPTORS = {
8 |
9 | # Caffe Types
10 | 'AbsVal': shape_identity,
11 | 'Accuracy': shape_scalar,
12 | 'ArgMax': shape_not_implemented,
13 | 'BatchNorm': shape_identity,
14 | 'BNLL': shape_not_implemented,
15 | 'Concat': shape_concat,
16 | 'ContrastiveLoss': shape_scalar,
17 | 'Convolution': shape_convolution,
18 | 'Deconvolution': shape_not_implemented,
19 | 'Data': shape_data,
20 | 'Dropout': shape_identity,
21 | 'DummyData': shape_data,
22 | 'EuclideanLoss': shape_scalar,
23 | 'Eltwise': shape_identity,
24 | 'Exp': shape_identity,
25 | 'Flatten': shape_not_implemented,
26 | 'HDF5Data': shape_data,
27 | 'HDF5Output': shape_identity,
28 | 'HingeLoss': shape_scalar,
29 | 'Im2col': shape_not_implemented,
30 | 'ImageData': shape_data,
31 | 'InfogainLoss': shape_scalar,
32 | 'InnerProduct': shape_inner_product,
33 | 'Input': shape_data,
34 | 'LRN': shape_identity,
35 | 'MemoryData': shape_mem_data,
36 | 'MultinomialLogisticLoss': shape_scalar,
37 | 'MVN': shape_not_implemented,
38 | 'Pooling': shape_pool,
39 | 'Power': shape_identity,
40 | 'ReLU': shape_identity,
41 | 'Scale': shape_identity,
42 | 'Sigmoid': shape_identity,
43 | 'SigmoidCrossEntropyLoss': shape_scalar,
44 | 'Silence': shape_not_implemented,
45 | 'Softmax': shape_identity,
46 | 'SoftmaxWithLoss': shape_scalar,
47 | 'Split': shape_not_implemented,
48 | 'Slice': shape_not_implemented,
49 | 'TanH': shape_identity,
50 | 'WindowData': shape_not_implemented,
51 | 'Threshold': shape_identity,
52 | }
53 |
54 | LAYER_TYPES = LAYER_DESCRIPTORS.keys()
55 |
56 | LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES})
57 |
58 | class NodeKind(LayerType):
59 |
60 | @staticmethod
61 | def map_raw_kind(kind):
62 | if kind in LAYER_TYPES:
63 | return kind
64 | return None
65 |
66 | @staticmethod
67 | def compute_output_shape(node):
68 | try:
69 | val = LAYER_DESCRIPTORS[node.kind](node)
70 | return val
71 | except NotImplementedError:
72 | raise KaffeError('Output shape computation not implemented for type: %s' % node.kind)
73 |
74 |
75 | class NodeDispatchError(KaffeError):
76 |
77 | pass
78 |
79 |
80 | class NodeDispatch(object):
81 |
82 | @staticmethod
83 | def get_handler_name(node_kind):
84 | if len(node_kind) <= 4:
85 | # A catch-all for things like ReLU and tanh
86 | return node_kind.lower()
87 | # Convert from CamelCase to under_scored
88 | name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', node_kind)
89 | return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
90 |
91 | def get_handler(self, node_kind, prefix):
92 | name = self.get_handler_name(node_kind)
93 | name = '_'.join((prefix, name))
94 | try:
95 | return getattr(self, name)
96 | except AttributeError:
97 | raise NodeDispatchError('No handler found for node kind: %s (expected: %s)' %
98 | (node_kind, name))
99 |
100 |
101 | class LayerAdapter(object):
102 |
103 | def __init__(self, layer, kind):
104 | self.layer = layer
105 | self.kind = kind
106 |
107 | @property
108 | def parameters(self):
109 | name = NodeDispatch.get_handler_name(self.kind)
110 | name = '_'.join((name, 'param'))
111 | try:
112 | return getattr(self.layer, name)
113 | except AttributeError:
114 | raise NodeDispatchError('Caffe parameters not found for layer kind: %s' % (self.kind))
115 |
116 | @staticmethod
117 | def get_kernel_value(scalar, repeated, idx, default=None):
118 | if scalar:
119 | return scalar
120 | if repeated:
121 | if isinstance(repeated, numbers.Number):
122 | return repeated
123 | if len(repeated) == 1:
124 | # Same value applies to all spatial dimensions
125 | return int(repeated[0])
126 | assert idx < len(repeated)
127 | # Extract the value for the given spatial dimension
128 | return repeated[idx]
129 | if default is None:
130 | raise ValueError('Unable to determine kernel parameter!')
131 | return default
132 |
133 | @property
134 | def kernel_parameters(self):
135 | assert self.kind in (NodeKind.Convolution, NodeKind.Pooling)
136 | params = self.parameters
137 | k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0)
138 | k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1)
139 | s_h = self.get_kernel_value(params.stride_h, params.stride, 0, default=1)
140 | s_w = self.get_kernel_value(params.stride_w, params.stride, 1, default=1)
141 | p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0)
142 | p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0)
143 | return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w)
144 |
145 |
146 | KernelParameters = namedtuple('KernelParameters', ['kernel_h', 'kernel_w', 'stride_h', 'stride_w',
147 | 'pad_h', 'pad_w'])
148 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/shapes.py:
--------------------------------------------------------------------------------
1 | import math
2 | from collections import namedtuple
3 |
4 | from .errors import KaffeError
5 |
6 | TensorShape = namedtuple('TensorShape', ['batch_size', 'channels', 'height', 'width'])
7 |
8 |
9 | def get_filter_output_shape(i_h, i_w, params, round_func):
10 | o_h = (i_h + 2 * params.pad_h - params.kernel_h) / float(params.stride_h) + 1
11 | o_w = (i_w + 2 * params.pad_w - params.kernel_w) / float(params.stride_w) + 1
12 | return (int(round_func(o_h)), int(round_func(o_w)))
13 |
14 |
15 | def get_strided_kernel_output_shape(node, round_func):
16 | assert node.layer is not None
17 | input_shape = node.get_only_parent().output_shape
18 | o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width,
19 | node.layer.kernel_parameters, round_func)
20 | params = node.layer.parameters
21 | has_c_o = hasattr(params, 'num_output')
22 | c = params.num_output if has_c_o else input_shape.channels
23 | return TensorShape(input_shape.batch_size, c, o_h, o_w)
24 |
25 |
26 | def shape_not_implemented(node):
27 | raise NotImplementedError
28 |
29 |
30 | def shape_identity(node):
31 | assert len(node.parents) > 0
32 | return node.parents[0].output_shape
33 |
34 |
35 | def shape_scalar(node):
36 | return TensorShape(1, 1, 1, 1)
37 |
38 |
39 | def shape_data(node):
40 | if node.output_shape:
41 | # Old-style input specification
42 | return node.output_shape
43 | try:
44 | # New-style input specification
45 | return map(int, node.parameters.shape[0].dim)
46 | except:
47 | # We most likely have a data layer on our hands. The problem is,
48 | # Caffe infers the dimensions of the data from the source (eg: LMDB).
49 | # We want to avoid reading datasets here. Fail for now.
50 | # This can be temporarily fixed by transforming the data layer to
51 | # Caffe's "input" layer (as is usually used in the "deploy" version).
52 | # TODO: Find a better solution for this.
53 | raise KaffeError('Cannot determine dimensions of data layer.\n'
54 | 'See comments in function shape_data for more info.')
55 |
56 |
57 | def shape_mem_data(node):
58 | params = node.parameters
59 | return TensorShape(params.batch_size, params.channels, params.height, params.width)
60 |
61 |
62 | def shape_concat(node):
63 | axis = node.layer.parameters.axis
64 | output_shape = None
65 | for parent in node.parents:
66 | if output_shape is None:
67 | output_shape = list(parent.output_shape)
68 | else:
69 | output_shape[axis] += parent.output_shape[axis]
70 | return tuple(output_shape)
71 |
72 |
73 | def shape_convolution(node):
74 | return get_strided_kernel_output_shape(node, math.floor)
75 |
76 |
77 | def shape_pool(node):
78 | return get_strided_kernel_output_shape(node, math.ceil)
79 |
80 |
81 | def shape_inner_product(node):
82 | input_shape = node.get_only_parent().output_shape
83 | return TensorShape(input_shape.batch_size, node.layer.parameters.num_output, 1, 1)
84 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/tensorflow/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer import TensorFlowTransformer
2 | from .network import Network
3 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/tensorflow/network.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | slim = tf.contrib.slim
4 |
5 | DEFAULT_PADDING = 'SAME'
6 |
7 |
8 | def layer(op):
9 | '''Decorator for composable network layers.'''
10 |
11 | def layer_decorated(self, *args, **kwargs):
12 | # Automatically set a name if not provided.
13 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
14 | # Figure out the layer inputs.
15 | if len(self.terminals) == 0:
16 | raise RuntimeError('No input variables found for layer %s.' % name)
17 | elif len(self.terminals) == 1:
18 | layer_input = self.terminals[0]
19 | else:
20 | layer_input = list(self.terminals)
21 | # Perform the operation and get the output.
22 | layer_output = op(self, layer_input, *args, **kwargs)
23 | # Add to layer LUT.
24 | self.layers[name] = layer_output
25 | # This output is now the input for the next layer.
26 | self.feed(layer_output)
27 | # Return self for chained calls.
28 | return self
29 |
30 | return layer_decorated
31 |
32 |
33 | class Network(object):
34 |
35 | def __init__(self, inputs, trainable=True, is_training=False, num_classes=21):
36 | # The input nodes for this network
37 | self.inputs = inputs
38 | # The current list of terminal nodes
39 | self.terminals = []
40 | # Mapping from layer names to layers
41 | self.layers = dict(inputs)
42 | # If true, the resulting variables are set as trainable
43 | self.trainable = trainable
44 | # Switch variable for dropout
45 | self.use_dropout = tf.placeholder_with_default(tf.constant(1.0),
46 | shape=[],
47 | name='use_dropout')
48 | self.setup(is_training, num_classes)
49 |
50 | def setup(self, is_training):
51 | '''Construct the network. '''
52 | raise NotImplementedError('Must be implemented by the subclass.')
53 |
54 | def load(self, data_path, session, ignore_missing=False):
55 | '''Load network weights.
56 | data_path: The path to the numpy-serialized network weights
57 | session: The current TensorFlow session
58 | ignore_missing: If true, serialized weights for missing layers are ignored.
59 | '''
60 | data_dict = np.load(data_path).item()
61 | for op_name in data_dict:
62 | with tf.variable_scope(op_name, reuse=True):
63 | for param_name, data in data_dict[op_name].iteritems():
64 | try:
65 | var = tf.get_variable(param_name)
66 | session.run(var.assign(data))
67 | except ValueError:
68 | if not ignore_missing:
69 | raise
70 |
71 | def feed(self, *args):
72 | '''Set the input(s) for the next operation by replacing the terminal nodes.
73 | The arguments can be either layer names or the actual layers.
74 | '''
75 | assert len(args) != 0
76 | self.terminals = []
77 | for fed_layer in args:
78 | if isinstance(fed_layer, basestring):
79 | try:
80 | fed_layer = self.layers[fed_layer]
81 | except KeyError:
82 | raise KeyError('Unknown layer name fed: %s' % fed_layer)
83 | self.terminals.append(fed_layer)
84 | return self
85 |
86 | def get_output(self):
87 | '''Returns the current network output.'''
88 | return self.terminals[-1]
89 |
90 | def get_unique_name(self, prefix):
91 | '''Returns an index-suffixed unique name for the given prefix.
92 | This is used for auto-generating layer names based on the type-prefix.
93 | '''
94 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
95 | return '%s_%d' % (prefix, ident)
96 |
97 | def make_var(self, name, shape):
98 | '''Creates a new TensorFlow variable.'''
99 | return tf.get_variable(name, shape, trainable=self.trainable)
100 |
101 | def validate_padding(self, padding):
102 | '''Verifies that the padding is one of the supported ones.'''
103 | assert padding in ('SAME', 'VALID')
104 |
105 | @layer
106 | def conv(self,
107 | input,
108 | k_h,
109 | k_w,
110 | c_o,
111 | s_h,
112 | s_w,
113 | name,
114 | relu=True,
115 | padding=DEFAULT_PADDING,
116 | group=1,
117 | biased=True):
118 | # Verify that the padding is acceptable
119 | self.validate_padding(padding)
120 | # Get the number of channels in the input
121 | c_i = input.get_shape()[-1]
122 | # Verify that the grouping parameter is valid
123 | assert c_i % group == 0
124 | assert c_o % group == 0
125 | # Convolution for a given input and kernel
126 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
127 | with tf.variable_scope(name) as scope:
128 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o])
129 | if group == 1:
130 | # This is the common-case. Convolve the input without any further complications.
131 | output = convolve(input, kernel)
132 | else:
133 | # Split the input into groups and then convolve each of them independently
134 | input_groups = tf.split(3, group, input)
135 | kernel_groups = tf.split(3, group, kernel)
136 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
137 | # Concatenate the groups
138 | output = tf.concat(3, output_groups)
139 | # Add the biases
140 | if biased:
141 | biases = self.make_var('biases', [c_o])
142 | output = tf.nn.bias_add(output, biases)
143 | if relu:
144 | # ReLU non-linearity
145 | output = tf.nn.relu(output, name=scope.name)
146 | return output
147 |
148 | @layer
149 | def atrous_conv(self,
150 | input,
151 | k_h,
152 | k_w,
153 | c_o,
154 | dilation,
155 | name,
156 | relu=True,
157 | padding=DEFAULT_PADDING,
158 | group=1,
159 | biased=True):
160 | # Verify that the padding is acceptable
161 | self.validate_padding(padding)
162 | # Get the number of channels in the input
163 | c_i = input.get_shape()[-1]
164 | # Verify that the grouping parameter is valid
165 | assert c_i % group == 0
166 | assert c_o % group == 0
167 | # Convolution for a given input and kernel
168 | convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding)
169 | with tf.variable_scope(name) as scope:
170 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o])
171 | if group == 1:
172 | # This is the common-case. Convolve the input without any further complications.
173 | output = convolve(input, kernel)
174 | else:
175 | # Split the input into groups and then convolve each of them independently
176 | input_groups = tf.split(3, group, input)
177 | kernel_groups = tf.split(3, group, kernel)
178 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
179 | # Concatenate the groups
180 | output = tf.concat(3, output_groups)
181 | # Add the biases
182 | if biased:
183 | biases = self.make_var('biases', [c_o])
184 | output = tf.nn.bias_add(output, biases)
185 | if relu:
186 | # ReLU non-linearity
187 | output = tf.nn.relu(output, name=scope.name)
188 | return output
189 |
190 | @layer
191 | def relu(self, input, name):
192 | return tf.nn.relu(input, name=name)
193 |
194 | @layer
195 | def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
196 | self.validate_padding(padding)
197 | return tf.nn.max_pool(input,
198 | ksize=[1, k_h, k_w, 1],
199 | strides=[1, s_h, s_w, 1],
200 | padding=padding,
201 | name=name)
202 |
203 | @layer
204 | def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
205 | self.validate_padding(padding)
206 | return tf.nn.avg_pool(input,
207 | ksize=[1, k_h, k_w, 1],
208 | strides=[1, s_h, s_w, 1],
209 | padding=padding,
210 | name=name)
211 |
212 | @layer
213 | def lrn(self, input, radius, alpha, beta, name, bias=1.0):
214 | return tf.nn.local_response_normalization(input,
215 | depth_radius=radius,
216 | alpha=alpha,
217 | beta=beta,
218 | bias=bias,
219 | name=name)
220 |
221 | @layer
222 | def concat(self, inputs, axis, name):
223 | return tf.concat(concat_dim=axis, values=inputs, name=name)
224 |
225 | @layer
226 | def add(self, inputs, name):
227 | return tf.add_n(inputs, name=name)
228 |
229 | @layer
230 | def fc(self, input, num_out, name, relu=True):
231 | with tf.variable_scope(name) as scope:
232 | input_shape = input.get_shape()
233 | if input_shape.ndims == 4:
234 | # The input is spatial. Vectorize it first.
235 | dim = 1
236 | for d in input_shape[1:].as_list():
237 | dim *= d
238 | feed_in = tf.reshape(input, [-1, dim])
239 | else:
240 | feed_in, dim = (input, input_shape[-1].value)
241 | weights = self.make_var('weights', shape=[dim, num_out])
242 | biases = self.make_var('biases', [num_out])
243 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
244 | fc = op(feed_in, weights, biases, name=scope.name)
245 | return fc
246 |
247 | @layer
248 | def softmax(self, input, name):
249 | input_shape = map(lambda v: v.value, input.get_shape())
250 | if len(input_shape) > 2:
251 | # For certain models (like NiN), the singleton spatial dimensions
252 | # need to be explicitly squeezed, since they're not broadcast-able
253 | # in TensorFlow's NHWC ordering (unlike Caffe's NCHW).
254 | if input_shape[1] == 1 and input_shape[2] == 1:
255 | input = tf.squeeze(input, squeeze_dims=[1, 2])
256 | else:
257 | raise ValueError('Rank 2 tensor input expected for softmax!')
258 | return tf.nn.softmax(input, name)
259 |
260 | @layer
261 | def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True):
262 | with tf.variable_scope(name) as scope:
263 | output = slim.batch_norm(
264 | input,
265 | activation_fn=activation_fn,
266 | is_training=is_training,
267 | updates_collections=None,
268 | scale=scale,
269 | scope=scope)
270 | return output
271 |
272 | @layer
273 | def dropout(self, input, keep_prob, name):
274 | keep = 1 - self.use_dropout + (self.use_dropout * keep_prob)
275 | return tf.nn.dropout(input, keep, name=name)
276 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/tensorflow/transformer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ..errors import KaffeError, print_stderr
4 | from ..graph import GraphBuilder, NodeMapper
5 | from ..layers import NodeKind
6 | from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser,
7 | BatchNormScaleBiasFuser, BatchNormPreprocessor, ParameterNamer)
8 |
9 | from . import network
10 |
11 |
12 | def get_padding_type(kernel_params, input_shape, output_shape):
13 | '''Translates Caffe's numeric padding to one of ('SAME', 'VALID').
14 | Caffe supports arbitrary padding values, while TensorFlow only
15 | supports 'SAME' and 'VALID' modes. So, not all Caffe paddings
16 | can be translated to TensorFlow. There are some subtleties to
17 | how the padding edge-cases are handled. These are described here:
18 | https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
19 | '''
20 | k_h, k_w, s_h, s_w, p_h, p_w = kernel_params
21 | s_o_h = np.ceil(input_shape.height / float(s_h))
22 | s_o_w = np.ceil(input_shape.width / float(s_w))
23 | if (output_shape.height == s_o_h) and (output_shape.width == s_o_w):
24 | return 'SAME'
25 | v_o_h = np.ceil((input_shape.height - k_h + 1.0) / float(s_h))
26 | v_o_w = np.ceil((input_shape.width - k_w + 1.0) / float(s_w))
27 | if (output_shape.height == v_o_h) and (output_shape.width == v_o_w):
28 | return 'VALID'
29 | return None
30 |
31 |
32 | class TensorFlowNode(object):
33 | '''An intermediate representation for TensorFlow operations.'''
34 |
35 | def __init__(self, op, *args, **kwargs):
36 | # A string corresponding to the TensorFlow operation
37 | self.op = op
38 | # Positional arguments for the operation
39 | self.args = args
40 | # Keyword arguments for the operation
41 | self.kwargs = list(kwargs.items())
42 | # The source Caffe node
43 | self.node = None
44 |
45 | def format(self, arg):
46 | '''Returns a string representation for the given value.'''
47 | return "'%s'" % arg if isinstance(arg, basestring) else str(arg)
48 |
49 | def pair(self, key, value):
50 | '''Returns key=formatted(value).'''
51 | return '%s=%s' % (key, self.format(value))
52 |
53 | def emit(self):
54 | '''Emits the Python source for this node.'''
55 | # Format positional arguments
56 | args = map(self.format, self.args)
57 | # Format any keyword arguments
58 | if self.kwargs:
59 | args += [self.pair(k, v) for k, v in self.kwargs]
60 | # Set the node name
61 | args.append(self.pair('name', self.node.name))
62 | args = ', '.join(args)
63 | return '%s(%s)' % (self.op, args)
64 |
65 |
66 | class MaybeActivated(object):
67 |
68 | def __init__(self, node, default=True):
69 | self.inject_kwargs = {}
70 | if node.metadata.get('relu', False) != default:
71 | self.inject_kwargs['relu'] = not default
72 |
73 | def __call__(self, *args, **kwargs):
74 | kwargs.update(self.inject_kwargs)
75 | return TensorFlowNode(*args, **kwargs)
76 |
77 |
78 | class TensorFlowMapper(NodeMapper):
79 |
80 | def get_kernel_params(self, node):
81 | kernel_params = node.layer.kernel_parameters
82 | input_shape = node.get_only_parent().output_shape
83 | padding = get_padding_type(kernel_params, input_shape, node.output_shape)
84 | # Only emit the padding if it's not the default value.
85 | padding = {'padding': padding} if padding != network.DEFAULT_PADDING else {}
86 | return (kernel_params, padding)
87 |
88 | def map_convolution(self, node):
89 | (kernel_params, kwargs) = self.get_kernel_params(node)
90 | h = kernel_params.kernel_h
91 | w = kernel_params.kernel_w
92 | c_o = node.output_shape[1]
93 | c_i = node.parents[0].output_shape[1]
94 | group = node.parameters.group
95 | if group != 1:
96 | kwargs['group'] = group
97 | if not node.parameters.bias_term:
98 | kwargs['biased'] = False
99 | assert kernel_params.kernel_h == h
100 | assert kernel_params.kernel_w == w
101 | return MaybeActivated(node)('conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
102 | kernel_params.stride_h, kernel_params.stride_w, **kwargs)
103 |
104 | def map_relu(self, node):
105 | return TensorFlowNode('relu')
106 |
107 | def map_pooling(self, node):
108 | pool_type = node.parameters.pool
109 | if pool_type == 0:
110 | pool_op = 'max_pool'
111 | elif pool_type == 1:
112 | pool_op = 'avg_pool'
113 | else:
114 | # Stochastic pooling, for instance.
115 | raise KaffeError('Unsupported pooling type.')
116 | (kernel_params, padding) = self.get_kernel_params(node)
117 | return TensorFlowNode(pool_op, kernel_params.kernel_h, kernel_params.kernel_w,
118 | kernel_params.stride_h, kernel_params.stride_w, **padding)
119 |
120 | def map_inner_product(self, node):
121 | #TODO: Axis
122 | assert node.parameters.axis == 1
123 | #TODO: Unbiased
124 | assert node.parameters.bias_term == True
125 | return MaybeActivated(node)('fc', node.parameters.num_output)
126 |
127 | def map_softmax(self, node):
128 | return TensorFlowNode('softmax')
129 |
130 | def map_lrn(self, node):
131 | params = node.parameters
132 | # The window size must be an odd value. For a window
133 | # size of (2*n+1), TensorFlow defines depth_radius = n.
134 | assert params.local_size % 2 == 1
135 | # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow
136 | # just scales by alpha (as does Krizhevsky's paper).
137 | # We'll account for that here.
138 | alpha = params.alpha / float(params.local_size)
139 | return TensorFlowNode('lrn', int(params.local_size / 2), alpha, params.beta)
140 |
141 | def map_concat(self, node):
142 | axis = (2, 3, 1, 0)[node.parameters.axis]
143 | return TensorFlowNode('concat', axis)
144 |
145 | def map_dropout(self, node):
146 | return TensorFlowNode('dropout', node.parameters.dropout_ratio)
147 |
148 | def map_batch_norm(self, node):
149 | scale_offset = len(node.data) == 4
150 | kwargs = {'is_training': True} if scale_offset else {'is_training': True, 'scale': False}
151 | return MaybeActivated(node, default=False)('batch_normalization', **kwargs)
152 |
153 | def map_eltwise(self, node):
154 | operations = {0: 'multiply', 1: 'add', 2: 'max'}
155 | op_code = node.parameters.operation
156 | try:
157 | return TensorFlowNode(operations[op_code])
158 | except KeyError:
159 | raise KaffeError('Unknown elementwise operation: {}'.format(op_code))
160 |
161 | def commit(self, chains):
162 | return chains
163 |
164 |
165 | class TensorFlowEmitter(object):
166 |
167 | def __init__(self, tab=None):
168 | self.tab = tab or ' ' * 4
169 | self.prefix = ''
170 |
171 | def indent(self):
172 | self.prefix += self.tab
173 |
174 | def outdent(self):
175 | self.prefix = self.prefix[:-len(self.tab)]
176 |
177 | def statement(self, s):
178 | return self.prefix + s + '\n'
179 |
180 | def emit_imports(self):
181 | return self.statement('from kaffe.tensorflow import Network\n')
182 |
183 | def emit_class_def(self, name):
184 | return self.statement('class %s(Network):' % (name))
185 |
186 | def emit_setup_def(self):
187 | return self.statement('def setup(self):')
188 |
189 | def emit_parents(self, chain):
190 | assert len(chain)
191 | s = '(self.feed('
192 | sep = ', \n' + self.prefix + (' ' * len(s))
193 | s += sep.join(["'%s'" % parent.name for parent in chain[0].node.parents])
194 | return self.statement(s + ')')
195 |
196 | def emit_node(self, node):
197 | return self.statement(' ' * 5 + '.' + node.emit())
198 |
199 | def emit(self, name, chains):
200 | s = self.emit_imports()
201 | s += self.emit_class_def(name)
202 | self.indent()
203 | s += self.emit_setup_def()
204 | self.indent()
205 | blocks = []
206 | for chain in chains:
207 | b = ''
208 | b += self.emit_parents(chain)
209 | for node in chain:
210 | b += self.emit_node(node)
211 | blocks.append(b[:-1] + ')')
212 | s = s + '\n\n'.join(blocks)
213 | return s
214 |
215 |
216 | class TensorFlowTransformer(object):
217 |
218 | def __init__(self, def_path, data_path, verbose=True, phase='test'):
219 | self.verbose = verbose
220 | self.phase = phase
221 | self.load(def_path, data_path, phase)
222 | self.params = None
223 | self.source = None
224 |
225 | def load(self, def_path, data_path, phase):
226 | # Build the graph
227 | graph = GraphBuilder(def_path, phase).build()
228 |
229 | if data_path is not None:
230 | # Load and associate learned parameters
231 | graph = DataInjector(def_path, data_path)(graph)
232 |
233 | # Transform the graph
234 | transformers = [
235 | # Fuse split batch normalization layers
236 | BatchNormScaleBiasFuser(),
237 |
238 | # Fuse ReLUs
239 | # TODO: Move non-linearity application to layer wrapper, allowing
240 | # any arbitrary operation to be optionally activated.
241 | ReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct,
242 | NodeKind.BatchNorm]),
243 |
244 | # Rename nodes
245 | # Slashes are used for scoping in TensorFlow. Replace slashes
246 | # in node names with underscores.
247 | # (Caffe's GoogLeNet implementation uses slashes)
248 | NodeRenamer(lambda node: node.name.replace('/', '_'))
249 | ]
250 | self.graph = graph.transformed(transformers)
251 |
252 | # Display the graph
253 | if self.verbose:
254 | print_stderr(self.graph)
255 |
256 | def transform_data(self):
257 | if self.params is None:
258 | transformers = [
259 |
260 | # Reshape the parameters to TensorFlow's ordering
261 | DataReshaper({
262 | # (c_o, c_i, h, w) -> (h, w, c_i, c_o)
263 | NodeKind.Convolution: (2, 3, 1, 0),
264 |
265 | # (c_o, c_i) -> (c_i, c_o)
266 | NodeKind.InnerProduct: (1, 0)
267 | }),
268 |
269 | # Pre-process batch normalization data
270 | BatchNormPreprocessor(),
271 |
272 | # Convert parameters to dictionaries
273 | ParameterNamer(),
274 | ]
275 | self.graph = self.graph.transformed(transformers)
276 | self.params = {node.name: node.data for node in self.graph.nodes if node.data}
277 | return self.params
278 |
279 | def transform_source(self):
280 | if self.source is None:
281 | mapper = TensorFlowMapper(self.graph)
282 | chains = mapper.map()
283 | emitter = TensorFlowEmitter()
284 | self.source = emitter.emit(self.graph.name, chains)
285 | return self.source
286 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/kaffe/transformers.py:
--------------------------------------------------------------------------------
1 | '''
2 | A collection of graph transforms.
3 |
4 | A transformer is a callable that accepts a graph and returns a transformed version.
5 | '''
6 |
7 | import numpy as np
8 |
9 | from .caffe import get_caffe_resolver, has_pycaffe
10 | from .errors import KaffeError, print_stderr
11 | from .layers import NodeKind
12 |
13 |
14 | class DataInjector(object):
15 | '''
16 | Associates parameters loaded from a .caffemodel file with their corresponding nodes.
17 | '''
18 |
19 | def __init__(self, def_path, data_path):
20 | # The .prototxt file defining the graph
21 | self.def_path = def_path
22 | # The .caffemodel file containing the learned parameters
23 | self.data_path = data_path
24 | # Set to true if the fallback protocol-buffer based backend was used
25 | self.did_use_pb = False
26 | # A list containing (layer name, parameters) tuples
27 | self.params = None
28 | # Load the parameters
29 | self.load()
30 |
31 | def load(self):
32 | if has_pycaffe():
33 | self.load_using_caffe()
34 | else:
35 | self.load_using_pb()
36 |
37 | def load_using_caffe(self):
38 | caffe = get_caffe_resolver().caffe
39 | net = caffe.Net(self.def_path, self.data_path, caffe.TEST)
40 | data = lambda blob: blob.data
41 | self.params = [(k, map(data, v)) for k, v in net.params.items()]
42 |
43 | def load_using_pb(self):
44 | data = get_caffe_resolver().NetParameter()
45 | data.MergeFromString(open(self.data_path, 'rb').read())
46 | pair = lambda layer: (layer.name, self.normalize_pb_data(layer))
47 | layers = data.layers or data.layer
48 | self.params = [pair(layer) for layer in layers if layer.blobs]
49 | self.did_use_pb = True
50 |
51 | def normalize_pb_data(self, layer):
52 | transformed = []
53 | for blob in layer.blobs:
54 | if len(blob.shape.dim):
55 | dims = blob.shape.dim
56 | c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims))
57 | else:
58 | c_o = blob.num
59 | c_i = blob.channels
60 | h = blob.height
61 | w = blob.width
62 | data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w)
63 | transformed.append(data)
64 | return transformed
65 |
66 | def adjust_parameters(self, node, data):
67 | if not self.did_use_pb:
68 | return data
69 | # When using the protobuf-backend, each parameter initially has four dimensions.
70 | # In certain cases (like FC layers), we want to eliminate the singleton dimensions.
71 | # This implementation takes care of the common cases. However, it does leave the
72 | # potential for future issues.
73 | # The Caffe-backend does not suffer from this problem.
74 | data = list(data)
75 | squeeze_indices = [1] # Squeeze biases.
76 | if node.kind == NodeKind.InnerProduct:
77 | squeeze_indices.append(0) # Squeeze FC.
78 | for idx in squeeze_indices:
79 | data[idx] = np.squeeze(data[idx])
80 | return data
81 |
82 | def __call__(self, graph):
83 | for layer_name, data in self.params:
84 | if layer_name in graph:
85 | node = graph.get_node(layer_name)
86 | node.data = self.adjust_parameters(node, data)
87 | else:
88 | print_stderr('Ignoring parameters for non-existent layer: %s' % layer_name)
89 | return graph
90 |
91 |
92 | class DataReshaper(object):
93 |
94 | def __init__(self, mapping, replace=True):
95 | # A dictionary mapping NodeKind to the transposed order.
96 | self.mapping = mapping
97 | # The node kinds eligible for reshaping
98 | self.reshaped_node_types = self.mapping.keys()
99 | # If true, the reshaped data will replace the old one.
100 | # Otherwise, it's set to the reshaped_data attribute.
101 | self.replace = replace
102 |
103 | def has_spatial_parent(self, node):
104 | try:
105 | parent = node.get_only_parent()
106 | s = parent.output_shape
107 | return s.height > 1 or s.width > 1
108 | except KaffeError:
109 | return False
110 |
111 | def map(self, node_kind):
112 | try:
113 | return self.mapping[node_kind]
114 | except KeyError:
115 | raise KaffeError('Ordering not found for node kind: {}'.format(node_kind))
116 |
117 | def __call__(self, graph):
118 | for node in graph.nodes:
119 | if node.data is None:
120 | continue
121 | if node.kind not in self.reshaped_node_types:
122 | # Check for 2+ dimensional data
123 | if any(len(tensor.shape) > 1 for tensor in node.data):
124 | print_stderr('Warning: parmaters not reshaped for node: {}'.format(node))
125 | continue
126 | transpose_order = self.map(node.kind)
127 | weights = node.data[0]
128 | if (node.kind == NodeKind.InnerProduct) and self.has_spatial_parent(node):
129 | # The FC layer connected to the spatial layer needs to be
130 | # re-wired to match the new spatial ordering.
131 | in_shape = node.get_only_parent().output_shape
132 | fc_shape = weights.shape
133 | output_channels = fc_shape[0]
134 | weights = weights.reshape((output_channels, in_shape.channels, in_shape.height,
135 | in_shape.width))
136 | weights = weights.transpose(self.map(NodeKind.Convolution))
137 | node.reshaped_data = weights.reshape(fc_shape[transpose_order[0]],
138 | fc_shape[transpose_order[1]])
139 | else:
140 | node.reshaped_data = weights.transpose(transpose_order)
141 |
142 | if self.replace:
143 | for node in graph.nodes:
144 | if hasattr(node, 'reshaped_data'):
145 | # Set the weights
146 | node.data[0] = node.reshaped_data
147 | del node.reshaped_data
148 | return graph
149 |
150 |
151 | class SubNodeFuser(object):
152 | '''
153 | An abstract helper for merging a single-child with its single-parent.
154 | '''
155 |
156 | def __call__(self, graph):
157 | nodes = graph.nodes
158 | fused_nodes = []
159 | for node in nodes:
160 | if len(node.parents) != 1:
161 | # We're only fusing nodes with single parents
162 | continue
163 | parent = node.get_only_parent()
164 | if len(parent.children) != 1:
165 | # We can only fuse a node if its parent's
166 | # value isn't used by any other node.
167 | continue
168 | if not self.is_eligible_pair(parent, node):
169 | continue
170 | # Rewrite the fused node's children to its parent.
171 | for child in node.children:
172 | child.parents.remove(node)
173 | parent.add_child(child)
174 | # Disconnect the fused node from the graph.
175 | parent.children.remove(node)
176 | fused_nodes.append(node)
177 | # Let the sub-class merge the fused node in any arbitrary way.
178 | self.merge(parent, node)
179 | transformed_nodes = [node for node in nodes if node not in fused_nodes]
180 | return graph.replaced(transformed_nodes)
181 |
182 | def is_eligible_pair(self, parent, child):
183 | '''Returns true if this parent/child pair is eligible for fusion.'''
184 | raise NotImplementedError('Must be implemented by subclass.')
185 |
186 | def merge(self, parent, child):
187 | '''Merge the child node into the parent.'''
188 | raise NotImplementedError('Must be implemented by subclass')
189 |
190 |
191 | class ReLUFuser(SubNodeFuser):
192 | '''
193 | Fuses rectified linear units with their parent nodes.
194 | '''
195 |
196 | def __init__(self, allowed_parent_types=None):
197 | # Fuse ReLUs when the parent node is one of the given types.
198 | # If None, all node types are eligible.
199 | self.allowed_parent_types = allowed_parent_types
200 |
201 | def is_eligible_pair(self, parent, child):
202 | return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and
203 | child.kind == NodeKind.ReLU)
204 |
205 | def merge(self, parent, _):
206 | parent.metadata['relu'] = True
207 |
208 |
209 | class BatchNormScaleBiasFuser(SubNodeFuser):
210 | '''
211 | The original batch normalization paper includes two learned
212 | parameters: a scaling factor \gamma and a bias \beta.
213 | Caffe's implementation does not include these two. However, it is commonly
214 | replicated by adding a scaling+bias layer immidiately after the batch norm.
215 |
216 | This fuser merges the scaling+bias layer with the batch norm.
217 | '''
218 |
219 | def is_eligible_pair(self, parent, child):
220 | return (parent.kind == NodeKind.BatchNorm and child.kind == NodeKind.Scale and
221 | child.parameters.axis == 1 and child.parameters.bias_term == True)
222 |
223 | def merge(self, parent, child):
224 | parent.scale_bias_node = child
225 |
226 |
227 | class BatchNormPreprocessor(object):
228 | '''
229 | Prescale batch normalization parameters.
230 | Concatenate gamma (scale) and beta (bias) terms if set.
231 | '''
232 |
233 | def __call__(self, graph):
234 | for node in graph.nodes:
235 | if node.kind != NodeKind.BatchNorm:
236 | continue
237 | assert node.data is not None
238 | assert len(node.data) == 3
239 | mean, variance, scale = node.data
240 | # Prescale the stats
241 | scaling_factor = 1.0 / scale if scale != 0 else 0
242 | mean *= scaling_factor
243 | variance *= scaling_factor
244 | # Replace with the updated values
245 | node.data = [mean, variance]
246 | if hasattr(node, 'scale_bias_node'):
247 | # Include the scale and bias terms
248 | gamma, beta = node.scale_bias_node.data
249 | node.data += [gamma, beta]
250 | return graph
251 |
252 |
253 | class NodeRenamer(object):
254 | '''
255 | Renames nodes in the graph using a given unary function that
256 | accepts a node and returns its new name.
257 | '''
258 |
259 | def __init__(self, renamer):
260 | self.renamer = renamer
261 |
262 | def __call__(self, graph):
263 | for node in graph.nodes:
264 | node.name = self.renamer(node)
265 | return graph
266 |
267 |
268 | class ParameterNamer(object):
269 | '''
270 | Convert layer data arrays to a dictionary mapping parameter names to their values.
271 | '''
272 |
273 | def __call__(self, graph):
274 | for node in graph.nodes:
275 | if node.data is None:
276 | continue
277 | if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct):
278 | names = ('weights',)
279 | if node.parameters.bias_term:
280 | names += ('biases',)
281 | elif node.kind == NodeKind.BatchNorm:
282 | names = ('moving_mean', 'moving_variance')
283 | if len(node.data) == 4:
284 | names += ('gamma', 'beta')
285 | else:
286 | print_stderr('WARNING: Unhandled parameters: {}'.format(node.kind))
287 | continue
288 | assert len(names) == len(node.data)
289 | node.data = dict(zip(names, node.data))
290 | return graph
291 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/misc/2007_000129.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/misc/2007_000129.jpg
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/misc/2007_000129.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/misc/2007_000129.png
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/npy2ckpt.py:
--------------------------------------------------------------------------------
1 | """Conversion of the .npy weights into the .ckpt ones.
2 |
3 | This script converts the weights of the DeepLab-ResNet model
4 | from the numpy format into the TensorFlow one.
5 | """
6 |
7 | from __future__ import print_function
8 |
9 | import argparse
10 | import os
11 |
12 | import tensorflow as tf
13 | import numpy as np
14 |
15 | from deeplab_resnet import DeepLabResNetModel
16 |
17 | SAVE_DIR = './'
18 |
19 | def get_arguments():
20 | """Parse all the arguments provided from the CLI.
21 |
22 | Returns:
23 | A list of parsed arguments.
24 | """
25 | parser = argparse.ArgumentParser(description="NPY to CKPT converter.")
26 | parser.add_argument("npy_path", type=str,
27 | help="Path to the .npy file, which contains the weights.")
28 | parser.add_argument("--save-dir", type=str, default=SAVE_DIR,
29 | help="Where to save the converted .ckpt file.")
30 | return parser.parse_args()
31 |
32 | def save(saver, sess, logdir):
33 | model_name = 'model.ckpt'
34 | checkpoint_path = os.path.join(logdir, model_name)
35 |
36 | if not os.path.exists(logdir):
37 | os.makedirs(logdir)
38 |
39 | saver.save(sess, checkpoint_path, write_meta_graph=False)
40 | print('The weights have been converted to {}.'.format(checkpoint_path))
41 |
42 |
43 | def main():
44 | """Create the model and start the training."""
45 | args = get_arguments()
46 |
47 | # Default image.
48 | image_batch = tf.constant(0, tf.float32, shape=[1, 321, 321, 3])
49 | # Create network.
50 | net = DeepLabResNetModel({'data': image_batch})
51 | var_list = tf.global_variables()
52 |
53 | # Set up tf session and initialize variables.
54 | config = tf.ConfigProto()
55 | config.gpu_options.allow_growth = True
56 |
57 | with tf.Session(config=config) as sess:
58 | init = tf.global_variables_initializer()
59 | sess.run(init)
60 |
61 | # Loading .npy weights.
62 | net.load(args.npy_path, sess)
63 |
64 | # Saver for converting the loaded weights into .ckpt.
65 | saver = tf.train.Saver(var_list=var_list, write_version=1)
66 | save(saver, sess, args.save_dir)
67 |
68 | if __name__ == '__main__':
69 | main()
70 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython>=0.19.2
2 | numpy>=1.7.1
3 | matplotlib>=1.3.1
4 | Pillow>=2.3.0
5 | six>=1.1.0
6 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/train.py:
--------------------------------------------------------------------------------
1 | """Training script for the DeepLab-ResNet network on the PASCAL VOC dataset
2 | for semantic image segmentation.
3 |
4 | This script trains the model using augmented PASCAL VOC,
5 | which contains approximately 10000 images for training and 1500 images for validation.
6 | """
7 |
8 | from __future__ import print_function
9 |
10 | import argparse
11 | from datetime import datetime
12 | import os
13 | import sys
14 | import time
15 |
16 | import tensorflow as tf
17 | import numpy as np
18 |
19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, inv_preprocess, prepare_label
20 |
21 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
22 |
23 | BATCH_SIZE = 10
24 | DATA_DIRECTORY = '/home/VOCdevkit'
25 | DATA_LIST_PATH = './dataset/train.txt'
26 | IGNORE_LABEL = 255
27 | INPUT_SIZE = '321,321'
28 | LEARNING_RATE = 2.5e-4
29 | MOMENTUM = 0.9
30 | NUM_CLASSES = 21
31 | NUM_STEPS = 20001
32 | POWER = 0.9
33 | RANDOM_SEED = 1234
34 | RESTORE_FROM = './deeplab_resnet.ckpt'
35 | SAVE_NUM_IMAGES = 2
36 | SAVE_PRED_EVERY = 1000
37 | SNAPSHOT_DIR = './snapshots/'
38 | WEIGHT_DECAY = 0.0005
39 |
40 |
41 | def get_arguments():
42 | """Parse all the arguments provided from the CLI.
43 |
44 | Returns:
45 | A list of parsed arguments.
46 | """
47 | parser = argparse.ArgumentParser(description="DeepLab-ResNet Network")
48 | parser.add_argument("--batch-size", type=int, default=BATCH_SIZE,
49 | help="Number of images sent to the network in one step.")
50 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY,
51 | help="Path to the directory containing the PASCAL VOC dataset.")
52 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH,
53 | help="Path to the file listing the images in the dataset.")
54 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL,
55 | help="The index of the label to ignore during the training.")
56 | parser.add_argument("--input-size", type=str, default=INPUT_SIZE,
57 | help="Comma-separated string with height and width of images.")
58 | parser.add_argument("--is-training", action="store_true",
59 | help="Whether to updates the running means and variances during the training.")
60 | parser.add_argument("--learning-rate", type=float, default=LEARNING_RATE,
61 | help="Base learning rate for training with polynomial decay.")
62 | parser.add_argument("--momentum", type=float, default=MOMENTUM,
63 | help="Momentum component of the optimiser.")
64 | parser.add_argument("--not-restore-last", action="store_true",
65 | help="Whether to not restore last (FC) layers.")
66 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
67 | help="Number of classes to predict (including background).")
68 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS,
69 | help="Number of training steps.")
70 | parser.add_argument("--power", type=float, default=POWER,
71 | help="Decay parameter to compute the learning rate.")
72 | parser.add_argument("--random-mirror", action="store_true",
73 | help="Whether to randomly mirror the inputs during the training.")
74 | parser.add_argument("--random-scale", action="store_true",
75 | help="Whether to randomly scale the inputs during the training.")
76 | parser.add_argument("--random-seed", type=int, default=RANDOM_SEED,
77 | help="Random seed to have reproducible results.")
78 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM,
79 | help="Where restore model parameters from.")
80 | parser.add_argument("--save-num-images", type=int, default=SAVE_NUM_IMAGES,
81 | help="How many images to save.")
82 | parser.add_argument("--save-pred-every", type=int, default=SAVE_PRED_EVERY,
83 | help="Save summaries and checkpoint every often.")
84 | parser.add_argument("--snapshot-dir", type=str, default=SNAPSHOT_DIR,
85 | help="Where to save snapshots of the model.")
86 | parser.add_argument("--weight-decay", type=float, default=WEIGHT_DECAY,
87 | help="Regularisation parameter for L2-loss.")
88 | return parser.parse_args()
89 |
90 | def save(saver, sess, logdir, step):
91 | '''Save weights.
92 |
93 | Args:
94 | saver: TensorFlow Saver object.
95 | sess: TensorFlow session.
96 | logdir: path to the snapshots directory.
97 | step: current training step.
98 | '''
99 | model_name = 'model.ckpt'
100 | checkpoint_path = os.path.join(logdir, model_name)
101 |
102 | if not os.path.exists(logdir):
103 | os.makedirs(logdir)
104 | saver.save(sess, checkpoint_path, global_step=step)
105 | print('The checkpoint has been created.')
106 |
107 | def load(saver, sess, ckpt_path):
108 | '''Load trained weights.
109 |
110 | Args:
111 | saver: TensorFlow Saver object.
112 | sess: TensorFlow session.
113 | ckpt_path: path to checkpoint file with parameters.
114 | '''
115 | saver.restore(sess, ckpt_path)
116 | print("Restored model parameters from {}".format(ckpt_path))
117 |
118 | def main():
119 | """Create the model and start the training."""
120 | args = get_arguments()
121 |
122 | h, w = map(int, args.input_size.split(','))
123 | input_size = (h, w)
124 |
125 | tf.set_random_seed(args.random_seed)
126 |
127 | # Create queue coordinator.
128 | coord = tf.train.Coordinator()
129 |
130 | # Load reader.
131 | with tf.name_scope("create_inputs"):
132 | reader = ImageReader(
133 | args.data_dir,
134 | args.data_list,
135 | input_size,
136 | args.random_scale,
137 | args.random_mirror,
138 | args.ignore_label,
139 | IMG_MEAN,
140 | coord)
141 | image_batch, label_batch = reader.dequeue(args.batch_size)
142 |
143 | # Create network.
144 | net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes)
145 | # For a small batch size, it is better to keep
146 | # the statistics of the BN layers (running means and variances)
147 | # frozen, and to not update the values provided by the pre-trained model.
148 | # If is_training=True, the statistics will be updated during the training.
149 | # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
150 | # if they are presented in var_list of the optimiser definition.
151 |
152 | # Predictions.
153 | raw_output = net.layers['fc1_voc12']
154 | # Which variables to load. Running means and variances are not trainable,
155 | # thus all_variables() should be restored.
156 | restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
157 | all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
158 | fc_trainable = [v for v in all_trainable if 'fc' in v.name]
159 | conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
160 | fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0
161 | fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0
162 | assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable))
163 | assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))
164 |
165 |
166 | # Predictions: ignoring all predictions with labels greater or equal than n_classes
167 | raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
168 | label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w]
169 | raw_gt = tf.reshape(label_proc, [-1,])
170 | indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
171 | gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
172 | prediction = tf.gather(raw_prediction, indices)
173 |
174 |
175 | # Pixel-wise softmax loss.
176 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
177 | l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
178 | reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)
179 |
180 | # Processed predictions: for visualisation.
181 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
182 | raw_output_up = tf.argmax(raw_output_up, dimension=3)
183 | pred = tf.expand_dims(raw_output_up, dim=3)
184 |
185 | # Image summary.
186 | images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
187 | labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)
188 | preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)
189 |
190 | total_summary = tf.summary.image('images',
191 | tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]),
192 | max_outputs=args.save_num_images) # Concatenate row-wise.
193 | summary_writer = tf.summary.FileWriter(args.snapshot_dir,
194 | graph=tf.get_default_graph())
195 |
196 | # Define loss and optimisation parameters.
197 | base_lr = tf.constant(args.learning_rate)
198 | step_ph = tf.placeholder(dtype=tf.float32, shape=())
199 | learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
200 |
201 | opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
202 | opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)
203 | opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum)
204 |
205 | grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
206 | grads_conv = grads[:len(conv_trainable)]
207 | grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
208 | grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]
209 |
210 | train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
211 | train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
212 | train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
213 |
214 | train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
215 |
216 |
217 | # Set up tf session and initialize variables.
218 | config = tf.ConfigProto()
219 | config.gpu_options.allow_growth = True
220 | sess = tf.Session(config=config)
221 | init = tf.global_variables_initializer()
222 |
223 | sess.run(init)
224 |
225 | # Saver for storing checkpoints of the model.
226 | saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)
227 |
228 | # Load variables if the checkpoint is provided.
229 | if args.restore_from is not None:
230 | loader = tf.train.Saver(var_list=restore_var)
231 | load(loader, sess, args.restore_from)
232 |
233 | # Start queue threads.
234 | threads = tf.train.start_queue_runners(coord=coord, sess=sess)
235 |
236 | # Iterate over training steps.
237 | for step in range(args.num_steps):
238 | start_time = time.time()
239 | feed_dict = { step_ph : step }
240 |
241 | if step % args.save_pred_every == 0:
242 | loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, train_op], feed_dict=feed_dict)
243 | summary_writer.add_summary(summary, step)
244 | save(saver, sess, args.snapshot_dir, step)
245 | else:
246 | loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict)
247 | duration = time.time() - start_time
248 | print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
249 | coord.request_stop()
250 | coord.join(threads)
251 |
252 | if __name__ == '__main__':
253 | main()
254 |
--------------------------------------------------------------------------------
/external/tensorflow-deeplab-resnet/train_msc.py:
--------------------------------------------------------------------------------
1 | """Training script with multi-scale inputs for the DeepLab-ResNet network on the PASCAL VOC dataset
2 | for semantic image segmentation.
3 |
4 | This script trains the model using augmented PASCAL VOC,
5 | which contains approximately 10000 images for training and 1500 images for validation.
6 | """
7 |
8 | from __future__ import print_function
9 |
10 | import argparse
11 | from datetime import datetime
12 | import os
13 | import sys
14 | import time
15 |
16 | import tensorflow as tf
17 | import numpy as np
18 |
19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, inv_preprocess, prepare_label
20 |
21 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
22 |
23 | BATCH_SIZE = 1
24 | DATA_DIRECTORY = '/home/VOCdevkit'
25 | DATA_LIST_PATH = './dataset/train.txt'
26 | GRAD_UPDATE_EVERY = 10
27 | IGNORE_LABEL = 255
28 | INPUT_SIZE = '321,321'
29 | LEARNING_RATE = 2.5e-4
30 | MOMENTUM = 0.9
31 | NUM_CLASSES = 21
32 | NUM_STEPS = 20001
33 | POWER = 0.9
34 | RANDOM_SEED = 1234
35 | RESTORE_FROM = './deeplab_resnet.ckpt'
36 | SAVE_NUM_IMAGES = 1
37 | SAVE_PRED_EVERY = 1000
38 | SNAPSHOT_DIR = './snapshots/'
39 | WEIGHT_DECAY = 0.0005
40 |
41 |
42 | def get_arguments():
43 | """Parse all the arguments provided from the CLI.
44 |
45 | Returns:
46 | A list of parsed arguments.
47 | """
48 | parser = argparse.ArgumentParser(description="DeepLab-ResNet Network")
49 | parser.add_argument("--batch-size", type=int, default=BATCH_SIZE,
50 | help="Number of images sent to the network in one step.")
51 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY,
52 | help="Path to the directory containing the PASCAL VOC dataset.")
53 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH,
54 | help="Path to the file listing the images in the dataset.")
55 | parser.add_argument("--grad-update-every", type=int, default=GRAD_UPDATE_EVERY,
56 | help="Number of steps after which gradient update is applied.")
57 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL,
58 | help="The index of the label to ignore during the training.")
59 | parser.add_argument("--input-size", type=str, default=INPUT_SIZE,
60 | help="Comma-separated string with height and width of images.")
61 | parser.add_argument("--is-training", action="store_true",
62 | help="Whether to update the running means and variances during the training.")
63 | parser.add_argument("--learning-rate", type=float, default=LEARNING_RATE,
64 | help="Base learning rate for training with polynomial decay.")
65 | parser.add_argument("--momentum", type=float, default=MOMENTUM,
66 | help="Momentum component of the optimiser.")
67 | parser.add_argument("--not-restore-last", action="store_true",
68 | help="Whether to not restore last (FC) layers.")
69 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
70 | help="Number of classes to predict (including background).")
71 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS,
72 | help="Number of training steps.")
73 | parser.add_argument("--power", type=float, default=POWER,
74 | help="Decay parameter to compute the learning rate.")
75 | parser.add_argument("--random-mirror", action="store_true",
76 | help="Whether to randomly mirror the inputs during the training.")
77 | parser.add_argument("--random-scale", action="store_true",
78 | help="Whether to randomly scale the inputs during the training.")
79 | parser.add_argument("--random-seed", type=int, default=RANDOM_SEED,
80 | help="Random seed to have reproducible results.")
81 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM,
82 | help="Where restore model parameters from.")
83 | parser.add_argument("--save-num-images", type=int, default=SAVE_NUM_IMAGES,
84 | help="How many images to save.")
85 | parser.add_argument("--save-pred-every", type=int, default=SAVE_PRED_EVERY,
86 | help="Save summaries and checkpoint every often.")
87 | parser.add_argument("--snapshot-dir", type=str, default=SNAPSHOT_DIR,
88 | help="Where to save snapshots of the model.")
89 | parser.add_argument("--weight-decay", type=float, default=WEIGHT_DECAY,
90 | help="Regularisation parameter for L2-loss.")
91 | return parser.parse_args()
92 |
93 | def save(saver, sess, logdir, step):
94 | '''Save weights.
95 |
96 | Args:
97 | saver: TensorFlow Saver object.
98 | sess: TensorFlow session.
99 | logdir: path to the snapshots directory.
100 | step: current training step.
101 | '''
102 | model_name = 'model.ckpt'
103 | checkpoint_path = os.path.join(logdir, model_name)
104 |
105 | if not os.path.exists(logdir):
106 | os.makedirs(logdir)
107 | saver.save(sess, checkpoint_path, global_step=step)
108 | print('The checkpoint has been created.')
109 |
110 | def load(saver, sess, ckpt_path):
111 | '''Load trained weights.
112 |
113 | Args:
114 | saver: TensorFlow Saver object.
115 | sess: TensorFlow session.
116 | ckpt_path: path to checkpoint file with parameters.
117 | '''
118 | saver.restore(sess, ckpt_path)
119 | print("Restored model parameters from {}".format(ckpt_path))
120 |
121 | def main():
122 | """Create the model and start the training."""
123 | args = get_arguments()
124 |
125 | h, w = map(int, args.input_size.split(','))
126 | input_size = (h, w)
127 |
128 | tf.set_random_seed(args.random_seed)
129 |
130 | # Create queue coordinator.
131 | coord = tf.train.Coordinator()
132 |
133 | # Load reader.
134 | with tf.name_scope("create_inputs"):
135 | reader = ImageReader(
136 | args.data_dir,
137 | args.data_list,
138 | input_size,
139 | args.random_scale,
140 | args.random_mirror,
141 | args.ignore_label,
142 | IMG_MEAN,
143 | coord)
144 | image_batch, label_batch = reader.dequeue(args.batch_size)
145 | image_batch075 = tf.image.resize_images(image_batch, [int(h * 0.75), int(w * 0.75)])
146 | image_batch05 = tf.image.resize_images(image_batch, [int(h * 0.5), int(w * 0.5)])
147 |
148 | # Create network.
149 | with tf.variable_scope('', reuse=False):
150 | net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes)
151 | with tf.variable_scope('', reuse=True):
152 | net075 = DeepLabResNetModel({'data': image_batch075}, is_training=args.is_training, num_classes=args.num_classes)
153 | with tf.variable_scope('', reuse=True):
154 | net05 = DeepLabResNetModel({'data': image_batch05}, is_training=args.is_training, num_classes=args.num_classes)
155 | # For a small batch size, it is better to keep
156 | # the statistics of the BN layers (running means and variances)
157 | # frozen, and to not update the values provided by the pre-trained model.
158 | # If is_training=True, the statistics will be updated during the training.
159 | # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
160 | # if they are presented in var_list of the optimiser definition.
161 |
162 | # Predictions.
163 | raw_output100 = net.layers['fc1_voc12']
164 | raw_output075 = net075.layers['fc1_voc12']
165 | raw_output05 = net05.layers['fc1_voc12']
166 | raw_output = tf.reduce_max(tf.stack([raw_output100,
167 | tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]),
168 | tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0)
169 | # Which variables to load. Running means and variances are not trainable,
170 | # thus all_variables() should be restored.
171 | restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
172 | all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
173 | fc_trainable = [v for v in all_trainable if 'fc' in v.name]
174 | conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
175 | fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0
176 | fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0
177 | assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable))
178 | assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))
179 |
180 |
181 | # Predictions: ignoring all predictions with labels greater or equal than n_classes
182 | raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
183 | raw_prediction100 = tf.reshape(raw_output100, [-1, args.num_classes])
184 | raw_prediction075 = tf.reshape(raw_output075, [-1, args.num_classes])
185 | raw_prediction05 = tf.reshape(raw_output05, [-1, args.num_classes])
186 |
187 | label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w]
188 | label_proc075 = prepare_label(label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False)
189 | label_proc05 = prepare_label(label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False)
190 |
191 | raw_gt = tf.reshape(label_proc, [-1,])
192 | raw_gt075 = tf.reshape(label_proc075, [-1,])
193 | raw_gt05 = tf.reshape(label_proc05, [-1,])
194 |
195 | indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
196 | indices075 = tf.squeeze(tf.where(tf.less_equal(raw_gt075, args.num_classes - 1)), 1)
197 | indices05 = tf.squeeze(tf.where(tf.less_equal(raw_gt05, args.num_classes - 1)), 1)
198 |
199 | gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
200 | gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
201 | gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)
202 |
203 | prediction = tf.gather(raw_prediction, indices)
204 | prediction100 = tf.gather(raw_prediction100, indices)
205 | prediction075 = tf.gather(raw_prediction075, indices075)
206 | prediction05 = tf.gather(raw_prediction05, indices05)
207 |
208 |
209 | # Pixel-wise softmax loss.
210 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
211 | loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt)
212 | loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075)
213 | loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05)
214 | l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
215 | reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n(l2_losses)
216 |
217 | # Processed predictions: for visualisation.
218 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
219 | raw_output_up = tf.argmax(raw_output_up, dimension=3)
220 | pred = tf.expand_dims(raw_output_up, dim=3)
221 |
222 | # Image summary.
223 | images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
224 | labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)
225 | preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)
226 |
227 | total_summary = tf.summary.image('images',
228 | tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]),
229 | max_outputs=args.save_num_images) # Concatenate row-wise.
230 | summary_writer = tf.summary.FileWriter(args.snapshot_dir,
231 | graph=tf.get_default_graph())
232 |
233 | # Define loss and optimisation parameters.
234 | base_lr = tf.constant(args.learning_rate)
235 | step_ph = tf.placeholder(dtype=tf.float32, shape=())
236 | learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
237 |
238 | opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
239 | opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)
240 | opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum)
241 |
242 | # Define a variable to accumulate gradients.
243 | accum_grads = [tf.Variable(tf.zeros_like(v.initialized_value()),
244 | trainable=False) for v in conv_trainable + fc_w_trainable + fc_b_trainable]
245 |
246 | # Define an operation to clear the accumulated gradients for next batch.
247 | zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads]
248 |
249 | # Compute gradients.
250 | grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
251 |
252 | # Accumulate and normalise the gradients.
253 | accum_grads_op = [accum_grads[i].assign_add(grad / args.grad_update_every) for i, grad in
254 | enumerate(grads)]
255 |
256 | grads_conv = accum_grads[:len(conv_trainable)]
257 | grads_fc_w = accum_grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
258 | grads_fc_b = accum_grads[(len(conv_trainable) + len(fc_w_trainable)):]
259 |
260 | # Apply the gradients.
261 | train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
262 | train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
263 | train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
264 |
265 | train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
266 |
267 |
268 | # Set up tf session and initialize variables.
269 | config = tf.ConfigProto()
270 | config.gpu_options.allow_growth = True
271 | sess = tf.Session(config=config)
272 | init = tf.global_variables_initializer()
273 |
274 | sess.run(init)
275 |
276 | # Saver for storing checkpoints of the model.
277 | saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)
278 |
279 | # Load variables if the checkpoint is provided.
280 | if args.restore_from is not None:
281 | loader = tf.train.Saver(var_list=restore_var)
282 | load(loader, sess, args.restore_from)
283 |
284 | # Start queue threads.
285 | threads = tf.train.start_queue_runners(coord=coord, sess=sess)
286 |
287 | # Iterate over training steps.
288 | for step in range(args.num_steps):
289 | start_time = time.time()
290 | feed_dict = { step_ph : step }
291 | loss_value = 0
292 |
293 | # Clear the accumulated gradients.
294 | sess.run(zero_op, feed_dict=feed_dict)
295 |
296 | # Accumulate gradients.
297 | for i in range(args.grad_update_every):
298 | _, l_val = sess.run([accum_grads_op, reduced_loss], feed_dict=feed_dict)
299 | loss_value += l_val
300 |
301 | # Normalise the loss.
302 | loss_value /= args.grad_update_every
303 |
304 | # Apply gradients.
305 | if step % args.save_pred_every == 0:
306 | images, labels, summary, _ = sess.run([image_batch, label_batch, total_summary, train_op], feed_dict=feed_dict)
307 | summary_writer.add_summary(summary, step)
308 | save(saver, sess, args.snapshot_dir, step)
309 | else:
310 | sess.run(train_op, feed_dict=feed_dict)
311 |
312 | duration = time.time() - start_time
313 | print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
314 | coord.request_stop()
315 | coord.join(threads)
316 |
317 | if __name__ == '__main__':
318 | main()
319 |
--------------------------------------------------------------------------------
/get_model.py:
--------------------------------------------------------------------------------
1 | import CMPC_model
2 |
3 |
4 | def get_segmentation_model(name, **kwargs):
5 | model = eval(name).LSTM_model(**kwargs)
6 | return model
7 |
--------------------------------------------------------------------------------
/motivation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/motivation.png
--------------------------------------------------------------------------------
/trainval.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | LOG=logs/unc/cmpc_model
4 | mkdir -p ${LOG}
5 | now=$(date +"%Y%m%d_%H%M%S")
6 |
7 | python -u trainval_model.py \
8 | -m train \
9 | -d unc \
10 | -t train \
11 | -n CMPC_model \
12 | -emb \
13 | -f ckpts/unc/cmpc_model 2>&1 | tee ${LOG}/train_$now.txt
14 |
15 | python -u trainval_model.py \
16 | -m test \
17 | -d unc \
18 | -t val \
19 | -n CMPC_model \
20 | -i 700000 \
21 | -c \
22 | -emb \
23 | -f ckpts/unc/cmpc_model 2>&1 | tee ${LOG}/test_val_$now.txt
24 |
--------------------------------------------------------------------------------
/trainval_model.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import sys
4 | import os
5 | import argparse
6 | import tensorflow as tf
7 | import skimage
8 | from skimage import io as sio
9 | import time
10 | # import matplotlib.pyplot as plt
11 | from get_model import get_segmentation_model
12 | from pydensecrf import densecrf
13 |
14 | from util import data_reader
15 | from util.processing_tools import *
16 | from util import im_processing, eval_tools, MovingAverage
17 |
18 |
19 | def train(max_iter, snapshot, dataset, setname, mu, lr, bs, tfmodel_folder,
20 | conv5, model_name, stop_iter, pre_emb=False):
21 | iters_per_log = 100
22 | data_folder = './' + dataset + '/' + setname + '_batch/'
23 | data_prefix = dataset + '_' + setname
24 | snapshot_file = os.path.join(tfmodel_folder, dataset + '_iter_%d.tfmodel')
25 | if not os.path.isdir(tfmodel_folder):
26 | os.makedirs(tfmodel_folder)
27 |
28 | cls_loss_avg = 0
29 | avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0
30 | decay = 0.99
31 | vocab_size = 8803 if dataset == 'referit' else 12112
32 | emb_name = 'referit' if dataset == 'referit' else 'Gref'
33 |
34 | if pre_emb:
35 | print("Use pretrained Embeddings.")
36 | model = get_segmentation_model(model_name, mode='train',
37 | vocab_size=vocab_size, start_lr=lr,
38 | batch_size=bs, conv5=conv5, emb_name=emb_name)
39 | else:
40 | model = get_segmentation_model(model_name, mode='train',
41 | vocab_size=vocab_size, start_lr=lr,
42 | batch_size=bs, conv5=conv5)
43 |
44 | weights = './data/weights/deeplab_resnet_init.ckpt'
45 | print("Loading pretrained weights from {}".format(weights))
46 | load_var = {var.op.name: var for var in tf.global_variables()
47 | if var.name.startswith('res') or var.name.startswith('bn') or var.name.startswith('conv1')}
48 |
49 | snapshot_loader = tf.train.Saver(load_var)
50 | snapshot_saver = tf.train.Saver(max_to_keep=4)
51 |
52 | config = tf.ConfigProto()
53 | config.gpu_options.allow_growth = True
54 | sess = tf.Session(config=config)
55 | sess.run(tf.global_variables_initializer())
56 | snapshot_loader.restore(sess, weights)
57 |
58 | im_h, im_w, num_steps = model.H, model.W, model.num_steps
59 | text_batch = np.zeros((bs, num_steps), dtype=np.float32)
60 | image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32)
61 | mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32)
62 | valid_idx_batch = np.zeros((bs, 1), dtype=np.int32)
63 |
64 | reader = data_reader.DataReader(data_folder, data_prefix)
65 |
66 | # for time calculate
67 | last_time = time.time()
68 | time_avg = MovingAverage()
69 | for n_iter in range(max_iter):
70 |
71 | for n_batch in range(bs):
72 | batch = reader.read_batch(is_log=(n_batch == 0 and n_iter % iters_per_log == 0))
73 | text = batch['text_batch']
74 | im = batch['im_batch'].astype(np.float32)
75 | mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2)
76 |
77 | im = im[:, :, ::-1]
78 | im -= mu
79 |
80 | text_batch[n_batch, ...] = text
81 | image_batch[n_batch, ...] = im
82 | mask_batch[n_batch, ...] = mask
83 |
84 | for idx in range(text.shape[0]):
85 | if text[idx] != 0:
86 | valid_idx_batch[n_batch, :] = idx
87 | break
88 |
89 | _, cls_loss_val, lr_val, scores_val, label_val = sess.run([model.train_step,
90 | model.cls_loss,
91 | model.learning_rate,
92 | model.pred,
93 | model.target],
94 | feed_dict={
95 | model.words: text_batch,
96 | # np.expand_dims(text, axis=0),
97 | model.im: image_batch,
98 | # np.expand_dims(im, axis=0),
99 | model.target_fine: mask_batch,
100 | # np.expand_dims(mask, axis=0)
101 | model.valid_idx: valid_idx_batch
102 | })
103 | cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val
104 |
105 | # Accuracy
106 | accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val)
107 | avg_accuracy_all = decay * avg_accuracy_all + (1 - decay) * accuracy_all
108 | avg_accuracy_pos = decay * avg_accuracy_pos + (1 - decay) * accuracy_pos
109 | avg_accuracy_neg = decay * avg_accuracy_neg + (1 - decay) * accuracy_neg
110 |
111 | # timing
112 | cur_time = time.time()
113 | elapsed = cur_time - last_time
114 | last_time = cur_time
115 |
116 | if n_iter % iters_per_log == 0:
117 | print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f'
118 | % (n_iter, cls_loss_val, cls_loss_avg, lr_val))
119 | print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)'
120 | % (n_iter, accuracy_all, accuracy_pos, accuracy_neg))
121 | print('iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)'
122 | % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))
123 | time_avg.add(elapsed)
124 | print('iter = %d, cur time = %.5f, avg time = %.5f, model_name: %s' % (n_iter, elapsed, time_avg.get_avg(), model_name))
125 |
126 | # Save snapshot
127 | if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter:
128 | snapshot_saver.save(sess, snapshot_file % (n_iter + 1))
129 | print('snapshot saved to ' + snapshot_file % (n_iter + 1))
130 | if (n_iter + 1) >= stop_iter:
131 | print('stop training at iter ' + str(stop_iter))
132 | break
133 |
134 | print('Optimization done.')
135 |
136 |
137 | def test(iter, dataset, visualize, setname, dcrf, mu, tfmodel_folder, model_name, pre_emb=False):
138 | data_folder = './' + dataset + '/' + setname + '_batch/'
139 | data_prefix = dataset + '_' + setname
140 | if visualize:
141 | save_dir = './' + dataset + '/visualization/' + str(iter) + '/'
142 | if not os.path.isdir(save_dir):
143 | os.makedirs(save_dir)
144 | weights = os.path.join(tfmodel_folder, dataset + '_iter_' + str(iter) + '.tfmodel')
145 | print("Loading trained weights from {}".format(weights))
146 |
147 | score_thresh = 1e-9
148 | eval_seg_iou_list = [.5, .6, .7, .8, .9]
149 | cum_I, cum_U = 0, 0
150 | mean_IoU, mean_dcrf_IoU = 0, 0
151 | seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
152 | if dcrf:
153 | cum_I_dcrf, cum_U_dcrf = 0, 0
154 | seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
155 | seg_total = 0.
156 | H, W = 320, 320
157 | vocab_size = 8803 if dataset == 'referit' else 12112
158 | emb_name = 'referit' if dataset == 'referit' else 'Gref'
159 |
160 | IU_result = list()
161 |
162 | if pre_emb:
163 | # use pretrained embbeding
164 | print("Use pretrained Embeddings.")
165 | model = get_segmentation_model(model_name, H=H, W=W,
166 | mode='eval', vocab_size=vocab_size, emb_name=emb_name)
167 | else:
168 | model = get_segmentation_model(model_name, H=H, W=W,
169 | mode='eval', vocab_size=vocab_size)
170 |
171 | # Load pretrained model
172 | snapshot_restorer = tf.train.Saver()
173 | config = tf.ConfigProto()
174 | config.gpu_options.allow_growth = True
175 | sess = tf.Session(config=config)
176 | sess.run(tf.global_variables_initializer())
177 | snapshot_restorer.restore(sess, weights)
178 | reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False)
179 |
180 | NN = reader.num_batch
181 | for n_iter in range(reader.num_batch):
182 |
183 | if n_iter % (NN // 50) == 0:
184 | if n_iter / (NN // 50) % 5 == 0:
185 | sys.stdout.write(str(n_iter / (NN // 50) // 5))
186 | else:
187 | sys.stdout.write('.')
188 | sys.stdout.flush()
189 |
190 | batch = reader.read_batch(is_log=False)
191 | text = batch['text_batch']
192 | im = batch['im_batch']
193 | mask = batch['mask_batch'].astype(np.float32)
194 | valid_idx = np.zeros([1], dtype=np.int32)
195 | for idx in range(text.shape[0]):
196 | if text[idx] != 0:
197 | valid_idx[0] = idx
198 | break
199 |
200 | proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W))
201 | proc_im_ = proc_im.astype(np.float32)
202 | proc_im_ = proc_im_[:, :, ::-1]
203 | proc_im_ -= mu
204 |
205 | scores_val, up_val, sigm_val = sess.run([model.pred, model.up, model.sigm],
206 | feed_dict={
207 | model.words: np.expand_dims(text, axis=0),
208 | model.im: np.expand_dims(proc_im_, axis=0),
209 | model.valid_idx: np.expand_dims(valid_idx, axis=0)
210 | })
211 |
212 | # scores_val = np.squeeze(scores_val)
213 | # pred_raw = (scores_val >= score_thresh).astype(np.float32)
214 | up_val = np.squeeze(up_val)
215 | pred_raw = (up_val >= score_thresh).astype(np.float32)
216 | predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1])
217 | if dcrf:
218 | # Dense CRF post-processing
219 | sigm_val = np.squeeze(sigm_val)
220 | d = densecrf.DenseCRF2D(W, H, 2)
221 | U = np.expand_dims(-np.log(sigm_val), axis=0)
222 | U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0)
223 | unary = np.concatenate((U_, U), axis=0)
224 | unary = unary.reshape((2, -1))
225 | d.setUnaryEnergy(unary)
226 | d.addPairwiseGaussian(sxy=3, compat=3)
227 | d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10)
228 | Q = d.inference(5)
229 | pred_raw_dcrf = np.argmax(Q, axis=0).reshape((H, W)).astype(np.float32)
230 | predicts_dcrf = im_processing.resize_and_crop(pred_raw_dcrf, mask.shape[0], mask.shape[1])
231 |
232 | if visualize:
233 | sent = batch['sent_batch'][0]
234 | visualize_seg(im, mask, predicts, sent)
235 | if dcrf:
236 | visualize_seg(im, mask, predicts_dcrf, sent)
237 |
238 | I, U = eval_tools.compute_mask_IU(predicts, mask)
239 | IU_result.append({'batch_no': n_iter, 'I': I, 'U': U})
240 | mean_IoU += float(I) / U
241 | cum_I += I
242 | cum_U += U
243 | msg = 'cumulative IoU = %f' % (cum_I / cum_U)
244 | for n_eval_iou in range(len(eval_seg_iou_list)):
245 | eval_seg_iou = eval_seg_iou_list[n_eval_iou]
246 | seg_correct[n_eval_iou] += (I / U >= eval_seg_iou)
247 | if dcrf:
248 | I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask)
249 | mean_dcrf_IoU += float(I_dcrf) / U_dcrf
250 | cum_I_dcrf += I_dcrf
251 | cum_U_dcrf += U_dcrf
252 | msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf / cum_U_dcrf)
253 | for n_eval_iou in range(len(eval_seg_iou_list)):
254 | eval_seg_iou = eval_seg_iou_list[n_eval_iou]
255 | seg_correct_dcrf[n_eval_iou] += (I_dcrf / U_dcrf >= eval_seg_iou)
256 | # print(msg)
257 | seg_total += 1
258 |
259 | # Print results
260 | print('Segmentation evaluation (without DenseCRF):')
261 | result_str = ''
262 | for n_eval_iou in range(len(eval_seg_iou_list)):
263 | result_str += 'precision@%s = %f\n' % \
264 | (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou] / seg_total)
265 | result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I / cum_U, mean_IoU / seg_total)
266 | print(result_str)
267 | if dcrf:
268 | print('Segmentation evaluation (with DenseCRF):')
269 | result_str = ''
270 | for n_eval_iou in range(len(eval_seg_iou_list)):
271 | result_str += 'precision@%s = %f\n' % \
272 | (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou] / seg_total)
273 | result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I_dcrf / cum_U_dcrf, mean_dcrf_IoU / seg_total)
274 | print(result_str)
275 |
276 |
277 | def visualize_seg(im, mask, predicts, sent):
278 | # print("visualizing")
279 | vis_dir = "./visualize/lgcr_best_c5map/unc/testA"
280 | sent_dir = os.path.join(vis_dir, sent)
281 | if not os.path.exists(sent_dir):
282 | os.makedirs(sent_dir)
283 |
284 | # Ignore sio warnings of low-contrast image.
285 | import warnings
286 | warnings.filterwarnings('ignore')
287 |
288 | sio.imsave(os.path.join(sent_dir, "im.png"), im)
289 |
290 | im_gt = np.zeros_like(im)
291 | im_gt[:, :, 2] = 170
292 | im_gt[:, :, 0] += mask.astype('uint8') * 170
293 | im_gt = im_gt.astype('int16')
294 | im_gt[:, :, 2] += mask.astype('int16') * (-170)
295 | im_gt = im_gt.astype('uint8')
296 | sio.imsave(os.path.join(sent_dir, "gt.png"), im_gt)
297 |
298 | im_seg = im / 2
299 | im_seg[:, :, 0] += predicts.astype('uint8') * 100
300 | im_seg = im_seg.astype('uint8')
301 | sio.imsave(os.path.join(sent_dir, "pred.png"), im_seg)
302 |
303 | # plt.imshow(im_seg.astype('uint8'))
304 | # plt.title(sent)
305 | # plt.show()
306 |
307 |
308 | if __name__ == "__main__":
309 | parser = argparse.ArgumentParser()
310 | parser.add_argument('-g', type=str, default='0')
311 | parser.add_argument('-i', type=int, default=800000)
312 | parser.add_argument('-s', type=int, default=100000)
313 | parser.add_argument('-st', type=int, default=700000) # stop training when get st iters
314 | parser.add_argument('-m', type=str) # 'train' 'test'
315 | parser.add_argument('-d', type=str, default='referit') # 'Gref' 'unc' 'unc+' 'referit'
316 | parser.add_argument('-t', type=str) # 'train' 'trainval' 'val' 'test' 'testA' 'testB'
317 | parser.add_argument('-f', type=str) # directory to save models
318 | parser.add_argument('-lr', type=float, default=0.00025) # start learning rate
319 | parser.add_argument('-bs', type=int, default=1) # batch size
320 | parser.add_argument('-v', default=False, action='store_true') # visualization
321 | parser.add_argument('-c', default=False, action='store_true') # whether or not apply DenseCRF
322 | parser.add_argument('-emb', default=False, action='store_true') # whether or not use Pretrained Embeddings
323 | parser.add_argument('-n', type=str, default='') # select model
324 | parser.add_argument('-conv5', default=False, action='store_true') # finetune conv layers
325 |
326 | args = parser.parse_args()
327 | # os.environ['CUDA_VISIBLE_DEVICES'] = args.g
328 | mu = np.array((104.00698793, 116.66876762, 122.67891434))
329 |
330 | if args.m == 'train':
331 | train(max_iter=args.i,
332 | snapshot=args.s,
333 | dataset=args.d,
334 | setname=args.t,
335 | mu=mu,
336 | lr=args.lr,
337 | bs=args.bs,
338 | tfmodel_folder=args.f,
339 | conv5=args.conv5,
340 | model_name=args.n,
341 | stop_iter=args.st,
342 | pre_emb=args.emb)
343 | elif args.m == 'test':
344 | test(iter=args.i,
345 | dataset=args.d,
346 | visualize=args.v,
347 | setname=args.t,
348 | dcrf=args.c,
349 | mu=mu,
350 | tfmodel_folder=args.f,
351 | model_name=args.n,
352 | pre_emb=args.emb)
353 |
--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import MovingAverage
--------------------------------------------------------------------------------
/util/cell.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | class ConvLSTMCell(tf.nn.rnn_cell.RNNCell):
4 | """A LSTM cell with convolutions instead of multiplications.
5 | Reference:
6 | Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015.
7 | """
8 |
9 | def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None):
10 | super(ConvLSTMCell, self).__init__(_reuse=reuse)
11 | self._kernel = kernel
12 | self._filters = filters
13 | self._forget_bias = forget_bias
14 | self._activation = activation
15 | self._normalize = normalize
16 | self._peephole = peephole
17 | if data_format == 'channels_last':
18 | self._size = tf.TensorShape(shape + [self._filters])
19 | self._feature_axis = self._size.ndims
20 | self._data_format = None
21 | elif data_format == 'channels_first':
22 | self._size = tf.TensorShape([self._filters] + shape)
23 | self._feature_axis = 0
24 | self._data_format = 'NC'
25 | else:
26 | raise ValueError('Unknown data_format')
27 |
28 | @property
29 | def state_size(self):
30 | return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size)
31 |
32 | @property
33 | def output_size(self):
34 | return self._size
35 |
36 | def call(self, x, state):
37 | c, h = state
38 |
39 | x = tf.concat([x, h], axis=self._feature_axis)
40 | n = x.shape[-1].value
41 | m = 4 * self._filters if self._filters > 1 else 4
42 | W = tf.get_variable('kernel', self._kernel + [n, m])
43 | y = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format)
44 | if not self._normalize:
45 | y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
46 | j, i, f, o = tf.split(y, 4, axis=self._feature_axis)
47 |
48 | if self._peephole:
49 | i += tf.get_variable('W_ci', c.shape[1:]) * c
50 | f += tf.get_variable('W_cf', c.shape[1:]) * c
51 |
52 | if self._normalize:
53 | j = tf.contrib.layers.layer_norm(j)
54 | i = tf.contrib.layers.layer_norm(i)
55 | f = tf.contrib.layers.layer_norm(f)
56 |
57 | f = tf.sigmoid(f + self._forget_bias)
58 | i = tf.sigmoid(i)
59 | c = c * f + i * self._activation(j)
60 |
61 | if self._peephole:
62 | o += tf.get_variable('W_co', c.shape[1:]) * c
63 |
64 | if self._normalize:
65 | o = tf.contrib.layers.layer_norm(o)
66 | c = tf.contrib.layers.layer_norm(c)
67 |
68 | o = tf.sigmoid(o)
69 | h = o * self._activation(c)
70 |
71 | # TODO
72 | #tf.summary.histogram('forget_gate', f)
73 | #tf.summary.histogram('input_gate', i)
74 | #tf.summary.histogram('output_gate', o)
75 | #tf.summary.histogram('cell_state', c)
76 |
77 | state = tf.nn.rnn_cell.LSTMStateTuple(c, h)
78 |
79 | return h, state
80 |
81 |
82 | class ConvGRUCell(tf.nn.rnn_cell.RNNCell):
83 | """A GRU cell with convolutions instead of multiplications."""
84 |
85 | def __init__(self, shape, filters, kernel, activation=tf.tanh, normalize=True, data_format='channels_last', reuse=None):
86 | super(ConvGRUCell, self).__init__(_reuse=reuse)
87 | self._filters = filters
88 | self._kernel = kernel
89 | self._activation = activation
90 | self._normalize = normalize
91 | if data_format == 'channels_last':
92 | self._size = tf.TensorShape(shape + [self._filters])
93 | self._feature_axis = self._size.ndims
94 | self._data_format = None
95 | elif data_format == 'channels_first':
96 | self._size = tf.TensorShape([self._filters] + shape)
97 | self._feature_axis = 0
98 | self._data_format = 'NC'
99 | else:
100 | raise ValueError('Unknown data_format')
101 |
102 | @property
103 | def state_size(self):
104 | return self._size
105 |
106 | @property
107 | def output_size(self):
108 | return self._size
109 |
110 | def call(self, x, h):
111 | channels = x.shape[self._feature_axis].value
112 |
113 | with tf.variable_scope('gates'):
114 | inputs = tf.concat([x, h], axis=self._feature_axis)
115 | n = channels + self._filters
116 | m = 2 * self._filters if self._filters > 1 else 2
117 | W = tf.get_variable('kernel', self._kernel + [n, m])
118 | y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
119 | if self._normalize:
120 | r, u = tf.split(y, 2, axis=self._feature_axis)
121 | r = tf.contrib.layers.layer_norm(r)
122 | u = tf.contrib.layers.layer_norm(u)
123 | else:
124 | y += tf.get_variable('bias', [m], initializer=tf.ones_initializer())
125 | r, u = tf.split(y, 2, axis=self._feature_axis)
126 | r, u = tf.sigmoid(r), tf.sigmoid(u)
127 |
128 | # TODO
129 | #tf.summary.histogram('reset_gate', r)
130 | #tf.summary.histogram('update_gate', u)
131 |
132 | with tf.variable_scope('candidate'):
133 | inputs = tf.concat([x, r * h], axis=self._feature_axis)
134 | n = channels + self._filters
135 | m = self._filters
136 | W = tf.get_variable('kernel', self._kernel + [n, m])
137 | y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format)
138 | if self._normalize:
139 | y = tf.contrib.layers.layer_norm(y)
140 | else:
141 | y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
142 | h = u * h + (1 - u) * self._activation(y)
143 |
144 |
--------------------------------------------------------------------------------
/util/data_reader.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import os
5 | import threading
6 | import Queue as queue
7 |
8 | def run_prefetch(prefetch_queue, folder_name, prefix, num_batch, shuffle):
9 | n_batch_prefetch = 0
10 | fetch_order = np.arange(num_batch)
11 | while True:
12 | # Shuffle the batch order for every epoch
13 | if n_batch_prefetch == 0 and shuffle:
14 | fetch_order = np.random.permutation(num_batch)
15 |
16 | # Load batch from file
17 | batch_id = fetch_order[n_batch_prefetch]
18 | save_file = os.path.join(folder_name, prefix+'_'+str(batch_id)+'.npz')
19 | npz_filemap = np.load(save_file)
20 | batch = dict(npz_filemap)
21 | npz_filemap.close()
22 |
23 | # add loaded batch to fetchqing queue
24 | prefetch_queue.put(batch, block=True)
25 |
26 | # Move to next batch
27 | n_batch_prefetch = (n_batch_prefetch + 1) % num_batch
28 |
29 | class DataReader:
30 | def __init__(self, folder_name, prefix, shuffle=True, prefetch_num=8):
31 | self.folder_name = folder_name
32 | self.prefix = prefix
33 | self.shuffle = shuffle
34 | self.prefetch_num = prefetch_num
35 |
36 | self.n_batch = 0
37 | self.n_epoch = 0
38 |
39 | # Search the folder to see the number of num_batch
40 | filelist = os.listdir(folder_name)
41 | num_batch = 0
42 | while (prefix + '_' + str(num_batch) + '.npz') in filelist:
43 | num_batch += 1
44 | if num_batch > 0:
45 | print('found %d batches under %s with prefix "%s"' % (num_batch, folder_name, prefix))
46 | else:
47 | raise RuntimeError('no batches under %s with prefix "%s"' % (folder_name, prefix))
48 | self.num_batch = num_batch
49 |
50 | # Start prefetching thread
51 | self.prefetch_queue = queue.Queue(maxsize=prefetch_num)
52 | self.prefetch_thread = threading.Thread(target=run_prefetch,
53 | args=(self.prefetch_queue, self.folder_name, self.prefix,
54 | self.num_batch, self.shuffle))
55 | self.prefetch_thread.daemon = True
56 | self.prefetch_thread.start()
57 |
58 | def read_batch(self, is_log = True):
59 | if is_log:
60 | print('data reader: epoch = %d, batch = %d / %d' % (self.n_epoch, self.n_batch, self.num_batch))
61 |
62 | # Get a batch from the prefetching queue
63 | if self.prefetch_queue.empty():
64 | print('data reader: waiting for file input (IO is slow)...')
65 | batch = self.prefetch_queue.get(block=True)
66 | self.n_batch = (self.n_batch + 1) % self.num_batch
67 | self.n_epoch += (self.n_batch == 0)
68 | return batch
69 |
--------------------------------------------------------------------------------
/util/data_reader_ignore.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import os
5 | import threading
6 | import Queue as queue
7 |
8 | def run_prefetch(prefetch_queue, folder_name, prefix, num_batch, shuffle, id2name):
9 | n_batch_prefetch = 0
10 | fetch_order = np.arange(num_batch)
11 | while True:
12 | # Shuffle the batch order for every epoch
13 | if n_batch_prefetch == 0 and shuffle:
14 | fetch_order = np.random.permutation(num_batch)
15 |
16 | # Load batch from file
17 | batch_id = fetch_order[n_batch_prefetch]
18 | save_file = os.path.join(folder_name, prefix+'_'+str(batch_id)+'.npz')
19 | npz_filemap = np.load(save_file)
20 | batch = dict(npz_filemap)
21 | if id2name:
22 | batch['img_name'] = id2name[str(batch_id)]
23 | npz_filemap.close()
24 |
25 | # add loaded batch to fetchqing queue
26 | prefetch_queue.put(batch, block=True)
27 |
28 | # Move to next batch
29 | n_batch_prefetch = (n_batch_prefetch + 1) % num_batch
30 |
31 | class DataReader:
32 | def __init__(self, folder_name, prefix, shuffle=True, prefetch_num=8, list_name=''):
33 | self.folder_name = folder_name
34 | self.prefix = prefix
35 | self.shuffle = shuffle
36 | self.prefetch_num = prefetch_num
37 |
38 | self.n_batch = 0
39 | self.n_epoch = 0
40 |
41 | self.id2name = None
42 | if list_name != '':
43 | img_list = [line.strip() for line in open(list_name)]
44 | self.id2name = {line.split('\t')[0]:line.split('\t')[-1] for line in img_list}
45 |
46 |
47 | # Search the folder to see the number of num_batch
48 | filelist = os.listdir(folder_name)
49 | num_batch = 0
50 | while (prefix + '_' + str(num_batch) + '.npz') in filelist:
51 | num_batch += 1
52 | if num_batch > 0:
53 | print('found %d batches under %s with prefix "%s"' % (num_batch, folder_name, prefix))
54 | else:
55 | raise RuntimeError('no batches under %s with prefix "%s"' % (folder_name, prefix))
56 | self.num_batch = num_batch
57 |
58 | # Start prefetching thread
59 | self.prefetch_queue = queue.Queue(maxsize=prefetch_num)
60 | self.prefetch_thread = threading.Thread(target=run_prefetch,
61 | args=(self.prefetch_queue, self.folder_name, self.prefix,
62 | self.num_batch, self.shuffle, self.id2name))
63 | self.prefetch_thread.daemon = True
64 | self.prefetch_thread.start()
65 |
66 | def read_batch(self, is_log = True):
67 | if is_log:
68 | print('data reader: epoch = %d, batch = %d / %d' % (self.n_epoch, self.n_batch, self.num_batch))
69 |
70 | # Get a batch from the prefetching queue
71 | if self.prefetch_queue.empty():
72 | print('data reader: waiting for file input (IO is slow)...')
73 | batch = self.prefetch_queue.get(block=True)
74 | self.n_batch = (self.n_batch + 1) % self.num_batch
75 | self.n_epoch += (self.n_batch == 0)
76 | return batch
77 |
--------------------------------------------------------------------------------
/util/eval_tools.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import numpy as np
4 | import pyximport; pyximport.install()
5 | # from util.nms import cpu_nms as nms
6 |
7 | # all boxes are [xmin, ymin, xmax, ymax] format, 0-indexed, including xmax and ymax
8 | def compute_bbox_iou(bboxes, target):
9 | if isinstance(bboxes, list):
10 | bboxes = np.array(bboxes)
11 | bboxes = bboxes.reshape((-1, 4))
12 |
13 | if isinstance(target, list):
14 | target = np.array(target)
15 | target = target.reshape((-1, 4))
16 |
17 | A_bboxes = (bboxes[..., 2]-bboxes[..., 0]+1) * (bboxes[..., 3]-bboxes[..., 1]+1)
18 | A_target = (target[..., 2]-target[..., 0]+1) * (target[..., 3]-target[..., 1]+1)
19 | assert(np.all(A_bboxes >= 0))
20 | assert(np.all(A_target >= 0))
21 | I_x1 = np.maximum(bboxes[..., 0], target[..., 0])
22 | I_y1 = np.maximum(bboxes[..., 1], target[..., 1])
23 | I_x2 = np.minimum(bboxes[..., 2], target[..., 2])
24 | I_y2 = np.minimum(bboxes[..., 3], target[..., 3])
25 | A_I = np.maximum(I_x2 - I_x1 + 1, 0) * np.maximum(I_y2 - I_y1 + 1, 0)
26 | IoUs = A_I / (A_bboxes + A_target - A_I)
27 | assert(np.all(0 <= IoUs) and np.all(IoUs <= 1))
28 | return IoUs
29 |
30 | # # all boxes are [num, height, width] binary array
31 | def compute_mask_IU(masks, target):
32 | assert(target.shape[-2:] == masks.shape[-2:])
33 | I = np.sum(np.logical_and(masks, target))
34 | U = np.sum(np.logical_or(masks, target))
35 | return I, U
36 |
--------------------------------------------------------------------------------
/util/functions.py:
--------------------------------------------------------------------------------
1 | import math
2 | from collections import deque
3 |
4 | class MovingAverage():
5 | """ Keeps an average window of the specified number of items. """
6 |
7 | def __init__(self, max_window_size=1000):
8 | self.max_window_size = max_window_size
9 | self.reset()
10 |
11 | def add(self, elem):
12 | """ Adds an element to the window, removing the earliest element if necessary. """
13 | if elem > 999999999999:
14 | print('Warning: Moving average ignored a value of %f' % elem)
15 | return
16 |
17 | self.window.append(elem)
18 | self.sum += elem
19 |
20 | if len(self.window) > self.max_window_size:
21 | self.sum -= self.window.popleft()
22 |
23 | def append(self, elem):
24 | """ Same as add just more pythonic. """
25 | self.add(elem)
26 |
27 | def reset(self):
28 | """ Resets the MovingAverage to its initial state. """
29 | self.window = deque()
30 | self.sum = 0
31 |
32 | def get_avg(self):
33 | """ Returns the average of the elements in the window. """
34 | return self.sum / max(len(self.window), 1)
35 |
36 | def __str__(self):
37 | return str(self.get_avg())
38 |
39 | def __repr__(self):
40 | return repr(self.get_avg())
--------------------------------------------------------------------------------
/util/h5_reader.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import os
5 | import threading
6 | import queue as queue
7 | import h5py
8 |
9 | def run_prefetch(prefetch_queue, h5_file, h5_img, num_batch, shuffle):
10 | n_batch_prefetch = 0
11 | fetch_order = np.arange(num_batch)
12 | img_size=(320, 320)
13 | while True:
14 | # Shuffle the batch order for every epoch
15 | if n_batch_prefetch == 0 and shuffle:
16 | fetch_order = np.random.permutation(num_batch)
17 |
18 | # Load batch from file
19 | batch_id = fetch_order[n_batch_prefetch]
20 | mask = h5_file['answers'][batch_id] # [320, 320]
21 | image_id = h5_file['image_idxs'][batch_id] # int
22 | refexp = h5_file['refexps'][batch_id] # [60]
23 | sent = h5_file['sentence'][batch_id]
24 |
25 | # read images
26 | img = h5_img['images'][image_id] # [320, 320, 3]
27 | batch = {'mask_batch': mask,
28 | 'text_batch': refexp,
29 | 'im_batch': img}
30 |
31 | # add loaded batch to fetchqing queue
32 | prefetch_queue.put(batch, block=True)
33 |
34 | # Move to next batch
35 | n_batch_prefetch = (n_batch_prefetch + 1) % num_batch
36 |
37 | class DataReader:
38 | def __init__(self, h5_file_name, h5_image_name, shuffle=True, prefetch_num=8):
39 | # self.img_folder = img_folder
40 | self.h5_file_name = h5_file_name
41 | self.h5_image = h5_image_name
42 | self.shuffle = shuffle
43 | self.prefetch_num = prefetch_num
44 |
45 | self.n_batch = 0
46 | self.n_epoch = 0
47 |
48 | # Search the folder to see the number of num_batch
49 | self.h5_file = h5py.File(h5_file_name, 'r')
50 | self.h5_image = h5py.File(h5_image_name, 'r')
51 | num_batch = self.h5_file['image_idxs'].shape[0] # n?
52 | if num_batch > 0:
53 | print('found %d batches within %s' % (num_batch, h5_file_name))
54 | else:
55 | raise RuntimeError('no batches within %s' % (h5_file_name))
56 | self.num_batch = num_batch # 一共有多少个batch
57 |
58 | # Start prefetching thread
59 | self.prefetch_queue = queue.Queue(maxsize=prefetch_num)
60 | # 读数据的线程,只有一个?
61 | self.prefetch_thread = threading.Thread(target=run_prefetch,
62 | args=(self.prefetch_queue, self.h5_file,
63 | self.h5_image, self.num_batch, self.shuffle))
64 | self.prefetch_thread.daemon = True
65 | self.prefetch_thread.start()
66 |
67 | def read_batch(self, is_log = True):
68 | if is_log:
69 | print('data reader: epoch = %d, batch = %d / %d' % (self.n_epoch, self.n_batch, self.num_batch))
70 |
71 | # Get a batch from the prefetching queue
72 | if self.prefetch_queue.empty():
73 | print('data reader: waiting for file input (IO is slow)...')
74 | batch = self.prefetch_queue.get(block=True)
75 | self.n_batch = (self.n_batch + 1) % self.num_batch
76 | self.n_epoch += (self.n_batch == 0)
77 | return batch
78 |
--------------------------------------------------------------------------------
/util/im_processing.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import skimage.transform
4 | import numpy as np
5 |
6 | def resize_and_pad(im, input_h, input_w):
7 | # Resize and pad im to input_h x input_w size
8 | im_h, im_w = im.shape[:2]
9 | scale = min(input_h / im_h, input_w / im_w)
10 | resized_h = int(np.round(im_h * scale))
11 | resized_w = int(np.round(im_w * scale))
12 | pad_h = int(np.floor(input_h - resized_h) / 2)
13 | pad_w = int(np.floor(input_w - resized_w) / 2)
14 |
15 | resized_im = skimage.transform.resize(im, [resized_h, resized_w])
16 | if im.ndim > 2:
17 | new_im = np.zeros((input_h, input_w, im.shape[2]), dtype=resized_im.dtype)
18 | else:
19 | new_im = np.zeros((input_h, input_w), dtype=resized_im.dtype)
20 | new_im[pad_h:pad_h+resized_h, pad_w:pad_w+resized_w, ...] = resized_im
21 |
22 | return new_im
23 |
24 | def resize_and_crop(im, input_h, input_w):
25 | # Resize and crop im to input_h x input_w size
26 | im_h, im_w = im.shape[:2]
27 | scale = max(input_h / im_h, input_w / im_w)
28 | resized_h = int(np.round(im_h * scale))
29 | resized_w = int(np.round(im_w * scale))
30 | crop_h = int(np.floor(resized_h - input_h) / 2)
31 | crop_w = int(np.floor(resized_w - input_w) / 2)
32 |
33 | resized_im = skimage.transform.resize(im, [resized_h, resized_w])
34 | if im.ndim > 2:
35 | new_im = np.zeros((input_h, input_w, im.shape[2]), dtype=resized_im.dtype)
36 | else:
37 | new_im = np.zeros((input_h, input_w), dtype=resized_im.dtype)
38 | new_im[...] = resized_im[crop_h:crop_h+input_h, crop_w:crop_w+input_w, ...]
39 |
40 | return new_im
41 |
42 | def crop_bboxes_subtract_mean(im, bboxes, crop_size, image_mean):
43 | if isinstance(bboxes, list):
44 | bboxes = np.array(bboxes)
45 | bboxes = bboxes.reshape((-1, 4))
46 |
47 | im = skimage.img_as_ubyte(im)
48 | num_bbox = bboxes.shape[0]
49 | imcrop_batch = np.zeros((num_bbox, crop_size, crop_size, 3), dtype=np.float32)
50 | for n_bbox in range(bboxes.shape[0]):
51 | xmin, ymin, xmax, ymax = bboxes[n_bbox]
52 | # crop and resize
53 | imcrop = im[ymin:ymax+1, xmin:xmax+1, :]
54 | imcrop_batch[n_bbox, ...] = skimage.img_as_ubyte(
55 | skimage.transform.resize(imcrop, [crop_size, crop_size]))
56 | imcrop_batch -= image_mean
57 | return imcrop_batch
58 |
59 | def bboxes_from_masks(masks):
60 | if masks.ndim == 2:
61 | masks = masks[np.newaxis, ...]
62 | num_mask = masks.shape[0]
63 | bboxes = np.zeros((num_mask, 4), dtype=np.int32)
64 | for n_mask in range(num_mask):
65 | idx = np.nonzero(masks[n_mask])
66 | xmin, xmax = np.min(idx[1]), np.max(idx[1])
67 | ymin, ymax = np.min(idx[0]), np.max(idx[0])
68 | bboxes[n_mask, :] = [xmin, ymin, xmax, ymax]
69 | return bboxes
70 |
71 | def crop_masks_subtract_mean(im, masks, crop_size, image_mean):
72 | if masks.ndim == 2:
73 | masks = masks[np.newaxis, ...]
74 | num_mask = masks.shape[0]
75 |
76 | im = skimage.img_as_ubyte(im)
77 | bboxes = bboxes_from_masks(masks)
78 | imcrop_batch = np.zeros((num_mask, crop_size, crop_size, 3), dtype=np.float32)
79 | for n_mask in range(num_mask):
80 | xmin, ymin, xmax, ymax = bboxes[n_mask]
81 |
82 | # crop and resize
83 | im_masked = im.copy()
84 | mask = masks[n_mask, ..., np.newaxis]
85 | im_masked *= mask
86 | im_masked += image_mean.astype(np.uint8) * (1 - mask)
87 | imcrop = im_masked[ymin:ymax+1, xmin:xmax+1, :]
88 | imcrop_batch[n_mask, ...] = skimage.img_as_ubyte(skimage.transform.resize(imcrop, [224, 224]))
89 |
90 | imcrop_batch -= image_mean
91 | return imcrop_batch
92 |
--------------------------------------------------------------------------------
/util/io.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import json
4 | import scipy.io as sio
5 |
6 | def load_str_list(filename):
7 | with open(filename, 'r') as f:
8 | str_list = f.readlines()
9 | str_list = [s[:-1] for s in str_list]
10 | return str_list
11 |
12 | def save_str_list(str_list, filename):
13 | str_list = [s+'\n' for s in str_list]
14 | with open(filename, 'w') as f:
15 | f.writelines(str_list)
16 |
17 | def load_json(filename):
18 | with open(filename, 'r') as f:
19 | return json.load(f)
20 |
21 | def save_json(json_obj, filename):
22 | with open(filename, 'w') as f:
23 | json.dump(json_obj, f, separators=(',\n', ':\n'))
24 |
25 | def load_referit_gt_mask(mask_path):
26 | mat = sio.loadmat(mask_path)
27 | mask = (mat['segimg_t'] == 0)
28 | return mask
29 |
30 | def load_proposal_mask(mask_path):
31 | mat = sio.loadmat(mask_path)
32 | mask = mat['mask']
33 | return mask.transpose((2, 0, 1))
34 |
--------------------------------------------------------------------------------
/util/loss.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import tensorflow as tf
4 | import numpy as np
5 |
6 | def weighed_logistic_loss(scores, labels, pos_loss_mult=1.0, neg_loss_mult=1.0):
7 | # Apply different weights to loss of positive samples and negative samples
8 | # positive samples have label 1 while negative samples have label 0
9 | loss_mult = tf.add(tf.multiply(labels, pos_loss_mult-neg_loss_mult), neg_loss_mult)
10 |
11 | # Classification loss as the average of weighed per-score loss
12 | cls_loss = tf.reduce_mean(tf.reduce_sum(tf.multiply(
13 | tf.nn.sigmoid_cross_entropy_with_logits(logits = scores, labels = labels),
14 | loss_mult), [1, 2, 3]))
15 |
16 | return cls_loss
17 |
18 | def logistic_loss_cond(scores, labels):
19 | # Classification loss as the average of weighed per-score loss
20 | cond = tf.select(tf.equal(labels, tf.zeros(tf.shape(labels))),
21 | tf.zeros(tf.shape(labels)),
22 | tf.nn.sigmoid_cross_entropy_with_logits(logits = scores, labels = labels)
23 | )
24 | cls_loss = tf.reduce_mean(tf.reduce_sum(cond, [1, 2, 3]))
25 |
26 | return cls_loss
27 |
28 | def l2_regularization_loss(variables, weight_decay):
29 | l2_losses = [tf.nn.l2_loss(var) for var in variables]
30 | total_l2_loss = weight_decay * tf.add_n(l2_losses)
31 |
32 | return total_l2_loss
33 |
34 | def dsc_loss(scores, labels):
35 | scores = tf.sigmoid(scores)
36 | inter = tf.scalar_mul(2., tf.reduce_sum(tf.multiply(scores, labels), [1, 2, 3]))
37 | union = tf.add(tf.reduce_sum(scores, [1, 2, 3]), tf.reduce_sum(labels, [1, 2, 3]))
38 | dsc_loss = tf.reduce_mean(tf.sub(1., tf.div(inter, union)))
39 |
40 | return dsc_loss
41 |
42 | def iou_loss(scores, labels):
43 | scores = tf.sigmoid(scores)
44 | inter = tf.reduce_sum(tf.multiply(scores, labels), [1, 2, 3])
45 | union = tf.add(tf.reduce_sum(scores, [1, 2, 3]), tf.reduce_sum(labels, [1, 2, 3]))
46 | union = tf.sub(union, inter)
47 | iou_loss = tf.reduce_mean(tf.sub(1., tf.div(inter, union)))
48 |
49 | return iou_loss
50 |
51 | def smooth_l1_loss(scores, labels, ld=1.0):
52 | box_diff = scores - labels
53 | abs_box_diff = tf.abs(box_diff)
54 | smooth_l1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_box_diff, 1.)))
55 | loss_box_raw = tf.pow(box_diff, 2) * 0.5 * smooth_l1_sign \
56 | + (abs_box_diff - 0.5) * (1.0 - smooth_l1_sign)
57 | loss_box = ld * tf.reduce_mean(tf.reduce_sum(loss_box_raw, [1]))
58 |
59 | return loss_box
60 |
--------------------------------------------------------------------------------
/util/nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets,
18 | np.ndarray[np.float32_t, ndim=1] scores, np.float thresh):
19 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
20 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
21 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
22 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/util/processing_tools.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import numpy as np
4 |
5 | def generate_spatial_batch(N, featmap_H, featmap_W):
6 | spatial_batch_val = np.zeros((N, featmap_H, featmap_W, 8), dtype=np.float32)
7 | for h in range(featmap_H):
8 | for w in range(featmap_W):
9 | xmin = w / featmap_W * 2 - 1
10 | xmax = (w+1) / featmap_W * 2 - 1
11 | xctr = (xmin+xmax) / 2
12 | ymin = h / featmap_H * 2 - 1
13 | ymax = (h+1) / featmap_H * 2 - 1
14 | yctr = (ymin+ymax) / 2
15 | spatial_batch_val[:, h, w, :] = \
16 | [xmin, ymin, xmax, ymax, xctr, yctr, 1/featmap_W, 1/featmap_H]
17 | return spatial_batch_val
18 |
19 | def generate_bilinear_filter(stride):
20 | # Bilinear upsampling filter
21 | f = np.concatenate((np.arange(0, stride), np.arange(stride, 0, -1))) / stride
22 | return np.outer(f, f).astype(np.float32)[:, :, np.newaxis, np.newaxis]
23 |
24 | def compute_accuracy(scores, labels):
25 | is_pos = (labels != 0)
26 | is_neg = np.logical_not(is_pos)
27 | num_pos = np.sum(is_pos)
28 | num_neg = np.sum(is_neg)
29 | num_all = num_pos + num_neg
30 |
31 | is_correct = np.logical_xor(scores < 0, is_pos)
32 | accuracy_all = np.sum(is_correct) / num_all
33 | accuracy_pos = np.sum(is_correct[is_pos]) / (num_pos + 1)
34 | accuracy_neg = np.sum(is_correct[is_neg]) / num_neg
35 | return accuracy_all, accuracy_pos, accuracy_neg
36 |
37 | def spatial_feature_from_bbox(bboxes, imsize):
38 | if isinstance(bboxes, list):
39 | bboxes = np.array(bboxes)
40 | bboxes = bboxes.reshape((-1, 4))
41 | im_w, im_h = imsize
42 | assert(np.all(bboxes[:, 0] < im_w) and np.all(bboxes[:, 2] < im_w))
43 | assert(np.all(bboxes[:, 1] < im_h) and np.all(bboxes[:, 3] < im_h))
44 |
45 | feats = np.zeros((bboxes.shape[0], 8))
46 | feats[:, 0] = bboxes[:, 0] * 2.0 / im_w - 1 # x1
47 | feats[:, 1] = bboxes[:, 1] * 2.0 / im_h - 1 # y1
48 | feats[:, 2] = bboxes[:, 2] * 2.0 / im_w - 1 # x2
49 | feats[:, 3] = bboxes[:, 3] * 2.0 / im_h - 1 # y2
50 | feats[:, 4] = (feats[:, 0] + feats[:, 2]) / 2 # x0
51 | feats[:, 5] = (feats[:, 1] + feats[:, 3]) / 2 # y0
52 | feats[:, 6] = feats[:, 2] - feats[:, 0] # w
53 | feats[:, 7] = feats[:, 3] - feats[:, 1] # h
54 | return feats
55 |
--------------------------------------------------------------------------------
/util/text_processing.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import re
4 |
5 | def load_vocab_dict_from_file(dict_file):
6 | with open(dict_file) as f:
7 | words = [w.strip() for w in f.readlines()]
8 | vocab_dict = {words[n]:n for n in range(len(words))}
9 | return vocab_dict
10 |
11 | UNK_IDENTIFIER = '' # is the word used to identify unknown words
12 | SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)')
13 | def sentence2vocab_indices(sentence, vocab_dict):
14 | words = SENTENCE_SPLIT_REGEX.split(sentence.strip())
15 | words = [w.lower() for w in words if len(w.strip()) > 0]
16 | # remove .
17 | if words[-1] == '.':
18 | words = words[:-1]
19 | vocab_indices = [(vocab_dict[w] if w in vocab_dict else vocab_dict[UNK_IDENTIFIER])
20 | for w in words]
21 | return vocab_indices
22 |
23 | PAD_IDENTIFIER = ''
24 | EOS_IDENTIFIER = ''
25 | def preprocess_sentence(sentence, vocab_dict, T):
26 | vocab_indices = sentence2vocab_indices(sentence, vocab_dict)
27 | # # Append '' symbol to the end
28 | # vocab_indices.append(vocab_dict[EOS_IDENTIFIER])
29 | # Truncate long sentences
30 | if len(vocab_indices) > T:
31 | vocab_indices = vocab_indices[:T]
32 | # Pad short sentences at the beginning with the special symbol ''
33 | if len(vocab_indices) < T:
34 | vocab_indices = [vocab_dict[PAD_IDENTIFIER]] * (T - len(vocab_indices)) + vocab_indices
35 | return vocab_indices
--------------------------------------------------------------------------------
/util/vgg16_fcn.py:
--------------------------------------------------------------------------------
1 | '''
2 | vgg16 model with atrous & fully convolution layers
3 | '''
4 |
5 | import tensorflow as tf
6 |
7 | class Vgg16:
8 | def __init__(self, data):
9 | self.data = data
10 | '''
11 | build graph
12 | '''
13 | self.conv1_1 = self.conv_relu('conv1_1', self.data, 3, 64)
14 | self.conv1_2 = self.conv_relu('conv1_2', self.conv1_1, 64, 64)
15 | self.pool1 = self.max_pool('pool1', self.conv1_2)
16 |
17 | self.conv2_1 = self.conv_relu('conv2_1', self.pool1, 64, 128)
18 | self.conv2_2 = self.conv_relu('conv2_2', self.conv2_1, 128, 128)
19 | self.pool2 = self.max_pool('pool2', self.conv2_2)
20 |
21 | self.conv3_1 = self.conv_relu('conv3_1', self.pool2, 128, 256)
22 | self.conv3_2 = self.conv_relu('conv3_2', self.conv3_1, 256, 256)
23 | self.conv3_3 = self.conv_relu('conv3_3', self.conv3_2, 256, 256)
24 | self.pool3 = self.max_pool('pool3', self.conv3_3)
25 |
26 | self.conv4_1 = self.conv_relu('conv4_1', self.pool3, 256, 512)
27 | self.conv4_2 = self.conv_relu('conv4_2', self.conv4_1, 512, 512)
28 | self.conv4_3 = self.conv_relu('conv4_3', self.conv4_2, 512, 512)
29 |
30 | self.conv5_1 = self.conv_relu('conv5_1', self.conv4_3, 512, 512)
31 | self.conv5_2 = self.conv_relu('conv5_2', self.conv5_1, 512, 512)
32 | self.conv5_3 = self.conv_relu('conv5_3', self.conv5_2, 512, 512)
33 |
34 | self.fc6 = self.conv_relu('fc6', self.conv5_3, 512, 4096, kernel_size=7)
35 | self.fc7 = self.conv_relu('fc7', self.fc6, 4096, 4096, kernel_size=1)
36 | self.fc8 = self.conv_layer('fc8', self.fc7, 4096, 1000, kernel_size=1)
37 |
38 | def max_pool(self, name, bottom, kernel_size=2, stride=2):
39 | pool = tf.nn.max_pool(bottom, ksize=[1, kernel_size, kernel_size, 1],
40 | strides=[1, stride, stride, 1], padding='SAME', name=name)
41 | return pool
42 |
43 | def conv_layer(self, name, bottom, input_dim, output_dim, kernel_size=3, stride=1):
44 | with tf.variable_scope(name):
45 | w = tf.get_variable('weights', [kernel_size, kernel_size, input_dim, output_dim],
46 | initializer=tf.contrib.layers.xavier_initializer_conv2d())
47 | b = tf.get_variable('biases', output_dim, initializer=tf.constant_initializer(0.))
48 |
49 | conv = tf.nn.conv2d(bottom, w, [1, stride, stride, 1], padding='SAME')
50 | conv = tf.nn.bias_add(conv, b)
51 | return conv
52 |
53 | def conv_relu(self, name, bottom, input_dim, output_dim, kernel_size=3, stride=1):
54 | conv = self.conv_layer(name, bottom, input_dim, output_dim, kernel_size, stride)
55 | return tf.nn.relu(conv)
56 |
57 | def atrous_conv_relu(self, name, bottom, input_dim, output_dim, kernel_size=3, rate=1):
58 | with tf.variable_scope(name):
59 | w = tf.get_variable('weights', [kernel_size, kernel_size, input_dim, output_dim],
60 | initializer=tf.random_normal_initializer(stddev=0.01))
61 | b = tf.get_variable('biases', output_dim, initializer=tf.constant_initializer(0.))
62 |
63 | conv = tf.nn.atrous_conv2d(bottom, w, rate=rate, padding='SAME')
64 | conv = tf.nn.bias_add(conv, b)
65 | relu = tf.nn.relu(conv)
66 | return relu
67 |
--------------------------------------------------------------------------------