├── CMPC_model.py ├── CMPC_video ├── CMPC_video_mm_tgraph_allvec.py ├── build_A2D_batches.py ├── train_a2d_new.sh └── trainval_video.py ├── LICENSE ├── README.md ├── build_batches.py ├── data ├── referit_query_test.json ├── referit_query_trainval.json ├── vocabulary_Gref.txt └── vocabulary_referit.txt ├── external └── tensorflow-deeplab-resnet │ ├── LICENSE │ ├── README.md │ ├── convert.py │ ├── dataset │ ├── debug.txt │ ├── test.txt │ ├── train.txt │ ├── val.txt │ └── val_reduced.txt │ ├── deeplab_resnet │ ├── __init__.py │ ├── image_reader.py │ ├── model.py │ └── utils.py │ ├── evaluate.py │ ├── evaluate_msc.py │ ├── fine_tune.py │ ├── images │ ├── colour_scheme.png │ ├── mask.png │ └── summary.png │ ├── inference.py │ ├── kaffe │ ├── __init__.py │ ├── caffe │ │ ├── __init__.py │ │ ├── caffepb.py │ │ └── resolver.py │ ├── errors.py │ ├── graph.py │ ├── layers.py │ ├── shapes.py │ ├── tensorflow │ │ ├── __init__.py │ │ ├── network.py │ │ └── transformer.py │ └── transformers.py │ ├── misc │ ├── 2007_000129.jpg │ ├── 2007_000129.png │ └── deploy.prototxt │ ├── npy2ckpt.py │ ├── requirements.txt │ ├── train.py │ └── train_msc.py ├── get_model.py ├── motivation.png ├── trainval.sh ├── trainval_model.py └── util ├── __init__.py ├── cell.py ├── data_reader.py ├── data_reader_ignore.py ├── eval_tools.py ├── functions.py ├── h5_reader.py ├── im_processing.py ├── io.py ├── loss.py ├── nms.pyx ├── processing_tools.py ├── text_processing.py └── vgg16_fcn.py /CMPC_video/build_A2D_batches.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import skimage 4 | import skimage.io 5 | import csv 6 | import glob 7 | import h5py 8 | import re 9 | 10 | from tqdm import tqdm 11 | from util import im_processing, text_processing 12 | 13 | debug = False 14 | # root directory 15 | root_dir = os.getcwd() 16 | # data directory 17 | a2d_dir = '/mnt/lustre/share/huitianrui/DATASET/A2D-Sentences' 18 | 19 | 20 | def build_a2d_batches(T, input_H, input_W, video=False): 21 | """ 22 | Build data batches of A2D Sentence dataset 23 | 24 | Args: 25 | T: limit of number of words 26 | input_H: height of input frame of I3D backbone 27 | input_W: width of input frame of I3D backbone 28 | video: select consecutive frames or standalone frame 29 | """ 30 | 31 | query_file = os.path.join(a2d_dir, 'a2d_annotation.txt') 32 | frame_dir = os.path.join(a2d_dir, 'Release/frames') 33 | vocab_file = os.path.join(root_dir, 'data/vocabulary_Gref.txt') 34 | 35 | dataset_name = 'a2d_sent_new' 36 | out_dataset_dir = os.path.join(root_dir, dataset_name) 37 | if not os.path.exists(out_dataset_dir): 38 | os.mkdir(out_dataset_dir) 39 | test_batch = os.path.join(out_dataset_dir, 'test_batch') 40 | train_batch = os.path.join(out_dataset_dir, 'train_batch') 41 | if not os.path.exists(test_batch): 42 | os.mkdir(test_batch) 43 | if not os.path.exists(train_batch): 44 | os.mkdir(train_batch) 45 | 46 | vocab_dict = text_processing.load_vocab_dict_from_file(vocab_file) 47 | test_prefix_list = list() 48 | train_prefix_list = list() 49 | split_dict = gen_split_dict() 50 | SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)') 51 | 52 | with open(query_file, 'r') as f: 53 | reader = csv.reader(f) 54 | next(reader) 55 | total_count = 0 56 | test_count = 0 57 | train_count = 0 58 | all_zero_mask_count = 0 59 | for row in tqdm(reader): 60 | # each video belongs to test or train 61 | video_id = row[0] 62 | data_prefix = video_id 63 | if split_dict[data_prefix] == 1: 64 | save_dir = test_batch 65 | test_prefix_list.append(data_prefix) 66 | test = True 67 | else: 68 | save_dir = train_batch 69 | train_prefix_list.append(data_prefix) 70 | test = False 71 | # load sentence 72 | instance_id = int(row[1]) 73 | sent = row[2].lower() 74 | words = SENTENCE_SPLIT_REGEX.split(sent.strip()) 75 | words = [w for w in words if len(w.strip()) > 0] 76 | # remove punctuation and restrict sentence within 20 words 77 | if words[-1] == '.': 78 | words = words[:-1] 79 | if len(words) > T: 80 | words = words[:T] 81 | n_sent = "" 82 | for w in words: 83 | n_sent = n_sent + w + ' ' 84 | n_sent = n_sent.strip() 85 | n_sent = n_sent.encode('utf-8').decode("utf-8") 86 | text = text_processing.preprocess_sentence(n_sent, vocab_dict, T) 87 | 88 | image_paths = list() 89 | # for each video, get all the gt masks of a certain instance 90 | masks, frame_ids = get_masks(video_id, instance_id) 91 | 92 | for frame_id in frame_ids: 93 | image_path = os.path.join(frame_dir, video_id, '{:0>5d}.png'.format(frame_id)) 94 | image_paths.append(image_path) 95 | 96 | for frame_id, image_path, mask in zip(frame_ids, image_paths, masks): 97 | # abandon all zero mask batch 98 | if np.sum(mask) == 0: 99 | print("all zeros mask caught") 100 | all_zero_mask_count += 1 101 | continue 102 | if video: 103 | # obtain 16 consecutive frames centered at the gt frame 104 | frame_paths = frame_range(frame_id=frame_id, frame_dir=os.path.join(frame_dir, video_id)) 105 | else: 106 | # only use the gt frame 107 | frame_paths = list() 108 | frames = list() 109 | if test: 110 | count = test_count 111 | test_count = test_count + 1 112 | prefix = 'test_' 113 | image = skimage.io.imread(image_path) 114 | for frame_path in frame_paths: 115 | frames.append(skimage.io.imread(frame_path)) 116 | else: 117 | prefix = 'train_' 118 | count = train_count 119 | train_count = train_count + 1 120 | image = skimage.io.imread(image_path) 121 | image = skimage.img_as_ubyte(im_processing.resize_and_pad(image, input_H, input_W)) 122 | mask = im_processing.resize_and_pad(mask, input_H, input_W) 123 | for frame_path in frame_paths: 124 | frame = skimage.io.imread(frame_path) 125 | frame = skimage.img_as_ubyte(im_processing.resize_and_pad(frame, input_H, input_W)) 126 | frames.append(frame) 127 | 128 | if debug: 129 | m0 = mask[:, :, np.newaxis] 130 | m0 = (m0 > 0).astype(np.uint8) 131 | m0 = np.concatenate([m0, m0, m0], axis=2) 132 | debug_image = image * m0 133 | skimage.io.imsave('./debug/{}_{}_{}.png'.format(data_prefix, frame_id, 134 | sent.replace(' ', '_')), debug_image) 135 | 136 | # save batches 137 | np.savez(file=os.path.join(save_dir, dataset_name + '_' + prefix + str(count)), 138 | text_batch=text, 139 | mask_batch=(mask > 0), 140 | sent_batch=[sent], 141 | im_batch=image, 142 | frame_id=frame_id, 143 | frames=frames) 144 | total_count = total_count + 1 145 | 146 | print() 147 | print("num of all zeros masks is: {}".format(all_zero_mask_count)) 148 | 149 | 150 | def frame_range(frame_id, frame_dir): 151 | frame_paths = os.listdir(frame_dir) 152 | frame_paths.sort() 153 | biggest = frame_paths[-1] 154 | frame_num = int(biggest[:-4]) 155 | start = frame_id - 8 156 | end = frame_id + 8 157 | result = list() 158 | for i in range(start, end): 159 | if i < 1: 160 | frame_id = 1 161 | elif i > frame_num: 162 | frame_id = frame_num 163 | else: 164 | frame_id = i 165 | result.append(os.path.join(frame_dir, '{:0>5d}.png'.format(frame_id))) 166 | assert len(result) == 16 167 | return result 168 | 169 | 170 | def gen_split_dict(): 171 | split_file = os.path.join(a2d_dir, 'Release/videoset.csv') 172 | result = dict() 173 | result.setdefault(0) 174 | with open(split_file, 'r') as f: 175 | reader = csv.reader(f) 176 | for line in reader: 177 | video_id = line[0] 178 | split_code = line[-1] 179 | result[video_id] = int(split_code) 180 | return result 181 | 182 | 183 | def get_masks(video_id, instance_id): 184 | anno_dir = os.path.join(a2d_dir, 'a2d_annotation_with_instances') 185 | masks_path = os.path.join(anno_dir, video_id, '*') 186 | mask_files = glob.glob(masks_path) 187 | mask_files.sort() 188 | masks = list() 189 | frame_ids = list() 190 | 191 | for mask_file in mask_files: 192 | f = h5py.File(mask_file, 'r') 193 | instance_ids = f['instance'][:] 194 | if instance_ids.shape[0] == 1: 195 | mask = f['reMask'][:].T 196 | else: 197 | index = np.argwhere(instance_ids == instance_id) 198 | index = np.squeeze(index) 199 | mask = f['reMask'][index].T 200 | mask = np.squeeze(mask) 201 | if index.size != 1: 202 | mask = np.sum(mask, axis=2) 203 | 204 | masks.append(mask) 205 | base_name = os.path.basename(mask_file) 206 | frame_id = int(base_name[:-3]) 207 | frame_ids.append(frame_id) 208 | f.close() 209 | return masks, frame_ids 210 | 211 | 212 | if __name__ == "__main__": 213 | T = 20 214 | input_H = 320 215 | input_W = 320 216 | build_a2d_batches(T=T, input_H=input_H, input_W=input_W, video=True) 217 | -------------------------------------------------------------------------------- /CMPC_video/train_a2d_new.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | LOG=logs/a2d_sent_new/deeplab_cmpc_video_mm_tgraph_allvec 4 | mkdir -p ${LOG} 5 | now=$(date +"%Y%m%d_%H%M%S") 6 | 7 | python -u trainval_video.py \ 8 | -m train \ 9 | -d a2d_sent_new \ 10 | -t train \ 11 | -n CMPC_video_mm_tgraph_allvec \ 12 | -i 400000 \ 13 | -s 20000 \ 14 | -st 380000 \ 15 | -lrd 400000 \ 16 | -emb \ 17 | -g 2 \ 18 | -f ckpts/a2d_sent_new/deeplab_cmpc_video_mm_tgraph_allvec 2>&1 | tee ${LOG}/train_$now.txt 19 | 20 | python -u trainval_video.py \ 21 | -m test \ 22 | -d a2d_sent_new \ 23 | -t test \ 24 | -n CMPC_video_mm_tgraph_allvec \ 25 | -i 360000 \ 26 | -c \ 27 | -emb \ 28 | -g 2 \ 29 | -f ckpts/a2d_sent_new/deeplab_cmpc_video_mm_tgraph_allvec 2>&1 | tee ${LOG}/test_$now.txt 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 spyflying 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CMPC-Refseg 2 | Code of our CVPR 2020 paper [*Referring Image Segmentation via Cross-Modal Progressive Comprehension*](https://openaccess.thecvf.com/content_CVPR_2020/papers/Huang_Referring_Image_Segmentation_via_Cross-Modal_Progressive_Comprehension_CVPR_2020_paper.pdf). 3 | 4 | Shaofei Huang*, Tianrui Hui*, Si Liu, Guanbin Li, Yunchao Wei, Jizhong Han, Luoqi Liu, Bo Li (* Equal contribution) 5 | 6 | ## Interpretation of CMPC. 7 | 8 | * (a) Input referring expression and image. 9 | 10 | * (b) The model first perceives all the entities described in the expression based on entity words and attribute words, e.g., “man” and “white frisbee” (orange masks and blue outline). 11 | 12 | * (c) After finding out all the candidate entities that may match with input expression, relational word “holding” can be further exploited to highlight the entity involved with the relationship (green arrow) and suppress the others which are not involved. 13 | 14 | * (d) Benefiting from the relation-aware reasoning process, the referred entity is found as the final prediction (purple mask). 15 | ![interpretation](motivation.png) 16 | 17 | ## Experimental Results 18 | 19 | We modify the way of feature concatenation in the end of CMPC module and achieve higher performances than the results reported in our paper. 20 | New experimental results are summarized in the table bellow. 21 | You can download our trained checkpoints to test on the four datasets. The link to the checkpoints is: 22 | [Baidu Drive](https://pan.baidu.com/s/1Vm7JqqCJ6Gl3Rp4P2M-obA), pswd: jjsf. 23 | 24 | | Method | UNC val | UNC testA | UNC testB | UNC+ val | UNC+ testA | UNC+ testB | G-Ref val | ReferIt test | 25 | | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: | 26 | | STEP-ICCV19 \[1\] | 60.04 | 63.46 | 57.97 | 48.19 | 52.33 | 40.41| 46.40 | 64.13 | 27 | | Ours-CVPR20 | 61.36 | 64.53 | 59.64 | 49.56 | 53.44 | 43.23 | 49.05 | 65.53 | 28 | |Ours-Updated | **62.47** | **65.08** | **60.82** | **50.25** | **54.04** | **43.47** | **49.89** | **65.58** | 29 | 30 | ## Setup 31 | 32 | We recommended the following dependencies. 33 | 34 | * Python 2.7 35 | * TensorFlow 1.5 36 | * Numpy 37 | * pydensecrf 38 | 39 | This code is derived from [RRN](https://github.com/liruiyu/referseg_rrn) \[2\]. Please refer to it for more details of setup. 40 | 41 | ## Data Preparation 42 | * Dataset Preprocessing 43 | 44 | We conduct experiments on 4 datasets of referring image segmentation, including `UNC`, `UNC+`, `Gref` and `ReferIt`. After downloading these datasets, you can run the following commands for data preparation: 45 | ``` 46 | python build_batches.py -d Gref -t train 47 | python build_batches.py -d Gref -t val 48 | python build_batches.py -d unc -t train 49 | python build_batches.py -d unc -t val 50 | python build_batches.py -d unc -t testA 51 | python build_batches.py -d unc -t testB 52 | python build_batches.py -d unc+ -t train 53 | python build_batches.py -d unc+ -t val 54 | python build_batches.py -d unc+ -t testA 55 | python build_batches.py -d unc+ -t testB 56 | python build_batches.py -d referit -t trainval 57 | python build_batches.py -d referit -t test 58 | ``` 59 | 60 | * Glove Embedding 61 | 62 | Download `Gref_emb.npy` and `referit_emb.npy` and put them in `data/`. We provide download link for Glove Embedding here: 63 | [Baidu Drive](https://pan.baidu.com/s/19f8CxT3lc_UyjCIIE_74FA), password: 2m28. 64 | 65 | 66 | ## Training 67 | Train on UNC training set with: 68 | ``` 69 | python -u trainval_model.py -m train -d unc -t train -n CMPC_model -emb -f ckpts/unc/cmpc_model 70 | ``` 71 | 72 | ## Testing 73 | Test on UNC validation set with: 74 | ``` 75 | python -u trainval_model.py -m test -d unc -t val -n CMPC_model -i 700000 -c -emb -f ckpts/unc/cmpc_model 76 | ``` 77 | 78 | ## CMPC for video referring segmentation 79 | We release video version code for CMPC on A2D dataset under `CMPC_video/`. 80 | 81 | ## Reference 82 | \[1\] Chen, Ding-Jie, et al. "See-through-text grouping for referring image segmentation." Proceedings of the IEEE International Conference on Computer Vision. 2019. 83 | 84 | \[2\] Li, Ruiyu, et al. "Referring image segmentation via recurrent refinement networks." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018. 85 | 86 | ## Citation 87 | If our CMPC is useful to your research, please consider citing: 88 | ``` 89 | @inproceedings{huang2020referring, 90 | title={Referring Image Segmentation via Cross-Modal Progressive Comprehension}, 91 | author={Huang, Shaofei and Hui, Tianrui and Liu, Si and Li, Guanbin and Wei, Yunchao and Han, Jizhong and Liu, Luoqi and Li, Bo}, 92 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 93 | pages={10488--10497}, 94 | year={2020} 95 | } 96 | ``` 97 | -------------------------------------------------------------------------------- /build_batches.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./external/coco/PythonAPI') 3 | import os 4 | import argparse 5 | import numpy as np 6 | import json 7 | import skimage 8 | import skimage.io 9 | 10 | from util import im_processing, text_processing 11 | from util.io import load_referit_gt_mask as load_gt_mask 12 | from refer import REFER 13 | from pycocotools import mask as cocomask 14 | 15 | 16 | def build_referit_batches(setname, T, input_H, input_W): 17 | # data directory 18 | im_dir = './data/referit/images/' 19 | mask_dir = './data/referit/mask/' 20 | query_file = './data/referit_query_' + setname + '.json' 21 | vocab_file = './data/vocabulary_referit.txt' 22 | 23 | # saving directory 24 | data_folder = './referit/' + setname + '_batch/' 25 | data_prefix = 'referit_' + setname 26 | if not os.path.isdir(data_folder): 27 | os.makedirs(data_folder) 28 | 29 | # load annotations 30 | query_dict = json.load(open(query_file)) 31 | im_list = query_dict.keys() 32 | vocab_dict = text_processing.load_vocab_dict_from_file(vocab_file) 33 | 34 | # collect training samples 35 | samples = [] 36 | for n_im, name in enumerate(im_list): 37 | im_name = name.split('_', 1)[0] + '.jpg' 38 | mask_name = name + '.mat' 39 | for sent in query_dict[name]: 40 | samples.append((im_name, mask_name, sent)) 41 | 42 | # save batches to disk 43 | num_batch = len(samples) 44 | for n_batch in range(num_batch): 45 | print('saving batch %d / %d' % (n_batch + 1, num_batch)) 46 | im_name, mask_name, sent = samples[n_batch] 47 | im = skimage.io.imread(im_dir + im_name) 48 | mask = load_gt_mask(mask_dir + mask_name).astype(np.float32) 49 | 50 | if 'train' in setname: 51 | im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, input_H, input_W)) 52 | mask = im_processing.resize_and_pad(mask, input_H, input_W) 53 | if im.ndim == 2: 54 | im = np.tile(im[:, :, np.newaxis], (1, 1, 3)) 55 | 56 | text = text_processing.preprocess_sentence(sent, vocab_dict, T) 57 | 58 | np.savez(file = data_folder + data_prefix + '_' + str(n_batch) + '.npz', 59 | text_batch = text, 60 | im_batch = im, 61 | mask_batch = (mask > 0), 62 | sent_batch = [sent]) 63 | 64 | 65 | def build_coco_batches(dataset, setname, T, input_H, input_W): 66 | im_dir = './data/coco/images' 67 | im_type = 'train2014' 68 | vocab_file = './data/vocabulary_Gref.txt' 69 | 70 | data_folder = './' + dataset + '/' + setname + '_batch/' 71 | data_prefix = dataset + '_' + setname 72 | if not os.path.isdir(data_folder): 73 | os.makedirs(data_folder) 74 | 75 | if dataset == 'Gref': 76 | refer = REFER('./external/refer/data', dataset = 'refcocog', splitBy = 'google') 77 | elif dataset == 'unc': 78 | refer = REFER('./external/refer/data', dataset = 'refcoco', splitBy = 'unc') 79 | elif dataset == 'unc+': 80 | refer = REFER('./external/refer/data', dataset = 'refcoco+', splitBy = 'unc') 81 | else: 82 | raise ValueError('Unknown dataset %s' % dataset) 83 | refs = [refer.Refs[ref_id] for ref_id in refer.Refs if refer.Refs[ref_id]['split'] == setname] 84 | vocab_dict = text_processing.load_vocab_dict_from_file(vocab_file) 85 | 86 | n_batch = 0 87 | for ref in refs: 88 | im_name = 'COCO_' + im_type + '_' + str(ref['image_id']).zfill(12) 89 | im = skimage.io.imread('%s/%s/%s.jpg' % (im_dir, im_type, im_name)) 90 | seg = refer.Anns[ref['ann_id']]['segmentation'] 91 | rle = cocomask.frPyObjects(seg, im.shape[0], im.shape[1]) 92 | mask = np.max(cocomask.decode(rle), axis = 2).astype(np.float32) 93 | 94 | if 'train' in setname: 95 | im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, input_H, input_W)) 96 | mask = im_processing.resize_and_pad(mask, input_H, input_W) 97 | if im.ndim == 2: 98 | im = np.tile(im[:, :, np.newaxis], (1, 1, 3)) 99 | 100 | for sentence in ref['sentences']: 101 | print('saving batch %d' % (n_batch + 1)) 102 | sent = sentence['sent'] 103 | text = text_processing.preprocess_sentence(sent, vocab_dict, T) 104 | 105 | np.savez(file = data_folder + data_prefix + '_' + str(n_batch) + '.npz', 106 | text_batch = text, 107 | im_batch = im, 108 | mask_batch = (mask > 0), 109 | sent_batch = [sent]) 110 | n_batch += 1 111 | 112 | 113 | if __name__ == "__main__": 114 | parser = argparse.ArgumentParser() 115 | parser.add_argument('-d', type = str, default = 'referit') # 'unc', 'unc+', 'Gref' 116 | parser.add_argument('-t', type = str, default = 'trainval') # 'test', val', 'testA', 'testB' 117 | 118 | args = parser.parse_args() 119 | T = 20 120 | input_H = 320 121 | input_W = 320 122 | if args.d == 'referit': 123 | build_referit_batches(setname = args.t, 124 | T = T, input_H = input_H, input_W = input_W) 125 | else: 126 | build_coco_batches(dataset = args.d, setname = args.t, 127 | T = T, input_H = input_H, input_W = input_W) -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Vladimir Nekrasov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/README.md: -------------------------------------------------------------------------------- 1 | # DeepLab-ResNet-TensorFlow 2 | 3 | [![Build Status](https://travis-ci.org/DrSleep/tensorflow-deeplab-resnet.svg?branch=master)](https://travis-ci.org/DrSleep/tensorflow-deeplab-resnet) 4 | 5 | This is an (re-)implementation of [DeepLab-ResNet](http://liangchiehchen.com/projects/DeepLabv2_resnet.html) in TensorFlow for semantic image segmentation on the [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/). 6 | 7 | ## Updates 8 | 9 | **29 Jan, 2017**: 10 | * Fixed the implementation of the batch normalisation layer: it now supports both the training and inference steps. If the flag `--is-training` is provided, the running means and variances will be updated; otherwise, they will be kept intact. The `.ckpt` files have been updated accordingly - to download please refer to the new link provided below. 11 | * Image summaries during the training process can now be seen using TensorBoard. 12 | * Fixed the evaluation procedure: the 'void' label (255) is now correctly ignored. As a result, the performance score on the validation set has increased to 80.1%. 13 | 14 | **11 Feb, 2017**: 15 | * The training script `train.py` has been re-written following the original optimisation setup: SGD with momentum, weight decay, learning rate with polynomial decay, different learning rates for different layers, ignoring the 'void' label (255). 16 | * The training script with multi-scale inputs `train_msc.py` has been added: the input is resized to 0.5 and 0.75 of the original resolution, and 4 losses are aggregated: loss on the original resolution, on the 0.75 resolution, on the 0.5 resolution, and loss on the all fused outputs. 17 | * Evaluation of a single-scale converted pre-trained model on the PASCAL VOC validation dataset (using ['SegmentationClassAug'](https://www.dropbox.com/s/oeu149j8qtbs1x0/SegmentationClassAug.zip?dl=0)) leads to 86.9% mIoU. This is confirmed by [the official PASCAL VOC server](http://host.robots.ox.ac.uk/anonymous/FIQPRH.html). The score on the test dataset is [75.8%](http://host.robots.ox.ac.uk/anonymous/EPBIGU.html). 18 | 19 | **22 Feb, 2017**: 20 | * The training script with multi-scale inputs `train_msc.py` now supports gradients accumulation: the relevant parameter `--grad-update-every` effectively mimics the behaviour of `iter_size` of Caffe. This allows to use batches of bigger sizes with less GPU memory being consumed. (Thanks to @arslan-chaudhry for this contribution!) 21 | * The random mirror and random crop options have been added. (Again big thanks to @arslan-chaudhry !) 22 | 23 | **23 Apr, 2017**: 24 | * TensorFlow 1.1.0 is now supported. 25 | * Three new flags `--num-classes`, `--ignore-label` and `--not-restore-last` are added to ease the usability of the scripts on new datasets. Check out [these instructions](https://github.com/DrSleep/tensorflow-deeplab-resnet#using-your-dataset) on how to set up the training process on your dataset. 26 | 27 | ## Model Description 28 | 29 | The DeepLab-ResNet is built on a fully convolutional variant of [ResNet-101](https://github.com/KaimingHe/deep-residual-networks) with [atrous (dilated) convolutions](https://github.com/fyu/dilation), atrous spatial pyramid pooling, and multi-scale inputs (not implemented here). 30 | 31 | The model is trained on a mini-batch of images and corresponding ground truth masks with the softmax classifier at the top. During training, the masks are downsampled to match the size of the output from the network; during inference, to acquire the output of the same size as the input, bilinear upsampling is applied. The final segmentation mask is computed using argmax over the logits. 32 | Optionally, a fully-connected probabilistic graphical model, namely, CRF, can be applied to refine the final predictions. 33 | On the test set of PASCAL VOC, the model achieves 79.7% of mean intersection-over-union. 34 | 35 | For more details on the underlying model please refer to the following paper: 36 | 37 | 38 | @article{CP2016Deeplab, 39 | title={DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs}, 40 | author={Liang-Chieh Chen and George Papandreou and Iasonas Kokkinos and Kevin Murphy and Alan L Yuille}, 41 | journal={arXiv:1606.00915}, 42 | year={2016} 43 | } 44 | 45 | 46 | 47 | ## Requirements 48 | 49 | TensorFlow needs to be installed before running the scripts. 50 | TensorFlow v1.1.0 is supported; for TensorFlow v0.12 please refer to this [branch](https://github.com/DrSleep/tensorflow-deeplab-resnet/tree/tf-0.12); for TensorFlow v0.11 please refer to this [branch](https://github.com/DrSleep/tensorflow-deeplab-resnet/tree/tf-0.11). Note that those branches may not have the same functional as the current master. 51 | 52 | To install the required python packages (except TensorFlow), run 53 | ```bash 54 | pip install -r requirements.txt 55 | ``` 56 | or for a local installation 57 | ```bash 58 | pip install -user -r requirements.txt 59 | ``` 60 | 61 | ## Caffe to TensorFlow conversion 62 | 63 | To imitate the structure of the model, we have used `.caffemodel` files provided by the [authors](http://liangchiehchen.com/projects/DeepLabv2_resnet.html). The conversion has been performed using [Caffe to TensorFlow](https://github.com/ethereon/caffe-tensorflow) with an additional configuration for atrous convolution and batch normalisation (since the batch normalisation provided by Caffe-tensorflow only supports inference). 64 | There is no need to perform the conversion yourself as you can download the already converted models - `deeplab_resnet.ckpt` (pre-trained) and `deeplab_resnet_init.ckpt` (the last layers are randomly initialised) - [here](https://drive.google.com/open?id=0B_rootXHuswsZ0E4Mjh1ZU5xZVU). 65 | 66 | Nevertheless, it is easy to perform the conversion manually, given that the appropriate `.caffemodel` file has been downloaded, and [Caffe to TensorFlow](https://github.com/ethereon/caffe-tensorflow) dependencies have been installed. The Caffe model definition is provided in `misc/deploy.prototxt`. 67 | To extract weights from `.caffemodel`, run the following: 68 | ```bash 69 | python convert.py /path/to/deploy/prototxt --caffemodel /path/to/caffemodel --data-output-path /where/to/save/numpy/weights 70 | ``` 71 | As a result of running the command above, the model weights will be stored in `/where/to/save/numpy/weights`. To convert them to the native TensorFlow format (`.ckpt`), simply execute: 72 | ```bash 73 | python npy2ckpt.py /where/to/save/numpy/weights --save-dir=/where/to/save/ckpt/weights 74 | ``` 75 | 76 | ## Dataset and Training 77 | 78 | To train the network, one can use the augmented PASCAL VOC 2012 dataset with 10582 images for training and 1449 images for validation. 79 | 80 | The training script allows to monitor the progress in the optimisation process using TensorBoard's image summary. Besides that, one can also exploit random scaling and mirroring of the inputs during training as a means for data augmentation. For example, to train the model from scratch with random scale and mirroring turned on, simply run: 81 | ```bash 82 | python train.py --random-mirror --random-scale 83 | ``` 84 | 85 | 86 | 87 | To see the documentation on each of the training settings run the following: 88 | 89 | ```bash 90 | python train.py --help 91 | ``` 92 | 93 | An additional script, `fine_tune.py`, demonstrates how to train only the last layers of the network. The script `train_msc.py` with multi-scale inputs fully resembles the training setup of the original model. 94 | 95 | 96 | ## Evaluation 97 | 98 | The single-scale model shows 86.9% mIoU on the Pascal VOC 2012 validation dataset (['SegmentationClassAug'](https://www.dropbox.com/s/oeu149j8qtbs1x0/SegmentationClassAug.zip?dl=0)). No post-processing step with CRF is applied. 99 | 100 | The following command provides the description of each of the evaluation settings: 101 | ```bash 102 | python evaluate.py --help 103 | ``` 104 | 105 | ## Inference 106 | 107 | To perform inference over your own images, use the following command: 108 | ```bash 109 | python inference.py /path/to/your/image /path/to/ckpt/file 110 | ``` 111 | This will run the forward pass and save the resulted mask with this colour map: 112 | 113 | 114 | 115 | ## Using your dataset 116 | 117 | In order to apply the same scripts using your own dataset, you would need to follow the next steps: 118 | 119 | 0. Make sure that your segmentation masks are in the same format as the ones in the DeepLab setup (i.e., without a colour map). This means that if your segmentation masks are RGB images, you would need to convert each 3-D RGB vector into a 1-D label. For example, take a look [here](https://gist.github.com/DrSleep/4bce37254c5900545e6b65f6a0858b9c); 120 | 1. Create a file with instances of your dataset in the same format as in files [here](https://github.com/DrSleep/tensorflow-deeplab-resnet/tree/master/dataset); 121 | 2. Change the flags `data-dir` and `data-list` accordingly in thehttps://gist.github.com/DrSleep/4bce37254c5900545e6b65f6a0858b9c); script file that you will be using (e.g., `python train.py --data-dir /my/data/dir --data-list /my/data/list`); 122 | 3. Change the `IMG_MEAN` vector accordingly in the script file that you will be using; 123 | 4. For visualisation purposes, you will also need to change the colour map [here](https://github.com/DrSleep/tensorflow-deeplab-resnet/blob/master/deeplab_resnet/utils.py); 124 | 5. Change the flags `num-classes` and `ignore-label` accordingly in the script that you will be using (e.g., `python train.py --ignore-label 255 --num-classes 21`). 125 | 6. If restoring weights from the `PASCAL` models for your dataset with a different number of classes, you will also need to pass the `--not-restore-last` flag, which will prevent the last layers of size 21 from being restored. 126 | 127 | 128 | ## Missing features 129 | 130 | The post-processing step with CRF is currently being implemented [here](https://github.com/DrSleep/tensorflow-deeplab-resnet/tree/crf). 131 | 132 | 133 | ## Other implementations 134 | * [DeepLab-LargeFOV in TensorFlow](https://github.com/DrSleep/tensorflow-deeplab-lfov) 135 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/convert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This script belongs to https://github.com/ethereon/caffe-tensorflow 4 | import os 5 | import sys 6 | import numpy as np 7 | import argparse 8 | from kaffe import KaffeError, print_stderr 9 | from kaffe.tensorflow import TensorFlowTransformer 10 | 11 | 12 | def fatal_error(msg): 13 | print_stderr(msg) 14 | exit(-1) 15 | 16 | 17 | def validate_arguments(args): 18 | if (args.data_output_path is not None) and (args.caffemodel is None): 19 | fatal_error('No input data path provided.') 20 | if (args.caffemodel is not None) and (args.data_output_path is None): 21 | fatal_error('No output data path provided.') 22 | if (args.code_output_path is None) and (args.data_output_path is None): 23 | fatal_error('No output path specified.') 24 | 25 | 26 | def convert(def_path, caffemodel_path, data_output_path, code_output_path, phase): 27 | try: 28 | transformer = TensorFlowTransformer(def_path, caffemodel_path, phase=phase) 29 | print_stderr('Converting data...') 30 | if caffemodel_path is not None: 31 | data = transformer.transform_data() 32 | print_stderr('Saving data...') 33 | with open(data_output_path, 'wb') as data_out: 34 | np.save(data_out, data) 35 | if code_output_path: 36 | print_stderr('Saving source...') 37 | with open(code_output_path, 'wb') as src_out: 38 | src_out.write(transformer.transform_source()) 39 | print_stderr('Done.') 40 | except KaffeError as err: 41 | fatal_error('Error encountered: {}'.format(err)) 42 | 43 | 44 | def main(): 45 | parser = argparse.ArgumentParser() 46 | parser.add_argument('def_path', help='Model definition (.prototxt) path') 47 | parser.add_argument('--caffemodel', help='Model data (.caffemodel) path') 48 | parser.add_argument('--data-output-path', help='Converted data output path') 49 | parser.add_argument('--code-output-path', help='Save generated source to this path') 50 | parser.add_argument('-p', 51 | '--phase', 52 | default='test', 53 | help='The phase to convert: test (default) or train') 54 | args = parser.parse_args() 55 | validate_arguments(args) 56 | convert(args.def_path, args.caffemodel, args.data_output_path, args.code_output_path, 57 | args.phase) 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/dataset/debug.txt: -------------------------------------------------------------------------------- 1 | misc/2007_000129.jpg misc/2007_000129.png 2 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/deeplab_resnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import DeepLabResNetModel 2 | from .image_reader import ImageReader 3 | from .utils import decode_labels, inv_preprocess, prepare_label 4 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/deeplab_resnet/image_reader.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | def image_scaling(img, label): 7 | """ 8 | Randomly scales the images between 0.5 to 1.5 times the original size. 9 | 10 | Args: 11 | img: Training image to scale. 12 | label: Segmentation mask to scale. 13 | """ 14 | 15 | scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None) 16 | h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale)) 17 | w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale)) 18 | new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1]) 19 | img = tf.image.resize_images(img, new_shape) 20 | label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape) 21 | label = tf.squeeze(label, squeeze_dims=[0]) 22 | 23 | return img, label 24 | 25 | def image_mirroring(img, label): 26 | """ 27 | Randomly mirrors the images. 28 | 29 | Args: 30 | img: Training image to mirror. 31 | label: Segmentation mask to mirror. 32 | """ 33 | 34 | distort_left_right_random = tf.random_uniform([1], 0, 1.0, dtype=tf.float32)[0] 35 | mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5) 36 | mirror = tf.boolean_mask([0, 1, 2], mirror) 37 | img = tf.reverse(img, mirror) 38 | label = tf.reverse(label, mirror) 39 | return img, label 40 | 41 | def random_crop_and_pad_image_and_labels(image, label, crop_h, crop_w, ignore_label=255): 42 | """ 43 | Randomly crop and pads the input images. 44 | 45 | Args: 46 | image: Training image to crop/ pad. 47 | label: Segmentation mask to crop/ pad. 48 | crop_h: Height of cropped segment. 49 | crop_w: Width of cropped segment. 50 | ignore_label: Label to ignore during the training. 51 | """ 52 | 53 | label = tf.cast(label, dtype=tf.float32) 54 | label = label - ignore_label # Needs to be subtracted and later added due to 0 padding. 55 | combined = tf.concat(axis=2, values=[image, label]) 56 | image_shape = tf.shape(image) 57 | combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0, tf.maximum(crop_h, image_shape[0]), tf.maximum(crop_w, image_shape[1])) 58 | 59 | last_image_dim = tf.shape(image)[-1] 60 | last_label_dim = tf.shape(label)[-1] 61 | combined_crop = tf.random_crop(combined_pad, [crop_h,crop_w,4]) 62 | img_crop = combined_crop[:, :, :last_image_dim] 63 | label_crop = combined_crop[:, :, last_image_dim:] 64 | label_crop = label_crop + ignore_label 65 | label_crop = tf.cast(label_crop, dtype=tf.uint8) 66 | 67 | # Set static shape so that tensorflow knows shape at compile time. 68 | img_crop.set_shape((crop_h, crop_w, 3)) 69 | label_crop.set_shape((crop_h,crop_w, 1)) 70 | return img_crop, label_crop 71 | 72 | def read_labeled_image_list(data_dir, data_list): 73 | """Reads txt file containing paths to images and ground truth masks. 74 | 75 | Args: 76 | data_dir: path to the directory with images and masks. 77 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'. 78 | 79 | Returns: 80 | Two lists with all file names for images and masks, respectively. 81 | """ 82 | f = open(data_list, 'r') 83 | images = [] 84 | masks = [] 85 | for line in f: 86 | try: 87 | image, mask = line.strip("\n").split(' ') 88 | except ValueError: # Adhoc for test. 89 | image = mask = line.strip("\n") 90 | images.append(data_dir + image) 91 | masks.append(data_dir + mask) 92 | return images, masks 93 | 94 | def read_images_from_disk(input_queue, input_size, random_scale, random_mirror, ignore_label, img_mean): # optional pre-processing arguments 95 | """Read one image and its corresponding mask with optional pre-processing. 96 | 97 | Args: 98 | input_queue: tf queue with paths to the image and its mask. 99 | input_size: a tuple with (height, width) values. 100 | If not given, return images of original size. 101 | random_scale: whether to randomly scale the images prior 102 | to random crop. 103 | random_mirror: whether to randomly mirror the images prior 104 | to random crop. 105 | ignore_label: index of label to ignore during the training. 106 | img_mean: vector of mean colour values. 107 | 108 | Returns: 109 | Two tensors: the decoded image and its mask. 110 | """ 111 | 112 | img_contents = tf.read_file(input_queue[0]) 113 | label_contents = tf.read_file(input_queue[1]) 114 | 115 | img = tf.image.decode_jpeg(img_contents, channels=3) 116 | img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) 117 | img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) 118 | # Extract mean. 119 | img -= img_mean 120 | 121 | label = tf.image.decode_png(label_contents, channels=1) 122 | 123 | if input_size is not None: 124 | h, w = input_size 125 | 126 | # Randomly scale the images and labels. 127 | if random_scale: 128 | img, label = image_scaling(img, label) 129 | 130 | # Randomly mirror the images and labels. 131 | if random_mirror: 132 | img, label = image_mirroring(img, label) 133 | 134 | # Randomly crops the images and labels. 135 | img, label = random_crop_and_pad_image_and_labels(img, label, h, w, ignore_label) 136 | 137 | return img, label 138 | 139 | class ImageReader(object): 140 | '''Generic ImageReader which reads images and corresponding segmentation 141 | masks from the disk, and enqueues them into a TensorFlow queue. 142 | ''' 143 | 144 | def __init__(self, data_dir, data_list, input_size, 145 | random_scale, random_mirror, ignore_label, img_mean, coord): 146 | '''Initialise an ImageReader. 147 | 148 | Args: 149 | data_dir: path to the directory with images and masks. 150 | data_list: path to the file with lines of the form '/path/to/image /path/to/mask'. 151 | input_size: a tuple with (height, width) values, to which all the images will be resized. 152 | random_scale: whether to randomly scale the images prior to random crop. 153 | random_mirror: whether to randomly mirror the images prior to random crop. 154 | ignore_label: index of label to ignore during the training. 155 | img_mean: vector of mean colour values. 156 | coord: TensorFlow queue coordinator. 157 | ''' 158 | self.data_dir = data_dir 159 | self.data_list = data_list 160 | self.input_size = input_size 161 | self.coord = coord 162 | 163 | self.image_list, self.label_list = read_labeled_image_list(self.data_dir, self.data_list) 164 | self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string) 165 | self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string) 166 | self.queue = tf.train.slice_input_producer([self.images, self.labels], 167 | shuffle=input_size is not None) # not shuffling if it is val 168 | self.image, self.label = read_images_from_disk(self.queue, self.input_size, random_scale, random_mirror, ignore_label, img_mean) 169 | 170 | def dequeue(self, num_elements): 171 | '''Pack images and labels into a batch. 172 | 173 | Args: 174 | num_elements: the batch size. 175 | 176 | Returns: 177 | Two tensors of size (batch_size, h, w, {3, 1}) for images and masks.''' 178 | image_batch, label_batch = tf.train.batch([self.image, self.label], 179 | num_elements) 180 | return image_batch, label_batch 181 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/deeplab_resnet/utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | # colour map 6 | label_colours = [(0,0,0) 7 | # 0=background 8 | ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 9 | # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 10 | ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 11 | # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 12 | ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 13 | # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 14 | ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)] 15 | # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor 16 | 17 | def decode_labels(mask, num_images=1, num_classes=21): 18 | """Decode batch of segmentation masks. 19 | 20 | Args: 21 | mask: result of inference after taking argmax. 22 | num_images: number of images to decode from the batch. 23 | num_classes: number of classes to predict (including background). 24 | 25 | Returns: 26 | A batch with num_images RGB images of the same size as the input. 27 | """ 28 | n, h, w, c = mask.shape 29 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images) 30 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8) 31 | for i in range(num_images): 32 | img = Image.new('RGB', (len(mask[i, 0]), len(mask[i]))) 33 | pixels = img.load() 34 | for j_, j in enumerate(mask[i, :, :, 0]): 35 | for k_, k in enumerate(j): 36 | if k < num_classes: 37 | pixels[k_,j_] = label_colours[k] 38 | outputs[i] = np.array(img) 39 | return outputs 40 | 41 | def prepare_label(input_batch, new_size, num_classes, one_hot=True): 42 | """Resize masks and perform one-hot encoding. 43 | 44 | Args: 45 | input_batch: input tensor of shape [batch_size H W 1]. 46 | new_size: a tensor with new height and width. 47 | num_classes: number of classes to predict (including background). 48 | one_hot: whether perform one-hot encoding. 49 | 50 | Returns: 51 | Outputs a tensor of shape [batch_size h w 21] 52 | with last dimension comprised of 0's and 1's only. 53 | """ 54 | with tf.name_scope('label_encode'): 55 | input_batch = tf.image.resize_nearest_neighbor(input_batch, new_size) # as labels are integer numbers, need to use NN interp. 56 | input_batch = tf.squeeze(input_batch, squeeze_dims=[3]) # reducing the channel dimension. 57 | if one_hot: 58 | input_batch = tf.one_hot(input_batch, depth=num_classes) 59 | return input_batch 60 | 61 | def inv_preprocess(imgs, num_images, img_mean): 62 | """Inverse preprocessing of the batch of images. 63 | Add the mean vector and convert from BGR to RGB. 64 | 65 | Args: 66 | imgs: batch of input images. 67 | num_images: number of images to apply the inverse transformations on. 68 | img_mean: vector of mean colour values. 69 | 70 | Returns: 71 | The batch of the size num_images with the same spatial dimensions as the input. 72 | """ 73 | n, h, w, c = imgs.shape 74 | assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images) 75 | outputs = np.zeros((num_images, h, w, c), dtype=np.uint8) 76 | for i in range(num_images): 77 | outputs[i] = (imgs[i] + img_mean)[:, :, ::-1].astype(np.uint8) 78 | return outputs 79 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluation script for the DeepLab-ResNet network on the validation subset 2 | of PASCAL VOC dataset. 3 | 4 | This script evaluates the model on 1449 validation images. 5 | """ 6 | 7 | from __future__ import print_function 8 | 9 | import argparse 10 | from datetime import datetime 11 | import os 12 | import sys 13 | import time 14 | 15 | import tensorflow as tf 16 | import numpy as np 17 | 18 | from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label 19 | 20 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 21 | 22 | DATA_DIRECTORY = '/home/VOCdevkit' 23 | DATA_LIST_PATH = './dataset/val.txt' 24 | IGNORE_LABEL = 255 25 | NUM_CLASSES = 21 26 | NUM_STEPS = 1449 # Number of images in the validation set. 27 | RESTORE_FROM = './deeplab_resnet.ckpt' 28 | 29 | def get_arguments(): 30 | """Parse all the arguments provided from the CLI. 31 | 32 | Returns: 33 | A list of parsed arguments. 34 | """ 35 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network") 36 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY, 37 | help="Path to the directory containing the PASCAL VOC dataset.") 38 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH, 39 | help="Path to the file listing the images in the dataset.") 40 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL, 41 | help="The index of the label to ignore during the training.") 42 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES, 43 | help="Number of classes to predict (including background).") 44 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS, 45 | help="Number of images in the validation set.") 46 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM, 47 | help="Where restore model parameters from.") 48 | return parser.parse_args() 49 | 50 | def load(saver, sess, ckpt_path): 51 | '''Load trained weights. 52 | 53 | Args: 54 | saver: TensorFlow saver object. 55 | sess: TensorFlow session. 56 | ckpt_path: path to checkpoint file with parameters. 57 | ''' 58 | saver.restore(sess, ckpt_path) 59 | print("Restored model parameters from {}".format(ckpt_path)) 60 | 61 | def main(): 62 | """Create the model and start the evaluation process.""" 63 | args = get_arguments() 64 | 65 | # Create queue coordinator. 66 | coord = tf.train.Coordinator() 67 | 68 | # Load reader. 69 | with tf.name_scope("create_inputs"): 70 | reader = ImageReader( 71 | args.data_dir, 72 | args.data_list, 73 | None, # No defined input size. 74 | False, # No random scale. 75 | False, # No random mirror. 76 | args.ignore_label, 77 | IMG_MEAN, 78 | coord) 79 | image, label = reader.image, reader.label 80 | image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension. 81 | 82 | # Create network. 83 | net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) 84 | 85 | # Which variables to load. 86 | restore_var = tf.global_variables() 87 | 88 | # Predictions. 89 | raw_output = net.layers['fc1_voc12'] 90 | raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) 91 | raw_output = tf.argmax(raw_output, dimension=3) 92 | pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. 93 | 94 | # mIoU 95 | pred = tf.reshape(pred, [-1,]) 96 | gt = tf.reshape(label_batch, [-1,]) 97 | weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. 98 | mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights) 99 | 100 | # Set up tf session and initialize variables. 101 | config = tf.ConfigProto() 102 | config.gpu_options.allow_growth = True 103 | sess = tf.Session(config=config) 104 | init = tf.global_variables_initializer() 105 | 106 | sess.run(init) 107 | sess.run(tf.local_variables_initializer()) 108 | 109 | # Load weights. 110 | loader = tf.train.Saver(var_list=restore_var) 111 | if args.restore_from is not None: 112 | load(loader, sess, args.restore_from) 113 | 114 | # Start queue threads. 115 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 116 | 117 | # Iterate over training steps. 118 | for step in range(args.num_steps): 119 | preds, _ = sess.run([pred, update_op]) 120 | if step % 100 == 0: 121 | print('step {:d}'.format(step)) 122 | print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) 123 | coord.request_stop() 124 | coord.join(threads) 125 | 126 | if __name__ == '__main__': 127 | main() 128 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/evaluate_msc.py: -------------------------------------------------------------------------------- 1 | """Evaluation script for the DeepLab-ResNet network on the validation subset 2 | of PASCAL VOC dataset. 3 | 4 | This script evaluates the model on 1449 validation images. 5 | """ 6 | 7 | from __future__ import print_function 8 | 9 | import argparse 10 | from datetime import datetime 11 | import os 12 | import sys 13 | import time 14 | 15 | import tensorflow as tf 16 | import numpy as np 17 | 18 | from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label 19 | 20 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 21 | 22 | DATA_DIRECTORY = '/home/VOCdevkit' 23 | DATA_LIST_PATH = './dataset/val.txt' 24 | IGNORE_LABEL = 255 25 | NUM_CLASSES = 21 26 | NUM_STEPS = 1449 # Number of images in the validation set. 27 | RESTORE_FROM = './deeplab_resnet.ckpt' 28 | 29 | def get_arguments(): 30 | """Parse all the arguments provided from the CLI. 31 | 32 | Returns: 33 | A list of parsed arguments. 34 | """ 35 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network") 36 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY, 37 | help="Path to the directory containing the PASCAL VOC dataset.") 38 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH, 39 | help="Path to the file listing the images in the dataset.") 40 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL, 41 | help="The index of the label to ignore during the training.") 42 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES, 43 | help="Number of classes to predict (including background).") 44 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS, 45 | help="Number of images in the validation set.") 46 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM, 47 | help="Where restore model parameters from.") 48 | return parser.parse_args() 49 | 50 | def load(saver, sess, ckpt_path): 51 | '''Load trained weights. 52 | 53 | Args: 54 | saver: TensorFlow saver object. 55 | sess: TensorFlow session. 56 | ckpt_path: path to checkpoint file with parameters. 57 | ''' 58 | saver.restore(sess, ckpt_path) 59 | print("Restored model parameters from {}".format(ckpt_path)) 60 | 61 | def main(): 62 | """Create the model and start the evaluation process.""" 63 | args = get_arguments() 64 | 65 | # Create queue coordinator. 66 | coord = tf.train.Coordinator() 67 | 68 | # Load reader. 69 | with tf.name_scope("create_inputs"): 70 | reader = ImageReader( 71 | args.data_dir, 72 | args.data_list, 73 | None, # No defined input size. 74 | False, # No random scale. 75 | False, # No random mirror. 76 | args.ignore_label, 77 | IMG_MEAN, 78 | coord) 79 | image, label = reader.image, reader.label 80 | 81 | image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension. 82 | h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2]) 83 | image_batch075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))])) 84 | image_batch05 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))])) 85 | 86 | # Create network. 87 | with tf.variable_scope('', reuse=False): 88 | net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) 89 | with tf.variable_scope('', reuse=True): 90 | net075 = DeepLabResNetModel({'data': image_batch075}, is_training=False, num_classes=args.num_classes) 91 | with tf.variable_scope('', reuse=True): 92 | net05 = DeepLabResNetModel({'data': image_batch05}, is_training=False, num_classes=args.num_classes) 93 | 94 | # Which variables to load. 95 | restore_var = tf.global_variables() 96 | 97 | # Predictions. 98 | raw_output100 = net.layers['fc1_voc12'] 99 | raw_output075 = tf.image.resize_images(net075.layers['fc1_voc12'], tf.shape(raw_output100)[1:3,]) 100 | raw_output05 = tf.image.resize_images(net05.layers['fc1_voc12'], tf.shape(raw_output100)[1:3,]) 101 | 102 | raw_output = tf.reduce_max(tf.stack([raw_output100, raw_output075, raw_output05]), axis=0) 103 | raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) 104 | raw_output = tf.argmax(raw_output, dimension=3) 105 | pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. 106 | 107 | # mIoU 108 | pred = tf.reshape(pred, [-1,]) 109 | gt = tf.reshape(label_batch, [-1,]) 110 | weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. 111 | mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights) 112 | 113 | # Set up tf session and initialize variables. 114 | config = tf.ConfigProto() 115 | config.gpu_options.allow_growth = True 116 | sess = tf.Session(config=config) 117 | init = tf.global_variables_initializer() 118 | 119 | sess.run(init) 120 | sess.run(tf.local_variables_initializer()) 121 | 122 | # Load weights. 123 | loader = tf.train.Saver(var_list=restore_var) 124 | if args.restore_from is not None: 125 | load(loader, sess, args.restore_from) 126 | 127 | # Start queue threads. 128 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 129 | 130 | # Iterate over training steps. 131 | for step in range(args.num_steps): 132 | preds, _ = sess.run([pred, update_op]) 133 | if step % 100 == 0: 134 | print('step {:d}'.format(step)) 135 | print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) 136 | coord.request_stop() 137 | coord.join(threads) 138 | 139 | if __name__ == '__main__': 140 | main() 141 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/fine_tune.py: -------------------------------------------------------------------------------- 1 | """Training script for the DeepLab-ResNet network on the PASCAL VOC dataset 2 | for semantic image segmentation. 3 | 4 | This script fine-tunes the model using augmented PASCAL VOC, 5 | which contains approximately 10000 images for training and 1500 images for validation. 6 | Only the last 'fc1_voc12' layers are being trained. 7 | """ 8 | 9 | from __future__ import print_function 10 | 11 | import argparse 12 | from datetime import datetime 13 | import os 14 | import sys 15 | import time 16 | 17 | import tensorflow as tf 18 | import numpy as np 19 | 20 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, inv_preprocess, prepare_label 21 | 22 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 23 | 24 | BATCH_SIZE = 4 25 | DATA_DIRECTORY = '/home/VOCdevkit' 26 | DATA_LIST_PATH = './dataset/train.txt' 27 | IGNORE_LABEL = 255 28 | INPUT_SIZE = '321,321' 29 | LEARNING_RATE = 1e-4 30 | NUM_CLASSES = 21 31 | NUM_STEPS = 20000 32 | RANDOM_SEED = 1234 33 | RESTORE_FROM = './deeplab_resnet.ckpt' 34 | SAVE_NUM_IMAGES = 2 35 | SAVE_PRED_EVERY = 100 36 | SNAPSHOT_DIR = './snapshots_finetune/' 37 | 38 | def get_arguments(): 39 | """Parse all the arguments provided from the CLI. 40 | 41 | Returns: 42 | A list of parsed arguments. 43 | """ 44 | parser = argparse.ArgumentParser(description="DeepLab-ResNet Network") 45 | parser.add_argument("--batch-size", type=int, default=BATCH_SIZE, 46 | help="Number of images sent to the network in one step.") 47 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY, 48 | help="Path to the directory containing the PASCAL VOC dataset.") 49 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH, 50 | help="Path to the file listing the images in the dataset.") 51 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL, 52 | help="The index of the label to ignore during the training.") 53 | parser.add_argument("--input-size", type=str, default=INPUT_SIZE, 54 | help="Comma-separated string with height and width of images.") 55 | parser.add_argument("--is-training", action="store_true", 56 | help="Whether to updates the running means and variances during the training.") 57 | parser.add_argument("--learning-rate", type=float, default=LEARNING_RATE, 58 | help="Learning rate for training.") 59 | parser.add_argument("--not-restore-last", action="store_true", 60 | help="Whether to not restore last (FC) layers.") 61 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES, 62 | help="Number of classes to predict (including background).") 63 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS, 64 | help="Number of training steps.") 65 | parser.add_argument("--random-mirror", action="store_true", 66 | help="Whether to randomly mirror the inputs during the training.") 67 | parser.add_argument("--random-scale", action="store_true", 68 | help="Whether to randomly scale the inputs during the training.") 69 | parser.add_argument("--random-seed", type=int, default=RANDOM_SEED, 70 | help="Random seed to have reproducible results.") 71 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM, 72 | help="Where restore model parameters from.") 73 | parser.add_argument("--save-num-images", type=int, default=SAVE_NUM_IMAGES, 74 | help="How many images to save.") 75 | parser.add_argument("--save-pred-every", type=int, default=SAVE_PRED_EVERY, 76 | help="Save summaries and checkpoint every often.") 77 | parser.add_argument("--snapshot-dir", type=str, default=SNAPSHOT_DIR, 78 | help="Where to save snapshots of the model.") 79 | return parser.parse_args() 80 | 81 | def save(saver, sess, logdir, step): 82 | model_name = 'model.ckpt' 83 | checkpoint_path = os.path.join(logdir, model_name) 84 | 85 | if not os.path.exists(logdir): 86 | os.makedirs(logdir) 87 | 88 | saver.save(sess, checkpoint_path, global_step=step) 89 | print('The checkpoint has been created.') 90 | 91 | def load(saver, sess, ckpt_path): 92 | '''Load trained weights. 93 | 94 | Args: 95 | saver: TensorFlow saver object. 96 | sess: TensorFlow session. 97 | ckpt_path: path to checkpoint file with parameters. 98 | ''' 99 | saver.restore(sess, ckpt_path) 100 | print("Restored model parameters from {}".format(ckpt_path)) 101 | 102 | def main(): 103 | """Create the model and start the training.""" 104 | args = get_arguments() 105 | 106 | h, w = map(int, args.input_size.split(',')) 107 | input_size = (h, w) 108 | 109 | tf.set_random_seed(args.random_seed) 110 | 111 | # Create queue coordinator. 112 | coord = tf.train.Coordinator() 113 | 114 | # Load reader. 115 | with tf.name_scope("create_inputs"): 116 | reader = ImageReader( 117 | args.data_dir, 118 | args.data_list, 119 | input_size, 120 | args.random_scale, 121 | args.random_mirror, 122 | args.ignore_label, 123 | IMG_MEAN, 124 | coord) 125 | image_batch, label_batch = reader.dequeue(args.batch_size) 126 | 127 | # Create network. 128 | net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) 129 | # For a small batch size, it is better to keep 130 | # the statistics of the BN layers (running means and variances) 131 | # frozen, and to not update the values provided by the pre-trained model. 132 | # If is_training=True, the statistics will be updated during the training. 133 | # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) 134 | # if they are presented in var_list of the optimiser definition. 135 | 136 | # Predictions. 137 | raw_output = net.layers['fc1_voc12'] 138 | # Which variables to load. Running means and variances are not trainable, 139 | # thus all_variables() should be restored. 140 | # Restore all variables, or all except the last ones. 141 | restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last] 142 | trainable = [v for v in tf.trainable_variables() if 'fc1_voc12' in v.name] # Fine-tune only the last layers. 143 | 144 | prediction = tf.reshape(raw_output, [-1, args.num_classes]) 145 | label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes) 146 | gt = tf.reshape(label_proc, [-1, args.num_classes]) 147 | 148 | # Pixel-wise softmax loss. 149 | loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=gt) 150 | reduced_loss = tf.reduce_mean(loss) 151 | 152 | # Processed predictions. 153 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) 154 | raw_output_up = tf.argmax(raw_output_up, dimension=3) 155 | pred = tf.expand_dims(raw_output_up, dim=3) 156 | 157 | # Image summary. 158 | images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) 159 | labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) 160 | preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) 161 | 162 | total_summary = tf.summary.image('images', 163 | tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), 164 | max_outputs=args.save_num_images) # Concatenate row-wise. 165 | summary_writer = tf.summary.FileWriter(args.snapshot_dir, 166 | graph=tf.get_default_graph()) 167 | 168 | # Define loss and optimisation parameters. 169 | optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate) 170 | optim = optimiser.minimize(reduced_loss, var_list=trainable) 171 | 172 | # Set up tf session and initialize variables. 173 | config = tf.ConfigProto() 174 | config.gpu_options.allow_growth = True 175 | sess = tf.Session(config=config) 176 | init = tf.global_variables_initializer() 177 | 178 | sess.run(init) 179 | 180 | # Saver for storing checkpoints of the model. 181 | saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=40) 182 | 183 | # Load variables if the checkpoint is provided. 184 | if args.restore_from is not None: 185 | loader = tf.train.Saver(var_list=restore_var) 186 | load(loader, sess, args.restore_from) 187 | 188 | # Start queue threads. 189 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 190 | 191 | # Iterate over training steps. 192 | for step in range(args.num_steps): 193 | start_time = time.time() 194 | 195 | if step % args.save_pred_every == 0: 196 | loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, optim]) 197 | summary_writer.add_summary(summary, step) 198 | save(saver, sess, args.snapshot_dir, step) 199 | else: 200 | loss_value, _ = sess.run([reduced_loss, optim]) 201 | duration = time.time() - start_time 202 | print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) 203 | coord.request_stop() 204 | coord.join(threads) 205 | 206 | if __name__ == '__main__': 207 | main() 208 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/images/colour_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/images/colour_scheme.png -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/images/mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/images/mask.png -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/images/summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/images/summary.png -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/inference.py: -------------------------------------------------------------------------------- 1 | """Run DeepLab-ResNet on a given image. 2 | 3 | This script computes a segmentation mask for a given image. 4 | """ 5 | 6 | from __future__ import print_function 7 | 8 | import argparse 9 | from datetime import datetime 10 | import os 11 | import sys 12 | import time 13 | 14 | from PIL import Image 15 | 16 | import tensorflow as tf 17 | import numpy as np 18 | 19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, prepare_label 20 | 21 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 22 | 23 | NUM_CLASSES = 21 24 | SAVE_DIR = './output/' 25 | 26 | def get_arguments(): 27 | """Parse all the arguments provided from the CLI. 28 | 29 | Returns: 30 | A list of parsed arguments. 31 | """ 32 | parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference.") 33 | parser.add_argument("img_path", type=str, 34 | help="Path to the RGB image file.") 35 | parser.add_argument("model_weights", type=str, 36 | help="Path to the file with model weights.") 37 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES, 38 | help="Number of classes to predict (including background).") 39 | parser.add_argument("--save-dir", type=str, default=SAVE_DIR, 40 | help="Where to save predicted mask.") 41 | return parser.parse_args() 42 | 43 | def load(saver, sess, ckpt_path): 44 | '''Load trained weights. 45 | 46 | Args: 47 | saver: TensorFlow saver object. 48 | sess: TensorFlow session. 49 | ckpt_path: path to checkpoint file with parameters. 50 | ''' 51 | saver.restore(sess, ckpt_path) 52 | print("Restored model parameters from {}".format(ckpt_path)) 53 | 54 | def main(): 55 | """Create the model and start the evaluation process.""" 56 | args = get_arguments() 57 | 58 | # Prepare image. 59 | img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3) 60 | # Convert RGB to BGR. 61 | img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img) 62 | img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32) 63 | # Extract mean. 64 | img -= IMG_MEAN 65 | 66 | # Create network. 67 | net = DeepLabResNetModel({'data': tf.expand_dims(img, dim=0)}, is_training=False, num_classes=args.num_classes) 68 | 69 | # Which variables to load. 70 | restore_var = tf.global_variables() 71 | 72 | # Predictions. 73 | raw_output = net.layers['fc1_voc12'] 74 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2,]) 75 | raw_output_up = tf.argmax(raw_output_up, dimension=3) 76 | pred = tf.expand_dims(raw_output_up, dim=3) 77 | 78 | 79 | # Set up TF session and initialize variables. 80 | config = tf.ConfigProto() 81 | config.gpu_options.allow_growth = True 82 | sess = tf.Session(config=config) 83 | init = tf.global_variables_initializer() 84 | 85 | sess.run(init) 86 | 87 | # Load weights. 88 | loader = tf.train.Saver(var_list=restore_var) 89 | load(loader, sess, args.model_weights) 90 | 91 | # Perform inference. 92 | preds = sess.run(pred) 93 | 94 | msk = decode_labels(preds, num_classes=args.num_classes) 95 | im = Image.fromarray(msk[0]) 96 | if not os.path.exists(args.save_dir): 97 | os.makedirs(args.save_dir) 98 | im.save(args.save_dir + 'mask.png') 99 | 100 | print('The output file has been saved to {}'.format(args.save_dir + 'mask.png')) 101 | 102 | 103 | if __name__ == '__main__': 104 | main() 105 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/__init__.py: -------------------------------------------------------------------------------- 1 | from .graph import GraphBuilder, NodeMapper 2 | from .errors import KaffeError, print_stderr 3 | 4 | from . import tensorflow 5 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/caffe/__init__.py: -------------------------------------------------------------------------------- 1 | from .resolver import get_caffe_resolver, has_pycaffe 2 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/caffe/resolver.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | SHARED_CAFFE_RESOLVER = None 4 | 5 | class CaffeResolver(object): 6 | def __init__(self): 7 | self.import_caffe() 8 | 9 | def import_caffe(self): 10 | self.caffe = None 11 | try: 12 | # Try to import PyCaffe first 13 | import caffe 14 | self.caffe = caffe 15 | except ImportError: 16 | # Fall back to the protobuf implementation 17 | from . import caffepb 18 | self.caffepb = caffepb 19 | show_fallback_warning() 20 | if self.caffe: 21 | # Use the protobuf code from the imported distribution. 22 | # This way, Caffe variants with custom layers will work. 23 | self.caffepb = self.caffe.proto.caffe_pb2 24 | self.NetParameter = self.caffepb.NetParameter 25 | 26 | def has_pycaffe(self): 27 | return self.caffe is not None 28 | 29 | def get_caffe_resolver(): 30 | global SHARED_CAFFE_RESOLVER 31 | if SHARED_CAFFE_RESOLVER is None: 32 | SHARED_CAFFE_RESOLVER = CaffeResolver() 33 | return SHARED_CAFFE_RESOLVER 34 | 35 | def has_pycaffe(): 36 | return get_caffe_resolver().has_pycaffe() 37 | 38 | def show_fallback_warning(): 39 | msg = ''' 40 | ------------------------------------------------------------ 41 | WARNING: PyCaffe not found! 42 | Falling back to a pure protocol buffer implementation. 43 | * Conversions will be drastically slower. 44 | * This backend is UNTESTED! 45 | ------------------------------------------------------------ 46 | 47 | ''' 48 | sys.stderr.write(msg) 49 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/errors.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class KaffeError(Exception): 4 | pass 5 | 6 | def print_stderr(msg): 7 | sys.stderr.write('%s\n' % msg) 8 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/graph.py: -------------------------------------------------------------------------------- 1 | from google.protobuf import text_format 2 | 3 | from .caffe import get_caffe_resolver 4 | from .errors import KaffeError, print_stderr 5 | from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch 6 | from .shapes import TensorShape 7 | 8 | class Node(object): 9 | 10 | def __init__(self, name, kind, layer=None): 11 | self.name = name 12 | self.kind = kind 13 | self.layer = LayerAdapter(layer, kind) if layer else None 14 | self.parents = [] 15 | self.children = [] 16 | self.data = None 17 | self.output_shape = None 18 | self.metadata = {} 19 | 20 | def add_parent(self, parent_node): 21 | assert parent_node not in self.parents 22 | self.parents.append(parent_node) 23 | if self not in parent_node.children: 24 | parent_node.children.append(self) 25 | 26 | def add_child(self, child_node): 27 | assert child_node not in self.children 28 | self.children.append(child_node) 29 | if self not in child_node.parents: 30 | child_node.parents.append(self) 31 | 32 | def get_only_parent(self): 33 | if len(self.parents) != 1: 34 | raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' % 35 | (self, len(self.parents))) 36 | return self.parents[0] 37 | 38 | @property 39 | def parameters(self): 40 | if self.layer is not None: 41 | return self.layer.parameters 42 | return None 43 | 44 | def __str__(self): 45 | return '[%s] %s' % (self.kind, self.name) 46 | 47 | def __repr__(self): 48 | return '%s (0x%x)' % (self.name, id(self)) 49 | 50 | 51 | class Graph(object): 52 | 53 | def __init__(self, nodes=None, name=None): 54 | self.nodes = nodes or [] 55 | self.node_lut = {node.name: node for node in self.nodes} 56 | self.name = name 57 | 58 | def add_node(self, node): 59 | self.nodes.append(node) 60 | self.node_lut[node.name] = node 61 | 62 | def get_node(self, name): 63 | try: 64 | return self.node_lut[name] 65 | except KeyError: 66 | raise KaffeError('Layer not found: %s' % name) 67 | 68 | def get_input_nodes(self): 69 | return [node for node in self.nodes if len(node.parents) == 0] 70 | 71 | def get_output_nodes(self): 72 | return [node for node in self.nodes if len(node.children) == 0] 73 | 74 | def topologically_sorted(self): 75 | sorted_nodes = [] 76 | unsorted_nodes = list(self.nodes) 77 | temp_marked = set() 78 | perm_marked = set() 79 | 80 | def visit(node): 81 | if node in temp_marked: 82 | raise KaffeError('Graph is not a DAG.') 83 | if node in perm_marked: 84 | return 85 | temp_marked.add(node) 86 | for child in node.children: 87 | visit(child) 88 | perm_marked.add(node) 89 | temp_marked.remove(node) 90 | sorted_nodes.insert(0, node) 91 | 92 | while len(unsorted_nodes): 93 | visit(unsorted_nodes.pop()) 94 | return sorted_nodes 95 | 96 | def compute_output_shapes(self): 97 | sorted_nodes = self.topologically_sorted() 98 | for node in sorted_nodes: 99 | node.output_shape = TensorShape(*NodeKind.compute_output_shape(node)) 100 | 101 | def replaced(self, new_nodes): 102 | return Graph(nodes=new_nodes, name=self.name) 103 | 104 | def transformed(self, transformers): 105 | graph = self 106 | for transformer in transformers: 107 | graph = transformer(graph) 108 | if graph is None: 109 | raise KaffeError('Transformer failed: {}'.format(transformer)) 110 | assert isinstance(graph, Graph) 111 | return graph 112 | 113 | def __contains__(self, key): 114 | return key in self.node_lut 115 | 116 | def __str__(self): 117 | hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', 'Output') 118 | s = [hdr, '-' * 94] 119 | for node in self.topologically_sorted(): 120 | # If the node has learned parameters, display the first one's shape. 121 | # In case of convolutions, this corresponds to the weights. 122 | data_shape = node.data[0].shape if node.data else '--' 123 | out_shape = node.output_shape or '--' 124 | s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape, 125 | tuple(out_shape))) 126 | return '\n'.join(s) 127 | 128 | 129 | class GraphBuilder(object): 130 | '''Constructs a model graph from a Caffe protocol buffer definition.''' 131 | 132 | def __init__(self, def_path, phase='test'): 133 | ''' 134 | def_path: Path to the model definition (.prototxt) 135 | data_path: Path to the model data (.caffemodel) 136 | phase: Either 'test' or 'train'. Used for filtering phase-specific nodes. 137 | ''' 138 | self.def_path = def_path 139 | self.phase = phase 140 | self.load() 141 | 142 | def load(self): 143 | '''Load the layer definitions from the prototxt.''' 144 | self.params = get_caffe_resolver().NetParameter() 145 | with open(self.def_path, 'rb') as def_file: 146 | text_format.Merge(def_file.read(), self.params) 147 | 148 | def filter_layers(self, layers): 149 | '''Filter out layers based on the current phase.''' 150 | phase_map = {0: 'train', 1: 'test'} 151 | filtered_layer_names = set() 152 | filtered_layers = [] 153 | for layer in layers: 154 | phase = self.phase 155 | if len(layer.include): 156 | phase = phase_map[layer.include[0].phase] 157 | if len(layer.exclude): 158 | phase = phase_map[1 - layer.include[0].phase] 159 | exclude = (phase != self.phase) 160 | # Dropout layers appear in a fair number of Caffe 161 | # test-time networks. These are just ignored. We'll 162 | # filter them out here. 163 | if (not exclude) and (phase == 'test'): 164 | exclude = (layer.type == LayerType.Dropout) 165 | if not exclude: 166 | filtered_layers.append(layer) 167 | # Guard against dupes. 168 | assert layer.name not in filtered_layer_names 169 | filtered_layer_names.add(layer.name) 170 | return filtered_layers 171 | 172 | def make_node(self, layer): 173 | '''Create a graph node for the given layer.''' 174 | kind = NodeKind.map_raw_kind(layer.type) 175 | if kind is None: 176 | raise KaffeError('Unknown layer type encountered: %s' % layer.type) 177 | # We want to use the layer's top names (the "output" names), rather than the 178 | # name attribute, which is more of readability thing than a functional one. 179 | # Other layers will refer to a node by its "top name". 180 | return Node(layer.name, kind, layer=layer) 181 | 182 | def make_input_nodes(self): 183 | ''' 184 | Create data input nodes. 185 | 186 | This method is for old-style inputs, where the input specification 187 | was not treated as a first-class layer in the prototext. 188 | Newer models use the "Input layer" type. 189 | ''' 190 | nodes = [Node(name, NodeKind.Data) for name in self.params.input] 191 | if len(nodes): 192 | input_dim = map(int, self.params.input_dim) 193 | if not input_dim: 194 | if len(self.params.input_shape) > 0: 195 | input_dim = map(int, self.params.input_shape[0].dim) 196 | else: 197 | raise KaffeError('Dimensions for input not specified.') 198 | for node in nodes: 199 | node.output_shape = tuple(input_dim) 200 | return nodes 201 | 202 | def build(self): 203 | ''' 204 | Builds the graph from the Caffe layer definitions. 205 | ''' 206 | # Get the layers 207 | layers = self.params.layers or self.params.layer 208 | # Filter out phase-excluded layers 209 | layers = self.filter_layers(layers) 210 | # Get any separately-specified input layers 211 | nodes = self.make_input_nodes() 212 | nodes += [self.make_node(layer) for layer in layers] 213 | # Initialize the graph 214 | graph = Graph(nodes=nodes, name=self.params.name) 215 | # Connect the nodes 216 | # 217 | # A note on layers and outputs: 218 | # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs 219 | # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom 220 | # (in case of in-place operations). Note that the layer's name is not used for establishing 221 | # any connectivity. It's only used for data association. By convention, a layer with a 222 | # single top will often use the same name (although this is not required). 223 | # 224 | # The current implementation only supports single-output nodes (note that a node can still 225 | # have multiple children, since multiple child nodes can refer to the single top's name). 226 | node_outputs = {} 227 | for layer in layers: 228 | node = graph.get_node(layer.name) 229 | for input_name in layer.bottom: 230 | assert input_name != layer.name 231 | parent_node = node_outputs.get(input_name) 232 | if (parent_node is None) or (parent_node == node): 233 | parent_node = graph.get_node(input_name) 234 | node.add_parent(parent_node) 235 | if len(layer.top)>1: 236 | raise KaffeError('Multiple top nodes are not supported.') 237 | for output_name in layer.top: 238 | if output_name == layer.name: 239 | # Output is named the same as the node. No further action required. 240 | continue 241 | # There are two possibilities here: 242 | # 243 | # Case 1: output_name refers to another node in the graph. 244 | # This is an "in-place operation" that overwrites an existing node. 245 | # This would create a cycle in the graph. We'll undo the in-placing 246 | # by substituting this node wherever the overwritten node is referenced. 247 | # 248 | # Case 2: output_name violates the convention layer.name == output_name. 249 | # Since we are working in the single-output regime, we will can rename it to 250 | # match the layer name. 251 | # 252 | # For both cases, future references to this top re-routes to this node. 253 | node_outputs[output_name] = node 254 | 255 | graph.compute_output_shapes() 256 | return graph 257 | 258 | 259 | class NodeMapper(NodeDispatch): 260 | 261 | def __init__(self, graph): 262 | self.graph = graph 263 | 264 | def map(self): 265 | nodes = self.graph.topologically_sorted() 266 | # Remove input nodes - we'll handle them separately. 267 | input_nodes = self.graph.get_input_nodes() 268 | nodes = [t for t in nodes if t not in input_nodes] 269 | # Decompose DAG into chains. 270 | chains = [] 271 | for node in nodes: 272 | attach_to_chain = None 273 | if len(node.parents) == 1: 274 | parent = node.get_only_parent() 275 | for chain in chains: 276 | if chain[-1] == parent: 277 | # Node is part of an existing chain. 278 | attach_to_chain = chain 279 | break 280 | if attach_to_chain is None: 281 | # Start a new chain for this node. 282 | attach_to_chain = [] 283 | chains.append(attach_to_chain) 284 | attach_to_chain.append(node) 285 | # Map each chain. 286 | mapped_chains = [] 287 | for chain in chains: 288 | mapped_chains.append(self.map_chain(chain)) 289 | return self.commit(mapped_chains) 290 | 291 | def map_chain(self, chain): 292 | return [self.map_node(node) for node in chain] 293 | 294 | def map_node(self, node): 295 | map_func = self.get_handler(node.kind, 'map') 296 | mapped_node = map_func(node) 297 | assert mapped_node is not None 298 | mapped_node.node = node 299 | return mapped_node 300 | 301 | def commit(self, mapped_chains): 302 | raise NotImplementedError('Must be implemented by subclass.') 303 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/layers.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numbers 3 | from collections import namedtuple 4 | 5 | from .shapes import * 6 | 7 | LAYER_DESCRIPTORS = { 8 | 9 | # Caffe Types 10 | 'AbsVal': shape_identity, 11 | 'Accuracy': shape_scalar, 12 | 'ArgMax': shape_not_implemented, 13 | 'BatchNorm': shape_identity, 14 | 'BNLL': shape_not_implemented, 15 | 'Concat': shape_concat, 16 | 'ContrastiveLoss': shape_scalar, 17 | 'Convolution': shape_convolution, 18 | 'Deconvolution': shape_not_implemented, 19 | 'Data': shape_data, 20 | 'Dropout': shape_identity, 21 | 'DummyData': shape_data, 22 | 'EuclideanLoss': shape_scalar, 23 | 'Eltwise': shape_identity, 24 | 'Exp': shape_identity, 25 | 'Flatten': shape_not_implemented, 26 | 'HDF5Data': shape_data, 27 | 'HDF5Output': shape_identity, 28 | 'HingeLoss': shape_scalar, 29 | 'Im2col': shape_not_implemented, 30 | 'ImageData': shape_data, 31 | 'InfogainLoss': shape_scalar, 32 | 'InnerProduct': shape_inner_product, 33 | 'Input': shape_data, 34 | 'LRN': shape_identity, 35 | 'MemoryData': shape_mem_data, 36 | 'MultinomialLogisticLoss': shape_scalar, 37 | 'MVN': shape_not_implemented, 38 | 'Pooling': shape_pool, 39 | 'Power': shape_identity, 40 | 'ReLU': shape_identity, 41 | 'Scale': shape_identity, 42 | 'Sigmoid': shape_identity, 43 | 'SigmoidCrossEntropyLoss': shape_scalar, 44 | 'Silence': shape_not_implemented, 45 | 'Softmax': shape_identity, 46 | 'SoftmaxWithLoss': shape_scalar, 47 | 'Split': shape_not_implemented, 48 | 'Slice': shape_not_implemented, 49 | 'TanH': shape_identity, 50 | 'WindowData': shape_not_implemented, 51 | 'Threshold': shape_identity, 52 | } 53 | 54 | LAYER_TYPES = LAYER_DESCRIPTORS.keys() 55 | 56 | LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES}) 57 | 58 | class NodeKind(LayerType): 59 | 60 | @staticmethod 61 | def map_raw_kind(kind): 62 | if kind in LAYER_TYPES: 63 | return kind 64 | return None 65 | 66 | @staticmethod 67 | def compute_output_shape(node): 68 | try: 69 | val = LAYER_DESCRIPTORS[node.kind](node) 70 | return val 71 | except NotImplementedError: 72 | raise KaffeError('Output shape computation not implemented for type: %s' % node.kind) 73 | 74 | 75 | class NodeDispatchError(KaffeError): 76 | 77 | pass 78 | 79 | 80 | class NodeDispatch(object): 81 | 82 | @staticmethod 83 | def get_handler_name(node_kind): 84 | if len(node_kind) <= 4: 85 | # A catch-all for things like ReLU and tanh 86 | return node_kind.lower() 87 | # Convert from CamelCase to under_scored 88 | name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', node_kind) 89 | return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower() 90 | 91 | def get_handler(self, node_kind, prefix): 92 | name = self.get_handler_name(node_kind) 93 | name = '_'.join((prefix, name)) 94 | try: 95 | return getattr(self, name) 96 | except AttributeError: 97 | raise NodeDispatchError('No handler found for node kind: %s (expected: %s)' % 98 | (node_kind, name)) 99 | 100 | 101 | class LayerAdapter(object): 102 | 103 | def __init__(self, layer, kind): 104 | self.layer = layer 105 | self.kind = kind 106 | 107 | @property 108 | def parameters(self): 109 | name = NodeDispatch.get_handler_name(self.kind) 110 | name = '_'.join((name, 'param')) 111 | try: 112 | return getattr(self.layer, name) 113 | except AttributeError: 114 | raise NodeDispatchError('Caffe parameters not found for layer kind: %s' % (self.kind)) 115 | 116 | @staticmethod 117 | def get_kernel_value(scalar, repeated, idx, default=None): 118 | if scalar: 119 | return scalar 120 | if repeated: 121 | if isinstance(repeated, numbers.Number): 122 | return repeated 123 | if len(repeated) == 1: 124 | # Same value applies to all spatial dimensions 125 | return int(repeated[0]) 126 | assert idx < len(repeated) 127 | # Extract the value for the given spatial dimension 128 | return repeated[idx] 129 | if default is None: 130 | raise ValueError('Unable to determine kernel parameter!') 131 | return default 132 | 133 | @property 134 | def kernel_parameters(self): 135 | assert self.kind in (NodeKind.Convolution, NodeKind.Pooling) 136 | params = self.parameters 137 | k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0) 138 | k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1) 139 | s_h = self.get_kernel_value(params.stride_h, params.stride, 0, default=1) 140 | s_w = self.get_kernel_value(params.stride_w, params.stride, 1, default=1) 141 | p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0) 142 | p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0) 143 | return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w) 144 | 145 | 146 | KernelParameters = namedtuple('KernelParameters', ['kernel_h', 'kernel_w', 'stride_h', 'stride_w', 147 | 'pad_h', 'pad_w']) 148 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/shapes.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import namedtuple 3 | 4 | from .errors import KaffeError 5 | 6 | TensorShape = namedtuple('TensorShape', ['batch_size', 'channels', 'height', 'width']) 7 | 8 | 9 | def get_filter_output_shape(i_h, i_w, params, round_func): 10 | o_h = (i_h + 2 * params.pad_h - params.kernel_h) / float(params.stride_h) + 1 11 | o_w = (i_w + 2 * params.pad_w - params.kernel_w) / float(params.stride_w) + 1 12 | return (int(round_func(o_h)), int(round_func(o_w))) 13 | 14 | 15 | def get_strided_kernel_output_shape(node, round_func): 16 | assert node.layer is not None 17 | input_shape = node.get_only_parent().output_shape 18 | o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width, 19 | node.layer.kernel_parameters, round_func) 20 | params = node.layer.parameters 21 | has_c_o = hasattr(params, 'num_output') 22 | c = params.num_output if has_c_o else input_shape.channels 23 | return TensorShape(input_shape.batch_size, c, o_h, o_w) 24 | 25 | 26 | def shape_not_implemented(node): 27 | raise NotImplementedError 28 | 29 | 30 | def shape_identity(node): 31 | assert len(node.parents) > 0 32 | return node.parents[0].output_shape 33 | 34 | 35 | def shape_scalar(node): 36 | return TensorShape(1, 1, 1, 1) 37 | 38 | 39 | def shape_data(node): 40 | if node.output_shape: 41 | # Old-style input specification 42 | return node.output_shape 43 | try: 44 | # New-style input specification 45 | return map(int, node.parameters.shape[0].dim) 46 | except: 47 | # We most likely have a data layer on our hands. The problem is, 48 | # Caffe infers the dimensions of the data from the source (eg: LMDB). 49 | # We want to avoid reading datasets here. Fail for now. 50 | # This can be temporarily fixed by transforming the data layer to 51 | # Caffe's "input" layer (as is usually used in the "deploy" version). 52 | # TODO: Find a better solution for this. 53 | raise KaffeError('Cannot determine dimensions of data layer.\n' 54 | 'See comments in function shape_data for more info.') 55 | 56 | 57 | def shape_mem_data(node): 58 | params = node.parameters 59 | return TensorShape(params.batch_size, params.channels, params.height, params.width) 60 | 61 | 62 | def shape_concat(node): 63 | axis = node.layer.parameters.axis 64 | output_shape = None 65 | for parent in node.parents: 66 | if output_shape is None: 67 | output_shape = list(parent.output_shape) 68 | else: 69 | output_shape[axis] += parent.output_shape[axis] 70 | return tuple(output_shape) 71 | 72 | 73 | def shape_convolution(node): 74 | return get_strided_kernel_output_shape(node, math.floor) 75 | 76 | 77 | def shape_pool(node): 78 | return get_strided_kernel_output_shape(node, math.ceil) 79 | 80 | 81 | def shape_inner_product(node): 82 | input_shape = node.get_only_parent().output_shape 83 | return TensorShape(input_shape.batch_size, node.layer.parameters.num_output, 1, 1) 84 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import TensorFlowTransformer 2 | from .network import Network 3 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/tensorflow/network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | slim = tf.contrib.slim 4 | 5 | DEFAULT_PADDING = 'SAME' 6 | 7 | 8 | def layer(op): 9 | '''Decorator for composable network layers.''' 10 | 11 | def layer_decorated(self, *args, **kwargs): 12 | # Automatically set a name if not provided. 13 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 14 | # Figure out the layer inputs. 15 | if len(self.terminals) == 0: 16 | raise RuntimeError('No input variables found for layer %s.' % name) 17 | elif len(self.terminals) == 1: 18 | layer_input = self.terminals[0] 19 | else: 20 | layer_input = list(self.terminals) 21 | # Perform the operation and get the output. 22 | layer_output = op(self, layer_input, *args, **kwargs) 23 | # Add to layer LUT. 24 | self.layers[name] = layer_output 25 | # This output is now the input for the next layer. 26 | self.feed(layer_output) 27 | # Return self for chained calls. 28 | return self 29 | 30 | return layer_decorated 31 | 32 | 33 | class Network(object): 34 | 35 | def __init__(self, inputs, trainable=True, is_training=False, num_classes=21): 36 | # The input nodes for this network 37 | self.inputs = inputs 38 | # The current list of terminal nodes 39 | self.terminals = [] 40 | # Mapping from layer names to layers 41 | self.layers = dict(inputs) 42 | # If true, the resulting variables are set as trainable 43 | self.trainable = trainable 44 | # Switch variable for dropout 45 | self.use_dropout = tf.placeholder_with_default(tf.constant(1.0), 46 | shape=[], 47 | name='use_dropout') 48 | self.setup(is_training, num_classes) 49 | 50 | def setup(self, is_training): 51 | '''Construct the network. ''' 52 | raise NotImplementedError('Must be implemented by the subclass.') 53 | 54 | def load(self, data_path, session, ignore_missing=False): 55 | '''Load network weights. 56 | data_path: The path to the numpy-serialized network weights 57 | session: The current TensorFlow session 58 | ignore_missing: If true, serialized weights for missing layers are ignored. 59 | ''' 60 | data_dict = np.load(data_path).item() 61 | for op_name in data_dict: 62 | with tf.variable_scope(op_name, reuse=True): 63 | for param_name, data in data_dict[op_name].iteritems(): 64 | try: 65 | var = tf.get_variable(param_name) 66 | session.run(var.assign(data)) 67 | except ValueError: 68 | if not ignore_missing: 69 | raise 70 | 71 | def feed(self, *args): 72 | '''Set the input(s) for the next operation by replacing the terminal nodes. 73 | The arguments can be either layer names or the actual layers. 74 | ''' 75 | assert len(args) != 0 76 | self.terminals = [] 77 | for fed_layer in args: 78 | if isinstance(fed_layer, basestring): 79 | try: 80 | fed_layer = self.layers[fed_layer] 81 | except KeyError: 82 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 83 | self.terminals.append(fed_layer) 84 | return self 85 | 86 | def get_output(self): 87 | '''Returns the current network output.''' 88 | return self.terminals[-1] 89 | 90 | def get_unique_name(self, prefix): 91 | '''Returns an index-suffixed unique name for the given prefix. 92 | This is used for auto-generating layer names based on the type-prefix. 93 | ''' 94 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 95 | return '%s_%d' % (prefix, ident) 96 | 97 | def make_var(self, name, shape): 98 | '''Creates a new TensorFlow variable.''' 99 | return tf.get_variable(name, shape, trainable=self.trainable) 100 | 101 | def validate_padding(self, padding): 102 | '''Verifies that the padding is one of the supported ones.''' 103 | assert padding in ('SAME', 'VALID') 104 | 105 | @layer 106 | def conv(self, 107 | input, 108 | k_h, 109 | k_w, 110 | c_o, 111 | s_h, 112 | s_w, 113 | name, 114 | relu=True, 115 | padding=DEFAULT_PADDING, 116 | group=1, 117 | biased=True): 118 | # Verify that the padding is acceptable 119 | self.validate_padding(padding) 120 | # Get the number of channels in the input 121 | c_i = input.get_shape()[-1] 122 | # Verify that the grouping parameter is valid 123 | assert c_i % group == 0 124 | assert c_o % group == 0 125 | # Convolution for a given input and kernel 126 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 127 | with tf.variable_scope(name) as scope: 128 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o]) 129 | if group == 1: 130 | # This is the common-case. Convolve the input without any further complications. 131 | output = convolve(input, kernel) 132 | else: 133 | # Split the input into groups and then convolve each of them independently 134 | input_groups = tf.split(3, group, input) 135 | kernel_groups = tf.split(3, group, kernel) 136 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] 137 | # Concatenate the groups 138 | output = tf.concat(3, output_groups) 139 | # Add the biases 140 | if biased: 141 | biases = self.make_var('biases', [c_o]) 142 | output = tf.nn.bias_add(output, biases) 143 | if relu: 144 | # ReLU non-linearity 145 | output = tf.nn.relu(output, name=scope.name) 146 | return output 147 | 148 | @layer 149 | def atrous_conv(self, 150 | input, 151 | k_h, 152 | k_w, 153 | c_o, 154 | dilation, 155 | name, 156 | relu=True, 157 | padding=DEFAULT_PADDING, 158 | group=1, 159 | biased=True): 160 | # Verify that the padding is acceptable 161 | self.validate_padding(padding) 162 | # Get the number of channels in the input 163 | c_i = input.get_shape()[-1] 164 | # Verify that the grouping parameter is valid 165 | assert c_i % group == 0 166 | assert c_o % group == 0 167 | # Convolution for a given input and kernel 168 | convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding) 169 | with tf.variable_scope(name) as scope: 170 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o]) 171 | if group == 1: 172 | # This is the common-case. Convolve the input without any further complications. 173 | output = convolve(input, kernel) 174 | else: 175 | # Split the input into groups and then convolve each of them independently 176 | input_groups = tf.split(3, group, input) 177 | kernel_groups = tf.split(3, group, kernel) 178 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] 179 | # Concatenate the groups 180 | output = tf.concat(3, output_groups) 181 | # Add the biases 182 | if biased: 183 | biases = self.make_var('biases', [c_o]) 184 | output = tf.nn.bias_add(output, biases) 185 | if relu: 186 | # ReLU non-linearity 187 | output = tf.nn.relu(output, name=scope.name) 188 | return output 189 | 190 | @layer 191 | def relu(self, input, name): 192 | return tf.nn.relu(input, name=name) 193 | 194 | @layer 195 | def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 196 | self.validate_padding(padding) 197 | return tf.nn.max_pool(input, 198 | ksize=[1, k_h, k_w, 1], 199 | strides=[1, s_h, s_w, 1], 200 | padding=padding, 201 | name=name) 202 | 203 | @layer 204 | def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 205 | self.validate_padding(padding) 206 | return tf.nn.avg_pool(input, 207 | ksize=[1, k_h, k_w, 1], 208 | strides=[1, s_h, s_w, 1], 209 | padding=padding, 210 | name=name) 211 | 212 | @layer 213 | def lrn(self, input, radius, alpha, beta, name, bias=1.0): 214 | return tf.nn.local_response_normalization(input, 215 | depth_radius=radius, 216 | alpha=alpha, 217 | beta=beta, 218 | bias=bias, 219 | name=name) 220 | 221 | @layer 222 | def concat(self, inputs, axis, name): 223 | return tf.concat(concat_dim=axis, values=inputs, name=name) 224 | 225 | @layer 226 | def add(self, inputs, name): 227 | return tf.add_n(inputs, name=name) 228 | 229 | @layer 230 | def fc(self, input, num_out, name, relu=True): 231 | with tf.variable_scope(name) as scope: 232 | input_shape = input.get_shape() 233 | if input_shape.ndims == 4: 234 | # The input is spatial. Vectorize it first. 235 | dim = 1 236 | for d in input_shape[1:].as_list(): 237 | dim *= d 238 | feed_in = tf.reshape(input, [-1, dim]) 239 | else: 240 | feed_in, dim = (input, input_shape[-1].value) 241 | weights = self.make_var('weights', shape=[dim, num_out]) 242 | biases = self.make_var('biases', [num_out]) 243 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 244 | fc = op(feed_in, weights, biases, name=scope.name) 245 | return fc 246 | 247 | @layer 248 | def softmax(self, input, name): 249 | input_shape = map(lambda v: v.value, input.get_shape()) 250 | if len(input_shape) > 2: 251 | # For certain models (like NiN), the singleton spatial dimensions 252 | # need to be explicitly squeezed, since they're not broadcast-able 253 | # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). 254 | if input_shape[1] == 1 and input_shape[2] == 1: 255 | input = tf.squeeze(input, squeeze_dims=[1, 2]) 256 | else: 257 | raise ValueError('Rank 2 tensor input expected for softmax!') 258 | return tf.nn.softmax(input, name) 259 | 260 | @layer 261 | def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True): 262 | with tf.variable_scope(name) as scope: 263 | output = slim.batch_norm( 264 | input, 265 | activation_fn=activation_fn, 266 | is_training=is_training, 267 | updates_collections=None, 268 | scale=scale, 269 | scope=scope) 270 | return output 271 | 272 | @layer 273 | def dropout(self, input, keep_prob, name): 274 | keep = 1 - self.use_dropout + (self.use_dropout * keep_prob) 275 | return tf.nn.dropout(input, keep, name=name) 276 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/tensorflow/transformer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..errors import KaffeError, print_stderr 4 | from ..graph import GraphBuilder, NodeMapper 5 | from ..layers import NodeKind 6 | from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser, 7 | BatchNormScaleBiasFuser, BatchNormPreprocessor, ParameterNamer) 8 | 9 | from . import network 10 | 11 | 12 | def get_padding_type(kernel_params, input_shape, output_shape): 13 | '''Translates Caffe's numeric padding to one of ('SAME', 'VALID'). 14 | Caffe supports arbitrary padding values, while TensorFlow only 15 | supports 'SAME' and 'VALID' modes. So, not all Caffe paddings 16 | can be translated to TensorFlow. There are some subtleties to 17 | how the padding edge-cases are handled. These are described here: 18 | https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto 19 | ''' 20 | k_h, k_w, s_h, s_w, p_h, p_w = kernel_params 21 | s_o_h = np.ceil(input_shape.height / float(s_h)) 22 | s_o_w = np.ceil(input_shape.width / float(s_w)) 23 | if (output_shape.height == s_o_h) and (output_shape.width == s_o_w): 24 | return 'SAME' 25 | v_o_h = np.ceil((input_shape.height - k_h + 1.0) / float(s_h)) 26 | v_o_w = np.ceil((input_shape.width - k_w + 1.0) / float(s_w)) 27 | if (output_shape.height == v_o_h) and (output_shape.width == v_o_w): 28 | return 'VALID' 29 | return None 30 | 31 | 32 | class TensorFlowNode(object): 33 | '''An intermediate representation for TensorFlow operations.''' 34 | 35 | def __init__(self, op, *args, **kwargs): 36 | # A string corresponding to the TensorFlow operation 37 | self.op = op 38 | # Positional arguments for the operation 39 | self.args = args 40 | # Keyword arguments for the operation 41 | self.kwargs = list(kwargs.items()) 42 | # The source Caffe node 43 | self.node = None 44 | 45 | def format(self, arg): 46 | '''Returns a string representation for the given value.''' 47 | return "'%s'" % arg if isinstance(arg, basestring) else str(arg) 48 | 49 | def pair(self, key, value): 50 | '''Returns key=formatted(value).''' 51 | return '%s=%s' % (key, self.format(value)) 52 | 53 | def emit(self): 54 | '''Emits the Python source for this node.''' 55 | # Format positional arguments 56 | args = map(self.format, self.args) 57 | # Format any keyword arguments 58 | if self.kwargs: 59 | args += [self.pair(k, v) for k, v in self.kwargs] 60 | # Set the node name 61 | args.append(self.pair('name', self.node.name)) 62 | args = ', '.join(args) 63 | return '%s(%s)' % (self.op, args) 64 | 65 | 66 | class MaybeActivated(object): 67 | 68 | def __init__(self, node, default=True): 69 | self.inject_kwargs = {} 70 | if node.metadata.get('relu', False) != default: 71 | self.inject_kwargs['relu'] = not default 72 | 73 | def __call__(self, *args, **kwargs): 74 | kwargs.update(self.inject_kwargs) 75 | return TensorFlowNode(*args, **kwargs) 76 | 77 | 78 | class TensorFlowMapper(NodeMapper): 79 | 80 | def get_kernel_params(self, node): 81 | kernel_params = node.layer.kernel_parameters 82 | input_shape = node.get_only_parent().output_shape 83 | padding = get_padding_type(kernel_params, input_shape, node.output_shape) 84 | # Only emit the padding if it's not the default value. 85 | padding = {'padding': padding} if padding != network.DEFAULT_PADDING else {} 86 | return (kernel_params, padding) 87 | 88 | def map_convolution(self, node): 89 | (kernel_params, kwargs) = self.get_kernel_params(node) 90 | h = kernel_params.kernel_h 91 | w = kernel_params.kernel_w 92 | c_o = node.output_shape[1] 93 | c_i = node.parents[0].output_shape[1] 94 | group = node.parameters.group 95 | if group != 1: 96 | kwargs['group'] = group 97 | if not node.parameters.bias_term: 98 | kwargs['biased'] = False 99 | assert kernel_params.kernel_h == h 100 | assert kernel_params.kernel_w == w 101 | return MaybeActivated(node)('conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o, 102 | kernel_params.stride_h, kernel_params.stride_w, **kwargs) 103 | 104 | def map_relu(self, node): 105 | return TensorFlowNode('relu') 106 | 107 | def map_pooling(self, node): 108 | pool_type = node.parameters.pool 109 | if pool_type == 0: 110 | pool_op = 'max_pool' 111 | elif pool_type == 1: 112 | pool_op = 'avg_pool' 113 | else: 114 | # Stochastic pooling, for instance. 115 | raise KaffeError('Unsupported pooling type.') 116 | (kernel_params, padding) = self.get_kernel_params(node) 117 | return TensorFlowNode(pool_op, kernel_params.kernel_h, kernel_params.kernel_w, 118 | kernel_params.stride_h, kernel_params.stride_w, **padding) 119 | 120 | def map_inner_product(self, node): 121 | #TODO: Axis 122 | assert node.parameters.axis == 1 123 | #TODO: Unbiased 124 | assert node.parameters.bias_term == True 125 | return MaybeActivated(node)('fc', node.parameters.num_output) 126 | 127 | def map_softmax(self, node): 128 | return TensorFlowNode('softmax') 129 | 130 | def map_lrn(self, node): 131 | params = node.parameters 132 | # The window size must be an odd value. For a window 133 | # size of (2*n+1), TensorFlow defines depth_radius = n. 134 | assert params.local_size % 2 == 1 135 | # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow 136 | # just scales by alpha (as does Krizhevsky's paper). 137 | # We'll account for that here. 138 | alpha = params.alpha / float(params.local_size) 139 | return TensorFlowNode('lrn', int(params.local_size / 2), alpha, params.beta) 140 | 141 | def map_concat(self, node): 142 | axis = (2, 3, 1, 0)[node.parameters.axis] 143 | return TensorFlowNode('concat', axis) 144 | 145 | def map_dropout(self, node): 146 | return TensorFlowNode('dropout', node.parameters.dropout_ratio) 147 | 148 | def map_batch_norm(self, node): 149 | scale_offset = len(node.data) == 4 150 | kwargs = {'is_training': True} if scale_offset else {'is_training': True, 'scale': False} 151 | return MaybeActivated(node, default=False)('batch_normalization', **kwargs) 152 | 153 | def map_eltwise(self, node): 154 | operations = {0: 'multiply', 1: 'add', 2: 'max'} 155 | op_code = node.parameters.operation 156 | try: 157 | return TensorFlowNode(operations[op_code]) 158 | except KeyError: 159 | raise KaffeError('Unknown elementwise operation: {}'.format(op_code)) 160 | 161 | def commit(self, chains): 162 | return chains 163 | 164 | 165 | class TensorFlowEmitter(object): 166 | 167 | def __init__(self, tab=None): 168 | self.tab = tab or ' ' * 4 169 | self.prefix = '' 170 | 171 | def indent(self): 172 | self.prefix += self.tab 173 | 174 | def outdent(self): 175 | self.prefix = self.prefix[:-len(self.tab)] 176 | 177 | def statement(self, s): 178 | return self.prefix + s + '\n' 179 | 180 | def emit_imports(self): 181 | return self.statement('from kaffe.tensorflow import Network\n') 182 | 183 | def emit_class_def(self, name): 184 | return self.statement('class %s(Network):' % (name)) 185 | 186 | def emit_setup_def(self): 187 | return self.statement('def setup(self):') 188 | 189 | def emit_parents(self, chain): 190 | assert len(chain) 191 | s = '(self.feed(' 192 | sep = ', \n' + self.prefix + (' ' * len(s)) 193 | s += sep.join(["'%s'" % parent.name for parent in chain[0].node.parents]) 194 | return self.statement(s + ')') 195 | 196 | def emit_node(self, node): 197 | return self.statement(' ' * 5 + '.' + node.emit()) 198 | 199 | def emit(self, name, chains): 200 | s = self.emit_imports() 201 | s += self.emit_class_def(name) 202 | self.indent() 203 | s += self.emit_setup_def() 204 | self.indent() 205 | blocks = [] 206 | for chain in chains: 207 | b = '' 208 | b += self.emit_parents(chain) 209 | for node in chain: 210 | b += self.emit_node(node) 211 | blocks.append(b[:-1] + ')') 212 | s = s + '\n\n'.join(blocks) 213 | return s 214 | 215 | 216 | class TensorFlowTransformer(object): 217 | 218 | def __init__(self, def_path, data_path, verbose=True, phase='test'): 219 | self.verbose = verbose 220 | self.phase = phase 221 | self.load(def_path, data_path, phase) 222 | self.params = None 223 | self.source = None 224 | 225 | def load(self, def_path, data_path, phase): 226 | # Build the graph 227 | graph = GraphBuilder(def_path, phase).build() 228 | 229 | if data_path is not None: 230 | # Load and associate learned parameters 231 | graph = DataInjector(def_path, data_path)(graph) 232 | 233 | # Transform the graph 234 | transformers = [ 235 | # Fuse split batch normalization layers 236 | BatchNormScaleBiasFuser(), 237 | 238 | # Fuse ReLUs 239 | # TODO: Move non-linearity application to layer wrapper, allowing 240 | # any arbitrary operation to be optionally activated. 241 | ReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct, 242 | NodeKind.BatchNorm]), 243 | 244 | # Rename nodes 245 | # Slashes are used for scoping in TensorFlow. Replace slashes 246 | # in node names with underscores. 247 | # (Caffe's GoogLeNet implementation uses slashes) 248 | NodeRenamer(lambda node: node.name.replace('/', '_')) 249 | ] 250 | self.graph = graph.transformed(transformers) 251 | 252 | # Display the graph 253 | if self.verbose: 254 | print_stderr(self.graph) 255 | 256 | def transform_data(self): 257 | if self.params is None: 258 | transformers = [ 259 | 260 | # Reshape the parameters to TensorFlow's ordering 261 | DataReshaper({ 262 | # (c_o, c_i, h, w) -> (h, w, c_i, c_o) 263 | NodeKind.Convolution: (2, 3, 1, 0), 264 | 265 | # (c_o, c_i) -> (c_i, c_o) 266 | NodeKind.InnerProduct: (1, 0) 267 | }), 268 | 269 | # Pre-process batch normalization data 270 | BatchNormPreprocessor(), 271 | 272 | # Convert parameters to dictionaries 273 | ParameterNamer(), 274 | ] 275 | self.graph = self.graph.transformed(transformers) 276 | self.params = {node.name: node.data for node in self.graph.nodes if node.data} 277 | return self.params 278 | 279 | def transform_source(self): 280 | if self.source is None: 281 | mapper = TensorFlowMapper(self.graph) 282 | chains = mapper.map() 283 | emitter = TensorFlowEmitter() 284 | self.source = emitter.emit(self.graph.name, chains) 285 | return self.source 286 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/kaffe/transformers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A collection of graph transforms. 3 | 4 | A transformer is a callable that accepts a graph and returns a transformed version. 5 | ''' 6 | 7 | import numpy as np 8 | 9 | from .caffe import get_caffe_resolver, has_pycaffe 10 | from .errors import KaffeError, print_stderr 11 | from .layers import NodeKind 12 | 13 | 14 | class DataInjector(object): 15 | ''' 16 | Associates parameters loaded from a .caffemodel file with their corresponding nodes. 17 | ''' 18 | 19 | def __init__(self, def_path, data_path): 20 | # The .prototxt file defining the graph 21 | self.def_path = def_path 22 | # The .caffemodel file containing the learned parameters 23 | self.data_path = data_path 24 | # Set to true if the fallback protocol-buffer based backend was used 25 | self.did_use_pb = False 26 | # A list containing (layer name, parameters) tuples 27 | self.params = None 28 | # Load the parameters 29 | self.load() 30 | 31 | def load(self): 32 | if has_pycaffe(): 33 | self.load_using_caffe() 34 | else: 35 | self.load_using_pb() 36 | 37 | def load_using_caffe(self): 38 | caffe = get_caffe_resolver().caffe 39 | net = caffe.Net(self.def_path, self.data_path, caffe.TEST) 40 | data = lambda blob: blob.data 41 | self.params = [(k, map(data, v)) for k, v in net.params.items()] 42 | 43 | def load_using_pb(self): 44 | data = get_caffe_resolver().NetParameter() 45 | data.MergeFromString(open(self.data_path, 'rb').read()) 46 | pair = lambda layer: (layer.name, self.normalize_pb_data(layer)) 47 | layers = data.layers or data.layer 48 | self.params = [pair(layer) for layer in layers if layer.blobs] 49 | self.did_use_pb = True 50 | 51 | def normalize_pb_data(self, layer): 52 | transformed = [] 53 | for blob in layer.blobs: 54 | if len(blob.shape.dim): 55 | dims = blob.shape.dim 56 | c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims)) 57 | else: 58 | c_o = blob.num 59 | c_i = blob.channels 60 | h = blob.height 61 | w = blob.width 62 | data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w) 63 | transformed.append(data) 64 | return transformed 65 | 66 | def adjust_parameters(self, node, data): 67 | if not self.did_use_pb: 68 | return data 69 | # When using the protobuf-backend, each parameter initially has four dimensions. 70 | # In certain cases (like FC layers), we want to eliminate the singleton dimensions. 71 | # This implementation takes care of the common cases. However, it does leave the 72 | # potential for future issues. 73 | # The Caffe-backend does not suffer from this problem. 74 | data = list(data) 75 | squeeze_indices = [1] # Squeeze biases. 76 | if node.kind == NodeKind.InnerProduct: 77 | squeeze_indices.append(0) # Squeeze FC. 78 | for idx in squeeze_indices: 79 | data[idx] = np.squeeze(data[idx]) 80 | return data 81 | 82 | def __call__(self, graph): 83 | for layer_name, data in self.params: 84 | if layer_name in graph: 85 | node = graph.get_node(layer_name) 86 | node.data = self.adjust_parameters(node, data) 87 | else: 88 | print_stderr('Ignoring parameters for non-existent layer: %s' % layer_name) 89 | return graph 90 | 91 | 92 | class DataReshaper(object): 93 | 94 | def __init__(self, mapping, replace=True): 95 | # A dictionary mapping NodeKind to the transposed order. 96 | self.mapping = mapping 97 | # The node kinds eligible for reshaping 98 | self.reshaped_node_types = self.mapping.keys() 99 | # If true, the reshaped data will replace the old one. 100 | # Otherwise, it's set to the reshaped_data attribute. 101 | self.replace = replace 102 | 103 | def has_spatial_parent(self, node): 104 | try: 105 | parent = node.get_only_parent() 106 | s = parent.output_shape 107 | return s.height > 1 or s.width > 1 108 | except KaffeError: 109 | return False 110 | 111 | def map(self, node_kind): 112 | try: 113 | return self.mapping[node_kind] 114 | except KeyError: 115 | raise KaffeError('Ordering not found for node kind: {}'.format(node_kind)) 116 | 117 | def __call__(self, graph): 118 | for node in graph.nodes: 119 | if node.data is None: 120 | continue 121 | if node.kind not in self.reshaped_node_types: 122 | # Check for 2+ dimensional data 123 | if any(len(tensor.shape) > 1 for tensor in node.data): 124 | print_stderr('Warning: parmaters not reshaped for node: {}'.format(node)) 125 | continue 126 | transpose_order = self.map(node.kind) 127 | weights = node.data[0] 128 | if (node.kind == NodeKind.InnerProduct) and self.has_spatial_parent(node): 129 | # The FC layer connected to the spatial layer needs to be 130 | # re-wired to match the new spatial ordering. 131 | in_shape = node.get_only_parent().output_shape 132 | fc_shape = weights.shape 133 | output_channels = fc_shape[0] 134 | weights = weights.reshape((output_channels, in_shape.channels, in_shape.height, 135 | in_shape.width)) 136 | weights = weights.transpose(self.map(NodeKind.Convolution)) 137 | node.reshaped_data = weights.reshape(fc_shape[transpose_order[0]], 138 | fc_shape[transpose_order[1]]) 139 | else: 140 | node.reshaped_data = weights.transpose(transpose_order) 141 | 142 | if self.replace: 143 | for node in graph.nodes: 144 | if hasattr(node, 'reshaped_data'): 145 | # Set the weights 146 | node.data[0] = node.reshaped_data 147 | del node.reshaped_data 148 | return graph 149 | 150 | 151 | class SubNodeFuser(object): 152 | ''' 153 | An abstract helper for merging a single-child with its single-parent. 154 | ''' 155 | 156 | def __call__(self, graph): 157 | nodes = graph.nodes 158 | fused_nodes = [] 159 | for node in nodes: 160 | if len(node.parents) != 1: 161 | # We're only fusing nodes with single parents 162 | continue 163 | parent = node.get_only_parent() 164 | if len(parent.children) != 1: 165 | # We can only fuse a node if its parent's 166 | # value isn't used by any other node. 167 | continue 168 | if not self.is_eligible_pair(parent, node): 169 | continue 170 | # Rewrite the fused node's children to its parent. 171 | for child in node.children: 172 | child.parents.remove(node) 173 | parent.add_child(child) 174 | # Disconnect the fused node from the graph. 175 | parent.children.remove(node) 176 | fused_nodes.append(node) 177 | # Let the sub-class merge the fused node in any arbitrary way. 178 | self.merge(parent, node) 179 | transformed_nodes = [node for node in nodes if node not in fused_nodes] 180 | return graph.replaced(transformed_nodes) 181 | 182 | def is_eligible_pair(self, parent, child): 183 | '''Returns true if this parent/child pair is eligible for fusion.''' 184 | raise NotImplementedError('Must be implemented by subclass.') 185 | 186 | def merge(self, parent, child): 187 | '''Merge the child node into the parent.''' 188 | raise NotImplementedError('Must be implemented by subclass') 189 | 190 | 191 | class ReLUFuser(SubNodeFuser): 192 | ''' 193 | Fuses rectified linear units with their parent nodes. 194 | ''' 195 | 196 | def __init__(self, allowed_parent_types=None): 197 | # Fuse ReLUs when the parent node is one of the given types. 198 | # If None, all node types are eligible. 199 | self.allowed_parent_types = allowed_parent_types 200 | 201 | def is_eligible_pair(self, parent, child): 202 | return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and 203 | child.kind == NodeKind.ReLU) 204 | 205 | def merge(self, parent, _): 206 | parent.metadata['relu'] = True 207 | 208 | 209 | class BatchNormScaleBiasFuser(SubNodeFuser): 210 | ''' 211 | The original batch normalization paper includes two learned 212 | parameters: a scaling factor \gamma and a bias \beta. 213 | Caffe's implementation does not include these two. However, it is commonly 214 | replicated by adding a scaling+bias layer immidiately after the batch norm. 215 | 216 | This fuser merges the scaling+bias layer with the batch norm. 217 | ''' 218 | 219 | def is_eligible_pair(self, parent, child): 220 | return (parent.kind == NodeKind.BatchNorm and child.kind == NodeKind.Scale and 221 | child.parameters.axis == 1 and child.parameters.bias_term == True) 222 | 223 | def merge(self, parent, child): 224 | parent.scale_bias_node = child 225 | 226 | 227 | class BatchNormPreprocessor(object): 228 | ''' 229 | Prescale batch normalization parameters. 230 | Concatenate gamma (scale) and beta (bias) terms if set. 231 | ''' 232 | 233 | def __call__(self, graph): 234 | for node in graph.nodes: 235 | if node.kind != NodeKind.BatchNorm: 236 | continue 237 | assert node.data is not None 238 | assert len(node.data) == 3 239 | mean, variance, scale = node.data 240 | # Prescale the stats 241 | scaling_factor = 1.0 / scale if scale != 0 else 0 242 | mean *= scaling_factor 243 | variance *= scaling_factor 244 | # Replace with the updated values 245 | node.data = [mean, variance] 246 | if hasattr(node, 'scale_bias_node'): 247 | # Include the scale and bias terms 248 | gamma, beta = node.scale_bias_node.data 249 | node.data += [gamma, beta] 250 | return graph 251 | 252 | 253 | class NodeRenamer(object): 254 | ''' 255 | Renames nodes in the graph using a given unary function that 256 | accepts a node and returns its new name. 257 | ''' 258 | 259 | def __init__(self, renamer): 260 | self.renamer = renamer 261 | 262 | def __call__(self, graph): 263 | for node in graph.nodes: 264 | node.name = self.renamer(node) 265 | return graph 266 | 267 | 268 | class ParameterNamer(object): 269 | ''' 270 | Convert layer data arrays to a dictionary mapping parameter names to their values. 271 | ''' 272 | 273 | def __call__(self, graph): 274 | for node in graph.nodes: 275 | if node.data is None: 276 | continue 277 | if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct): 278 | names = ('weights',) 279 | if node.parameters.bias_term: 280 | names += ('biases',) 281 | elif node.kind == NodeKind.BatchNorm: 282 | names = ('moving_mean', 'moving_variance') 283 | if len(node.data) == 4: 284 | names += ('gamma', 'beta') 285 | else: 286 | print_stderr('WARNING: Unhandled parameters: {}'.format(node.kind)) 287 | continue 288 | assert len(names) == len(node.data) 289 | node.data = dict(zip(names, node.data)) 290 | return graph 291 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/misc/2007_000129.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/misc/2007_000129.jpg -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/misc/2007_000129.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/external/tensorflow-deeplab-resnet/misc/2007_000129.png -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/npy2ckpt.py: -------------------------------------------------------------------------------- 1 | """Conversion of the .npy weights into the .ckpt ones. 2 | 3 | This script converts the weights of the DeepLab-ResNet model 4 | from the numpy format into the TensorFlow one. 5 | """ 6 | 7 | from __future__ import print_function 8 | 9 | import argparse 10 | import os 11 | 12 | import tensorflow as tf 13 | import numpy as np 14 | 15 | from deeplab_resnet import DeepLabResNetModel 16 | 17 | SAVE_DIR = './' 18 | 19 | def get_arguments(): 20 | """Parse all the arguments provided from the CLI. 21 | 22 | Returns: 23 | A list of parsed arguments. 24 | """ 25 | parser = argparse.ArgumentParser(description="NPY to CKPT converter.") 26 | parser.add_argument("npy_path", type=str, 27 | help="Path to the .npy file, which contains the weights.") 28 | parser.add_argument("--save-dir", type=str, default=SAVE_DIR, 29 | help="Where to save the converted .ckpt file.") 30 | return parser.parse_args() 31 | 32 | def save(saver, sess, logdir): 33 | model_name = 'model.ckpt' 34 | checkpoint_path = os.path.join(logdir, model_name) 35 | 36 | if not os.path.exists(logdir): 37 | os.makedirs(logdir) 38 | 39 | saver.save(sess, checkpoint_path, write_meta_graph=False) 40 | print('The weights have been converted to {}.'.format(checkpoint_path)) 41 | 42 | 43 | def main(): 44 | """Create the model and start the training.""" 45 | args = get_arguments() 46 | 47 | # Default image. 48 | image_batch = tf.constant(0, tf.float32, shape=[1, 321, 321, 3]) 49 | # Create network. 50 | net = DeepLabResNetModel({'data': image_batch}) 51 | var_list = tf.global_variables() 52 | 53 | # Set up tf session and initialize variables. 54 | config = tf.ConfigProto() 55 | config.gpu_options.allow_growth = True 56 | 57 | with tf.Session(config=config) as sess: 58 | init = tf.global_variables_initializer() 59 | sess.run(init) 60 | 61 | # Loading .npy weights. 62 | net.load(args.npy_path, sess) 63 | 64 | # Saver for converting the loaded weights into .ckpt. 65 | saver = tf.train.Saver(var_list=var_list, write_version=1) 66 | save(saver, sess, args.save_dir) 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython>=0.19.2 2 | numpy>=1.7.1 3 | matplotlib>=1.3.1 4 | Pillow>=2.3.0 5 | six>=1.1.0 6 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/train.py: -------------------------------------------------------------------------------- 1 | """Training script for the DeepLab-ResNet network on the PASCAL VOC dataset 2 | for semantic image segmentation. 3 | 4 | This script trains the model using augmented PASCAL VOC, 5 | which contains approximately 10000 images for training and 1500 images for validation. 6 | """ 7 | 8 | from __future__ import print_function 9 | 10 | import argparse 11 | from datetime import datetime 12 | import os 13 | import sys 14 | import time 15 | 16 | import tensorflow as tf 17 | import numpy as np 18 | 19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, inv_preprocess, prepare_label 20 | 21 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 22 | 23 | BATCH_SIZE = 10 24 | DATA_DIRECTORY = '/home/VOCdevkit' 25 | DATA_LIST_PATH = './dataset/train.txt' 26 | IGNORE_LABEL = 255 27 | INPUT_SIZE = '321,321' 28 | LEARNING_RATE = 2.5e-4 29 | MOMENTUM = 0.9 30 | NUM_CLASSES = 21 31 | NUM_STEPS = 20001 32 | POWER = 0.9 33 | RANDOM_SEED = 1234 34 | RESTORE_FROM = './deeplab_resnet.ckpt' 35 | SAVE_NUM_IMAGES = 2 36 | SAVE_PRED_EVERY = 1000 37 | SNAPSHOT_DIR = './snapshots/' 38 | WEIGHT_DECAY = 0.0005 39 | 40 | 41 | def get_arguments(): 42 | """Parse all the arguments provided from the CLI. 43 | 44 | Returns: 45 | A list of parsed arguments. 46 | """ 47 | parser = argparse.ArgumentParser(description="DeepLab-ResNet Network") 48 | parser.add_argument("--batch-size", type=int, default=BATCH_SIZE, 49 | help="Number of images sent to the network in one step.") 50 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY, 51 | help="Path to the directory containing the PASCAL VOC dataset.") 52 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH, 53 | help="Path to the file listing the images in the dataset.") 54 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL, 55 | help="The index of the label to ignore during the training.") 56 | parser.add_argument("--input-size", type=str, default=INPUT_SIZE, 57 | help="Comma-separated string with height and width of images.") 58 | parser.add_argument("--is-training", action="store_true", 59 | help="Whether to updates the running means and variances during the training.") 60 | parser.add_argument("--learning-rate", type=float, default=LEARNING_RATE, 61 | help="Base learning rate for training with polynomial decay.") 62 | parser.add_argument("--momentum", type=float, default=MOMENTUM, 63 | help="Momentum component of the optimiser.") 64 | parser.add_argument("--not-restore-last", action="store_true", 65 | help="Whether to not restore last (FC) layers.") 66 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES, 67 | help="Number of classes to predict (including background).") 68 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS, 69 | help="Number of training steps.") 70 | parser.add_argument("--power", type=float, default=POWER, 71 | help="Decay parameter to compute the learning rate.") 72 | parser.add_argument("--random-mirror", action="store_true", 73 | help="Whether to randomly mirror the inputs during the training.") 74 | parser.add_argument("--random-scale", action="store_true", 75 | help="Whether to randomly scale the inputs during the training.") 76 | parser.add_argument("--random-seed", type=int, default=RANDOM_SEED, 77 | help="Random seed to have reproducible results.") 78 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM, 79 | help="Where restore model parameters from.") 80 | parser.add_argument("--save-num-images", type=int, default=SAVE_NUM_IMAGES, 81 | help="How many images to save.") 82 | parser.add_argument("--save-pred-every", type=int, default=SAVE_PRED_EVERY, 83 | help="Save summaries and checkpoint every often.") 84 | parser.add_argument("--snapshot-dir", type=str, default=SNAPSHOT_DIR, 85 | help="Where to save snapshots of the model.") 86 | parser.add_argument("--weight-decay", type=float, default=WEIGHT_DECAY, 87 | help="Regularisation parameter for L2-loss.") 88 | return parser.parse_args() 89 | 90 | def save(saver, sess, logdir, step): 91 | '''Save weights. 92 | 93 | Args: 94 | saver: TensorFlow Saver object. 95 | sess: TensorFlow session. 96 | logdir: path to the snapshots directory. 97 | step: current training step. 98 | ''' 99 | model_name = 'model.ckpt' 100 | checkpoint_path = os.path.join(logdir, model_name) 101 | 102 | if not os.path.exists(logdir): 103 | os.makedirs(logdir) 104 | saver.save(sess, checkpoint_path, global_step=step) 105 | print('The checkpoint has been created.') 106 | 107 | def load(saver, sess, ckpt_path): 108 | '''Load trained weights. 109 | 110 | Args: 111 | saver: TensorFlow Saver object. 112 | sess: TensorFlow session. 113 | ckpt_path: path to checkpoint file with parameters. 114 | ''' 115 | saver.restore(sess, ckpt_path) 116 | print("Restored model parameters from {}".format(ckpt_path)) 117 | 118 | def main(): 119 | """Create the model and start the training.""" 120 | args = get_arguments() 121 | 122 | h, w = map(int, args.input_size.split(',')) 123 | input_size = (h, w) 124 | 125 | tf.set_random_seed(args.random_seed) 126 | 127 | # Create queue coordinator. 128 | coord = tf.train.Coordinator() 129 | 130 | # Load reader. 131 | with tf.name_scope("create_inputs"): 132 | reader = ImageReader( 133 | args.data_dir, 134 | args.data_list, 135 | input_size, 136 | args.random_scale, 137 | args.random_mirror, 138 | args.ignore_label, 139 | IMG_MEAN, 140 | coord) 141 | image_batch, label_batch = reader.dequeue(args.batch_size) 142 | 143 | # Create network. 144 | net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) 145 | # For a small batch size, it is better to keep 146 | # the statistics of the BN layers (running means and variances) 147 | # frozen, and to not update the values provided by the pre-trained model. 148 | # If is_training=True, the statistics will be updated during the training. 149 | # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) 150 | # if they are presented in var_list of the optimiser definition. 151 | 152 | # Predictions. 153 | raw_output = net.layers['fc1_voc12'] 154 | # Which variables to load. Running means and variances are not trainable, 155 | # thus all_variables() should be restored. 156 | restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last] 157 | all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] 158 | fc_trainable = [v for v in all_trainable if 'fc' in v.name] 159 | conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 160 | fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 161 | fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 162 | assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable)) 163 | assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) 164 | 165 | 166 | # Predictions: ignoring all predictions with labels greater or equal than n_classes 167 | raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) 168 | label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] 169 | raw_gt = tf.reshape(label_proc, [-1,]) 170 | indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) 171 | gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) 172 | prediction = tf.gather(raw_prediction, indices) 173 | 174 | 175 | # Pixel-wise softmax loss. 176 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) 177 | l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] 178 | reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) 179 | 180 | # Processed predictions: for visualisation. 181 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) 182 | raw_output_up = tf.argmax(raw_output_up, dimension=3) 183 | pred = tf.expand_dims(raw_output_up, dim=3) 184 | 185 | # Image summary. 186 | images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) 187 | labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) 188 | preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) 189 | 190 | total_summary = tf.summary.image('images', 191 | tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), 192 | max_outputs=args.save_num_images) # Concatenate row-wise. 193 | summary_writer = tf.summary.FileWriter(args.snapshot_dir, 194 | graph=tf.get_default_graph()) 195 | 196 | # Define loss and optimisation parameters. 197 | base_lr = tf.constant(args.learning_rate) 198 | step_ph = tf.placeholder(dtype=tf.float32, shape=()) 199 | learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) 200 | 201 | opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) 202 | opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) 203 | opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) 204 | 205 | grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) 206 | grads_conv = grads[:len(conv_trainable)] 207 | grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))] 208 | grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] 209 | 210 | train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) 211 | train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) 212 | train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) 213 | 214 | train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) 215 | 216 | 217 | # Set up tf session and initialize variables. 218 | config = tf.ConfigProto() 219 | config.gpu_options.allow_growth = True 220 | sess = tf.Session(config=config) 221 | init = tf.global_variables_initializer() 222 | 223 | sess.run(init) 224 | 225 | # Saver for storing checkpoints of the model. 226 | saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) 227 | 228 | # Load variables if the checkpoint is provided. 229 | if args.restore_from is not None: 230 | loader = tf.train.Saver(var_list=restore_var) 231 | load(loader, sess, args.restore_from) 232 | 233 | # Start queue threads. 234 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 235 | 236 | # Iterate over training steps. 237 | for step in range(args.num_steps): 238 | start_time = time.time() 239 | feed_dict = { step_ph : step } 240 | 241 | if step % args.save_pred_every == 0: 242 | loss_value, images, labels, preds, summary, _ = sess.run([reduced_loss, image_batch, label_batch, pred, total_summary, train_op], feed_dict=feed_dict) 243 | summary_writer.add_summary(summary, step) 244 | save(saver, sess, args.snapshot_dir, step) 245 | else: 246 | loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) 247 | duration = time.time() - start_time 248 | print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) 249 | coord.request_stop() 250 | coord.join(threads) 251 | 252 | if __name__ == '__main__': 253 | main() 254 | -------------------------------------------------------------------------------- /external/tensorflow-deeplab-resnet/train_msc.py: -------------------------------------------------------------------------------- 1 | """Training script with multi-scale inputs for the DeepLab-ResNet network on the PASCAL VOC dataset 2 | for semantic image segmentation. 3 | 4 | This script trains the model using augmented PASCAL VOC, 5 | which contains approximately 10000 images for training and 1500 images for validation. 6 | """ 7 | 8 | from __future__ import print_function 9 | 10 | import argparse 11 | from datetime import datetime 12 | import os 13 | import sys 14 | import time 15 | 16 | import tensorflow as tf 17 | import numpy as np 18 | 19 | from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, inv_preprocess, prepare_label 20 | 21 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 22 | 23 | BATCH_SIZE = 1 24 | DATA_DIRECTORY = '/home/VOCdevkit' 25 | DATA_LIST_PATH = './dataset/train.txt' 26 | GRAD_UPDATE_EVERY = 10 27 | IGNORE_LABEL = 255 28 | INPUT_SIZE = '321,321' 29 | LEARNING_RATE = 2.5e-4 30 | MOMENTUM = 0.9 31 | NUM_CLASSES = 21 32 | NUM_STEPS = 20001 33 | POWER = 0.9 34 | RANDOM_SEED = 1234 35 | RESTORE_FROM = './deeplab_resnet.ckpt' 36 | SAVE_NUM_IMAGES = 1 37 | SAVE_PRED_EVERY = 1000 38 | SNAPSHOT_DIR = './snapshots/' 39 | WEIGHT_DECAY = 0.0005 40 | 41 | 42 | def get_arguments(): 43 | """Parse all the arguments provided from the CLI. 44 | 45 | Returns: 46 | A list of parsed arguments. 47 | """ 48 | parser = argparse.ArgumentParser(description="DeepLab-ResNet Network") 49 | parser.add_argument("--batch-size", type=int, default=BATCH_SIZE, 50 | help="Number of images sent to the network in one step.") 51 | parser.add_argument("--data-dir", type=str, default=DATA_DIRECTORY, 52 | help="Path to the directory containing the PASCAL VOC dataset.") 53 | parser.add_argument("--data-list", type=str, default=DATA_LIST_PATH, 54 | help="Path to the file listing the images in the dataset.") 55 | parser.add_argument("--grad-update-every", type=int, default=GRAD_UPDATE_EVERY, 56 | help="Number of steps after which gradient update is applied.") 57 | parser.add_argument("--ignore-label", type=int, default=IGNORE_LABEL, 58 | help="The index of the label to ignore during the training.") 59 | parser.add_argument("--input-size", type=str, default=INPUT_SIZE, 60 | help="Comma-separated string with height and width of images.") 61 | parser.add_argument("--is-training", action="store_true", 62 | help="Whether to update the running means and variances during the training.") 63 | parser.add_argument("--learning-rate", type=float, default=LEARNING_RATE, 64 | help="Base learning rate for training with polynomial decay.") 65 | parser.add_argument("--momentum", type=float, default=MOMENTUM, 66 | help="Momentum component of the optimiser.") 67 | parser.add_argument("--not-restore-last", action="store_true", 68 | help="Whether to not restore last (FC) layers.") 69 | parser.add_argument("--num-classes", type=int, default=NUM_CLASSES, 70 | help="Number of classes to predict (including background).") 71 | parser.add_argument("--num-steps", type=int, default=NUM_STEPS, 72 | help="Number of training steps.") 73 | parser.add_argument("--power", type=float, default=POWER, 74 | help="Decay parameter to compute the learning rate.") 75 | parser.add_argument("--random-mirror", action="store_true", 76 | help="Whether to randomly mirror the inputs during the training.") 77 | parser.add_argument("--random-scale", action="store_true", 78 | help="Whether to randomly scale the inputs during the training.") 79 | parser.add_argument("--random-seed", type=int, default=RANDOM_SEED, 80 | help="Random seed to have reproducible results.") 81 | parser.add_argument("--restore-from", type=str, default=RESTORE_FROM, 82 | help="Where restore model parameters from.") 83 | parser.add_argument("--save-num-images", type=int, default=SAVE_NUM_IMAGES, 84 | help="How many images to save.") 85 | parser.add_argument("--save-pred-every", type=int, default=SAVE_PRED_EVERY, 86 | help="Save summaries and checkpoint every often.") 87 | parser.add_argument("--snapshot-dir", type=str, default=SNAPSHOT_DIR, 88 | help="Where to save snapshots of the model.") 89 | parser.add_argument("--weight-decay", type=float, default=WEIGHT_DECAY, 90 | help="Regularisation parameter for L2-loss.") 91 | return parser.parse_args() 92 | 93 | def save(saver, sess, logdir, step): 94 | '''Save weights. 95 | 96 | Args: 97 | saver: TensorFlow Saver object. 98 | sess: TensorFlow session. 99 | logdir: path to the snapshots directory. 100 | step: current training step. 101 | ''' 102 | model_name = 'model.ckpt' 103 | checkpoint_path = os.path.join(logdir, model_name) 104 | 105 | if not os.path.exists(logdir): 106 | os.makedirs(logdir) 107 | saver.save(sess, checkpoint_path, global_step=step) 108 | print('The checkpoint has been created.') 109 | 110 | def load(saver, sess, ckpt_path): 111 | '''Load trained weights. 112 | 113 | Args: 114 | saver: TensorFlow Saver object. 115 | sess: TensorFlow session. 116 | ckpt_path: path to checkpoint file with parameters. 117 | ''' 118 | saver.restore(sess, ckpt_path) 119 | print("Restored model parameters from {}".format(ckpt_path)) 120 | 121 | def main(): 122 | """Create the model and start the training.""" 123 | args = get_arguments() 124 | 125 | h, w = map(int, args.input_size.split(',')) 126 | input_size = (h, w) 127 | 128 | tf.set_random_seed(args.random_seed) 129 | 130 | # Create queue coordinator. 131 | coord = tf.train.Coordinator() 132 | 133 | # Load reader. 134 | with tf.name_scope("create_inputs"): 135 | reader = ImageReader( 136 | args.data_dir, 137 | args.data_list, 138 | input_size, 139 | args.random_scale, 140 | args.random_mirror, 141 | args.ignore_label, 142 | IMG_MEAN, 143 | coord) 144 | image_batch, label_batch = reader.dequeue(args.batch_size) 145 | image_batch075 = tf.image.resize_images(image_batch, [int(h * 0.75), int(w * 0.75)]) 146 | image_batch05 = tf.image.resize_images(image_batch, [int(h * 0.5), int(w * 0.5)]) 147 | 148 | # Create network. 149 | with tf.variable_scope('', reuse=False): 150 | net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) 151 | with tf.variable_scope('', reuse=True): 152 | net075 = DeepLabResNetModel({'data': image_batch075}, is_training=args.is_training, num_classes=args.num_classes) 153 | with tf.variable_scope('', reuse=True): 154 | net05 = DeepLabResNetModel({'data': image_batch05}, is_training=args.is_training, num_classes=args.num_classes) 155 | # For a small batch size, it is better to keep 156 | # the statistics of the BN layers (running means and variances) 157 | # frozen, and to not update the values provided by the pre-trained model. 158 | # If is_training=True, the statistics will be updated during the training. 159 | # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) 160 | # if they are presented in var_list of the optimiser definition. 161 | 162 | # Predictions. 163 | raw_output100 = net.layers['fc1_voc12'] 164 | raw_output075 = net075.layers['fc1_voc12'] 165 | raw_output05 = net05.layers['fc1_voc12'] 166 | raw_output = tf.reduce_max(tf.stack([raw_output100, 167 | tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), 168 | tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) 169 | # Which variables to load. Running means and variances are not trainable, 170 | # thus all_variables() should be restored. 171 | restore_var = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last] 172 | all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] 173 | fc_trainable = [v for v in all_trainable if 'fc' in v.name] 174 | conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 175 | fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 176 | fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 177 | assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable)) 178 | assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) 179 | 180 | 181 | # Predictions: ignoring all predictions with labels greater or equal than n_classes 182 | raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) 183 | raw_prediction100 = tf.reshape(raw_output100, [-1, args.num_classes]) 184 | raw_prediction075 = tf.reshape(raw_output075, [-1, args.num_classes]) 185 | raw_prediction05 = tf.reshape(raw_output05, [-1, args.num_classes]) 186 | 187 | label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] 188 | label_proc075 = prepare_label(label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) 189 | label_proc05 = prepare_label(label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) 190 | 191 | raw_gt = tf.reshape(label_proc, [-1,]) 192 | raw_gt075 = tf.reshape(label_proc075, [-1,]) 193 | raw_gt05 = tf.reshape(label_proc05, [-1,]) 194 | 195 | indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) 196 | indices075 = tf.squeeze(tf.where(tf.less_equal(raw_gt075, args.num_classes - 1)), 1) 197 | indices05 = tf.squeeze(tf.where(tf.less_equal(raw_gt05, args.num_classes - 1)), 1) 198 | 199 | gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) 200 | gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) 201 | gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) 202 | 203 | prediction = tf.gather(raw_prediction, indices) 204 | prediction100 = tf.gather(raw_prediction100, indices) 205 | prediction075 = tf.gather(raw_prediction075, indices075) 206 | prediction05 = tf.gather(raw_prediction05, indices05) 207 | 208 | 209 | # Pixel-wise softmax loss. 210 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) 211 | loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt) 212 | loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075) 213 | loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05) 214 | l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] 215 | reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n(l2_losses) 216 | 217 | # Processed predictions: for visualisation. 218 | raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) 219 | raw_output_up = tf.argmax(raw_output_up, dimension=3) 220 | pred = tf.expand_dims(raw_output_up, dim=3) 221 | 222 | # Image summary. 223 | images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) 224 | labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) 225 | preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) 226 | 227 | total_summary = tf.summary.image('images', 228 | tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), 229 | max_outputs=args.save_num_images) # Concatenate row-wise. 230 | summary_writer = tf.summary.FileWriter(args.snapshot_dir, 231 | graph=tf.get_default_graph()) 232 | 233 | # Define loss and optimisation parameters. 234 | base_lr = tf.constant(args.learning_rate) 235 | step_ph = tf.placeholder(dtype=tf.float32, shape=()) 236 | learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) 237 | 238 | opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) 239 | opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) 240 | opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) 241 | 242 | # Define a variable to accumulate gradients. 243 | accum_grads = [tf.Variable(tf.zeros_like(v.initialized_value()), 244 | trainable=False) for v in conv_trainable + fc_w_trainable + fc_b_trainable] 245 | 246 | # Define an operation to clear the accumulated gradients for next batch. 247 | zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads] 248 | 249 | # Compute gradients. 250 | grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) 251 | 252 | # Accumulate and normalise the gradients. 253 | accum_grads_op = [accum_grads[i].assign_add(grad / args.grad_update_every) for i, grad in 254 | enumerate(grads)] 255 | 256 | grads_conv = accum_grads[:len(conv_trainable)] 257 | grads_fc_w = accum_grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))] 258 | grads_fc_b = accum_grads[(len(conv_trainable) + len(fc_w_trainable)):] 259 | 260 | # Apply the gradients. 261 | train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) 262 | train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) 263 | train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) 264 | 265 | train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) 266 | 267 | 268 | # Set up tf session and initialize variables. 269 | config = tf.ConfigProto() 270 | config.gpu_options.allow_growth = True 271 | sess = tf.Session(config=config) 272 | init = tf.global_variables_initializer() 273 | 274 | sess.run(init) 275 | 276 | # Saver for storing checkpoints of the model. 277 | saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) 278 | 279 | # Load variables if the checkpoint is provided. 280 | if args.restore_from is not None: 281 | loader = tf.train.Saver(var_list=restore_var) 282 | load(loader, sess, args.restore_from) 283 | 284 | # Start queue threads. 285 | threads = tf.train.start_queue_runners(coord=coord, sess=sess) 286 | 287 | # Iterate over training steps. 288 | for step in range(args.num_steps): 289 | start_time = time.time() 290 | feed_dict = { step_ph : step } 291 | loss_value = 0 292 | 293 | # Clear the accumulated gradients. 294 | sess.run(zero_op, feed_dict=feed_dict) 295 | 296 | # Accumulate gradients. 297 | for i in range(args.grad_update_every): 298 | _, l_val = sess.run([accum_grads_op, reduced_loss], feed_dict=feed_dict) 299 | loss_value += l_val 300 | 301 | # Normalise the loss. 302 | loss_value /= args.grad_update_every 303 | 304 | # Apply gradients. 305 | if step % args.save_pred_every == 0: 306 | images, labels, summary, _ = sess.run([image_batch, label_batch, total_summary, train_op], feed_dict=feed_dict) 307 | summary_writer.add_summary(summary, step) 308 | save(saver, sess, args.snapshot_dir, step) 309 | else: 310 | sess.run(train_op, feed_dict=feed_dict) 311 | 312 | duration = time.time() - start_time 313 | print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) 314 | coord.request_stop() 315 | coord.join(threads) 316 | 317 | if __name__ == '__main__': 318 | main() 319 | -------------------------------------------------------------------------------- /get_model.py: -------------------------------------------------------------------------------- 1 | import CMPC_model 2 | 3 | 4 | def get_segmentation_model(name, **kwargs): 5 | model = eval(name).LSTM_model(**kwargs) 6 | return model 7 | -------------------------------------------------------------------------------- /motivation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spyflying/CMPC-Refseg/094639b8bf00cc169ea7b49cdf9c87fdfc70d963/motivation.png -------------------------------------------------------------------------------- /trainval.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | LOG=logs/unc/cmpc_model 4 | mkdir -p ${LOG} 5 | now=$(date +"%Y%m%d_%H%M%S") 6 | 7 | python -u trainval_model.py \ 8 | -m train \ 9 | -d unc \ 10 | -t train \ 11 | -n CMPC_model \ 12 | -emb \ 13 | -f ckpts/unc/cmpc_model 2>&1 | tee ${LOG}/train_$now.txt 14 | 15 | python -u trainval_model.py \ 16 | -m test \ 17 | -d unc \ 18 | -t val \ 19 | -n CMPC_model \ 20 | -i 700000 \ 21 | -c \ 22 | -emb \ 23 | -f ckpts/unc/cmpc_model 2>&1 | tee ${LOG}/test_val_$now.txt 24 | -------------------------------------------------------------------------------- /trainval_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import sys 4 | import os 5 | import argparse 6 | import tensorflow as tf 7 | import skimage 8 | from skimage import io as sio 9 | import time 10 | # import matplotlib.pyplot as plt 11 | from get_model import get_segmentation_model 12 | from pydensecrf import densecrf 13 | 14 | from util import data_reader 15 | from util.processing_tools import * 16 | from util import im_processing, eval_tools, MovingAverage 17 | 18 | 19 | def train(max_iter, snapshot, dataset, setname, mu, lr, bs, tfmodel_folder, 20 | conv5, model_name, stop_iter, pre_emb=False): 21 | iters_per_log = 100 22 | data_folder = './' + dataset + '/' + setname + '_batch/' 23 | data_prefix = dataset + '_' + setname 24 | snapshot_file = os.path.join(tfmodel_folder, dataset + '_iter_%d.tfmodel') 25 | if not os.path.isdir(tfmodel_folder): 26 | os.makedirs(tfmodel_folder) 27 | 28 | cls_loss_avg = 0 29 | avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 30 | decay = 0.99 31 | vocab_size = 8803 if dataset == 'referit' else 12112 32 | emb_name = 'referit' if dataset == 'referit' else 'Gref' 33 | 34 | if pre_emb: 35 | print("Use pretrained Embeddings.") 36 | model = get_segmentation_model(model_name, mode='train', 37 | vocab_size=vocab_size, start_lr=lr, 38 | batch_size=bs, conv5=conv5, emb_name=emb_name) 39 | else: 40 | model = get_segmentation_model(model_name, mode='train', 41 | vocab_size=vocab_size, start_lr=lr, 42 | batch_size=bs, conv5=conv5) 43 | 44 | weights = './data/weights/deeplab_resnet_init.ckpt' 45 | print("Loading pretrained weights from {}".format(weights)) 46 | load_var = {var.op.name: var for var in tf.global_variables() 47 | if var.name.startswith('res') or var.name.startswith('bn') or var.name.startswith('conv1')} 48 | 49 | snapshot_loader = tf.train.Saver(load_var) 50 | snapshot_saver = tf.train.Saver(max_to_keep=4) 51 | 52 | config = tf.ConfigProto() 53 | config.gpu_options.allow_growth = True 54 | sess = tf.Session(config=config) 55 | sess.run(tf.global_variables_initializer()) 56 | snapshot_loader.restore(sess, weights) 57 | 58 | im_h, im_w, num_steps = model.H, model.W, model.num_steps 59 | text_batch = np.zeros((bs, num_steps), dtype=np.float32) 60 | image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32) 61 | mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32) 62 | valid_idx_batch = np.zeros((bs, 1), dtype=np.int32) 63 | 64 | reader = data_reader.DataReader(data_folder, data_prefix) 65 | 66 | # for time calculate 67 | last_time = time.time() 68 | time_avg = MovingAverage() 69 | for n_iter in range(max_iter): 70 | 71 | for n_batch in range(bs): 72 | batch = reader.read_batch(is_log=(n_batch == 0 and n_iter % iters_per_log == 0)) 73 | text = batch['text_batch'] 74 | im = batch['im_batch'].astype(np.float32) 75 | mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2) 76 | 77 | im = im[:, :, ::-1] 78 | im -= mu 79 | 80 | text_batch[n_batch, ...] = text 81 | image_batch[n_batch, ...] = im 82 | mask_batch[n_batch, ...] = mask 83 | 84 | for idx in range(text.shape[0]): 85 | if text[idx] != 0: 86 | valid_idx_batch[n_batch, :] = idx 87 | break 88 | 89 | _, cls_loss_val, lr_val, scores_val, label_val = sess.run([model.train_step, 90 | model.cls_loss, 91 | model.learning_rate, 92 | model.pred, 93 | model.target], 94 | feed_dict={ 95 | model.words: text_batch, 96 | # np.expand_dims(text, axis=0), 97 | model.im: image_batch, 98 | # np.expand_dims(im, axis=0), 99 | model.target_fine: mask_batch, 100 | # np.expand_dims(mask, axis=0) 101 | model.valid_idx: valid_idx_batch 102 | }) 103 | cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val 104 | 105 | # Accuracy 106 | accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val) 107 | avg_accuracy_all = decay * avg_accuracy_all + (1 - decay) * accuracy_all 108 | avg_accuracy_pos = decay * avg_accuracy_pos + (1 - decay) * accuracy_pos 109 | avg_accuracy_neg = decay * avg_accuracy_neg + (1 - decay) * accuracy_neg 110 | 111 | # timing 112 | cur_time = time.time() 113 | elapsed = cur_time - last_time 114 | last_time = cur_time 115 | 116 | if n_iter % iters_per_log == 0: 117 | print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' 118 | % (n_iter, cls_loss_val, cls_loss_avg, lr_val)) 119 | print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' 120 | % (n_iter, accuracy_all, accuracy_pos, accuracy_neg)) 121 | print('iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' 122 | % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg)) 123 | time_avg.add(elapsed) 124 | print('iter = %d, cur time = %.5f, avg time = %.5f, model_name: %s' % (n_iter, elapsed, time_avg.get_avg(), model_name)) 125 | 126 | # Save snapshot 127 | if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter: 128 | snapshot_saver.save(sess, snapshot_file % (n_iter + 1)) 129 | print('snapshot saved to ' + snapshot_file % (n_iter + 1)) 130 | if (n_iter + 1) >= stop_iter: 131 | print('stop training at iter ' + str(stop_iter)) 132 | break 133 | 134 | print('Optimization done.') 135 | 136 | 137 | def test(iter, dataset, visualize, setname, dcrf, mu, tfmodel_folder, model_name, pre_emb=False): 138 | data_folder = './' + dataset + '/' + setname + '_batch/' 139 | data_prefix = dataset + '_' + setname 140 | if visualize: 141 | save_dir = './' + dataset + '/visualization/' + str(iter) + '/' 142 | if not os.path.isdir(save_dir): 143 | os.makedirs(save_dir) 144 | weights = os.path.join(tfmodel_folder, dataset + '_iter_' + str(iter) + '.tfmodel') 145 | print("Loading trained weights from {}".format(weights)) 146 | 147 | score_thresh = 1e-9 148 | eval_seg_iou_list = [.5, .6, .7, .8, .9] 149 | cum_I, cum_U = 0, 0 150 | mean_IoU, mean_dcrf_IoU = 0, 0 151 | seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) 152 | if dcrf: 153 | cum_I_dcrf, cum_U_dcrf = 0, 0 154 | seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32) 155 | seg_total = 0. 156 | H, W = 320, 320 157 | vocab_size = 8803 if dataset == 'referit' else 12112 158 | emb_name = 'referit' if dataset == 'referit' else 'Gref' 159 | 160 | IU_result = list() 161 | 162 | if pre_emb: 163 | # use pretrained embbeding 164 | print("Use pretrained Embeddings.") 165 | model = get_segmentation_model(model_name, H=H, W=W, 166 | mode='eval', vocab_size=vocab_size, emb_name=emb_name) 167 | else: 168 | model = get_segmentation_model(model_name, H=H, W=W, 169 | mode='eval', vocab_size=vocab_size) 170 | 171 | # Load pretrained model 172 | snapshot_restorer = tf.train.Saver() 173 | config = tf.ConfigProto() 174 | config.gpu_options.allow_growth = True 175 | sess = tf.Session(config=config) 176 | sess.run(tf.global_variables_initializer()) 177 | snapshot_restorer.restore(sess, weights) 178 | reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False) 179 | 180 | NN = reader.num_batch 181 | for n_iter in range(reader.num_batch): 182 | 183 | if n_iter % (NN // 50) == 0: 184 | if n_iter / (NN // 50) % 5 == 0: 185 | sys.stdout.write(str(n_iter / (NN // 50) // 5)) 186 | else: 187 | sys.stdout.write('.') 188 | sys.stdout.flush() 189 | 190 | batch = reader.read_batch(is_log=False) 191 | text = batch['text_batch'] 192 | im = batch['im_batch'] 193 | mask = batch['mask_batch'].astype(np.float32) 194 | valid_idx = np.zeros([1], dtype=np.int32) 195 | for idx in range(text.shape[0]): 196 | if text[idx] != 0: 197 | valid_idx[0] = idx 198 | break 199 | 200 | proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W)) 201 | proc_im_ = proc_im.astype(np.float32) 202 | proc_im_ = proc_im_[:, :, ::-1] 203 | proc_im_ -= mu 204 | 205 | scores_val, up_val, sigm_val = sess.run([model.pred, model.up, model.sigm], 206 | feed_dict={ 207 | model.words: np.expand_dims(text, axis=0), 208 | model.im: np.expand_dims(proc_im_, axis=0), 209 | model.valid_idx: np.expand_dims(valid_idx, axis=0) 210 | }) 211 | 212 | # scores_val = np.squeeze(scores_val) 213 | # pred_raw = (scores_val >= score_thresh).astype(np.float32) 214 | up_val = np.squeeze(up_val) 215 | pred_raw = (up_val >= score_thresh).astype(np.float32) 216 | predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1]) 217 | if dcrf: 218 | # Dense CRF post-processing 219 | sigm_val = np.squeeze(sigm_val) 220 | d = densecrf.DenseCRF2D(W, H, 2) 221 | U = np.expand_dims(-np.log(sigm_val), axis=0) 222 | U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0) 223 | unary = np.concatenate((U_, U), axis=0) 224 | unary = unary.reshape((2, -1)) 225 | d.setUnaryEnergy(unary) 226 | d.addPairwiseGaussian(sxy=3, compat=3) 227 | d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10) 228 | Q = d.inference(5) 229 | pred_raw_dcrf = np.argmax(Q, axis=0).reshape((H, W)).astype(np.float32) 230 | predicts_dcrf = im_processing.resize_and_crop(pred_raw_dcrf, mask.shape[0], mask.shape[1]) 231 | 232 | if visualize: 233 | sent = batch['sent_batch'][0] 234 | visualize_seg(im, mask, predicts, sent) 235 | if dcrf: 236 | visualize_seg(im, mask, predicts_dcrf, sent) 237 | 238 | I, U = eval_tools.compute_mask_IU(predicts, mask) 239 | IU_result.append({'batch_no': n_iter, 'I': I, 'U': U}) 240 | mean_IoU += float(I) / U 241 | cum_I += I 242 | cum_U += U 243 | msg = 'cumulative IoU = %f' % (cum_I / cum_U) 244 | for n_eval_iou in range(len(eval_seg_iou_list)): 245 | eval_seg_iou = eval_seg_iou_list[n_eval_iou] 246 | seg_correct[n_eval_iou] += (I / U >= eval_seg_iou) 247 | if dcrf: 248 | I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask) 249 | mean_dcrf_IoU += float(I_dcrf) / U_dcrf 250 | cum_I_dcrf += I_dcrf 251 | cum_U_dcrf += U_dcrf 252 | msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf / cum_U_dcrf) 253 | for n_eval_iou in range(len(eval_seg_iou_list)): 254 | eval_seg_iou = eval_seg_iou_list[n_eval_iou] 255 | seg_correct_dcrf[n_eval_iou] += (I_dcrf / U_dcrf >= eval_seg_iou) 256 | # print(msg) 257 | seg_total += 1 258 | 259 | # Print results 260 | print('Segmentation evaluation (without DenseCRF):') 261 | result_str = '' 262 | for n_eval_iou in range(len(eval_seg_iou_list)): 263 | result_str += 'precision@%s = %f\n' % \ 264 | (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou] / seg_total) 265 | result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I / cum_U, mean_IoU / seg_total) 266 | print(result_str) 267 | if dcrf: 268 | print('Segmentation evaluation (with DenseCRF):') 269 | result_str = '' 270 | for n_eval_iou in range(len(eval_seg_iou_list)): 271 | result_str += 'precision@%s = %f\n' % \ 272 | (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou] / seg_total) 273 | result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I_dcrf / cum_U_dcrf, mean_dcrf_IoU / seg_total) 274 | print(result_str) 275 | 276 | 277 | def visualize_seg(im, mask, predicts, sent): 278 | # print("visualizing") 279 | vis_dir = "./visualize/lgcr_best_c5map/unc/testA" 280 | sent_dir = os.path.join(vis_dir, sent) 281 | if not os.path.exists(sent_dir): 282 | os.makedirs(sent_dir) 283 | 284 | # Ignore sio warnings of low-contrast image. 285 | import warnings 286 | warnings.filterwarnings('ignore') 287 | 288 | sio.imsave(os.path.join(sent_dir, "im.png"), im) 289 | 290 | im_gt = np.zeros_like(im) 291 | im_gt[:, :, 2] = 170 292 | im_gt[:, :, 0] += mask.astype('uint8') * 170 293 | im_gt = im_gt.astype('int16') 294 | im_gt[:, :, 2] += mask.astype('int16') * (-170) 295 | im_gt = im_gt.astype('uint8') 296 | sio.imsave(os.path.join(sent_dir, "gt.png"), im_gt) 297 | 298 | im_seg = im / 2 299 | im_seg[:, :, 0] += predicts.astype('uint8') * 100 300 | im_seg = im_seg.astype('uint8') 301 | sio.imsave(os.path.join(sent_dir, "pred.png"), im_seg) 302 | 303 | # plt.imshow(im_seg.astype('uint8')) 304 | # plt.title(sent) 305 | # plt.show() 306 | 307 | 308 | if __name__ == "__main__": 309 | parser = argparse.ArgumentParser() 310 | parser.add_argument('-g', type=str, default='0') 311 | parser.add_argument('-i', type=int, default=800000) 312 | parser.add_argument('-s', type=int, default=100000) 313 | parser.add_argument('-st', type=int, default=700000) # stop training when get st iters 314 | parser.add_argument('-m', type=str) # 'train' 'test' 315 | parser.add_argument('-d', type=str, default='referit') # 'Gref' 'unc' 'unc+' 'referit' 316 | parser.add_argument('-t', type=str) # 'train' 'trainval' 'val' 'test' 'testA' 'testB' 317 | parser.add_argument('-f', type=str) # directory to save models 318 | parser.add_argument('-lr', type=float, default=0.00025) # start learning rate 319 | parser.add_argument('-bs', type=int, default=1) # batch size 320 | parser.add_argument('-v', default=False, action='store_true') # visualization 321 | parser.add_argument('-c', default=False, action='store_true') # whether or not apply DenseCRF 322 | parser.add_argument('-emb', default=False, action='store_true') # whether or not use Pretrained Embeddings 323 | parser.add_argument('-n', type=str, default='') # select model 324 | parser.add_argument('-conv5', default=False, action='store_true') # finetune conv layers 325 | 326 | args = parser.parse_args() 327 | # os.environ['CUDA_VISIBLE_DEVICES'] = args.g 328 | mu = np.array((104.00698793, 116.66876762, 122.67891434)) 329 | 330 | if args.m == 'train': 331 | train(max_iter=args.i, 332 | snapshot=args.s, 333 | dataset=args.d, 334 | setname=args.t, 335 | mu=mu, 336 | lr=args.lr, 337 | bs=args.bs, 338 | tfmodel_folder=args.f, 339 | conv5=args.conv5, 340 | model_name=args.n, 341 | stop_iter=args.st, 342 | pre_emb=args.emb) 343 | elif args.m == 'test': 344 | test(iter=args.i, 345 | dataset=args.d, 346 | visualize=args.v, 347 | setname=args.t, 348 | dcrf=args.c, 349 | mu=mu, 350 | tfmodel_folder=args.f, 351 | model_name=args.n, 352 | pre_emb=args.emb) 353 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import MovingAverage -------------------------------------------------------------------------------- /util/cell.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class ConvLSTMCell(tf.nn.rnn_cell.RNNCell): 4 | """A LSTM cell with convolutions instead of multiplications. 5 | Reference: 6 | Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015. 7 | """ 8 | 9 | def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None): 10 | super(ConvLSTMCell, self).__init__(_reuse=reuse) 11 | self._kernel = kernel 12 | self._filters = filters 13 | self._forget_bias = forget_bias 14 | self._activation = activation 15 | self._normalize = normalize 16 | self._peephole = peephole 17 | if data_format == 'channels_last': 18 | self._size = tf.TensorShape(shape + [self._filters]) 19 | self._feature_axis = self._size.ndims 20 | self._data_format = None 21 | elif data_format == 'channels_first': 22 | self._size = tf.TensorShape([self._filters] + shape) 23 | self._feature_axis = 0 24 | self._data_format = 'NC' 25 | else: 26 | raise ValueError('Unknown data_format') 27 | 28 | @property 29 | def state_size(self): 30 | return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size) 31 | 32 | @property 33 | def output_size(self): 34 | return self._size 35 | 36 | def call(self, x, state): 37 | c, h = state 38 | 39 | x = tf.concat([x, h], axis=self._feature_axis) 40 | n = x.shape[-1].value 41 | m = 4 * self._filters if self._filters > 1 else 4 42 | W = tf.get_variable('kernel', self._kernel + [n, m]) 43 | y = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format) 44 | if not self._normalize: 45 | y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer()) 46 | j, i, f, o = tf.split(y, 4, axis=self._feature_axis) 47 | 48 | if self._peephole: 49 | i += tf.get_variable('W_ci', c.shape[1:]) * c 50 | f += tf.get_variable('W_cf', c.shape[1:]) * c 51 | 52 | if self._normalize: 53 | j = tf.contrib.layers.layer_norm(j) 54 | i = tf.contrib.layers.layer_norm(i) 55 | f = tf.contrib.layers.layer_norm(f) 56 | 57 | f = tf.sigmoid(f + self._forget_bias) 58 | i = tf.sigmoid(i) 59 | c = c * f + i * self._activation(j) 60 | 61 | if self._peephole: 62 | o += tf.get_variable('W_co', c.shape[1:]) * c 63 | 64 | if self._normalize: 65 | o = tf.contrib.layers.layer_norm(o) 66 | c = tf.contrib.layers.layer_norm(c) 67 | 68 | o = tf.sigmoid(o) 69 | h = o * self._activation(c) 70 | 71 | # TODO 72 | #tf.summary.histogram('forget_gate', f) 73 | #tf.summary.histogram('input_gate', i) 74 | #tf.summary.histogram('output_gate', o) 75 | #tf.summary.histogram('cell_state', c) 76 | 77 | state = tf.nn.rnn_cell.LSTMStateTuple(c, h) 78 | 79 | return h, state 80 | 81 | 82 | class ConvGRUCell(tf.nn.rnn_cell.RNNCell): 83 | """A GRU cell with convolutions instead of multiplications.""" 84 | 85 | def __init__(self, shape, filters, kernel, activation=tf.tanh, normalize=True, data_format='channels_last', reuse=None): 86 | super(ConvGRUCell, self).__init__(_reuse=reuse) 87 | self._filters = filters 88 | self._kernel = kernel 89 | self._activation = activation 90 | self._normalize = normalize 91 | if data_format == 'channels_last': 92 | self._size = tf.TensorShape(shape + [self._filters]) 93 | self._feature_axis = self._size.ndims 94 | self._data_format = None 95 | elif data_format == 'channels_first': 96 | self._size = tf.TensorShape([self._filters] + shape) 97 | self._feature_axis = 0 98 | self._data_format = 'NC' 99 | else: 100 | raise ValueError('Unknown data_format') 101 | 102 | @property 103 | def state_size(self): 104 | return self._size 105 | 106 | @property 107 | def output_size(self): 108 | return self._size 109 | 110 | def call(self, x, h): 111 | channels = x.shape[self._feature_axis].value 112 | 113 | with tf.variable_scope('gates'): 114 | inputs = tf.concat([x, h], axis=self._feature_axis) 115 | n = channels + self._filters 116 | m = 2 * self._filters if self._filters > 1 else 2 117 | W = tf.get_variable('kernel', self._kernel + [n, m]) 118 | y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format) 119 | if self._normalize: 120 | r, u = tf.split(y, 2, axis=self._feature_axis) 121 | r = tf.contrib.layers.layer_norm(r) 122 | u = tf.contrib.layers.layer_norm(u) 123 | else: 124 | y += tf.get_variable('bias', [m], initializer=tf.ones_initializer()) 125 | r, u = tf.split(y, 2, axis=self._feature_axis) 126 | r, u = tf.sigmoid(r), tf.sigmoid(u) 127 | 128 | # TODO 129 | #tf.summary.histogram('reset_gate', r) 130 | #tf.summary.histogram('update_gate', u) 131 | 132 | with tf.variable_scope('candidate'): 133 | inputs = tf.concat([x, r * h], axis=self._feature_axis) 134 | n = channels + self._filters 135 | m = self._filters 136 | W = tf.get_variable('kernel', self._kernel + [n, m]) 137 | y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format) 138 | if self._normalize: 139 | y = tf.contrib.layers.layer_norm(y) 140 | else: 141 | y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer()) 142 | h = u * h + (1 - u) * self._activation(y) 143 | 144 | -------------------------------------------------------------------------------- /util/data_reader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import os 5 | import threading 6 | import Queue as queue 7 | 8 | def run_prefetch(prefetch_queue, folder_name, prefix, num_batch, shuffle): 9 | n_batch_prefetch = 0 10 | fetch_order = np.arange(num_batch) 11 | while True: 12 | # Shuffle the batch order for every epoch 13 | if n_batch_prefetch == 0 and shuffle: 14 | fetch_order = np.random.permutation(num_batch) 15 | 16 | # Load batch from file 17 | batch_id = fetch_order[n_batch_prefetch] 18 | save_file = os.path.join(folder_name, prefix+'_'+str(batch_id)+'.npz') 19 | npz_filemap = np.load(save_file) 20 | batch = dict(npz_filemap) 21 | npz_filemap.close() 22 | 23 | # add loaded batch to fetchqing queue 24 | prefetch_queue.put(batch, block=True) 25 | 26 | # Move to next batch 27 | n_batch_prefetch = (n_batch_prefetch + 1) % num_batch 28 | 29 | class DataReader: 30 | def __init__(self, folder_name, prefix, shuffle=True, prefetch_num=8): 31 | self.folder_name = folder_name 32 | self.prefix = prefix 33 | self.shuffle = shuffle 34 | self.prefetch_num = prefetch_num 35 | 36 | self.n_batch = 0 37 | self.n_epoch = 0 38 | 39 | # Search the folder to see the number of num_batch 40 | filelist = os.listdir(folder_name) 41 | num_batch = 0 42 | while (prefix + '_' + str(num_batch) + '.npz') in filelist: 43 | num_batch += 1 44 | if num_batch > 0: 45 | print('found %d batches under %s with prefix "%s"' % (num_batch, folder_name, prefix)) 46 | else: 47 | raise RuntimeError('no batches under %s with prefix "%s"' % (folder_name, prefix)) 48 | self.num_batch = num_batch 49 | 50 | # Start prefetching thread 51 | self.prefetch_queue = queue.Queue(maxsize=prefetch_num) 52 | self.prefetch_thread = threading.Thread(target=run_prefetch, 53 | args=(self.prefetch_queue, self.folder_name, self.prefix, 54 | self.num_batch, self.shuffle)) 55 | self.prefetch_thread.daemon = True 56 | self.prefetch_thread.start() 57 | 58 | def read_batch(self, is_log = True): 59 | if is_log: 60 | print('data reader: epoch = %d, batch = %d / %d' % (self.n_epoch, self.n_batch, self.num_batch)) 61 | 62 | # Get a batch from the prefetching queue 63 | if self.prefetch_queue.empty(): 64 | print('data reader: waiting for file input (IO is slow)...') 65 | batch = self.prefetch_queue.get(block=True) 66 | self.n_batch = (self.n_batch + 1) % self.num_batch 67 | self.n_epoch += (self.n_batch == 0) 68 | return batch 69 | -------------------------------------------------------------------------------- /util/data_reader_ignore.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import os 5 | import threading 6 | import Queue as queue 7 | 8 | def run_prefetch(prefetch_queue, folder_name, prefix, num_batch, shuffle, id2name): 9 | n_batch_prefetch = 0 10 | fetch_order = np.arange(num_batch) 11 | while True: 12 | # Shuffle the batch order for every epoch 13 | if n_batch_prefetch == 0 and shuffle: 14 | fetch_order = np.random.permutation(num_batch) 15 | 16 | # Load batch from file 17 | batch_id = fetch_order[n_batch_prefetch] 18 | save_file = os.path.join(folder_name, prefix+'_'+str(batch_id)+'.npz') 19 | npz_filemap = np.load(save_file) 20 | batch = dict(npz_filemap) 21 | if id2name: 22 | batch['img_name'] = id2name[str(batch_id)] 23 | npz_filemap.close() 24 | 25 | # add loaded batch to fetchqing queue 26 | prefetch_queue.put(batch, block=True) 27 | 28 | # Move to next batch 29 | n_batch_prefetch = (n_batch_prefetch + 1) % num_batch 30 | 31 | class DataReader: 32 | def __init__(self, folder_name, prefix, shuffle=True, prefetch_num=8, list_name=''): 33 | self.folder_name = folder_name 34 | self.prefix = prefix 35 | self.shuffle = shuffle 36 | self.prefetch_num = prefetch_num 37 | 38 | self.n_batch = 0 39 | self.n_epoch = 0 40 | 41 | self.id2name = None 42 | if list_name != '': 43 | img_list = [line.strip() for line in open(list_name)] 44 | self.id2name = {line.split('\t')[0]:line.split('\t')[-1] for line in img_list} 45 | 46 | 47 | # Search the folder to see the number of num_batch 48 | filelist = os.listdir(folder_name) 49 | num_batch = 0 50 | while (prefix + '_' + str(num_batch) + '.npz') in filelist: 51 | num_batch += 1 52 | if num_batch > 0: 53 | print('found %d batches under %s with prefix "%s"' % (num_batch, folder_name, prefix)) 54 | else: 55 | raise RuntimeError('no batches under %s with prefix "%s"' % (folder_name, prefix)) 56 | self.num_batch = num_batch 57 | 58 | # Start prefetching thread 59 | self.prefetch_queue = queue.Queue(maxsize=prefetch_num) 60 | self.prefetch_thread = threading.Thread(target=run_prefetch, 61 | args=(self.prefetch_queue, self.folder_name, self.prefix, 62 | self.num_batch, self.shuffle, self.id2name)) 63 | self.prefetch_thread.daemon = True 64 | self.prefetch_thread.start() 65 | 66 | def read_batch(self, is_log = True): 67 | if is_log: 68 | print('data reader: epoch = %d, batch = %d / %d' % (self.n_epoch, self.n_batch, self.num_batch)) 69 | 70 | # Get a batch from the prefetching queue 71 | if self.prefetch_queue.empty(): 72 | print('data reader: waiting for file input (IO is slow)...') 73 | batch = self.prefetch_queue.get(block=True) 74 | self.n_batch = (self.n_batch + 1) % self.num_batch 75 | self.n_epoch += (self.n_batch == 0) 76 | return batch 77 | -------------------------------------------------------------------------------- /util/eval_tools.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | import pyximport; pyximport.install() 5 | # from util.nms import cpu_nms as nms 6 | 7 | # all boxes are [xmin, ymin, xmax, ymax] format, 0-indexed, including xmax and ymax 8 | def compute_bbox_iou(bboxes, target): 9 | if isinstance(bboxes, list): 10 | bboxes = np.array(bboxes) 11 | bboxes = bboxes.reshape((-1, 4)) 12 | 13 | if isinstance(target, list): 14 | target = np.array(target) 15 | target = target.reshape((-1, 4)) 16 | 17 | A_bboxes = (bboxes[..., 2]-bboxes[..., 0]+1) * (bboxes[..., 3]-bboxes[..., 1]+1) 18 | A_target = (target[..., 2]-target[..., 0]+1) * (target[..., 3]-target[..., 1]+1) 19 | assert(np.all(A_bboxes >= 0)) 20 | assert(np.all(A_target >= 0)) 21 | I_x1 = np.maximum(bboxes[..., 0], target[..., 0]) 22 | I_y1 = np.maximum(bboxes[..., 1], target[..., 1]) 23 | I_x2 = np.minimum(bboxes[..., 2], target[..., 2]) 24 | I_y2 = np.minimum(bboxes[..., 3], target[..., 3]) 25 | A_I = np.maximum(I_x2 - I_x1 + 1, 0) * np.maximum(I_y2 - I_y1 + 1, 0) 26 | IoUs = A_I / (A_bboxes + A_target - A_I) 27 | assert(np.all(0 <= IoUs) and np.all(IoUs <= 1)) 28 | return IoUs 29 | 30 | # # all boxes are [num, height, width] binary array 31 | def compute_mask_IU(masks, target): 32 | assert(target.shape[-2:] == masks.shape[-2:]) 33 | I = np.sum(np.logical_and(masks, target)) 34 | U = np.sum(np.logical_or(masks, target)) 35 | return I, U 36 | -------------------------------------------------------------------------------- /util/functions.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import deque 3 | 4 | class MovingAverage(): 5 | """ Keeps an average window of the specified number of items. """ 6 | 7 | def __init__(self, max_window_size=1000): 8 | self.max_window_size = max_window_size 9 | self.reset() 10 | 11 | def add(self, elem): 12 | """ Adds an element to the window, removing the earliest element if necessary. """ 13 | if elem > 999999999999: 14 | print('Warning: Moving average ignored a value of %f' % elem) 15 | return 16 | 17 | self.window.append(elem) 18 | self.sum += elem 19 | 20 | if len(self.window) > self.max_window_size: 21 | self.sum -= self.window.popleft() 22 | 23 | def append(self, elem): 24 | """ Same as add just more pythonic. """ 25 | self.add(elem) 26 | 27 | def reset(self): 28 | """ Resets the MovingAverage to its initial state. """ 29 | self.window = deque() 30 | self.sum = 0 31 | 32 | def get_avg(self): 33 | """ Returns the average of the elements in the window. """ 34 | return self.sum / max(len(self.window), 1) 35 | 36 | def __str__(self): 37 | return str(self.get_avg()) 38 | 39 | def __repr__(self): 40 | return repr(self.get_avg()) -------------------------------------------------------------------------------- /util/h5_reader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import os 5 | import threading 6 | import queue as queue 7 | import h5py 8 | 9 | def run_prefetch(prefetch_queue, h5_file, h5_img, num_batch, shuffle): 10 | n_batch_prefetch = 0 11 | fetch_order = np.arange(num_batch) 12 | img_size=(320, 320) 13 | while True: 14 | # Shuffle the batch order for every epoch 15 | if n_batch_prefetch == 0 and shuffle: 16 | fetch_order = np.random.permutation(num_batch) 17 | 18 | # Load batch from file 19 | batch_id = fetch_order[n_batch_prefetch] 20 | mask = h5_file['answers'][batch_id] # [320, 320] 21 | image_id = h5_file['image_idxs'][batch_id] # int 22 | refexp = h5_file['refexps'][batch_id] # [60] 23 | sent = h5_file['sentence'][batch_id] 24 | 25 | # read images 26 | img = h5_img['images'][image_id] # [320, 320, 3] 27 | batch = {'mask_batch': mask, 28 | 'text_batch': refexp, 29 | 'im_batch': img} 30 | 31 | # add loaded batch to fetchqing queue 32 | prefetch_queue.put(batch, block=True) 33 | 34 | # Move to next batch 35 | n_batch_prefetch = (n_batch_prefetch + 1) % num_batch 36 | 37 | class DataReader: 38 | def __init__(self, h5_file_name, h5_image_name, shuffle=True, prefetch_num=8): 39 | # self.img_folder = img_folder 40 | self.h5_file_name = h5_file_name 41 | self.h5_image = h5_image_name 42 | self.shuffle = shuffle 43 | self.prefetch_num = prefetch_num 44 | 45 | self.n_batch = 0 46 | self.n_epoch = 0 47 | 48 | # Search the folder to see the number of num_batch 49 | self.h5_file = h5py.File(h5_file_name, 'r') 50 | self.h5_image = h5py.File(h5_image_name, 'r') 51 | num_batch = self.h5_file['image_idxs'].shape[0] # n? 52 | if num_batch > 0: 53 | print('found %d batches within %s' % (num_batch, h5_file_name)) 54 | else: 55 | raise RuntimeError('no batches within %s' % (h5_file_name)) 56 | self.num_batch = num_batch # 一共有多少个batch 57 | 58 | # Start prefetching thread 59 | self.prefetch_queue = queue.Queue(maxsize=prefetch_num) 60 | # 读数据的线程,只有一个? 61 | self.prefetch_thread = threading.Thread(target=run_prefetch, 62 | args=(self.prefetch_queue, self.h5_file, 63 | self.h5_image, self.num_batch, self.shuffle)) 64 | self.prefetch_thread.daemon = True 65 | self.prefetch_thread.start() 66 | 67 | def read_batch(self, is_log = True): 68 | if is_log: 69 | print('data reader: epoch = %d, batch = %d / %d' % (self.n_epoch, self.n_batch, self.num_batch)) 70 | 71 | # Get a batch from the prefetching queue 72 | if self.prefetch_queue.empty(): 73 | print('data reader: waiting for file input (IO is slow)...') 74 | batch = self.prefetch_queue.get(block=True) 75 | self.n_batch = (self.n_batch + 1) % self.num_batch 76 | self.n_epoch += (self.n_batch == 0) 77 | return batch 78 | -------------------------------------------------------------------------------- /util/im_processing.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import skimage.transform 4 | import numpy as np 5 | 6 | def resize_and_pad(im, input_h, input_w): 7 | # Resize and pad im to input_h x input_w size 8 | im_h, im_w = im.shape[:2] 9 | scale = min(input_h / im_h, input_w / im_w) 10 | resized_h = int(np.round(im_h * scale)) 11 | resized_w = int(np.round(im_w * scale)) 12 | pad_h = int(np.floor(input_h - resized_h) / 2) 13 | pad_w = int(np.floor(input_w - resized_w) / 2) 14 | 15 | resized_im = skimage.transform.resize(im, [resized_h, resized_w]) 16 | if im.ndim > 2: 17 | new_im = np.zeros((input_h, input_w, im.shape[2]), dtype=resized_im.dtype) 18 | else: 19 | new_im = np.zeros((input_h, input_w), dtype=resized_im.dtype) 20 | new_im[pad_h:pad_h+resized_h, pad_w:pad_w+resized_w, ...] = resized_im 21 | 22 | return new_im 23 | 24 | def resize_and_crop(im, input_h, input_w): 25 | # Resize and crop im to input_h x input_w size 26 | im_h, im_w = im.shape[:2] 27 | scale = max(input_h / im_h, input_w / im_w) 28 | resized_h = int(np.round(im_h * scale)) 29 | resized_w = int(np.round(im_w * scale)) 30 | crop_h = int(np.floor(resized_h - input_h) / 2) 31 | crop_w = int(np.floor(resized_w - input_w) / 2) 32 | 33 | resized_im = skimage.transform.resize(im, [resized_h, resized_w]) 34 | if im.ndim > 2: 35 | new_im = np.zeros((input_h, input_w, im.shape[2]), dtype=resized_im.dtype) 36 | else: 37 | new_im = np.zeros((input_h, input_w), dtype=resized_im.dtype) 38 | new_im[...] = resized_im[crop_h:crop_h+input_h, crop_w:crop_w+input_w, ...] 39 | 40 | return new_im 41 | 42 | def crop_bboxes_subtract_mean(im, bboxes, crop_size, image_mean): 43 | if isinstance(bboxes, list): 44 | bboxes = np.array(bboxes) 45 | bboxes = bboxes.reshape((-1, 4)) 46 | 47 | im = skimage.img_as_ubyte(im) 48 | num_bbox = bboxes.shape[0] 49 | imcrop_batch = np.zeros((num_bbox, crop_size, crop_size, 3), dtype=np.float32) 50 | for n_bbox in range(bboxes.shape[0]): 51 | xmin, ymin, xmax, ymax = bboxes[n_bbox] 52 | # crop and resize 53 | imcrop = im[ymin:ymax+1, xmin:xmax+1, :] 54 | imcrop_batch[n_bbox, ...] = skimage.img_as_ubyte( 55 | skimage.transform.resize(imcrop, [crop_size, crop_size])) 56 | imcrop_batch -= image_mean 57 | return imcrop_batch 58 | 59 | def bboxes_from_masks(masks): 60 | if masks.ndim == 2: 61 | masks = masks[np.newaxis, ...] 62 | num_mask = masks.shape[0] 63 | bboxes = np.zeros((num_mask, 4), dtype=np.int32) 64 | for n_mask in range(num_mask): 65 | idx = np.nonzero(masks[n_mask]) 66 | xmin, xmax = np.min(idx[1]), np.max(idx[1]) 67 | ymin, ymax = np.min(idx[0]), np.max(idx[0]) 68 | bboxes[n_mask, :] = [xmin, ymin, xmax, ymax] 69 | return bboxes 70 | 71 | def crop_masks_subtract_mean(im, masks, crop_size, image_mean): 72 | if masks.ndim == 2: 73 | masks = masks[np.newaxis, ...] 74 | num_mask = masks.shape[0] 75 | 76 | im = skimage.img_as_ubyte(im) 77 | bboxes = bboxes_from_masks(masks) 78 | imcrop_batch = np.zeros((num_mask, crop_size, crop_size, 3), dtype=np.float32) 79 | for n_mask in range(num_mask): 80 | xmin, ymin, xmax, ymax = bboxes[n_mask] 81 | 82 | # crop and resize 83 | im_masked = im.copy() 84 | mask = masks[n_mask, ..., np.newaxis] 85 | im_masked *= mask 86 | im_masked += image_mean.astype(np.uint8) * (1 - mask) 87 | imcrop = im_masked[ymin:ymax+1, xmin:xmax+1, :] 88 | imcrop_batch[n_mask, ...] = skimage.img_as_ubyte(skimage.transform.resize(imcrop, [224, 224])) 89 | 90 | imcrop_batch -= image_mean 91 | return imcrop_batch 92 | -------------------------------------------------------------------------------- /util/io.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import json 4 | import scipy.io as sio 5 | 6 | def load_str_list(filename): 7 | with open(filename, 'r') as f: 8 | str_list = f.readlines() 9 | str_list = [s[:-1] for s in str_list] 10 | return str_list 11 | 12 | def save_str_list(str_list, filename): 13 | str_list = [s+'\n' for s in str_list] 14 | with open(filename, 'w') as f: 15 | f.writelines(str_list) 16 | 17 | def load_json(filename): 18 | with open(filename, 'r') as f: 19 | return json.load(f) 20 | 21 | def save_json(json_obj, filename): 22 | with open(filename, 'w') as f: 23 | json.dump(json_obj, f, separators=(',\n', ':\n')) 24 | 25 | def load_referit_gt_mask(mask_path): 26 | mat = sio.loadmat(mask_path) 27 | mask = (mat['segimg_t'] == 0) 28 | return mask 29 | 30 | def load_proposal_mask(mask_path): 31 | mat = sio.loadmat(mask_path) 32 | mask = mat['mask'] 33 | return mask.transpose((2, 0, 1)) 34 | -------------------------------------------------------------------------------- /util/loss.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | def weighed_logistic_loss(scores, labels, pos_loss_mult=1.0, neg_loss_mult=1.0): 7 | # Apply different weights to loss of positive samples and negative samples 8 | # positive samples have label 1 while negative samples have label 0 9 | loss_mult = tf.add(tf.multiply(labels, pos_loss_mult-neg_loss_mult), neg_loss_mult) 10 | 11 | # Classification loss as the average of weighed per-score loss 12 | cls_loss = tf.reduce_mean(tf.reduce_sum(tf.multiply( 13 | tf.nn.sigmoid_cross_entropy_with_logits(logits = scores, labels = labels), 14 | loss_mult), [1, 2, 3])) 15 | 16 | return cls_loss 17 | 18 | def logistic_loss_cond(scores, labels): 19 | # Classification loss as the average of weighed per-score loss 20 | cond = tf.select(tf.equal(labels, tf.zeros(tf.shape(labels))), 21 | tf.zeros(tf.shape(labels)), 22 | tf.nn.sigmoid_cross_entropy_with_logits(logits = scores, labels = labels) 23 | ) 24 | cls_loss = tf.reduce_mean(tf.reduce_sum(cond, [1, 2, 3])) 25 | 26 | return cls_loss 27 | 28 | def l2_regularization_loss(variables, weight_decay): 29 | l2_losses = [tf.nn.l2_loss(var) for var in variables] 30 | total_l2_loss = weight_decay * tf.add_n(l2_losses) 31 | 32 | return total_l2_loss 33 | 34 | def dsc_loss(scores, labels): 35 | scores = tf.sigmoid(scores) 36 | inter = tf.scalar_mul(2., tf.reduce_sum(tf.multiply(scores, labels), [1, 2, 3])) 37 | union = tf.add(tf.reduce_sum(scores, [1, 2, 3]), tf.reduce_sum(labels, [1, 2, 3])) 38 | dsc_loss = tf.reduce_mean(tf.sub(1., tf.div(inter, union))) 39 | 40 | return dsc_loss 41 | 42 | def iou_loss(scores, labels): 43 | scores = tf.sigmoid(scores) 44 | inter = tf.reduce_sum(tf.multiply(scores, labels), [1, 2, 3]) 45 | union = tf.add(tf.reduce_sum(scores, [1, 2, 3]), tf.reduce_sum(labels, [1, 2, 3])) 46 | union = tf.sub(union, inter) 47 | iou_loss = tf.reduce_mean(tf.sub(1., tf.div(inter, union))) 48 | 49 | return iou_loss 50 | 51 | def smooth_l1_loss(scores, labels, ld=1.0): 52 | box_diff = scores - labels 53 | abs_box_diff = tf.abs(box_diff) 54 | smooth_l1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_box_diff, 1.))) 55 | loss_box_raw = tf.pow(box_diff, 2) * 0.5 * smooth_l1_sign \ 56 | + (abs_box_diff - 0.5) * (1.0 - smooth_l1_sign) 57 | loss_box = ld * tf.reduce_mean(tf.reduce_sum(loss_box_raw, [1])) 58 | 59 | return loss_box 60 | -------------------------------------------------------------------------------- /util/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, 18 | np.ndarray[np.float32_t, ndim=1] scores, np.float thresh): 19 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 20 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 21 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 22 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /util/processing_tools.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | 5 | def generate_spatial_batch(N, featmap_H, featmap_W): 6 | spatial_batch_val = np.zeros((N, featmap_H, featmap_W, 8), dtype=np.float32) 7 | for h in range(featmap_H): 8 | for w in range(featmap_W): 9 | xmin = w / featmap_W * 2 - 1 10 | xmax = (w+1) / featmap_W * 2 - 1 11 | xctr = (xmin+xmax) / 2 12 | ymin = h / featmap_H * 2 - 1 13 | ymax = (h+1) / featmap_H * 2 - 1 14 | yctr = (ymin+ymax) / 2 15 | spatial_batch_val[:, h, w, :] = \ 16 | [xmin, ymin, xmax, ymax, xctr, yctr, 1/featmap_W, 1/featmap_H] 17 | return spatial_batch_val 18 | 19 | def generate_bilinear_filter(stride): 20 | # Bilinear upsampling filter 21 | f = np.concatenate((np.arange(0, stride), np.arange(stride, 0, -1))) / stride 22 | return np.outer(f, f).astype(np.float32)[:, :, np.newaxis, np.newaxis] 23 | 24 | def compute_accuracy(scores, labels): 25 | is_pos = (labels != 0) 26 | is_neg = np.logical_not(is_pos) 27 | num_pos = np.sum(is_pos) 28 | num_neg = np.sum(is_neg) 29 | num_all = num_pos + num_neg 30 | 31 | is_correct = np.logical_xor(scores < 0, is_pos) 32 | accuracy_all = np.sum(is_correct) / num_all 33 | accuracy_pos = np.sum(is_correct[is_pos]) / (num_pos + 1) 34 | accuracy_neg = np.sum(is_correct[is_neg]) / num_neg 35 | return accuracy_all, accuracy_pos, accuracy_neg 36 | 37 | def spatial_feature_from_bbox(bboxes, imsize): 38 | if isinstance(bboxes, list): 39 | bboxes = np.array(bboxes) 40 | bboxes = bboxes.reshape((-1, 4)) 41 | im_w, im_h = imsize 42 | assert(np.all(bboxes[:, 0] < im_w) and np.all(bboxes[:, 2] < im_w)) 43 | assert(np.all(bboxes[:, 1] < im_h) and np.all(bboxes[:, 3] < im_h)) 44 | 45 | feats = np.zeros((bboxes.shape[0], 8)) 46 | feats[:, 0] = bboxes[:, 0] * 2.0 / im_w - 1 # x1 47 | feats[:, 1] = bboxes[:, 1] * 2.0 / im_h - 1 # y1 48 | feats[:, 2] = bboxes[:, 2] * 2.0 / im_w - 1 # x2 49 | feats[:, 3] = bboxes[:, 3] * 2.0 / im_h - 1 # y2 50 | feats[:, 4] = (feats[:, 0] + feats[:, 2]) / 2 # x0 51 | feats[:, 5] = (feats[:, 1] + feats[:, 3]) / 2 # y0 52 | feats[:, 6] = feats[:, 2] - feats[:, 0] # w 53 | feats[:, 7] = feats[:, 3] - feats[:, 1] # h 54 | return feats 55 | -------------------------------------------------------------------------------- /util/text_processing.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import re 4 | 5 | def load_vocab_dict_from_file(dict_file): 6 | with open(dict_file) as f: 7 | words = [w.strip() for w in f.readlines()] 8 | vocab_dict = {words[n]:n for n in range(len(words))} 9 | return vocab_dict 10 | 11 | UNK_IDENTIFIER = '' # is the word used to identify unknown words 12 | SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)') 13 | def sentence2vocab_indices(sentence, vocab_dict): 14 | words = SENTENCE_SPLIT_REGEX.split(sentence.strip()) 15 | words = [w.lower() for w in words if len(w.strip()) > 0] 16 | # remove . 17 | if words[-1] == '.': 18 | words = words[:-1] 19 | vocab_indices = [(vocab_dict[w] if w in vocab_dict else vocab_dict[UNK_IDENTIFIER]) 20 | for w in words] 21 | return vocab_indices 22 | 23 | PAD_IDENTIFIER = '' 24 | EOS_IDENTIFIER = '' 25 | def preprocess_sentence(sentence, vocab_dict, T): 26 | vocab_indices = sentence2vocab_indices(sentence, vocab_dict) 27 | # # Append '' symbol to the end 28 | # vocab_indices.append(vocab_dict[EOS_IDENTIFIER]) 29 | # Truncate long sentences 30 | if len(vocab_indices) > T: 31 | vocab_indices = vocab_indices[:T] 32 | # Pad short sentences at the beginning with the special symbol '' 33 | if len(vocab_indices) < T: 34 | vocab_indices = [vocab_dict[PAD_IDENTIFIER]] * (T - len(vocab_indices)) + vocab_indices 35 | return vocab_indices -------------------------------------------------------------------------------- /util/vgg16_fcn.py: -------------------------------------------------------------------------------- 1 | ''' 2 | vgg16 model with atrous & fully convolution layers 3 | ''' 4 | 5 | import tensorflow as tf 6 | 7 | class Vgg16: 8 | def __init__(self, data): 9 | self.data = data 10 | ''' 11 | build graph 12 | ''' 13 | self.conv1_1 = self.conv_relu('conv1_1', self.data, 3, 64) 14 | self.conv1_2 = self.conv_relu('conv1_2', self.conv1_1, 64, 64) 15 | self.pool1 = self.max_pool('pool1', self.conv1_2) 16 | 17 | self.conv2_1 = self.conv_relu('conv2_1', self.pool1, 64, 128) 18 | self.conv2_2 = self.conv_relu('conv2_2', self.conv2_1, 128, 128) 19 | self.pool2 = self.max_pool('pool2', self.conv2_2) 20 | 21 | self.conv3_1 = self.conv_relu('conv3_1', self.pool2, 128, 256) 22 | self.conv3_2 = self.conv_relu('conv3_2', self.conv3_1, 256, 256) 23 | self.conv3_3 = self.conv_relu('conv3_3', self.conv3_2, 256, 256) 24 | self.pool3 = self.max_pool('pool3', self.conv3_3) 25 | 26 | self.conv4_1 = self.conv_relu('conv4_1', self.pool3, 256, 512) 27 | self.conv4_2 = self.conv_relu('conv4_2', self.conv4_1, 512, 512) 28 | self.conv4_3 = self.conv_relu('conv4_3', self.conv4_2, 512, 512) 29 | 30 | self.conv5_1 = self.conv_relu('conv5_1', self.conv4_3, 512, 512) 31 | self.conv5_2 = self.conv_relu('conv5_2', self.conv5_1, 512, 512) 32 | self.conv5_3 = self.conv_relu('conv5_3', self.conv5_2, 512, 512) 33 | 34 | self.fc6 = self.conv_relu('fc6', self.conv5_3, 512, 4096, kernel_size=7) 35 | self.fc7 = self.conv_relu('fc7', self.fc6, 4096, 4096, kernel_size=1) 36 | self.fc8 = self.conv_layer('fc8', self.fc7, 4096, 1000, kernel_size=1) 37 | 38 | def max_pool(self, name, bottom, kernel_size=2, stride=2): 39 | pool = tf.nn.max_pool(bottom, ksize=[1, kernel_size, kernel_size, 1], 40 | strides=[1, stride, stride, 1], padding='SAME', name=name) 41 | return pool 42 | 43 | def conv_layer(self, name, bottom, input_dim, output_dim, kernel_size=3, stride=1): 44 | with tf.variable_scope(name): 45 | w = tf.get_variable('weights', [kernel_size, kernel_size, input_dim, output_dim], 46 | initializer=tf.contrib.layers.xavier_initializer_conv2d()) 47 | b = tf.get_variable('biases', output_dim, initializer=tf.constant_initializer(0.)) 48 | 49 | conv = tf.nn.conv2d(bottom, w, [1, stride, stride, 1], padding='SAME') 50 | conv = tf.nn.bias_add(conv, b) 51 | return conv 52 | 53 | def conv_relu(self, name, bottom, input_dim, output_dim, kernel_size=3, stride=1): 54 | conv = self.conv_layer(name, bottom, input_dim, output_dim, kernel_size, stride) 55 | return tf.nn.relu(conv) 56 | 57 | def atrous_conv_relu(self, name, bottom, input_dim, output_dim, kernel_size=3, rate=1): 58 | with tf.variable_scope(name): 59 | w = tf.get_variable('weights', [kernel_size, kernel_size, input_dim, output_dim], 60 | initializer=tf.random_normal_initializer(stddev=0.01)) 61 | b = tf.get_variable('biases', output_dim, initializer=tf.constant_initializer(0.)) 62 | 63 | conv = tf.nn.atrous_conv2d(bottom, w, rate=rate, padding='SAME') 64 | conv = tf.nn.bias_add(conv, b) 65 | relu = tf.nn.relu(conv) 66 | return relu 67 | --------------------------------------------------------------------------------