├── .gitignore ├── human_colormap.mat ├── .gitmodules ├── human └── config │ ├── no-ssl │ ├── solver.prototxt │ ├── test.prototxt │ └── train.prototxt │ └── attention │ ├── solver.prototxt │ ├── test.prototxt │ └── train.prototxt ├── sub.sed ├── show.m ├── test_human.py ├── run_human.sh └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.caffemodel 2 | *.solverstate 3 | *.o 4 | *.pyc 5 | -------------------------------------------------------------------------------- /human_colormap.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Engineering-Course/LIP_SSL/HEAD/human_colormap.mat -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "code"] 2 | path = code 3 | url = https://github.com/Engineering-Course/caffe_ssl.git 4 | -------------------------------------------------------------------------------- /human/config/no-ssl/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "${EXP}/config/${NET_ID}/train_${TRAIN_SET}.prototxt" 2 | 3 | iter_size: 2 4 | lr_policy: "step" 5 | gamma: 0.1 6 | stepsize: 200000 7 | base_lr: 0.0001 8 | 9 | display: 20 10 | max_iter: 100000 11 | momentum: 0.9 12 | weight_decay: 0.0005 13 | 14 | snapshot: 1000 15 | snapshot_prefix: "${EXP}/model/${NET_ID}/train" 16 | solver_mode: GPU 17 | -------------------------------------------------------------------------------- /human/config/attention/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "${EXP}/config/${NET_ID}/train_${TRAIN_SET}.prototxt" 2 | 3 | iter_size: 2 4 | lr_policy: "step" 5 | gamma: 0.1 6 | stepsize: 200000 7 | base_lr: 0.0001 8 | 9 | display: 20 10 | max_iter: 100000 11 | momentum: 0.9 12 | weight_decay: 0.0005 13 | 14 | snapshot: 1000 15 | snapshot_prefix: "${EXP}/model/${NET_ID}/train" 16 | solver_mode: GPU 17 | -------------------------------------------------------------------------------- /sub.sed: -------------------------------------------------------------------------------- 1 | 's,${DATA_ROOT},'"${DATA_ROOT}"',g;s,${DATA_ROOT1},'"${DATA_ROOT1}"',g;s,${EXP},'"${EXP}"',g;s,${EXP1},'"${EXP1}"',g;s,${TRAIN_SET},'"${TRAIN_SET}"',g;s,${TRAIN_SET1},'"${TRAIN_SET1}"',g;s,${TRAIN_SET1_WEAK},'"${TRAIN_SET1_WEAK}"',g;s,${TRAIN_SET1_STRONG},'"${TRAIN_SET1_STRONG}"',g;s,${TEST_SET},'"${TEST_SET}"',g;s,${NET_ID},'"${NET_ID}"',g;s,${FEATURE_DIR},'"${FEATURE_DIR}"',g;s,${NUM_LABELS},'"${NUM_LABELS}"',g;s,${NUM_LABELS1},'"${NUM_LABELS1}"',g;s,${NUM_LABELS_UNION},'"${NUM_LABELS_UNION}"',g;s,${BG_BIAS},'"${BG_BIAS}"',g;s,${FG_BIAS},'"${FG_BIAS}"',g;s,${TRAIN_SET_STRONG},'"${TRAIN_SET_STRONG}"',g;s,${TRAIN_SET_WEAK},'"${TRAIN_SET_WEAK}"',g;s,${BATCH_SIZE},'"${BATCH_SIZE}"',g;s,${TEST_SET_PREFIX},'"${TEST_SET_PREFIX}"',g;s,${TRAIN_STEP},'"${TRAIN_STEP}"',g' 2 | -------------------------------------------------------------------------------- /show.m: -------------------------------------------------------------------------------- 1 | clear; 2 | close all; 3 | fclose all; 4 | %% 5 | load('human_colormap.mat'); 6 | data_root_folder = './human/data'; 7 | output_mat_folder = fullfile('./human/features/attention/val/fc8_mask'); 8 | save_result_folder = fullfile('./human/features/attention/val/results'); 9 | if ~exist(save_result_folder, 'dir') 10 | mkdir(save_result_folder); 11 | end 12 | output_dir = dir(fullfile(output_mat_folder, '*.mat')); 13 | for i = 1 : numel(output_dir) 14 | if mod(i, 100) == 0 15 | fprintf(1, 'processing %d (%d)...\n', i, numel(output_dir)); 16 | end 17 | data = load(fullfile(output_mat_folder, output_dir(i).name)); 18 | raw_result = data.data; 19 | raw_result = permute(raw_result, [2 1 3]); 20 | 21 | img_fn = output_dir(i).name(1:end-4); 22 | img_fn = strrep(img_fn, '_blob_0', ''); 23 | img = imread(fullfile(data_root_folder, 'images', [img_fn, '.jpg'])); 24 | img_row = size(img, 1); 25 | img_col = size(img, 2); 26 | result = raw_result(1:img_row, 1:img_col); 27 | mask = uint8(result); 28 | imwrite(mask, colormap, fullfile(save_result_folder, [img_fn, '.png'])); 29 | end 30 | 31 | 32 | -------------------------------------------------------------------------------- /test_human.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | 5 | 6 | def main(): 7 | image_paths, label_paths = init_path() 8 | hist = compute_hist(image_paths, label_paths) 9 | show_result(hist) 10 | 11 | def init_path(): 12 | list_file = './human/list/val_id.txt' 13 | file_names = [] 14 | with open(list_file, 'rb') as f: 15 | for fn in f: 16 | file_names.append(fn.strip()) 17 | 18 | image_dir = './human/features/attention/val/results/' 19 | label_dir = './human/data/labels/' 20 | 21 | image_paths = [] 22 | label_paths = [] 23 | for file_name in file_names: 24 | image_paths.append(os.path.join(image_dir, file_name+'.png')) 25 | label_paths.append(os.path.join(label_dir, file_name+'.png')) 26 | return image_paths, label_paths 27 | 28 | 29 | def fast_hist(a, b, n): 30 | k = (a >= 0) & (a < n) 31 | return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n) 32 | 33 | def compute_hist(images, labels): 34 | n_cl = 20 35 | 36 | hist = np.zeros((n_cl, n_cl)) 37 | for img_path, label_path in zip(images, labels): 38 | label = Image.open(label_path) 39 | label_array = np.array(label, dtype=np.int32) 40 | image = Image.open(img_path) 41 | image_array = np.array(image, dtype=np.int32) 42 | 43 | gtsz = label_array.shape 44 | imgsz = image_array.shape 45 | if not gtsz == imgsz: 46 | image = image.resize((gtsz[1], gtsz[0]), Image.ANTIALIAS) 47 | image_array = np.array(image, dtype=np.int32) 48 | 49 | hist += fast_hist(label_array, image_array, n_cl) 50 | 51 | return hist 52 | 53 | def show_result(hist): 54 | 55 | classes = ['background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', 56 | 'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', 57 | 'face', 'leftArm', 'rightArm', 'leftLeg', 'rightLeg', 'leftShoe', 58 | 'rightShoe'] 59 | # num of correct pixels 60 | num_cor_pix = np.diag(hist) 61 | # num of gt pixels 62 | num_gt_pix = hist.sum(1) 63 | print '=' * 50 64 | 65 | # @evaluation 1: overall accuracy 66 | acc = num_cor_pix.sum() / hist.sum() 67 | print '>>>', 'overall accuracy', acc 68 | print '-' * 50 69 | 70 | # @evaluation 2: mean accuracy & per-class accuracy 71 | print 'Accuracy for each class (pixel accuracy):' 72 | for i in xrange(20): 73 | print('%-15s: %f' % (classes[i], num_cor_pix[i] / num_gt_pix[i])) 74 | acc = num_cor_pix / num_gt_pix 75 | print '>>>', 'mean accuracy', np.nanmean(acc) 76 | print '-' * 50 77 | 78 | # @evaluation 3: mean IU & per-class IU 79 | union = num_gt_pix + hist.sum(0) - num_cor_pix 80 | for i in xrange(20): 81 | print('%-15s: %f' % (classes[i], num_cor_pix[i] / union[i])) 82 | iu = num_cor_pix / (num_gt_pix + hist.sum(0) - num_cor_pix) 83 | print '>>>', 'mean IU', np.nanmean(iu) 84 | print '-' * 50 85 | 86 | # @evaluation 4: frequency weighted IU 87 | freq = num_gt_pix / hist.sum() 88 | print '>>>', 'fwavacc', (freq[freq > 0] * iu[freq > 0]).sum() 89 | print '=' * 50 90 | 91 | 92 | 93 | if __name__ == '__main__': 94 | main() 95 | -------------------------------------------------------------------------------- /run_human.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ## MODIFY PATH for YOUR SETTING 4 | ROOT_DIR=human 5 | 6 | CAFFE_DIR=code 7 | CAFFE_BIN=${CAFFE_DIR}/.build_release/tools/caffe.bin 8 | 9 | EXP=human 10 | 11 | if [ "${EXP}" = "human" ]; then 12 | NUM_LABELS=20 13 | DATA_ROOT=${ROOT_DIR}/data/ 14 | else 15 | NUM_LABELS=0 16 | echo "Wrong exp name" 17 | fi 18 | 19 | 20 | ## Specify which model to train 21 | ########### voc12 ################ 22 | NET_ID=attention 23 | #NET_ID=vgg16 24 | 25 | ## Variables used for weakly or semi-supervisedly training 26 | TRAIN_SET_SUFFIX= 27 | 28 | TRAIN_SET_STRONG=train 29 | 30 | DEV_ID=0 31 | 32 | ##### 33 | 34 | ## Create dirs 35 | 36 | CONFIG_DIR=${EXP}/config/${NET_ID} 37 | MODEL_DIR=${EXP}/model/${NET_ID} 38 | mkdir -p ${MODEL_DIR} 39 | LOG_DIR=${EXP}/log/${NET_ID} 40 | mkdir -p ${LOG_DIR} 41 | export GLOG_log_dir=${LOG_DIR} 42 | 43 | ## Run 44 | 45 | RUN_TRAIN=1 46 | RUN_TEST=0 47 | 48 | ## Training #1 (on train_aug) 49 | 50 | if [ ${RUN_TRAIN} -eq 1 ]; then 51 | # 52 | LIST_DIR=${EXP}/list 53 | TRAIN_SET=train${TRAIN_SET_SUFFIX} 54 | if [ -z ${TRAIN_SET_WEAK_LEN} ]; then 55 | TRAIN_SET_WEAK=${TRAIN_SET}_diff_${TRAIN_SET_STRONG} 56 | comm -3 ${LIST_DIR}/${TRAIN_SET}.txt ${LIST_DIR}/${TRAIN_SET_STRONG}.txt > ${LIST_DIR}/${TRAIN_SET_WEAK}.txt 57 | else 58 | TRAIN_SET_WEAK=${TRAIN_SET}_diff_${TRAIN_SET_STRONG}_head${TRAIN_SET_WEAK_LEN} 59 | comm -3 ${LIST_DIR}/${TRAIN_SET}.txt ${LIST_DIR}/${TRAIN_SET_STRONG}.txt | head -n ${TRAIN_SET_WEAK_LEN} > ${LIST_DIR}/${TRAIN_SET_WEAK}.txt 60 | fi 61 | # 62 | MODEL=${EXP}/model/${NET_ID}/init.caffemodel 63 | # 64 | echo Training net ${EXP}/${NET_ID} 65 | for pname in train solver; do 66 | sed "$(eval echo $(cat sub.sed))" \ 67 | ${CONFIG_DIR}/${pname}.prototxt > ${CONFIG_DIR}/${pname}_${TRAIN_SET}.prototxt 68 | done 69 | CMD="${CAFFE_BIN} train \ 70 | --solver=${CONFIG_DIR}/solver_${TRAIN_SET}.prototxt \ 71 | --gpu=${DEV_ID}" 72 | if [ -f ${MODEL} ]; then 73 | CMD="${CMD} --weights=${MODEL}" 74 | fi 75 | echo Running ${CMD} && ${CMD} 76 | fi 77 | 78 | ## Test #1 specification (on val or test) 79 | 80 | if [ ${RUN_TEST} -eq 1 ]; then 81 | # 82 | for TEST_SET in val; do 83 | TEST_ITER=`cat ${EXP}/list/${TEST_SET}.txt | wc -l` 84 | MODEL=${EXP}/model/${NET_ID}/test.caffemodel 85 | if [ ! -f ${MODEL} ]; then 86 | MODEL=`ls -t ${EXP}/model/${NET_ID}/train_iter_*.caffemodel | head -n 1` 87 | fi 88 | # 89 | echo Testing net ${EXP}/${NET_ID} 90 | FEATURE_DIR=${EXP}/features/${NET_ID} 91 | mkdir -p ${FEATURE_DIR}/${TEST_SET}/fc8_mask 92 | #mkdir -p ${FEATURE_DIR}/${TEST_SET}/fc9 93 | #mkdir -p ${FEATURE_DIR}/${TEST_SET}/seg_score 94 | sed "$(eval echo $(cat sub.sed))" \ 95 | ${CONFIG_DIR}/test.prototxt > ${CONFIG_DIR}/test_${TEST_SET}.prototxt 96 | CMD="${CAFFE_BIN} test \ 97 | --model=${CONFIG_DIR}/test_${TEST_SET}.prototxt \ 98 | --weights=${MODEL} \ 99 | --gpu=${DEV_ID} \ 100 | --iterations=${TEST_ITER}" 101 | echo Running ${CMD} && ${CMD} 102 | done 103 | fi 104 | 105 | 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Self-supervised Structure-sensitive Learning (SSL) 2 | Ke Gong, Xiaodan Liang, Xiaohui Shen, Liang Lin, "Look into Person: Self-supervised Structure-sensitive Learning and A New Benchmark for Human Parsing", CVPR 2017. 3 | 4 | ### Introduction 5 | 6 | SSL is a state-of-the-art deep learning methord for human parsing built on top of [Caffe](http://caffe.berkeleyvision.org). 7 | This novel self-supervised structure-sensitive learning approach can impose human pose structures into parsing results without resorting to extra supervision (i.e., no 8 | need for specifically labeling human joints in model training). The self-supervised learning framework can be injected into any advanced neural networks to help incorporate rich high-level knowledge regarding human joints from a global perspective and improve the parsing results. 9 | 10 | This distribution provides a publicly available implementation for the key model ingredients reported in our latest [paper](http://openaccess.thecvf.com/content_cvpr_2017/papers/Gong_Look_Into_Person_CVPR_2017_paper.pdf) which is accepted by CVPR2017. 11 | 12 | We newly introduce a novel Joint Human Parsing and Pose Estimation Network (**JPPNet**), which is accepted by T-PAMI 2018. ([Paper](https://arxiv.org/pdf/1804.01984.pdf) and [Code](https://github.com/Engineering-Course/LIP_JPPNet)) 13 | 14 | Please consult and consider citing the following papers: 15 | 16 | @InProceedings{Gong_2017_CVPR, 17 | author = {Gong, Ke and Liang, Xiaodan and Zhang, Dongyu and Shen, Xiaohui and Lin, Liang}, 18 | title = {Look Into Person: Self-Supervised Structure-Sensitive Learning and a New Benchmark for Human Parsing}, 19 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 20 | month = {July}, 21 | year = {2017} 22 | } 23 | @article{liang2018look, 24 | title={Look into Person: Joint Body Parsing \& Pose Estimation Network and a New Benchmark}, 25 | author={Liang, Xiaodan and Gong, Ke and Shen, Xiaohui and Lin, Liang}, 26 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 27 | year={2018}, 28 | publisher={IEEE} 29 | } 30 | 31 | ### Look into People (LIP) Dataset 32 | 33 | The SSL is trained and evaluated on our [LIP dataset](https://lip.sysuhcp.com/) for human parsing. Please check it for more model details. The dataset is also available at [google drive](https://drive.google.com/drive/folders/0BzvH3bSnp3E9QjVYZlhWSjltSWM?resourcekey=0-nkS8bDVjPs3bEw3UZW-omA&usp=sharing) and [baidu drive](http://pan.baidu.com/s/1nvqmZBN). 34 | 35 | 36 | ### Pre-trained models 37 | 38 | We have released our trained models with best performance at [google drive](https://drive.google.com/open?id=0BzvH3bSnp3E9eHMyVS1RbUVDems) and [baidu drive](http://pan.baidu.com/s/1dFLCYq9). 39 | 40 | 41 | ### Train and test 42 | 43 | 1. Download LIP dataset or prepare your own data. 44 | 2. Put the images(*.jpg) and segmentations(*.png) into ssl/human/data/images and ssl/human/data/labels 45 | 3. Put the train, val, test lists into ssl/human/list. Each type contains a list for path and a list for id (e.g., train.txt and train_id.txt) 46 | 4. Download the pre-trained model and put it into ssl/human/model/attention/. You can also refer [DeepLab](https://bitbucket.org/aquariusjay/deeplab-public-ver2) for more models. 47 | 5. Set up your init.caffemodel before training and test.caffemodel before testing. You can simply use a soft link. 48 | 6. The prototxt files for network config are saved in ssl/human/config 49 | 7. In run_human.sh, you can set the value of RUN_TRAIN adn RUN_TEST to train or test the model. 50 | 8. After you run TEST, the computed features will be saved in ssl/human/features. You can run the provided MATLAB script, show.m to generate visualizable results. Then you can run the Python script, test_human.py to evaluate the performance. 51 | 52 | 53 | 54 | ## Related work 55 | + Joint Body Parsing & Pose Estimation Network [JPPNet](https://github.com/Engineering-Course/LIP_JPPNet), T-PAMI2018 56 | + Instance-level Human Parsing via Part Grouping Network [PGN](https://github.com/Engineering-Course/CIHP_PGN), ECCV2018 57 | + Graphonomy: Universal Human Parsing via Graph Transfer Learning [Graphonomy](https://github.com/Gaoyiminggithub/Graphonomy), CVPR2019 58 | -------------------------------------------------------------------------------- /human/config/attention/test.prototxt: -------------------------------------------------------------------------------- 1 | ## test_attention_ssl 2 | # 3 | name: "${NET_ID}" 4 | 5 | layer { 6 | name: "data" 7 | type: "ImageSegData" 8 | top: "data" 9 | top: "label" 10 | image_data_param { 11 | root_folder: "${DATA_ROOT}" 12 | source: "${EXP}/list/${TEST_SET}.txt" 13 | batch_size: 1 14 | label_type: NONE 15 | # label_type: PIXEL 16 | } 17 | transform_param { 18 | mean_value: 104.008 19 | mean_value: 116.669 20 | mean_value: 122.675 21 | crop_size: 640 22 | mirror: false 23 | } 24 | include: { phase: TEST } 25 | } 26 | ### shrink data ### 27 | layer { 28 | bottom: "data" 29 | top: "shrink_data05" 30 | name: "shrink_data05" 31 | type: "Interp" 32 | interp_param { 33 | shrink_factor: 2 34 | pad_beg: 0 35 | pad_end: 0 36 | } 37 | } 38 | 39 | layer { 40 | bottom: "data" 41 | top: "shrink_data075" 42 | name: "shrink_data075" 43 | type: "Interp" 44 | interp_param { 45 | shrink_factor: 4 46 | zoom_factor: 3 47 | pad_beg: 0 48 | pad_end: 0 49 | } 50 | } 51 | 52 | ### NETWORK for resolution 1 ### 53 | 54 | layer { 55 | bottom: "data" 56 | top: "conv1_1" 57 | name: "conv1_1" 58 | type: "Convolution" 59 | param { 60 | name: "conv1_1_w" 61 | lr_mult: 1 62 | decay_mult: 1 63 | } 64 | param { 65 | name: "conv1_1_b" 66 | lr_mult: 2 67 | decay_mult: 0 68 | } 69 | convolution_param { 70 | num_output: 64 71 | pad: 1 72 | kernel_size: 3 73 | } 74 | } 75 | layer { 76 | bottom: "conv1_1" 77 | top: "conv1_1" 78 | name: "relu1_1" 79 | type: "ReLU" 80 | } 81 | layer { 82 | bottom: "conv1_1" 83 | top: "conv1_2" 84 | name: "conv1_2" 85 | type: "Convolution" 86 | param { 87 | name: "conv1_2_w" 88 | lr_mult: 1 89 | decay_mult: 1 90 | } 91 | param { 92 | name: "conv1_2_b" 93 | lr_mult: 2 94 | decay_mult: 0 95 | } 96 | convolution_param { 97 | num_output: 64 98 | pad: 1 99 | kernel_size: 3 100 | } 101 | } 102 | layer { 103 | bottom: "conv1_2" 104 | top: "conv1_2" 105 | name: "relu1_2" 106 | type: "ReLU" 107 | } 108 | layer { 109 | bottom: "conv1_2" 110 | top: "pool1" 111 | name: "pool1" 112 | type: "Pooling" 113 | pooling_param { 114 | pool: MAX 115 | kernel_size: 3 116 | stride: 2 117 | pad: 1 118 | } 119 | } 120 | layer { 121 | bottom: "pool1" 122 | top: "conv2_1" 123 | name: "conv2_1" 124 | type: "Convolution" 125 | param { 126 | name: "conv2_1_w" 127 | lr_mult: 1 128 | decay_mult: 1 129 | } 130 | param { 131 | name: "conv2_1_b" 132 | lr_mult: 2 133 | decay_mult: 0 134 | } 135 | convolution_param { 136 | num_output: 128 137 | pad: 1 138 | kernel_size: 3 139 | } 140 | } 141 | layer { 142 | bottom: "conv2_1" 143 | top: "conv2_1" 144 | name: "relu2_1" 145 | type: "ReLU" 146 | } 147 | layer { 148 | bottom: "conv2_1" 149 | top: "conv2_2" 150 | name: "conv2_2" 151 | type: "Convolution" 152 | param { 153 | name: "conv2_2_w" 154 | lr_mult: 1 155 | decay_mult: 1 156 | } 157 | param { 158 | name: "conv2_2_b" 159 | lr_mult: 2 160 | decay_mult: 0 161 | } 162 | convolution_param { 163 | num_output: 128 164 | pad: 1 165 | kernel_size: 3 166 | } 167 | } 168 | layer { 169 | bottom: "conv2_2" 170 | top: "conv2_2" 171 | name: "relu2_2" 172 | type: "ReLU" 173 | } 174 | layer { 175 | bottom: "conv2_2" 176 | top: "pool2" 177 | name: "pool2" 178 | type: "Pooling" 179 | pooling_param { 180 | pool: MAX 181 | kernel_size: 3 182 | stride: 2 183 | pad: 1 184 | } 185 | } 186 | layer { 187 | bottom: "pool2" 188 | top: "conv3_1" 189 | name: "conv3_1" 190 | type: "Convolution" 191 | param { 192 | name: "conv3_1_w" 193 | lr_mult: 1 194 | decay_mult: 1 195 | } 196 | param { 197 | name: "conv3_1_b" 198 | lr_mult: 2 199 | decay_mult: 0 200 | } 201 | convolution_param { 202 | num_output: 256 203 | pad: 1 204 | kernel_size: 3 205 | } 206 | } 207 | layer { 208 | bottom: "conv3_1" 209 | top: "conv3_1" 210 | name: "relu3_1" 211 | type: "ReLU" 212 | } 213 | layer { 214 | bottom: "conv3_1" 215 | top: "conv3_2" 216 | name: "conv3_2" 217 | type: "Convolution" 218 | param { 219 | name: "conv3_2_w" 220 | lr_mult: 1 221 | decay_mult: 1 222 | } 223 | param { 224 | name: "conv3_2_b" 225 | lr_mult: 2 226 | decay_mult: 0 227 | } 228 | convolution_param { 229 | num_output: 256 230 | pad: 1 231 | kernel_size: 3 232 | } 233 | } 234 | layer { 235 | bottom: "conv3_2" 236 | top: "conv3_2" 237 | name: "relu3_2" 238 | type: "ReLU" 239 | } 240 | layer { 241 | bottom: "conv3_2" 242 | top: "conv3_3" 243 | name: "conv3_3" 244 | type: "Convolution" 245 | param { 246 | name: "conv3_3_w" 247 | lr_mult: 1 248 | decay_mult: 1 249 | } 250 | param { 251 | name: "conv3_3_b" 252 | lr_mult: 2 253 | decay_mult: 0 254 | } 255 | convolution_param { 256 | num_output: 256 257 | pad: 1 258 | kernel_size: 3 259 | } 260 | } 261 | layer { 262 | bottom: "conv3_3" 263 | top: "conv3_3" 264 | name: "relu3_3" 265 | type: "ReLU" 266 | } 267 | layer { 268 | bottom: "conv3_3" 269 | top: "pool3" 270 | name: "pool3" 271 | type: "Pooling" 272 | pooling_param { 273 | pool: MAX 274 | kernel_size: 3 275 | stride: 2 276 | pad: 1 277 | } 278 | } 279 | layer { 280 | bottom: "pool3" 281 | top: "conv4_1" 282 | name: "conv4_1" 283 | type: "Convolution" 284 | param { 285 | name: "conv4_1_w" 286 | lr_mult: 1 287 | decay_mult: 1 288 | } 289 | param { 290 | name: "conv4_1_b" 291 | lr_mult: 2 292 | decay_mult: 0 293 | } 294 | convolution_param { 295 | num_output: 512 296 | pad: 1 297 | kernel_size: 3 298 | } 299 | } 300 | layer { 301 | bottom: "conv4_1" 302 | top: "conv4_1" 303 | name: "relu4_1" 304 | type: "ReLU" 305 | } 306 | layer { 307 | bottom: "conv4_1" 308 | top: "conv4_2" 309 | name: "conv4_2" 310 | type: "Convolution" 311 | param { 312 | name: "conv4_2_w" 313 | lr_mult: 1 314 | decay_mult: 1 315 | } 316 | param { 317 | name: "conv4_2_b" 318 | lr_mult: 2 319 | decay_mult: 0 320 | } 321 | convolution_param { 322 | num_output: 512 323 | pad: 1 324 | kernel_size: 3 325 | } 326 | } 327 | layer { 328 | bottom: "conv4_2" 329 | top: "conv4_2" 330 | name: "relu4_2" 331 | type: "ReLU" 332 | } 333 | layer { 334 | bottom: "conv4_2" 335 | top: "conv4_3" 336 | name: "conv4_3" 337 | type: "Convolution" 338 | param { 339 | name: "conv4_3_w" 340 | lr_mult: 1 341 | decay_mult: 1 342 | } 343 | param { 344 | name: "conv4_3_b" 345 | lr_mult: 2 346 | decay_mult: 0 347 | } 348 | convolution_param { 349 | num_output: 512 350 | pad: 1 351 | kernel_size: 3 352 | } 353 | } 354 | layer { 355 | bottom: "conv4_3" 356 | top: "conv4_3" 357 | name: "relu4_3" 358 | type: "ReLU" 359 | } 360 | layer { 361 | bottom: "conv4_3" 362 | top: "pool4" 363 | name: "pool4" 364 | type: "Pooling" 365 | pooling_param { 366 | pool: MAX 367 | kernel_size: 3 368 | pad: 1 369 | stride: 1 370 | } 371 | } 372 | layer { 373 | bottom: "pool4" 374 | top: "conv5_1" 375 | name: "conv5_1" 376 | type: "Convolution" 377 | param { 378 | name: "conv5_1_w" 379 | lr_mult: 1 380 | decay_mult: 1 381 | } 382 | param { 383 | name: "conv5_1_b" 384 | lr_mult: 2 385 | decay_mult: 0 386 | } 387 | convolution_param { 388 | num_output: 512 389 | #pad: 1 390 | pad: 2 391 | dilation: 2 392 | kernel_size: 3 393 | } 394 | } 395 | layer { 396 | bottom: "conv5_1" 397 | top: "conv5_1" 398 | name: "relu5_1" 399 | type: "ReLU" 400 | } 401 | layer { 402 | bottom: "conv5_1" 403 | top: "conv5_2" 404 | name: "conv5_2" 405 | type: "Convolution" 406 | param { 407 | name: "conv5_2_w" 408 | lr_mult: 1 409 | decay_mult: 1 410 | } 411 | param { 412 | name: "conv5_2_b" 413 | lr_mult: 2 414 | decay_mult: 0 415 | } 416 | convolution_param { 417 | num_output: 512 418 | #pad: 1 419 | pad: 2 420 | dilation: 2 421 | kernel_size: 3 422 | } 423 | } 424 | layer { 425 | bottom: "conv5_2" 426 | top: "conv5_2" 427 | name: "relu5_2" 428 | type: "ReLU" 429 | } 430 | layer { 431 | bottom: "conv5_2" 432 | top: "conv5_3" 433 | name: "conv5_3" 434 | type: "Convolution" 435 | param { 436 | name: "conv5_3_w" 437 | lr_mult: 1 438 | decay_mult: 1 439 | } 440 | param { 441 | name: "conv5_3_b" 442 | lr_mult: 2 443 | decay_mult: 0 444 | } 445 | convolution_param { 446 | num_output: 512 447 | #pad: 1 448 | pad: 2 449 | dilation: 2 450 | kernel_size: 3 451 | } 452 | } 453 | layer { 454 | bottom: "conv5_3" 455 | top: "conv5_3" 456 | name: "relu5_3" 457 | type: "ReLU" 458 | } 459 | layer { 460 | bottom: "conv5_3" 461 | top: "pool5" 462 | name: "pool5" 463 | type: "Pooling" 464 | pooling_param { 465 | pool: MAX 466 | #kernel_size: 2 467 | #stride: 2 468 | kernel_size: 3 469 | stride: 1 470 | pad: 1 471 | } 472 | } 473 | 474 | layer { 475 | bottom: "pool5" 476 | top: "fc6" 477 | name: "fc6" 478 | type: "Convolution" 479 | param { 480 | name: "fc6_w" 481 | lr_mult: 1 482 | decay_mult: 1 483 | } 484 | param { 485 | name: "fc6_b" 486 | lr_mult: 2 487 | decay_mult: 0 488 | } 489 | convolution_param { 490 | num_output: 1024 491 | pad: 12 492 | dilation: 12 493 | kernel_size: 3 494 | } 495 | } 496 | layer { 497 | bottom: "fc6" 498 | top: "fc6" 499 | name: "relu6" 500 | type: "ReLU" 501 | } 502 | layer { 503 | bottom: "fc6" 504 | top: "fc6" 505 | name: "drop6" 506 | type: "Dropout" 507 | dropout_param { 508 | dropout_ratio: 0.5 509 | } 510 | } 511 | layer { 512 | bottom: "fc6" 513 | top: "fc7" 514 | name: "fc7" 515 | type: "Convolution" 516 | param { 517 | name: "fc7_w" 518 | lr_mult: 1 519 | decay_mult: 1 520 | } 521 | param { 522 | name: "fc7_b" 523 | lr_mult: 2 524 | decay_mult: 0 525 | } 526 | convolution_param { 527 | num_output: 1024 528 | kernel_size: 1 529 | } 530 | } 531 | layer { 532 | bottom: "fc7" 533 | top: "fc7" 534 | name: "relu7" 535 | type: "ReLU" 536 | } 537 | layer { 538 | bottom: "fc7" 539 | top: "fc7" 540 | name: "drop7" 541 | type: "Dropout" 542 | dropout_param { 543 | dropout_ratio: 0.5 544 | } 545 | } 546 | 547 | ### NETWORK for resolution 1/2 ### 548 | 549 | layer { 550 | bottom: "shrink_data05" 551 | top: "conv1_1_res05" 552 | name: "conv1_1_res05" 553 | type: "Convolution" 554 | param { 555 | name: "conv1_1_w" 556 | lr_mult: 1 557 | decay_mult: 1 558 | } 559 | param { 560 | name: "conv1_1_b" 561 | lr_mult: 2 562 | decay_mult: 0 563 | } 564 | convolution_param { 565 | num_output: 64 566 | pad: 1 567 | kernel_size: 3 568 | } 569 | } 570 | layer { 571 | bottom: "conv1_1_res05" 572 | top: "conv1_1_res05" 573 | name: "relu1_1_res05" 574 | type: "ReLU" 575 | } 576 | layer { 577 | bottom: "conv1_1_res05" 578 | top: "conv1_2_res05" 579 | name: "conv1_2_res05" 580 | type: "Convolution" 581 | param { 582 | name: "conv1_2_w" 583 | lr_mult: 1 584 | decay_mult: 1 585 | } 586 | param { 587 | name: "conv1_2_b" 588 | lr_mult: 2 589 | decay_mult: 0 590 | } 591 | convolution_param { 592 | num_output: 64 593 | pad: 1 594 | kernel_size: 3 595 | } 596 | } 597 | layer { 598 | bottom: "conv1_2_res05" 599 | top: "conv1_2_res05" 600 | name: "relu1_2_res05" 601 | type: "ReLU" 602 | } 603 | layer { 604 | bottom: "conv1_2_res05" 605 | top: "pool1_res05" 606 | name: "pool1_res05" 607 | type: "Pooling" 608 | pooling_param { 609 | pool: MAX 610 | kernel_size: 3 611 | stride: 2 612 | pad: 1 613 | } 614 | } 615 | layer { 616 | bottom: "pool1_res05" 617 | top: "conv2_1_res05" 618 | name: "conv2_1_res05" 619 | type: "Convolution" 620 | param { 621 | name: "conv2_1_w" 622 | lr_mult: 1 623 | decay_mult: 1 624 | } 625 | param { 626 | name: "conv2_1_b" 627 | lr_mult: 2 628 | decay_mult: 0 629 | } 630 | convolution_param { 631 | num_output: 128 632 | pad: 1 633 | kernel_size: 3 634 | } 635 | } 636 | layer { 637 | bottom: "conv2_1_res05" 638 | top: "conv2_1_res05" 639 | name: "relu2_1_res05" 640 | type: "ReLU" 641 | } 642 | layer { 643 | bottom: "conv2_1_res05" 644 | top: "conv2_2_res05" 645 | name: "conv2_2_res05" 646 | type: "Convolution" 647 | param { 648 | name: "conv2_2_w" 649 | lr_mult: 1 650 | decay_mult: 1 651 | } 652 | param { 653 | name: "conv2_2_b" 654 | lr_mult: 2 655 | decay_mult: 0 656 | } 657 | convolution_param { 658 | num_output: 128 659 | pad: 1 660 | kernel_size: 3 661 | } 662 | } 663 | layer { 664 | bottom: "conv2_2_res05" 665 | top: "conv2_2_res05" 666 | name: "relu2_2_res05" 667 | type: "ReLU" 668 | } 669 | layer { 670 | bottom: "conv2_2_res05" 671 | top: "pool2_res05" 672 | name: "pool2_res05" 673 | type: "Pooling" 674 | pooling_param { 675 | pool: MAX 676 | kernel_size: 3 677 | stride: 2 678 | pad: 1 679 | } 680 | } 681 | layer { 682 | bottom: "pool2_res05" 683 | top: "conv3_1_res05" 684 | name: "conv3_1_res05" 685 | type: "Convolution" 686 | param { 687 | name: "conv3_1_w" 688 | lr_mult: 1 689 | decay_mult: 1 690 | } 691 | param { 692 | name: "conv3_1_b" 693 | lr_mult: 2 694 | decay_mult: 0 695 | } 696 | convolution_param { 697 | num_output: 256 698 | pad: 1 699 | kernel_size: 3 700 | } 701 | } 702 | layer { 703 | bottom: "conv3_1_res05" 704 | top: "conv3_1_res05" 705 | name: "relu3_1_res05" 706 | type: "ReLU" 707 | } 708 | layer { 709 | bottom: "conv3_1_res05" 710 | top: "conv3_2_res05" 711 | name: "conv3_2_res05" 712 | type: "Convolution" 713 | param { 714 | name: "conv3_2_w" 715 | lr_mult: 1 716 | decay_mult: 1 717 | } 718 | param { 719 | name: "conv3_2_b" 720 | lr_mult: 2 721 | decay_mult: 0 722 | } 723 | convolution_param { 724 | num_output: 256 725 | pad: 1 726 | kernel_size: 3 727 | } 728 | } 729 | layer { 730 | bottom: "conv3_2_res05" 731 | top: "conv3_2_res05" 732 | name: "relu3_2_res05" 733 | type: "ReLU" 734 | } 735 | layer { 736 | bottom: "conv3_2_res05" 737 | top: "conv3_3_res05" 738 | name: "conv3_3_res05" 739 | type: "Convolution" 740 | param { 741 | name: "conv3_3_w" 742 | lr_mult: 1 743 | decay_mult: 1 744 | } 745 | param { 746 | name: "conv3_3_b" 747 | lr_mult: 2 748 | decay_mult: 0 749 | } 750 | convolution_param { 751 | num_output: 256 752 | pad: 1 753 | kernel_size: 3 754 | } 755 | } 756 | layer { 757 | bottom: "conv3_3_res05" 758 | top: "conv3_3_res05" 759 | name: "relu3_3_res05" 760 | type: "ReLU" 761 | } 762 | layer { 763 | bottom: "conv3_3_res05" 764 | top: "pool3_res05" 765 | name: "pool3_res05" 766 | type: "Pooling" 767 | pooling_param { 768 | pool: MAX 769 | kernel_size: 3 770 | stride: 2 771 | pad: 1 772 | } 773 | } 774 | layer { 775 | bottom: "pool3_res05" 776 | top: "conv4_1_res05" 777 | name: "conv4_1_res05" 778 | type: "Convolution" 779 | param { 780 | name: "conv4_1_w" 781 | lr_mult: 1 782 | decay_mult: 1 783 | } 784 | param { 785 | name: "conv4_1_b" 786 | lr_mult: 2 787 | decay_mult: 0 788 | } 789 | convolution_param { 790 | num_output: 512 791 | pad: 1 792 | kernel_size: 3 793 | } 794 | } 795 | layer { 796 | bottom: "conv4_1_res05" 797 | top: "conv4_1_res05" 798 | name: "relu4_1_res05" 799 | type: "ReLU" 800 | } 801 | layer { 802 | bottom: "conv4_1_res05" 803 | top: "conv4_2_res05" 804 | name: "conv4_2_res05" 805 | type: "Convolution" 806 | param { 807 | name: "conv4_2_w" 808 | lr_mult: 1 809 | decay_mult: 1 810 | } 811 | param { 812 | name: "conv4_2_b" 813 | lr_mult: 2 814 | decay_mult: 0 815 | } 816 | convolution_param { 817 | num_output: 512 818 | pad: 1 819 | kernel_size: 3 820 | } 821 | } 822 | layer { 823 | bottom: "conv4_2_res05" 824 | top: "conv4_2_res05" 825 | name: "relu4_2_res05" 826 | type: "ReLU" 827 | } 828 | layer { 829 | bottom: "conv4_2_res05" 830 | top: "conv4_3_res05" 831 | name: "conv4_3_res05" 832 | type: "Convolution" 833 | param { 834 | name: "conv4_3_w" 835 | lr_mult: 1 836 | decay_mult: 1 837 | } 838 | param { 839 | name: "conv4_3_b" 840 | lr_mult: 2 841 | decay_mult: 0 842 | } 843 | convolution_param { 844 | num_output: 512 845 | pad: 1 846 | kernel_size: 3 847 | } 848 | } 849 | layer { 850 | bottom: "conv4_3_res05" 851 | top: "conv4_3_res05" 852 | name: "relu4_3_res05" 853 | type: "ReLU" 854 | } 855 | layer { 856 | bottom: "conv4_3_res05" 857 | top: "pool4_res05" 858 | name: "pool4_res05" 859 | type: "Pooling" 860 | pooling_param { 861 | pool: MAX 862 | kernel_size: 3 863 | pad: 1 864 | stride: 1 865 | } 866 | } 867 | layer { 868 | bottom: "pool4_res05" 869 | top: "conv5_1_res05" 870 | name: "conv5_1_res05" 871 | type: "Convolution" 872 | param { 873 | name: "conv5_1_w" 874 | lr_mult: 1 875 | decay_mult: 1 876 | } 877 | param { 878 | name: "conv5_1_b" 879 | lr_mult: 2 880 | decay_mult: 0 881 | } 882 | convolution_param { 883 | num_output: 512 884 | #pad: 1 885 | pad: 2 886 | dilation: 2 887 | kernel_size: 3 888 | } 889 | } 890 | layer { 891 | bottom: "conv5_1_res05" 892 | top: "conv5_1_res05" 893 | name: "relu5_1_res05" 894 | type: "ReLU" 895 | } 896 | layer { 897 | bottom: "conv5_1_res05" 898 | top: "conv5_2_res05" 899 | name: "conv5_2_res05" 900 | type: "Convolution" 901 | param { 902 | name: "conv5_2_w" 903 | lr_mult: 1 904 | decay_mult: 1 905 | } 906 | param { 907 | name: "conv5_2_b" 908 | lr_mult: 2 909 | decay_mult: 0 910 | } 911 | convolution_param { 912 | num_output: 512 913 | #pad: 1 914 | pad: 2 915 | dilation: 2 916 | kernel_size: 3 917 | } 918 | } 919 | layer { 920 | bottom: "conv5_2_res05" 921 | top: "conv5_2_res05" 922 | name: "relu5_2_res05" 923 | type: "ReLU" 924 | } 925 | layer { 926 | bottom: "conv5_2_res05" 927 | top: "conv5_3_res05" 928 | name: "conv5_3_res05" 929 | type: "Convolution" 930 | param { 931 | name: "conv5_3_w" 932 | lr_mult: 1 933 | decay_mult: 1 934 | } 935 | param { 936 | name: "conv5_3_b" 937 | lr_mult: 2 938 | decay_mult: 0 939 | } 940 | convolution_param { 941 | num_output: 512 942 | #pad: 1 943 | pad: 2 944 | dilation: 2 945 | kernel_size: 3 946 | } 947 | } 948 | layer { 949 | bottom: "conv5_3_res05" 950 | top: "conv5_3_res05" 951 | name: "relu5_3_res05" 952 | type: "ReLU" 953 | } 954 | layer { 955 | bottom: "conv5_3_res05" 956 | top: "pool5_res05" 957 | name: "pool5_res05" 958 | type: "Pooling" 959 | pooling_param { 960 | pool: MAX 961 | #kernel_size: 2 962 | #stride: 2 963 | kernel_size: 3 964 | stride: 1 965 | pad: 1 966 | } 967 | } 968 | 969 | layer { 970 | bottom: "pool5_res05" 971 | top: "fc6_res05" 972 | name: "fc6_res05" 973 | type: "Convolution" 974 | param { 975 | name: "fc6_w" 976 | lr_mult: 1 977 | decay_mult: 1 978 | } 979 | param { 980 | name: "fc6_b" 981 | lr_mult: 2 982 | decay_mult: 0 983 | } 984 | convolution_param { 985 | num_output: 1024 986 | pad: 12 987 | dilation: 12 988 | kernel_size: 3 989 | } 990 | } 991 | layer { 992 | bottom: "fc6_res05" 993 | top: "fc6_res05" 994 | name: "relu6_res05" 995 | type: "ReLU" 996 | } 997 | layer { 998 | bottom: "fc6_res05" 999 | top: "fc6_res05" 1000 | name: "drop6_res05" 1001 | type: "Dropout" 1002 | dropout_param { 1003 | dropout_ratio: 0.5 1004 | } 1005 | } 1006 | layer { 1007 | bottom: "fc6_res05" 1008 | top: "fc7_res05" 1009 | name: "fc7_res05" 1010 | type: "Convolution" 1011 | param { 1012 | name: "fc7_w" 1013 | lr_mult: 1 1014 | decay_mult: 1 1015 | } 1016 | param { 1017 | name: "fc7_b" 1018 | lr_mult: 2 1019 | decay_mult: 0 1020 | } 1021 | convolution_param { 1022 | num_output: 1024 1023 | kernel_size: 1 1024 | } 1025 | } 1026 | layer { 1027 | bottom: "fc7_res05" 1028 | top: "fc7_res05" 1029 | name: "relu7_res05" 1030 | type: "ReLU" 1031 | } 1032 | layer { 1033 | bottom: "fc7_res05" 1034 | top: "fc7_res05" 1035 | name: "drop7_res05" 1036 | type: "Dropout" 1037 | dropout_param { 1038 | dropout_ratio: 0.5 1039 | } 1040 | } 1041 | 1042 | ### NETWORK for resolution 3/4 ### 1043 | 1044 | layer { 1045 | bottom: "shrink_data075" 1046 | top: "conv1_1_res075" 1047 | name: "conv1_1_res075" 1048 | type: "Convolution" 1049 | param { 1050 | name: "conv1_1_w" 1051 | lr_mult: 1 1052 | decay_mult: 1 1053 | } 1054 | param { 1055 | name: "conv1_1_b" 1056 | lr_mult: 2 1057 | decay_mult: 0 1058 | } 1059 | convolution_param { 1060 | num_output: 64 1061 | pad: 1 1062 | kernel_size: 3 1063 | } 1064 | } 1065 | layer { 1066 | bottom: "conv1_1_res075" 1067 | top: "conv1_1_res075" 1068 | name: "relu1_1_res075" 1069 | type: "ReLU" 1070 | } 1071 | layer { 1072 | bottom: "conv1_1_res075" 1073 | top: "conv1_2_res075" 1074 | name: "conv1_2_res075" 1075 | type: "Convolution" 1076 | param { 1077 | name: "conv1_2_w" 1078 | lr_mult: 1 1079 | decay_mult: 1 1080 | } 1081 | param { 1082 | name: "conv1_2_b" 1083 | lr_mult: 2 1084 | decay_mult: 0 1085 | } 1086 | convolution_param { 1087 | num_output: 64 1088 | pad: 1 1089 | kernel_size: 3 1090 | } 1091 | } 1092 | layer { 1093 | bottom: "conv1_2_res075" 1094 | top: "conv1_2_res075" 1095 | name: "relu1_2_res075" 1096 | type: "ReLU" 1097 | } 1098 | layer { 1099 | bottom: "conv1_2_res075" 1100 | top: "pool1_res075" 1101 | name: "pool1_res075" 1102 | type: "Pooling" 1103 | pooling_param { 1104 | pool: MAX 1105 | kernel_size: 3 1106 | stride: 2 1107 | pad: 1 1108 | } 1109 | } 1110 | layer { 1111 | bottom: "pool1_res075" 1112 | top: "conv2_1_res075" 1113 | name: "conv2_1_res075" 1114 | type: "Convolution" 1115 | param { 1116 | name: "conv2_1_w" 1117 | lr_mult: 1 1118 | decay_mult: 1 1119 | } 1120 | param { 1121 | name: "conv2_1_b" 1122 | lr_mult: 2 1123 | decay_mult: 0 1124 | } 1125 | convolution_param { 1126 | num_output: 128 1127 | pad: 1 1128 | kernel_size: 3 1129 | } 1130 | } 1131 | layer { 1132 | bottom: "conv2_1_res075" 1133 | top: "conv2_1_res075" 1134 | name: "relu2_1_res075" 1135 | type: "ReLU" 1136 | } 1137 | layer { 1138 | bottom: "conv2_1_res075" 1139 | top: "conv2_2_res075" 1140 | name: "conv2_2_res075" 1141 | type: "Convolution" 1142 | param { 1143 | name: "conv2_2_w" 1144 | lr_mult: 1 1145 | decay_mult: 1 1146 | } 1147 | param { 1148 | name: "conv2_2_b" 1149 | lr_mult: 2 1150 | decay_mult: 0 1151 | } 1152 | convolution_param { 1153 | num_output: 128 1154 | pad: 1 1155 | kernel_size: 3 1156 | } 1157 | } 1158 | layer { 1159 | bottom: "conv2_2_res075" 1160 | top: "conv2_2_res075" 1161 | name: "relu2_2_res075" 1162 | type: "ReLU" 1163 | } 1164 | layer { 1165 | bottom: "conv2_2_res075" 1166 | top: "pool2_res075" 1167 | name: "pool2_res075" 1168 | type: "Pooling" 1169 | pooling_param { 1170 | pool: MAX 1171 | kernel_size: 3 1172 | stride: 2 1173 | pad: 1 1174 | } 1175 | } 1176 | layer { 1177 | bottom: "pool2_res075" 1178 | top: "conv3_1_res075" 1179 | name: "conv3_1_res075" 1180 | type: "Convolution" 1181 | param { 1182 | name: "conv3_1_w" 1183 | lr_mult: 1 1184 | decay_mult: 1 1185 | } 1186 | param { 1187 | name: "conv3_1_b" 1188 | lr_mult: 2 1189 | decay_mult: 0 1190 | } 1191 | convolution_param { 1192 | num_output: 256 1193 | pad: 1 1194 | kernel_size: 3 1195 | } 1196 | } 1197 | layer { 1198 | bottom: "conv3_1_res075" 1199 | top: "conv3_1_res075" 1200 | name: "relu3_1_res075" 1201 | type: "ReLU" 1202 | } 1203 | layer { 1204 | bottom: "conv3_1_res075" 1205 | top: "conv3_2_res075" 1206 | name: "conv3_2_res075" 1207 | type: "Convolution" 1208 | param { 1209 | name: "conv3_2_w" 1210 | lr_mult: 1 1211 | decay_mult: 1 1212 | } 1213 | param { 1214 | name: "conv3_2_b" 1215 | lr_mult: 2 1216 | decay_mult: 0 1217 | } 1218 | convolution_param { 1219 | num_output: 256 1220 | pad: 1 1221 | kernel_size: 3 1222 | } 1223 | } 1224 | layer { 1225 | bottom: "conv3_2_res075" 1226 | top: "conv3_2_res075" 1227 | name: "relu3_2_res075" 1228 | type: "ReLU" 1229 | } 1230 | layer { 1231 | bottom: "conv3_2_res075" 1232 | top: "conv3_3_res075" 1233 | name: "conv3_3_res075" 1234 | type: "Convolution" 1235 | param { 1236 | name: "conv3_3_w" 1237 | lr_mult: 1 1238 | decay_mult: 1 1239 | } 1240 | param { 1241 | name: "conv3_3_b" 1242 | lr_mult: 2 1243 | decay_mult: 0 1244 | } 1245 | convolution_param { 1246 | num_output: 256 1247 | pad: 1 1248 | kernel_size: 3 1249 | } 1250 | } 1251 | layer { 1252 | bottom: "conv3_3_res075" 1253 | top: "conv3_3_res075" 1254 | name: "relu3_3_res075" 1255 | type: "ReLU" 1256 | } 1257 | layer { 1258 | bottom: "conv3_3_res075" 1259 | top: "pool3_res075" 1260 | name: "pool3_res075" 1261 | type: "Pooling" 1262 | pooling_param { 1263 | pool: MAX 1264 | kernel_size: 3 1265 | stride: 2 1266 | pad: 1 1267 | } 1268 | } 1269 | layer { 1270 | bottom: "pool3_res075" 1271 | top: "conv4_1_res075" 1272 | name: "conv4_1_res075" 1273 | type: "Convolution" 1274 | param { 1275 | name: "conv4_1_w" 1276 | lr_mult: 1 1277 | decay_mult: 1 1278 | } 1279 | param { 1280 | name: "conv4_1_b" 1281 | lr_mult: 2 1282 | decay_mult: 0 1283 | } 1284 | convolution_param { 1285 | num_output: 512 1286 | pad: 1 1287 | kernel_size: 3 1288 | } 1289 | } 1290 | layer { 1291 | bottom: "conv4_1_res075" 1292 | top: "conv4_1_res075" 1293 | name: "relu4_1_res075" 1294 | type: "ReLU" 1295 | } 1296 | layer { 1297 | bottom: "conv4_1_res075" 1298 | top: "conv4_2_res075" 1299 | name: "conv4_2_res075" 1300 | type: "Convolution" 1301 | param { 1302 | name: "conv4_2_w" 1303 | lr_mult: 1 1304 | decay_mult: 1 1305 | } 1306 | param { 1307 | name: "conv4_2_b" 1308 | lr_mult: 2 1309 | decay_mult: 0 1310 | } 1311 | convolution_param { 1312 | num_output: 512 1313 | pad: 1 1314 | kernel_size: 3 1315 | } 1316 | } 1317 | layer { 1318 | bottom: "conv4_2_res075" 1319 | top: "conv4_2_res075" 1320 | name: "relu4_2_res075" 1321 | type: "ReLU" 1322 | } 1323 | layer { 1324 | bottom: "conv4_2_res075" 1325 | top: "conv4_3_res075" 1326 | name: "conv4_3_res075" 1327 | type: "Convolution" 1328 | param { 1329 | name: "conv4_3_w" 1330 | lr_mult: 1 1331 | decay_mult: 1 1332 | } 1333 | param { 1334 | name: "conv4_3_b" 1335 | lr_mult: 2 1336 | decay_mult: 0 1337 | } 1338 | convolution_param { 1339 | num_output: 512 1340 | pad: 1 1341 | kernel_size: 3 1342 | } 1343 | } 1344 | layer { 1345 | bottom: "conv4_3_res075" 1346 | top: "conv4_3_res075" 1347 | name: "relu4_3_res075" 1348 | type: "ReLU" 1349 | } 1350 | layer { 1351 | bottom: "conv4_3_res075" 1352 | top: "pool4_res075" 1353 | name: "pool4_res075" 1354 | type: "Pooling" 1355 | pooling_param { 1356 | pool: MAX 1357 | kernel_size: 3 1358 | pad: 1 1359 | stride: 1 1360 | } 1361 | } 1362 | layer { 1363 | bottom: "pool4_res075" 1364 | top: "conv5_1_res075" 1365 | name: "conv5_1_res075" 1366 | type: "Convolution" 1367 | param { 1368 | name: "conv5_1_w" 1369 | lr_mult: 1 1370 | decay_mult: 1 1371 | } 1372 | param { 1373 | name: "conv5_1_b" 1374 | lr_mult: 2 1375 | decay_mult: 0 1376 | } 1377 | convolution_param { 1378 | num_output: 512 1379 | #pad: 1 1380 | pad: 2 1381 | dilation: 2 1382 | kernel_size: 3 1383 | } 1384 | } 1385 | layer { 1386 | bottom: "conv5_1_res075" 1387 | top: "conv5_1_res075" 1388 | name: "relu5_1_res075" 1389 | type: "ReLU" 1390 | } 1391 | layer { 1392 | bottom: "conv5_1_res075" 1393 | top: "conv5_2_res075" 1394 | name: "conv5_2_res075" 1395 | type: "Convolution" 1396 | param { 1397 | name: "conv5_2_w" 1398 | lr_mult: 1 1399 | decay_mult: 1 1400 | } 1401 | param { 1402 | name: "conv5_2_b" 1403 | lr_mult: 2 1404 | decay_mult: 0 1405 | } 1406 | convolution_param { 1407 | num_output: 512 1408 | #pad: 1 1409 | pad: 2 1410 | dilation: 2 1411 | kernel_size: 3 1412 | } 1413 | } 1414 | layer { 1415 | bottom: "conv5_2_res075" 1416 | top: "conv5_2_res075" 1417 | name: "relu5_2_res075" 1418 | type: "ReLU" 1419 | } 1420 | layer { 1421 | bottom: "conv5_2_res075" 1422 | top: "conv5_3_res075" 1423 | name: "conv5_3_res075" 1424 | type: "Convolution" 1425 | param { 1426 | name: "conv5_3_w" 1427 | lr_mult: 1 1428 | decay_mult: 1 1429 | } 1430 | param { 1431 | name: "conv5_3_b" 1432 | lr_mult: 2 1433 | decay_mult: 0 1434 | } 1435 | convolution_param { 1436 | num_output: 512 1437 | #pad: 1 1438 | pad: 2 1439 | dilation: 2 1440 | kernel_size: 3 1441 | } 1442 | } 1443 | layer { 1444 | bottom: "conv5_3_res075" 1445 | top: "conv5_3_res075" 1446 | name: "relu5_3_res075" 1447 | type: "ReLU" 1448 | } 1449 | layer { 1450 | bottom: "conv5_3_res075" 1451 | top: "pool5_res075" 1452 | name: "pool5_res075" 1453 | type: "Pooling" 1454 | pooling_param { 1455 | pool: MAX 1456 | #kernel_size: 2 1457 | #stride: 2 1458 | kernel_size: 3 1459 | stride: 1 1460 | pad: 1 1461 | } 1462 | } 1463 | 1464 | layer { 1465 | bottom: "pool5_res075" 1466 | top: "fc6_res075" 1467 | name: "fc6_res075" 1468 | type: "Convolution" 1469 | param { 1470 | name: "fc6_w" 1471 | lr_mult: 1 1472 | decay_mult: 1 1473 | } 1474 | param { 1475 | name: "fc6_b" 1476 | lr_mult: 2 1477 | decay_mult: 0 1478 | } 1479 | convolution_param { 1480 | num_output: 1024 1481 | pad: 12 1482 | dilation: 12 1483 | kernel_size: 3 1484 | } 1485 | } 1486 | layer { 1487 | bottom: "fc6_res075" 1488 | top: "fc6_res075" 1489 | name: "relu6_res075" 1490 | type: "ReLU" 1491 | } 1492 | layer { 1493 | bottom: "fc6_res075" 1494 | top: "fc6_res075" 1495 | name: "drop6_res075" 1496 | type: "Dropout" 1497 | dropout_param { 1498 | dropout_ratio: 0.5 1499 | } 1500 | } 1501 | layer { 1502 | bottom: "fc6_res075" 1503 | top: "fc7_res075" 1504 | name: "fc7_res075" 1505 | type: "Convolution" 1506 | param { 1507 | name: "fc7_w" 1508 | lr_mult: 1 1509 | decay_mult: 1 1510 | } 1511 | param { 1512 | name: "fc7_b" 1513 | lr_mult: 2 1514 | decay_mult: 0 1515 | } 1516 | convolution_param { 1517 | num_output: 1024 1518 | kernel_size: 1 1519 | } 1520 | } 1521 | layer { 1522 | bottom: "fc7_res075" 1523 | top: "fc7_res075" 1524 | name: "relu7_res075" 1525 | type: "ReLU" 1526 | } 1527 | layer { 1528 | bottom: "fc7_res075" 1529 | top: "fc7_res075" 1530 | name: "drop7_res075" 1531 | type: "Dropout" 1532 | dropout_param { 1533 | dropout_ratio: 0.5 1534 | } 1535 | } 1536 | 1537 | ############### classifier for resolution 1################### 1538 | layer { 1539 | bottom: "fc7" 1540 | top: "fc8_${EXP}" 1541 | name: "fc8_${EXP}" 1542 | type: "Convolution" 1543 | param { 1544 | name: "fc8_w" 1545 | lr_mult: 10 1546 | decay_mult: 1 1547 | } 1548 | param { 1549 | name: "fc8_b" 1550 | lr_mult: 20 1551 | decay_mult: 0 1552 | } 1553 | convolution_param { 1554 | num_output: ${NUM_LABELS} 1555 | kernel_size: 1 1556 | } 1557 | } 1558 | 1559 | ############### classifier for resolution 1/2 ################### 1560 | layer { 1561 | bottom: "fc7_res05" 1562 | top: "fc8_${EXP}_res05" 1563 | name: "fc8_${EXP}_res05" 1564 | type: "Convolution" 1565 | param { 1566 | name: "fc8_w" 1567 | lr_mult: 10 1568 | decay_mult: 1 1569 | } 1570 | param { 1571 | name: "fc8_b" 1572 | lr_mult: 20 1573 | decay_mult: 0 1574 | } 1575 | convolution_param { 1576 | num_output: ${NUM_LABELS} 1577 | kernel_size: 1 1578 | } 1579 | } 1580 | 1581 | ############### classifier for resolution 3/4 ################### 1582 | layer { 1583 | bottom: "fc7_res075" 1584 | top: "fc8_${EXP}_res075" 1585 | name: "fc8_${EXP}_res075" 1586 | type: "Convolution" 1587 | param { 1588 | name: "fc8_w" 1589 | lr_mult: 10 1590 | decay_mult: 1 1591 | } 1592 | param { 1593 | name: "fc8_b" 1594 | lr_mult: 20 1595 | decay_mult: 0 1596 | } 1597 | convolution_param { 1598 | num_output: ${NUM_LABELS} 1599 | kernel_size: 1 1600 | } 1601 | } 1602 | 1603 | ############### upsampling ################## 1604 | layer { 1605 | bottom: "fc7_res05" 1606 | top: "fc7_res05_interp" 1607 | name: "fc7_res05_interp" 1608 | type: "Interp" 1609 | interp_param { 1610 | zoom_factor: 2 1611 | pad_beg: 0 1612 | pad_end: 0 1613 | } 1614 | } 1615 | layer { 1616 | bottom: "fc7_res075" 1617 | top: "fc7_res075_interp" 1618 | name: "fc7_res075_interp" 1619 | type: "Interp" 1620 | interp_param { 1621 | zoom_factor: 4 1622 | shrink_factor: 3 1623 | pad_beg: 0 1624 | pad_end: 0 1625 | } 1626 | } 1627 | layer { 1628 | bottom: "fc8_${EXP}_res05" 1629 | top: "fc8_${EXP}_res05_interp" 1630 | name: "fc8_${EXP}_res05_interp" 1631 | type: "Interp" 1632 | interp_param { 1633 | zoom_factor: 2 1634 | pad_beg: 0 1635 | pad_end: 0 1636 | } 1637 | } 1638 | layer { 1639 | bottom: "fc8_${EXP}_res075" 1640 | top: "fc8_${EXP}_res075_interp" 1641 | name: "fc8_${EXP}_res075_interp" 1642 | type: "Interp" 1643 | interp_param { 1644 | zoom_factor: 4 1645 | shrink_factor: 3 1646 | pad_beg: 0 1647 | pad_end: 0 1648 | } 1649 | } 1650 | 1651 | ############### concatenation and pass through attention model ######### 1652 | layer { 1653 | bottom: "fc7" 1654 | bottom: "fc7_res075_interp" 1655 | bottom: "fc7_res05_interp" 1656 | top: "fc7_concat" 1657 | name: "fc7_concat" 1658 | type: "Concat" 1659 | concat_param { 1660 | axis: 1 1661 | } 1662 | } 1663 | 1664 | ### attention model 1665 | layer { 1666 | bottom: "fc7_concat" 1667 | top: "att_conv1" 1668 | name: "att_conv1" 1669 | type: "Convolution" 1670 | convolution_param { 1671 | num_output: 512 1672 | kernel_size: 3 1673 | pad: 1 1674 | } 1675 | } 1676 | layer { 1677 | bottom: "att_conv1" 1678 | top: "att_conv1" 1679 | name: "relu_att_conv1" 1680 | type: "ReLU" 1681 | } 1682 | layer { 1683 | bottom: "att_conv1" 1684 | top: "att_conv1" 1685 | name: "drop_att_conv1" 1686 | type: "Dropout" 1687 | dropout_param { 1688 | dropout_ratio: 0.5 1689 | } 1690 | } 1691 | 1692 | layer { 1693 | bottom: "att_conv1" 1694 | top: "att_fc" 1695 | name: "att_fc" 1696 | type: "Convolution" 1697 | convolution_param { 1698 | num_output: 3 1699 | kernel_size: 1 1700 | } 1701 | } 1702 | layer { 1703 | bottom: "att_fc" 1704 | top: "attention" 1705 | name: "attention" 1706 | type: "Softmax" 1707 | } 1708 | 1709 | ############### collect the output from attention model ######## 1710 | layer { 1711 | bottom: "attention" 1712 | top: "attention1" 1713 | top: "attention2" 1714 | top: "attention3" 1715 | name: "slice_attention" 1716 | type: "Slice" 1717 | slice_param { 1718 | axis: 1 1719 | slice_point: 1 1720 | slice_point: 2 1721 | } 1722 | } 1723 | 1724 | ############### scale features ############# 1725 | layer { 1726 | bottom: "fc8_${EXP}" 1727 | bottom: "attention1" 1728 | top: "fc8_product" 1729 | name: "fc8_product" 1730 | type: "SpatialProduct" 1731 | } 1732 | 1733 | layer { 1734 | bottom: "fc8_${EXP}_res075_interp" 1735 | bottom: "attention2" 1736 | top: "fc8_res075_product" 1737 | name: "fc8_res075_product" 1738 | type: "SpatialProduct" 1739 | } 1740 | 1741 | layer { 1742 | bottom: "fc8_${EXP}_res05_interp" 1743 | bottom: "attention3" 1744 | top: "fc8_res05_product" 1745 | name: "fc8_res05_product" 1746 | type: "SpatialProduct" 1747 | } 1748 | 1749 | ### add features ### 1750 | layer { 1751 | bottom: "fc8_product" 1752 | bottom: "fc8_res075_product" 1753 | bottom: "fc8_res05_product" 1754 | top: "fc8_fusion" 1755 | name: "fc8_fusion" 1756 | type: "Eltwise" 1757 | eltwise_param { 1758 | operation: SUM 1759 | } 1760 | } 1761 | ############### upsampling ################ 1762 | #layer { 1763 | # bottom: "label" 1764 | # top: "label_shrink8" 1765 | # name: "label_shrink8" 1766 | # type: "Interp" 1767 | # interp_param { 1768 | # shrink_factor: 8 1769 | # pad_beg: 0 1770 | # pad_end: 0 1771 | # } 1772 | #} 1773 | layer { 1774 | bottom: "fc8_fusion" 1775 | top: "fc8_interp" 1776 | name: "fc8_interp" 1777 | type: "Interp" 1778 | interp_param { 1779 | zoom_factor: 8 1780 | } 1781 | } 1782 | 1783 | layer { 1784 | bottom: "fc8_interp" 1785 | top: "fc8_mask" 1786 | name: "fc8_mask" 1787 | type: "MaskCreate" 1788 | mask_create_param{ 1789 | num_cls: 20 1790 | } 1791 | } 1792 | 1793 | layer { 1794 | name: "fc8_mat" 1795 | type: "MatWrite" 1796 | bottom: "fc8_mask" 1797 | mat_write_param { 1798 | prefix: "${FEATURE_DIR}/${TEST_SET}/fc8_mask/" 1799 | source: "${EXP}/list/${TEST_SET}_id.txt" 1800 | strip: 0 1801 | period: 1 1802 | } 1803 | include: { phase: TEST } 1804 | } 1805 | #layer { 1806 | # name: "accuracy" 1807 | # type: "SegAccuracy" 1808 | # bottom: "fc8_fusion" 1809 | # bottom: "label_shrink8" 1810 | # top: "accuracy" 1811 | # seg_accuracy_param { 1812 | # ignore_label: 255 1813 | # } 1814 | #} 1815 | # 1816 | layer { 1817 | name: "silence" 1818 | type: "Silence" 1819 | bottom: "label" 1820 | } 1821 | -------------------------------------------------------------------------------- /human/config/no-ssl/test.prototxt: -------------------------------------------------------------------------------- 1 | # VGG 16-layer network convolutional finetuning 2 | # Network modified to have smaller receptive field (128 pixels) 3 | # and smaller stride (8 pixels) when run in convolutional mode. 4 | # 5 | # In this model we also change max pooling size in the first 4 layers 6 | # from 2 to 3 while retaining stride = 2 7 | # which makes it easier to exactly align responses at different layers. 8 | # 9 | name: "${NET_ID}" 10 | 11 | layer { 12 | name: "data" 13 | type: "ImageSegData" 14 | top: "data" 15 | top: "label" 16 | image_data_param { 17 | root_folder: "${DATA_ROOT}" 18 | source: "${EXP}/list/${TEST_SET}.txt" 19 | batch_size: 1 20 | label_type: NONE 21 | # label_type: PIXEL 22 | } 23 | transform_param { 24 | mean_value: 104.008 25 | mean_value: 116.669 26 | mean_value: 122.675 27 | crop_size: 640 28 | mirror: false 29 | } 30 | include: { phase: TEST } 31 | } 32 | ### shrink data ### 33 | layer { 34 | bottom: "data" 35 | top: "shrink_data05" 36 | name: "shrink_data05" 37 | type: "Interp" 38 | interp_param { 39 | shrink_factor: 2 40 | pad_beg: 0 41 | pad_end: 0 42 | } 43 | } 44 | 45 | layer { 46 | bottom: "data" 47 | top: "shrink_data075" 48 | name: "shrink_data075" 49 | type: "Interp" 50 | interp_param { 51 | shrink_factor: 4 52 | zoom_factor: 3 53 | pad_beg: 0 54 | pad_end: 0 55 | } 56 | } 57 | 58 | ### NETWORK for resolution 1 ### 59 | 60 | layer { 61 | bottom: "data" 62 | top: "conv1_1" 63 | name: "conv1_1" 64 | type: "Convolution" 65 | param { 66 | name: "conv1_1_w" 67 | lr_mult: 1 68 | decay_mult: 1 69 | } 70 | param { 71 | name: "conv1_1_b" 72 | lr_mult: 2 73 | decay_mult: 0 74 | } 75 | convolution_param { 76 | num_output: 64 77 | pad: 1 78 | kernel_size: 3 79 | } 80 | } 81 | layer { 82 | bottom: "conv1_1" 83 | top: "conv1_1" 84 | name: "relu1_1" 85 | type: "ReLU" 86 | } 87 | layer { 88 | bottom: "conv1_1" 89 | top: "conv1_2" 90 | name: "conv1_2" 91 | type: "Convolution" 92 | param { 93 | name: "conv1_2_w" 94 | lr_mult: 1 95 | decay_mult: 1 96 | } 97 | param { 98 | name: "conv1_2_b" 99 | lr_mult: 2 100 | decay_mult: 0 101 | } 102 | convolution_param { 103 | num_output: 64 104 | pad: 1 105 | kernel_size: 3 106 | } 107 | } 108 | layer { 109 | bottom: "conv1_2" 110 | top: "conv1_2" 111 | name: "relu1_2" 112 | type: "ReLU" 113 | } 114 | layer { 115 | bottom: "conv1_2" 116 | top: "pool1" 117 | name: "pool1" 118 | type: "Pooling" 119 | pooling_param { 120 | pool: MAX 121 | kernel_size: 3 122 | stride: 2 123 | pad: 1 124 | } 125 | } 126 | layer { 127 | bottom: "pool1" 128 | top: "conv2_1" 129 | name: "conv2_1" 130 | type: "Convolution" 131 | param { 132 | name: "conv2_1_w" 133 | lr_mult: 1 134 | decay_mult: 1 135 | } 136 | param { 137 | name: "conv2_1_b" 138 | lr_mult: 2 139 | decay_mult: 0 140 | } 141 | convolution_param { 142 | num_output: 128 143 | pad: 1 144 | kernel_size: 3 145 | } 146 | } 147 | layer { 148 | bottom: "conv2_1" 149 | top: "conv2_1" 150 | name: "relu2_1" 151 | type: "ReLU" 152 | } 153 | layer { 154 | bottom: "conv2_1" 155 | top: "conv2_2" 156 | name: "conv2_2" 157 | type: "Convolution" 158 | param { 159 | name: "conv2_2_w" 160 | lr_mult: 1 161 | decay_mult: 1 162 | } 163 | param { 164 | name: "conv2_2_b" 165 | lr_mult: 2 166 | decay_mult: 0 167 | } 168 | convolution_param { 169 | num_output: 128 170 | pad: 1 171 | kernel_size: 3 172 | } 173 | } 174 | layer { 175 | bottom: "conv2_2" 176 | top: "conv2_2" 177 | name: "relu2_2" 178 | type: "ReLU" 179 | } 180 | layer { 181 | bottom: "conv2_2" 182 | top: "pool2" 183 | name: "pool2" 184 | type: "Pooling" 185 | pooling_param { 186 | pool: MAX 187 | kernel_size: 3 188 | stride: 2 189 | pad: 1 190 | } 191 | } 192 | layer { 193 | bottom: "pool2" 194 | top: "conv3_1" 195 | name: "conv3_1" 196 | type: "Convolution" 197 | param { 198 | name: "conv3_1_w" 199 | lr_mult: 1 200 | decay_mult: 1 201 | } 202 | param { 203 | name: "conv3_1_b" 204 | lr_mult: 2 205 | decay_mult: 0 206 | } 207 | convolution_param { 208 | num_output: 256 209 | pad: 1 210 | kernel_size: 3 211 | } 212 | } 213 | layer { 214 | bottom: "conv3_1" 215 | top: "conv3_1" 216 | name: "relu3_1" 217 | type: "ReLU" 218 | } 219 | layer { 220 | bottom: "conv3_1" 221 | top: "conv3_2" 222 | name: "conv3_2" 223 | type: "Convolution" 224 | param { 225 | name: "conv3_2_w" 226 | lr_mult: 1 227 | decay_mult: 1 228 | } 229 | param { 230 | name: "conv3_2_b" 231 | lr_mult: 2 232 | decay_mult: 0 233 | } 234 | convolution_param { 235 | num_output: 256 236 | pad: 1 237 | kernel_size: 3 238 | } 239 | } 240 | layer { 241 | bottom: "conv3_2" 242 | top: "conv3_2" 243 | name: "relu3_2" 244 | type: "ReLU" 245 | } 246 | layer { 247 | bottom: "conv3_2" 248 | top: "conv3_3" 249 | name: "conv3_3" 250 | type: "Convolution" 251 | param { 252 | name: "conv3_3_w" 253 | lr_mult: 1 254 | decay_mult: 1 255 | } 256 | param { 257 | name: "conv3_3_b" 258 | lr_mult: 2 259 | decay_mult: 0 260 | } 261 | convolution_param { 262 | num_output: 256 263 | pad: 1 264 | kernel_size: 3 265 | } 266 | } 267 | layer { 268 | bottom: "conv3_3" 269 | top: "conv3_3" 270 | name: "relu3_3" 271 | type: "ReLU" 272 | } 273 | layer { 274 | bottom: "conv3_3" 275 | top: "pool3" 276 | name: "pool3" 277 | type: "Pooling" 278 | pooling_param { 279 | pool: MAX 280 | kernel_size: 3 281 | stride: 2 282 | pad: 1 283 | } 284 | } 285 | layer { 286 | bottom: "pool3" 287 | top: "conv4_1" 288 | name: "conv4_1" 289 | type: "Convolution" 290 | param { 291 | name: "conv4_1_w" 292 | lr_mult: 1 293 | decay_mult: 1 294 | } 295 | param { 296 | name: "conv4_1_b" 297 | lr_mult: 2 298 | decay_mult: 0 299 | } 300 | convolution_param { 301 | num_output: 512 302 | pad: 1 303 | kernel_size: 3 304 | } 305 | } 306 | layer { 307 | bottom: "conv4_1" 308 | top: "conv4_1" 309 | name: "relu4_1" 310 | type: "ReLU" 311 | } 312 | layer { 313 | bottom: "conv4_1" 314 | top: "conv4_2" 315 | name: "conv4_2" 316 | type: "Convolution" 317 | param { 318 | name: "conv4_2_w" 319 | lr_mult: 1 320 | decay_mult: 1 321 | } 322 | param { 323 | name: "conv4_2_b" 324 | lr_mult: 2 325 | decay_mult: 0 326 | } 327 | convolution_param { 328 | num_output: 512 329 | pad: 1 330 | kernel_size: 3 331 | } 332 | } 333 | layer { 334 | bottom: "conv4_2" 335 | top: "conv4_2" 336 | name: "relu4_2" 337 | type: "ReLU" 338 | } 339 | layer { 340 | bottom: "conv4_2" 341 | top: "conv4_3" 342 | name: "conv4_3" 343 | type: "Convolution" 344 | param { 345 | name: "conv4_3_w" 346 | lr_mult: 1 347 | decay_mult: 1 348 | } 349 | param { 350 | name: "conv4_3_b" 351 | lr_mult: 2 352 | decay_mult: 0 353 | } 354 | convolution_param { 355 | num_output: 512 356 | pad: 1 357 | kernel_size: 3 358 | } 359 | } 360 | layer { 361 | bottom: "conv4_3" 362 | top: "conv4_3" 363 | name: "relu4_3" 364 | type: "ReLU" 365 | } 366 | layer { 367 | bottom: "conv4_3" 368 | top: "pool4" 369 | name: "pool4" 370 | type: "Pooling" 371 | pooling_param { 372 | pool: MAX 373 | kernel_size: 3 374 | pad: 1 375 | stride: 1 376 | } 377 | } 378 | layer { 379 | bottom: "pool4" 380 | top: "conv5_1" 381 | name: "conv5_1" 382 | type: "Convolution" 383 | param { 384 | name: "conv5_1_w" 385 | lr_mult: 1 386 | decay_mult: 1 387 | } 388 | param { 389 | name: "conv5_1_b" 390 | lr_mult: 2 391 | decay_mult: 0 392 | } 393 | convolution_param { 394 | num_output: 512 395 | #pad: 1 396 | pad: 2 397 | dilation: 2 398 | kernel_size: 3 399 | } 400 | } 401 | layer { 402 | bottom: "conv5_1" 403 | top: "conv5_1" 404 | name: "relu5_1" 405 | type: "ReLU" 406 | } 407 | layer { 408 | bottom: "conv5_1" 409 | top: "conv5_2" 410 | name: "conv5_2" 411 | type: "Convolution" 412 | param { 413 | name: "conv5_2_w" 414 | lr_mult: 1 415 | decay_mult: 1 416 | } 417 | param { 418 | name: "conv5_2_b" 419 | lr_mult: 2 420 | decay_mult: 0 421 | } 422 | convolution_param { 423 | num_output: 512 424 | #pad: 1 425 | pad: 2 426 | dilation: 2 427 | kernel_size: 3 428 | } 429 | } 430 | layer { 431 | bottom: "conv5_2" 432 | top: "conv5_2" 433 | name: "relu5_2" 434 | type: "ReLU" 435 | } 436 | layer { 437 | bottom: "conv5_2" 438 | top: "conv5_3" 439 | name: "conv5_3" 440 | type: "Convolution" 441 | param { 442 | name: "conv5_3_w" 443 | lr_mult: 1 444 | decay_mult: 1 445 | } 446 | param { 447 | name: "conv5_3_b" 448 | lr_mult: 2 449 | decay_mult: 0 450 | } 451 | convolution_param { 452 | num_output: 512 453 | #pad: 1 454 | pad: 2 455 | dilation: 2 456 | kernel_size: 3 457 | } 458 | } 459 | layer { 460 | bottom: "conv5_3" 461 | top: "conv5_3" 462 | name: "relu5_3" 463 | type: "ReLU" 464 | } 465 | layer { 466 | bottom: "conv5_3" 467 | top: "pool5" 468 | name: "pool5" 469 | type: "Pooling" 470 | pooling_param { 471 | pool: MAX 472 | #kernel_size: 2 473 | #stride: 2 474 | kernel_size: 3 475 | stride: 1 476 | pad: 1 477 | } 478 | } 479 | 480 | layer { 481 | bottom: "pool5" 482 | top: "fc6" 483 | name: "fc6" 484 | type: "Convolution" 485 | param { 486 | name: "fc6_w" 487 | lr_mult: 1 488 | decay_mult: 1 489 | } 490 | param { 491 | name: "fc6_b" 492 | lr_mult: 2 493 | decay_mult: 0 494 | } 495 | convolution_param { 496 | num_output: 1024 497 | pad: 12 498 | dilation: 12 499 | kernel_size: 3 500 | } 501 | } 502 | layer { 503 | bottom: "fc6" 504 | top: "fc6" 505 | name: "relu6" 506 | type: "ReLU" 507 | } 508 | layer { 509 | bottom: "fc6" 510 | top: "fc6" 511 | name: "drop6" 512 | type: "Dropout" 513 | dropout_param { 514 | dropout_ratio: 0.5 515 | } 516 | } 517 | layer { 518 | bottom: "fc6" 519 | top: "fc7" 520 | name: "fc7" 521 | type: "Convolution" 522 | param { 523 | name: "fc7_w" 524 | lr_mult: 1 525 | decay_mult: 1 526 | } 527 | param { 528 | name: "fc7_b" 529 | lr_mult: 2 530 | decay_mult: 0 531 | } 532 | convolution_param { 533 | num_output: 1024 534 | kernel_size: 1 535 | } 536 | } 537 | layer { 538 | bottom: "fc7" 539 | top: "fc7" 540 | name: "relu7" 541 | type: "ReLU" 542 | } 543 | layer { 544 | bottom: "fc7" 545 | top: "fc7" 546 | name: "drop7" 547 | type: "Dropout" 548 | dropout_param { 549 | dropout_ratio: 0.5 550 | } 551 | } 552 | 553 | ### NETWORK for resolution 1/2 ### 554 | 555 | layer { 556 | bottom: "shrink_data05" 557 | top: "conv1_1_res05" 558 | name: "conv1_1_res05" 559 | type: "Convolution" 560 | param { 561 | name: "conv1_1_w" 562 | lr_mult: 1 563 | decay_mult: 1 564 | } 565 | param { 566 | name: "conv1_1_b" 567 | lr_mult: 2 568 | decay_mult: 0 569 | } 570 | convolution_param { 571 | num_output: 64 572 | pad: 1 573 | kernel_size: 3 574 | } 575 | } 576 | layer { 577 | bottom: "conv1_1_res05" 578 | top: "conv1_1_res05" 579 | name: "relu1_1_res05" 580 | type: "ReLU" 581 | } 582 | layer { 583 | bottom: "conv1_1_res05" 584 | top: "conv1_2_res05" 585 | name: "conv1_2_res05" 586 | type: "Convolution" 587 | param { 588 | name: "conv1_2_w" 589 | lr_mult: 1 590 | decay_mult: 1 591 | } 592 | param { 593 | name: "conv1_2_b" 594 | lr_mult: 2 595 | decay_mult: 0 596 | } 597 | convolution_param { 598 | num_output: 64 599 | pad: 1 600 | kernel_size: 3 601 | } 602 | } 603 | layer { 604 | bottom: "conv1_2_res05" 605 | top: "conv1_2_res05" 606 | name: "relu1_2_res05" 607 | type: "ReLU" 608 | } 609 | layer { 610 | bottom: "conv1_2_res05" 611 | top: "pool1_res05" 612 | name: "pool1_res05" 613 | type: "Pooling" 614 | pooling_param { 615 | pool: MAX 616 | kernel_size: 3 617 | stride: 2 618 | pad: 1 619 | } 620 | } 621 | layer { 622 | bottom: "pool1_res05" 623 | top: "conv2_1_res05" 624 | name: "conv2_1_res05" 625 | type: "Convolution" 626 | param { 627 | name: "conv2_1_w" 628 | lr_mult: 1 629 | decay_mult: 1 630 | } 631 | param { 632 | name: "conv2_1_b" 633 | lr_mult: 2 634 | decay_mult: 0 635 | } 636 | convolution_param { 637 | num_output: 128 638 | pad: 1 639 | kernel_size: 3 640 | } 641 | } 642 | layer { 643 | bottom: "conv2_1_res05" 644 | top: "conv2_1_res05" 645 | name: "relu2_1_res05" 646 | type: "ReLU" 647 | } 648 | layer { 649 | bottom: "conv2_1_res05" 650 | top: "conv2_2_res05" 651 | name: "conv2_2_res05" 652 | type: "Convolution" 653 | param { 654 | name: "conv2_2_w" 655 | lr_mult: 1 656 | decay_mult: 1 657 | } 658 | param { 659 | name: "conv2_2_b" 660 | lr_mult: 2 661 | decay_mult: 0 662 | } 663 | convolution_param { 664 | num_output: 128 665 | pad: 1 666 | kernel_size: 3 667 | } 668 | } 669 | layer { 670 | bottom: "conv2_2_res05" 671 | top: "conv2_2_res05" 672 | name: "relu2_2_res05" 673 | type: "ReLU" 674 | } 675 | layer { 676 | bottom: "conv2_2_res05" 677 | top: "pool2_res05" 678 | name: "pool2_res05" 679 | type: "Pooling" 680 | pooling_param { 681 | pool: MAX 682 | kernel_size: 3 683 | stride: 2 684 | pad: 1 685 | } 686 | } 687 | layer { 688 | bottom: "pool2_res05" 689 | top: "conv3_1_res05" 690 | name: "conv3_1_res05" 691 | type: "Convolution" 692 | param { 693 | name: "conv3_1_w" 694 | lr_mult: 1 695 | decay_mult: 1 696 | } 697 | param { 698 | name: "conv3_1_b" 699 | lr_mult: 2 700 | decay_mult: 0 701 | } 702 | convolution_param { 703 | num_output: 256 704 | pad: 1 705 | kernel_size: 3 706 | } 707 | } 708 | layer { 709 | bottom: "conv3_1_res05" 710 | top: "conv3_1_res05" 711 | name: "relu3_1_res05" 712 | type: "ReLU" 713 | } 714 | layer { 715 | bottom: "conv3_1_res05" 716 | top: "conv3_2_res05" 717 | name: "conv3_2_res05" 718 | type: "Convolution" 719 | param { 720 | name: "conv3_2_w" 721 | lr_mult: 1 722 | decay_mult: 1 723 | } 724 | param { 725 | name: "conv3_2_b" 726 | lr_mult: 2 727 | decay_mult: 0 728 | } 729 | convolution_param { 730 | num_output: 256 731 | pad: 1 732 | kernel_size: 3 733 | } 734 | } 735 | layer { 736 | bottom: "conv3_2_res05" 737 | top: "conv3_2_res05" 738 | name: "relu3_2_res05" 739 | type: "ReLU" 740 | } 741 | layer { 742 | bottom: "conv3_2_res05" 743 | top: "conv3_3_res05" 744 | name: "conv3_3_res05" 745 | type: "Convolution" 746 | param { 747 | name: "conv3_3_w" 748 | lr_mult: 1 749 | decay_mult: 1 750 | } 751 | param { 752 | name: "conv3_3_b" 753 | lr_mult: 2 754 | decay_mult: 0 755 | } 756 | convolution_param { 757 | num_output: 256 758 | pad: 1 759 | kernel_size: 3 760 | } 761 | } 762 | layer { 763 | bottom: "conv3_3_res05" 764 | top: "conv3_3_res05" 765 | name: "relu3_3_res05" 766 | type: "ReLU" 767 | } 768 | layer { 769 | bottom: "conv3_3_res05" 770 | top: "pool3_res05" 771 | name: "pool3_res05" 772 | type: "Pooling" 773 | pooling_param { 774 | pool: MAX 775 | kernel_size: 3 776 | stride: 2 777 | pad: 1 778 | } 779 | } 780 | layer { 781 | bottom: "pool3_res05" 782 | top: "conv4_1_res05" 783 | name: "conv4_1_res05" 784 | type: "Convolution" 785 | param { 786 | name: "conv4_1_w" 787 | lr_mult: 1 788 | decay_mult: 1 789 | } 790 | param { 791 | name: "conv4_1_b" 792 | lr_mult: 2 793 | decay_mult: 0 794 | } 795 | convolution_param { 796 | num_output: 512 797 | pad: 1 798 | kernel_size: 3 799 | } 800 | } 801 | layer { 802 | bottom: "conv4_1_res05" 803 | top: "conv4_1_res05" 804 | name: "relu4_1_res05" 805 | type: "ReLU" 806 | } 807 | layer { 808 | bottom: "conv4_1_res05" 809 | top: "conv4_2_res05" 810 | name: "conv4_2_res05" 811 | type: "Convolution" 812 | param { 813 | name: "conv4_2_w" 814 | lr_mult: 1 815 | decay_mult: 1 816 | } 817 | param { 818 | name: "conv4_2_b" 819 | lr_mult: 2 820 | decay_mult: 0 821 | } 822 | convolution_param { 823 | num_output: 512 824 | pad: 1 825 | kernel_size: 3 826 | } 827 | } 828 | layer { 829 | bottom: "conv4_2_res05" 830 | top: "conv4_2_res05" 831 | name: "relu4_2_res05" 832 | type: "ReLU" 833 | } 834 | layer { 835 | bottom: "conv4_2_res05" 836 | top: "conv4_3_res05" 837 | name: "conv4_3_res05" 838 | type: "Convolution" 839 | param { 840 | name: "conv4_3_w" 841 | lr_mult: 1 842 | decay_mult: 1 843 | } 844 | param { 845 | name: "conv4_3_b" 846 | lr_mult: 2 847 | decay_mult: 0 848 | } 849 | convolution_param { 850 | num_output: 512 851 | pad: 1 852 | kernel_size: 3 853 | } 854 | } 855 | layer { 856 | bottom: "conv4_3_res05" 857 | top: "conv4_3_res05" 858 | name: "relu4_3_res05" 859 | type: "ReLU" 860 | } 861 | layer { 862 | bottom: "conv4_3_res05" 863 | top: "pool4_res05" 864 | name: "pool4_res05" 865 | type: "Pooling" 866 | pooling_param { 867 | pool: MAX 868 | kernel_size: 3 869 | pad: 1 870 | stride: 1 871 | } 872 | } 873 | layer { 874 | bottom: "pool4_res05" 875 | top: "conv5_1_res05" 876 | name: "conv5_1_res05" 877 | type: "Convolution" 878 | param { 879 | name: "conv5_1_w" 880 | lr_mult: 1 881 | decay_mult: 1 882 | } 883 | param { 884 | name: "conv5_1_b" 885 | lr_mult: 2 886 | decay_mult: 0 887 | } 888 | convolution_param { 889 | num_output: 512 890 | #pad: 1 891 | pad: 2 892 | dilation: 2 893 | kernel_size: 3 894 | } 895 | } 896 | layer { 897 | bottom: "conv5_1_res05" 898 | top: "conv5_1_res05" 899 | name: "relu5_1_res05" 900 | type: "ReLU" 901 | } 902 | layer { 903 | bottom: "conv5_1_res05" 904 | top: "conv5_2_res05" 905 | name: "conv5_2_res05" 906 | type: "Convolution" 907 | param { 908 | name: "conv5_2_w" 909 | lr_mult: 1 910 | decay_mult: 1 911 | } 912 | param { 913 | name: "conv5_2_b" 914 | lr_mult: 2 915 | decay_mult: 0 916 | } 917 | convolution_param { 918 | num_output: 512 919 | #pad: 1 920 | pad: 2 921 | dilation: 2 922 | kernel_size: 3 923 | } 924 | } 925 | layer { 926 | bottom: "conv5_2_res05" 927 | top: "conv5_2_res05" 928 | name: "relu5_2_res05" 929 | type: "ReLU" 930 | } 931 | layer { 932 | bottom: "conv5_2_res05" 933 | top: "conv5_3_res05" 934 | name: "conv5_3_res05" 935 | type: "Convolution" 936 | param { 937 | name: "conv5_3_w" 938 | lr_mult: 1 939 | decay_mult: 1 940 | } 941 | param { 942 | name: "conv5_3_b" 943 | lr_mult: 2 944 | decay_mult: 0 945 | } 946 | convolution_param { 947 | num_output: 512 948 | #pad: 1 949 | pad: 2 950 | dilation: 2 951 | kernel_size: 3 952 | } 953 | } 954 | layer { 955 | bottom: "conv5_3_res05" 956 | top: "conv5_3_res05" 957 | name: "relu5_3_res05" 958 | type: "ReLU" 959 | } 960 | layer { 961 | bottom: "conv5_3_res05" 962 | top: "pool5_res05" 963 | name: "pool5_res05" 964 | type: "Pooling" 965 | pooling_param { 966 | pool: MAX 967 | #kernel_size: 2 968 | #stride: 2 969 | kernel_size: 3 970 | stride: 1 971 | pad: 1 972 | } 973 | } 974 | 975 | layer { 976 | bottom: "pool5_res05" 977 | top: "fc6_res05" 978 | name: "fc6_res05" 979 | type: "Convolution" 980 | param { 981 | name: "fc6_w" 982 | lr_mult: 1 983 | decay_mult: 1 984 | } 985 | param { 986 | name: "fc6_b" 987 | lr_mult: 2 988 | decay_mult: 0 989 | } 990 | convolution_param { 991 | num_output: 1024 992 | pad: 12 993 | dilation: 12 994 | kernel_size: 3 995 | } 996 | } 997 | layer { 998 | bottom: "fc6_res05" 999 | top: "fc6_res05" 1000 | name: "relu6_res05" 1001 | type: "ReLU" 1002 | } 1003 | layer { 1004 | bottom: "fc6_res05" 1005 | top: "fc6_res05" 1006 | name: "drop6_res05" 1007 | type: "Dropout" 1008 | dropout_param { 1009 | dropout_ratio: 0.5 1010 | } 1011 | } 1012 | layer { 1013 | bottom: "fc6_res05" 1014 | top: "fc7_res05" 1015 | name: "fc7_res05" 1016 | type: "Convolution" 1017 | param { 1018 | name: "fc7_w" 1019 | lr_mult: 1 1020 | decay_mult: 1 1021 | } 1022 | param { 1023 | name: "fc7_b" 1024 | lr_mult: 2 1025 | decay_mult: 0 1026 | } 1027 | convolution_param { 1028 | num_output: 1024 1029 | kernel_size: 1 1030 | } 1031 | } 1032 | layer { 1033 | bottom: "fc7_res05" 1034 | top: "fc7_res05" 1035 | name: "relu7_res05" 1036 | type: "ReLU" 1037 | } 1038 | layer { 1039 | bottom: "fc7_res05" 1040 | top: "fc7_res05" 1041 | name: "drop7_res05" 1042 | type: "Dropout" 1043 | dropout_param { 1044 | dropout_ratio: 0.5 1045 | } 1046 | } 1047 | 1048 | ### NETWORK for resolution 3/4 ### 1049 | 1050 | layer { 1051 | bottom: "shrink_data075" 1052 | top: "conv1_1_res075" 1053 | name: "conv1_1_res075" 1054 | type: "Convolution" 1055 | param { 1056 | name: "conv1_1_w" 1057 | lr_mult: 1 1058 | decay_mult: 1 1059 | } 1060 | param { 1061 | name: "conv1_1_b" 1062 | lr_mult: 2 1063 | decay_mult: 0 1064 | } 1065 | convolution_param { 1066 | num_output: 64 1067 | pad: 1 1068 | kernel_size: 3 1069 | } 1070 | } 1071 | layer { 1072 | bottom: "conv1_1_res075" 1073 | top: "conv1_1_res075" 1074 | name: "relu1_1_res075" 1075 | type: "ReLU" 1076 | } 1077 | layer { 1078 | bottom: "conv1_1_res075" 1079 | top: "conv1_2_res075" 1080 | name: "conv1_2_res075" 1081 | type: "Convolution" 1082 | param { 1083 | name: "conv1_2_w" 1084 | lr_mult: 1 1085 | decay_mult: 1 1086 | } 1087 | param { 1088 | name: "conv1_2_b" 1089 | lr_mult: 2 1090 | decay_mult: 0 1091 | } 1092 | convolution_param { 1093 | num_output: 64 1094 | pad: 1 1095 | kernel_size: 3 1096 | } 1097 | } 1098 | layer { 1099 | bottom: "conv1_2_res075" 1100 | top: "conv1_2_res075" 1101 | name: "relu1_2_res075" 1102 | type: "ReLU" 1103 | } 1104 | layer { 1105 | bottom: "conv1_2_res075" 1106 | top: "pool1_res075" 1107 | name: "pool1_res075" 1108 | type: "Pooling" 1109 | pooling_param { 1110 | pool: MAX 1111 | kernel_size: 3 1112 | stride: 2 1113 | pad: 1 1114 | } 1115 | } 1116 | layer { 1117 | bottom: "pool1_res075" 1118 | top: "conv2_1_res075" 1119 | name: "conv2_1_res075" 1120 | type: "Convolution" 1121 | param { 1122 | name: "conv2_1_w" 1123 | lr_mult: 1 1124 | decay_mult: 1 1125 | } 1126 | param { 1127 | name: "conv2_1_b" 1128 | lr_mult: 2 1129 | decay_mult: 0 1130 | } 1131 | convolution_param { 1132 | num_output: 128 1133 | pad: 1 1134 | kernel_size: 3 1135 | } 1136 | } 1137 | layer { 1138 | bottom: "conv2_1_res075" 1139 | top: "conv2_1_res075" 1140 | name: "relu2_1_res075" 1141 | type: "ReLU" 1142 | } 1143 | layer { 1144 | bottom: "conv2_1_res075" 1145 | top: "conv2_2_res075" 1146 | name: "conv2_2_res075" 1147 | type: "Convolution" 1148 | param { 1149 | name: "conv2_2_w" 1150 | lr_mult: 1 1151 | decay_mult: 1 1152 | } 1153 | param { 1154 | name: "conv2_2_b" 1155 | lr_mult: 2 1156 | decay_mult: 0 1157 | } 1158 | convolution_param { 1159 | num_output: 128 1160 | pad: 1 1161 | kernel_size: 3 1162 | } 1163 | } 1164 | layer { 1165 | bottom: "conv2_2_res075" 1166 | top: "conv2_2_res075" 1167 | name: "relu2_2_res075" 1168 | type: "ReLU" 1169 | } 1170 | layer { 1171 | bottom: "conv2_2_res075" 1172 | top: "pool2_res075" 1173 | name: "pool2_res075" 1174 | type: "Pooling" 1175 | pooling_param { 1176 | pool: MAX 1177 | kernel_size: 3 1178 | stride: 2 1179 | pad: 1 1180 | } 1181 | } 1182 | layer { 1183 | bottom: "pool2_res075" 1184 | top: "conv3_1_res075" 1185 | name: "conv3_1_res075" 1186 | type: "Convolution" 1187 | param { 1188 | name: "conv3_1_w" 1189 | lr_mult: 1 1190 | decay_mult: 1 1191 | } 1192 | param { 1193 | name: "conv3_1_b" 1194 | lr_mult: 2 1195 | decay_mult: 0 1196 | } 1197 | convolution_param { 1198 | num_output: 256 1199 | pad: 1 1200 | kernel_size: 3 1201 | } 1202 | } 1203 | layer { 1204 | bottom: "conv3_1_res075" 1205 | top: "conv3_1_res075" 1206 | name: "relu3_1_res075" 1207 | type: "ReLU" 1208 | } 1209 | layer { 1210 | bottom: "conv3_1_res075" 1211 | top: "conv3_2_res075" 1212 | name: "conv3_2_res075" 1213 | type: "Convolution" 1214 | param { 1215 | name: "conv3_2_w" 1216 | lr_mult: 1 1217 | decay_mult: 1 1218 | } 1219 | param { 1220 | name: "conv3_2_b" 1221 | lr_mult: 2 1222 | decay_mult: 0 1223 | } 1224 | convolution_param { 1225 | num_output: 256 1226 | pad: 1 1227 | kernel_size: 3 1228 | } 1229 | } 1230 | layer { 1231 | bottom: "conv3_2_res075" 1232 | top: "conv3_2_res075" 1233 | name: "relu3_2_res075" 1234 | type: "ReLU" 1235 | } 1236 | layer { 1237 | bottom: "conv3_2_res075" 1238 | top: "conv3_3_res075" 1239 | name: "conv3_3_res075" 1240 | type: "Convolution" 1241 | param { 1242 | name: "conv3_3_w" 1243 | lr_mult: 1 1244 | decay_mult: 1 1245 | } 1246 | param { 1247 | name: "conv3_3_b" 1248 | lr_mult: 2 1249 | decay_mult: 0 1250 | } 1251 | convolution_param { 1252 | num_output: 256 1253 | pad: 1 1254 | kernel_size: 3 1255 | } 1256 | } 1257 | layer { 1258 | bottom: "conv3_3_res075" 1259 | top: "conv3_3_res075" 1260 | name: "relu3_3_res075" 1261 | type: "ReLU" 1262 | } 1263 | layer { 1264 | bottom: "conv3_3_res075" 1265 | top: "pool3_res075" 1266 | name: "pool3_res075" 1267 | type: "Pooling" 1268 | pooling_param { 1269 | pool: MAX 1270 | kernel_size: 3 1271 | stride: 2 1272 | pad: 1 1273 | } 1274 | } 1275 | layer { 1276 | bottom: "pool3_res075" 1277 | top: "conv4_1_res075" 1278 | name: "conv4_1_res075" 1279 | type: "Convolution" 1280 | param { 1281 | name: "conv4_1_w" 1282 | lr_mult: 1 1283 | decay_mult: 1 1284 | } 1285 | param { 1286 | name: "conv4_1_b" 1287 | lr_mult: 2 1288 | decay_mult: 0 1289 | } 1290 | convolution_param { 1291 | num_output: 512 1292 | pad: 1 1293 | kernel_size: 3 1294 | } 1295 | } 1296 | layer { 1297 | bottom: "conv4_1_res075" 1298 | top: "conv4_1_res075" 1299 | name: "relu4_1_res075" 1300 | type: "ReLU" 1301 | } 1302 | layer { 1303 | bottom: "conv4_1_res075" 1304 | top: "conv4_2_res075" 1305 | name: "conv4_2_res075" 1306 | type: "Convolution" 1307 | param { 1308 | name: "conv4_2_w" 1309 | lr_mult: 1 1310 | decay_mult: 1 1311 | } 1312 | param { 1313 | name: "conv4_2_b" 1314 | lr_mult: 2 1315 | decay_mult: 0 1316 | } 1317 | convolution_param { 1318 | num_output: 512 1319 | pad: 1 1320 | kernel_size: 3 1321 | } 1322 | } 1323 | layer { 1324 | bottom: "conv4_2_res075" 1325 | top: "conv4_2_res075" 1326 | name: "relu4_2_res075" 1327 | type: "ReLU" 1328 | } 1329 | layer { 1330 | bottom: "conv4_2_res075" 1331 | top: "conv4_3_res075" 1332 | name: "conv4_3_res075" 1333 | type: "Convolution" 1334 | param { 1335 | name: "conv4_3_w" 1336 | lr_mult: 1 1337 | decay_mult: 1 1338 | } 1339 | param { 1340 | name: "conv4_3_b" 1341 | lr_mult: 2 1342 | decay_mult: 0 1343 | } 1344 | convolution_param { 1345 | num_output: 512 1346 | pad: 1 1347 | kernel_size: 3 1348 | } 1349 | } 1350 | layer { 1351 | bottom: "conv4_3_res075" 1352 | top: "conv4_3_res075" 1353 | name: "relu4_3_res075" 1354 | type: "ReLU" 1355 | } 1356 | layer { 1357 | bottom: "conv4_3_res075" 1358 | top: "pool4_res075" 1359 | name: "pool4_res075" 1360 | type: "Pooling" 1361 | pooling_param { 1362 | pool: MAX 1363 | kernel_size: 3 1364 | pad: 1 1365 | stride: 1 1366 | } 1367 | } 1368 | layer { 1369 | bottom: "pool4_res075" 1370 | top: "conv5_1_res075" 1371 | name: "conv5_1_res075" 1372 | type: "Convolution" 1373 | param { 1374 | name: "conv5_1_w" 1375 | lr_mult: 1 1376 | decay_mult: 1 1377 | } 1378 | param { 1379 | name: "conv5_1_b" 1380 | lr_mult: 2 1381 | decay_mult: 0 1382 | } 1383 | convolution_param { 1384 | num_output: 512 1385 | #pad: 1 1386 | pad: 2 1387 | dilation: 2 1388 | kernel_size: 3 1389 | } 1390 | } 1391 | layer { 1392 | bottom: "conv5_1_res075" 1393 | top: "conv5_1_res075" 1394 | name: "relu5_1_res075" 1395 | type: "ReLU" 1396 | } 1397 | layer { 1398 | bottom: "conv5_1_res075" 1399 | top: "conv5_2_res075" 1400 | name: "conv5_2_res075" 1401 | type: "Convolution" 1402 | param { 1403 | name: "conv5_2_w" 1404 | lr_mult: 1 1405 | decay_mult: 1 1406 | } 1407 | param { 1408 | name: "conv5_2_b" 1409 | lr_mult: 2 1410 | decay_mult: 0 1411 | } 1412 | convolution_param { 1413 | num_output: 512 1414 | #pad: 1 1415 | pad: 2 1416 | dilation: 2 1417 | kernel_size: 3 1418 | } 1419 | } 1420 | layer { 1421 | bottom: "conv5_2_res075" 1422 | top: "conv5_2_res075" 1423 | name: "relu5_2_res075" 1424 | type: "ReLU" 1425 | } 1426 | layer { 1427 | bottom: "conv5_2_res075" 1428 | top: "conv5_3_res075" 1429 | name: "conv5_3_res075" 1430 | type: "Convolution" 1431 | param { 1432 | name: "conv5_3_w" 1433 | lr_mult: 1 1434 | decay_mult: 1 1435 | } 1436 | param { 1437 | name: "conv5_3_b" 1438 | lr_mult: 2 1439 | decay_mult: 0 1440 | } 1441 | convolution_param { 1442 | num_output: 512 1443 | #pad: 1 1444 | pad: 2 1445 | dilation: 2 1446 | kernel_size: 3 1447 | } 1448 | } 1449 | layer { 1450 | bottom: "conv5_3_res075" 1451 | top: "conv5_3_res075" 1452 | name: "relu5_3_res075" 1453 | type: "ReLU" 1454 | } 1455 | layer { 1456 | bottom: "conv5_3_res075" 1457 | top: "pool5_res075" 1458 | name: "pool5_res075" 1459 | type: "Pooling" 1460 | pooling_param { 1461 | pool: MAX 1462 | #kernel_size: 2 1463 | #stride: 2 1464 | kernel_size: 3 1465 | stride: 1 1466 | pad: 1 1467 | } 1468 | } 1469 | 1470 | layer { 1471 | bottom: "pool5_res075" 1472 | top: "fc6_res075" 1473 | name: "fc6_res075" 1474 | type: "Convolution" 1475 | param { 1476 | name: "fc6_w" 1477 | lr_mult: 1 1478 | decay_mult: 1 1479 | } 1480 | param { 1481 | name: "fc6_b" 1482 | lr_mult: 2 1483 | decay_mult: 0 1484 | } 1485 | convolution_param { 1486 | num_output: 1024 1487 | pad: 12 1488 | dilation: 12 1489 | kernel_size: 3 1490 | } 1491 | } 1492 | layer { 1493 | bottom: "fc6_res075" 1494 | top: "fc6_res075" 1495 | name: "relu6_res075" 1496 | type: "ReLU" 1497 | } 1498 | layer { 1499 | bottom: "fc6_res075" 1500 | top: "fc6_res075" 1501 | name: "drop6_res075" 1502 | type: "Dropout" 1503 | dropout_param { 1504 | dropout_ratio: 0.5 1505 | } 1506 | } 1507 | layer { 1508 | bottom: "fc6_res075" 1509 | top: "fc7_res075" 1510 | name: "fc7_res075" 1511 | type: "Convolution" 1512 | param { 1513 | name: "fc7_w" 1514 | lr_mult: 1 1515 | decay_mult: 1 1516 | } 1517 | param { 1518 | name: "fc7_b" 1519 | lr_mult: 2 1520 | decay_mult: 0 1521 | } 1522 | convolution_param { 1523 | num_output: 1024 1524 | kernel_size: 1 1525 | } 1526 | } 1527 | layer { 1528 | bottom: "fc7_res075" 1529 | top: "fc7_res075" 1530 | name: "relu7_res075" 1531 | type: "ReLU" 1532 | } 1533 | layer { 1534 | bottom: "fc7_res075" 1535 | top: "fc7_res075" 1536 | name: "drop7_res075" 1537 | type: "Dropout" 1538 | dropout_param { 1539 | dropout_ratio: 0.5 1540 | } 1541 | } 1542 | 1543 | ############### classifier for resolution 1################### 1544 | layer { 1545 | bottom: "fc7" 1546 | top: "fc8_${EXP}" 1547 | name: "fc8_${EXP}" 1548 | type: "Convolution" 1549 | param { 1550 | name: "fc8_w" 1551 | lr_mult: 10 1552 | decay_mult: 1 1553 | } 1554 | param { 1555 | name: "fc8_b" 1556 | lr_mult: 20 1557 | decay_mult: 0 1558 | } 1559 | convolution_param { 1560 | num_output: ${NUM_LABELS} 1561 | kernel_size: 1 1562 | } 1563 | } 1564 | 1565 | ############### classifier for resolution 1/2 ################### 1566 | layer { 1567 | bottom: "fc7_res05" 1568 | top: "fc8_${EXP}_res05" 1569 | name: "fc8_${EXP}_res05" 1570 | type: "Convolution" 1571 | param { 1572 | name: "fc8_w" 1573 | lr_mult: 10 1574 | decay_mult: 1 1575 | } 1576 | param { 1577 | name: "fc8_b" 1578 | lr_mult: 20 1579 | decay_mult: 0 1580 | } 1581 | convolution_param { 1582 | num_output: ${NUM_LABELS} 1583 | kernel_size: 1 1584 | } 1585 | } 1586 | 1587 | ############### classifier for resolution 3/4 ################### 1588 | layer { 1589 | bottom: "fc7_res075" 1590 | top: "fc8_${EXP}_res075" 1591 | name: "fc8_${EXP}_res075" 1592 | type: "Convolution" 1593 | param { 1594 | name: "fc8_w" 1595 | lr_mult: 10 1596 | decay_mult: 1 1597 | } 1598 | param { 1599 | name: "fc8_b" 1600 | lr_mult: 20 1601 | decay_mult: 0 1602 | } 1603 | convolution_param { 1604 | num_output: ${NUM_LABELS} 1605 | kernel_size: 1 1606 | } 1607 | } 1608 | 1609 | ############### upsampling ################## 1610 | layer { 1611 | bottom: "fc7_res05" 1612 | top: "fc7_res05_interp" 1613 | name: "fc7_res05_interp" 1614 | type: "Interp" 1615 | interp_param { 1616 | zoom_factor: 2 1617 | pad_beg: 0 1618 | pad_end: 0 1619 | } 1620 | } 1621 | layer { 1622 | bottom: "fc7_res075" 1623 | top: "fc7_res075_interp" 1624 | name: "fc7_res075_interp" 1625 | type: "Interp" 1626 | interp_param { 1627 | zoom_factor: 4 1628 | shrink_factor: 3 1629 | pad_beg: 0 1630 | pad_end: 0 1631 | } 1632 | } 1633 | layer { 1634 | bottom: "fc8_${EXP}_res05" 1635 | top: "fc8_${EXP}_res05_interp" 1636 | name: "fc8_${EXP}_res05_interp" 1637 | type: "Interp" 1638 | interp_param { 1639 | zoom_factor: 2 1640 | pad_beg: 0 1641 | pad_end: 0 1642 | } 1643 | } 1644 | layer { 1645 | bottom: "fc8_${EXP}_res075" 1646 | top: "fc8_${EXP}_res075_interp" 1647 | name: "fc8_${EXP}_res075_interp" 1648 | type: "Interp" 1649 | interp_param { 1650 | zoom_factor: 4 1651 | shrink_factor: 3 1652 | pad_beg: 0 1653 | pad_end: 0 1654 | } 1655 | } 1656 | 1657 | ############### concatenation and pass through attention model ######### 1658 | layer { 1659 | bottom: "fc7" 1660 | bottom: "fc7_res075_interp" 1661 | bottom: "fc7_res05_interp" 1662 | top: "fc7_concat" 1663 | name: "fc7_concat" 1664 | type: "Concat" 1665 | concat_param { 1666 | axis: 1 1667 | } 1668 | } 1669 | 1670 | ### attention model 1671 | layer { 1672 | bottom: "fc7_concat" 1673 | top: "att_conv1" 1674 | name: "att_conv1" 1675 | type: "Convolution" 1676 | convolution_param { 1677 | num_output: 512 1678 | kernel_size: 3 1679 | pad: 1 1680 | } 1681 | } 1682 | layer { 1683 | bottom: "att_conv1" 1684 | top: "att_conv1" 1685 | name: "relu_att_conv1" 1686 | type: "ReLU" 1687 | } 1688 | layer { 1689 | bottom: "att_conv1" 1690 | top: "att_conv1" 1691 | name: "drop_att_conv1" 1692 | type: "Dropout" 1693 | dropout_param { 1694 | dropout_ratio: 0.5 1695 | } 1696 | } 1697 | 1698 | layer { 1699 | bottom: "att_conv1" 1700 | top: "att_fc" 1701 | name: "att_fc" 1702 | type: "Convolution" 1703 | convolution_param { 1704 | num_output: 3 1705 | kernel_size: 1 1706 | } 1707 | } 1708 | layer { 1709 | bottom: "att_fc" 1710 | top: "attention" 1711 | name: "attention" 1712 | type: "Softmax" 1713 | } 1714 | 1715 | ############### collect the output from attention model ######## 1716 | layer { 1717 | bottom: "attention" 1718 | top: "attention1" 1719 | top: "attention2" 1720 | top: "attention3" 1721 | name: "slice_attention" 1722 | type: "Slice" 1723 | slice_param { 1724 | axis: 1 1725 | slice_point: 1 1726 | slice_point: 2 1727 | } 1728 | } 1729 | 1730 | ############### scale features ############# 1731 | layer { 1732 | bottom: "fc8_${EXP}" 1733 | bottom: "attention1" 1734 | top: "fc8_product" 1735 | name: "fc8_product" 1736 | type: "SpatialProduct" 1737 | } 1738 | 1739 | layer { 1740 | bottom: "fc8_${EXP}_res075_interp" 1741 | bottom: "attention2" 1742 | top: "fc8_res075_product" 1743 | name: "fc8_res075_product" 1744 | type: "SpatialProduct" 1745 | } 1746 | 1747 | layer { 1748 | bottom: "fc8_${EXP}_res05_interp" 1749 | bottom: "attention3" 1750 | top: "fc8_res05_product" 1751 | name: "fc8_res05_product" 1752 | type: "SpatialProduct" 1753 | } 1754 | 1755 | ### add features ### 1756 | layer { 1757 | bottom: "fc8_product" 1758 | bottom: "fc8_res075_product" 1759 | bottom: "fc8_res05_product" 1760 | top: "fc8_fusion" 1761 | name: "fc8_fusion" 1762 | type: "Eltwise" 1763 | eltwise_param { 1764 | operation: SUM 1765 | } 1766 | } 1767 | ############### upsampling ################ 1768 | #layer { 1769 | # bottom: "label" 1770 | # top: "label_shrink8" 1771 | # name: "label_shrink8" 1772 | # type: "Interp" 1773 | # interp_param { 1774 | # shrink_factor: 8 1775 | # pad_beg: 0 1776 | # pad_end: 0 1777 | # } 1778 | #} 1779 | layer { 1780 | bottom: "fc8_fusion" 1781 | top: "fc8_interp" 1782 | name: "fc8_interp" 1783 | type: "Interp" 1784 | interp_param { 1785 | zoom_factor: 8 1786 | } 1787 | } 1788 | 1789 | layer { 1790 | bottom: "fc8_interp" 1791 | top: "fc8_mask" 1792 | name: "fc8_mask" 1793 | type: "MaskCreate" 1794 | mask_create_param{ 1795 | num_cls: 20 1796 | } 1797 | } 1798 | 1799 | layer { 1800 | name: "fc8_mat" 1801 | type: "MatWrite" 1802 | bottom: "fc8_mask" 1803 | mat_write_param { 1804 | prefix: "${FEATURE_DIR}/${TEST_SET}/fc8_mask/" 1805 | source: "${EXP}/list/${TEST_SET}_id.txt" 1806 | strip: 0 1807 | period: 1 1808 | } 1809 | include: { phase: TEST } 1810 | } 1811 | #layer { 1812 | # name: "accuracy" 1813 | # type: "SegAccuracy" 1814 | # bottom: "fc8_fusion" 1815 | # bottom: "label_shrink8" 1816 | # top: "accuracy" 1817 | # seg_accuracy_param { 1818 | # ignore_label: 255 1819 | # } 1820 | #} 1821 | # 1822 | layer { 1823 | name: "silence" 1824 | type: "Silence" 1825 | bottom: "label" 1826 | } 1827 | -------------------------------------------------------------------------------- /human/config/no-ssl/train.prototxt: -------------------------------------------------------------------------------- 1 | # VGG 16-layer network convolutional finetuning 2 | # Network modified to have smaller receptive field (128 pixels) 3 | # and smaller stride (8 pixels) when run in convolutional mode. 4 | # 5 | # In this model we also change max pooling size in the first 4 layers 6 | # from 2 to 3 while retaining stride = 2 7 | # which makes it easier to exactly align responses at different layers. 8 | # 9 | # For alignment to work, we set (we choose 32x so as to be able to evaluate 10 | # the model for all different subsampling sizes): 11 | # (1) input dimension equal to 12 | # $n = 32 * k - 31$, e.g., 321 (for k = 11) 13 | # Dimension after pooling w. subsampling: 14 | # (16 * k - 15); (8 * k - 7); (4 * k - 3); (2 * k - 1); (k). 15 | # For k = 11, these translate to 16 | # 161; 81; 41; 21; 11 17 | # 18 | 19 | name: "${NET_ID}" 20 | 21 | ### load data ### 22 | layer { 23 | name: "data" 24 | type: "ImageSegData" 25 | top: "data" 26 | top: "label" 27 | image_data_param { 28 | root_folder: "${DATA_ROOT}" 29 | source: "${EXP}/list/${TRAIN_SET}.txt" 30 | label_type: PIXEL 31 | batch_size: 10 32 | shuffle: true 33 | } 34 | transform_param { 35 | mean_value: 104.008 36 | mean_value: 116.669 37 | mean_value: 122.675 38 | crop_size: 321 39 | scale_factors: 0.6 40 | scale_factors: 0.8 41 | scale_factors: 1 42 | scale_factors: 1.2 43 | scale_factors: 1.4 44 | mirror: true 45 | } 46 | include: { phase: TRAIN } 47 | } 48 | 49 | ### shrink data ### 50 | layer { 51 | bottom: "data" 52 | top: "shrink_data05" 53 | name: "shrink_data05" 54 | type: "Interp" 55 | interp_param { 56 | shrink_factor: 2 57 | pad_beg: 0 58 | pad_end: 0 59 | } 60 | } 61 | 62 | layer { 63 | bottom: "data" 64 | top: "shrink_data075" 65 | name: "shrink_data075" 66 | type: "Interp" 67 | interp_param { 68 | shrink_factor: 4 69 | zoom_factor: 3 70 | pad_beg: 0 71 | pad_end: 0 72 | } 73 | } 74 | 75 | ### NETWORK for resolution 1 ### 76 | 77 | layer { 78 | bottom: "data" 79 | top: "conv1_1" 80 | name: "conv1_1" 81 | type: "Convolution" 82 | param { 83 | name: "conv1_1_w" 84 | lr_mult: 1 85 | decay_mult: 1 86 | } 87 | param { 88 | name: "conv1_1_b" 89 | lr_mult: 2 90 | decay_mult: 0 91 | } 92 | convolution_param { 93 | num_output: 64 94 | pad: 1 95 | kernel_size: 3 96 | } 97 | } 98 | layer { 99 | bottom: "conv1_1" 100 | top: "conv1_1" 101 | name: "relu1_1" 102 | type: "ReLU" 103 | } 104 | layer { 105 | bottom: "conv1_1" 106 | top: "conv1_2" 107 | name: "conv1_2" 108 | type: "Convolution" 109 | param { 110 | name: "conv1_2_w" 111 | lr_mult: 1 112 | decay_mult: 1 113 | } 114 | param { 115 | name: "conv1_2_b" 116 | lr_mult: 2 117 | decay_mult: 0 118 | } 119 | convolution_param { 120 | num_output: 64 121 | pad: 1 122 | kernel_size: 3 123 | } 124 | } 125 | layer { 126 | bottom: "conv1_2" 127 | top: "conv1_2" 128 | name: "relu1_2" 129 | type: "ReLU" 130 | } 131 | layer { 132 | bottom: "conv1_2" 133 | top: "pool1" 134 | name: "pool1" 135 | type: "Pooling" 136 | pooling_param { 137 | pool: MAX 138 | kernel_size: 3 139 | stride: 2 140 | pad: 1 141 | } 142 | } 143 | layer { 144 | bottom: "pool1" 145 | top: "conv2_1" 146 | name: "conv2_1" 147 | type: "Convolution" 148 | param { 149 | name: "conv2_1_w" 150 | lr_mult: 1 151 | decay_mult: 1 152 | } 153 | param { 154 | name: "conv2_1_b" 155 | lr_mult: 2 156 | decay_mult: 0 157 | } 158 | convolution_param { 159 | num_output: 128 160 | pad: 1 161 | kernel_size: 3 162 | } 163 | } 164 | layer { 165 | bottom: "conv2_1" 166 | top: "conv2_1" 167 | name: "relu2_1" 168 | type: "ReLU" 169 | } 170 | layer { 171 | bottom: "conv2_1" 172 | top: "conv2_2" 173 | name: "conv2_2" 174 | type: "Convolution" 175 | param { 176 | name: "conv2_2_w" 177 | lr_mult: 1 178 | decay_mult: 1 179 | } 180 | param { 181 | name: "conv2_2_b" 182 | lr_mult: 2 183 | decay_mult: 0 184 | } 185 | convolution_param { 186 | num_output: 128 187 | pad: 1 188 | kernel_size: 3 189 | } 190 | } 191 | layer { 192 | bottom: "conv2_2" 193 | top: "conv2_2" 194 | name: "relu2_2" 195 | type: "ReLU" 196 | } 197 | layer { 198 | bottom: "conv2_2" 199 | top: "pool2" 200 | name: "pool2" 201 | type: "Pooling" 202 | pooling_param { 203 | pool: MAX 204 | kernel_size: 3 205 | stride: 2 206 | pad: 1 207 | } 208 | } 209 | layer { 210 | bottom: "pool2" 211 | top: "conv3_1" 212 | name: "conv3_1" 213 | type: "Convolution" 214 | param { 215 | name: "conv3_1_w" 216 | lr_mult: 1 217 | decay_mult: 1 218 | } 219 | param { 220 | name: "conv3_1_b" 221 | lr_mult: 2 222 | decay_mult: 0 223 | } 224 | convolution_param { 225 | num_output: 256 226 | pad: 1 227 | kernel_size: 3 228 | } 229 | } 230 | layer { 231 | bottom: "conv3_1" 232 | top: "conv3_1" 233 | name: "relu3_1" 234 | type: "ReLU" 235 | } 236 | layer { 237 | bottom: "conv3_1" 238 | top: "conv3_2" 239 | name: "conv3_2" 240 | type: "Convolution" 241 | param { 242 | name: "conv3_2_w" 243 | lr_mult: 1 244 | decay_mult: 1 245 | } 246 | param { 247 | name: "conv3_2_b" 248 | lr_mult: 2 249 | decay_mult: 0 250 | } 251 | convolution_param { 252 | num_output: 256 253 | pad: 1 254 | kernel_size: 3 255 | } 256 | } 257 | layer { 258 | bottom: "conv3_2" 259 | top: "conv3_2" 260 | name: "relu3_2" 261 | type: "ReLU" 262 | } 263 | layer { 264 | bottom: "conv3_2" 265 | top: "conv3_3" 266 | name: "conv3_3" 267 | type: "Convolution" 268 | param { 269 | name: "conv3_3_w" 270 | lr_mult: 1 271 | decay_mult: 1 272 | } 273 | param { 274 | name: "conv3_3_b" 275 | lr_mult: 2 276 | decay_mult: 0 277 | } 278 | convolution_param { 279 | num_output: 256 280 | pad: 1 281 | kernel_size: 3 282 | } 283 | } 284 | layer { 285 | bottom: "conv3_3" 286 | top: "conv3_3" 287 | name: "relu3_3" 288 | type: "ReLU" 289 | } 290 | layer { 291 | bottom: "conv3_3" 292 | top: "pool3" 293 | name: "pool3" 294 | type: "Pooling" 295 | pooling_param { 296 | pool: MAX 297 | kernel_size: 3 298 | stride: 2 299 | pad: 1 300 | } 301 | } 302 | layer { 303 | bottom: "pool3" 304 | top: "conv4_1" 305 | name: "conv4_1" 306 | type: "Convolution" 307 | param { 308 | name: "conv4_1_w" 309 | lr_mult: 1 310 | decay_mult: 1 311 | } 312 | param { 313 | name: "conv4_1_b" 314 | lr_mult: 2 315 | decay_mult: 0 316 | } 317 | convolution_param { 318 | num_output: 512 319 | pad: 1 320 | kernel_size: 3 321 | } 322 | } 323 | layer { 324 | bottom: "conv4_1" 325 | top: "conv4_1" 326 | name: "relu4_1" 327 | type: "ReLU" 328 | } 329 | layer { 330 | bottom: "conv4_1" 331 | top: "conv4_2" 332 | name: "conv4_2" 333 | type: "Convolution" 334 | param { 335 | name: "conv4_2_w" 336 | lr_mult: 1 337 | decay_mult: 1 338 | } 339 | param { 340 | name: "conv4_2_b" 341 | lr_mult: 2 342 | decay_mult: 0 343 | } 344 | convolution_param { 345 | num_output: 512 346 | pad: 1 347 | kernel_size: 3 348 | } 349 | } 350 | layer { 351 | bottom: "conv4_2" 352 | top: "conv4_2" 353 | name: "relu4_2" 354 | type: "ReLU" 355 | } 356 | layer { 357 | bottom: "conv4_2" 358 | top: "conv4_3" 359 | name: "conv4_3" 360 | type: "Convolution" 361 | param { 362 | name: "conv4_3_w" 363 | lr_mult: 1 364 | decay_mult: 1 365 | } 366 | param { 367 | name: "conv4_3_b" 368 | lr_mult: 2 369 | decay_mult: 0 370 | } 371 | convolution_param { 372 | num_output: 512 373 | pad: 1 374 | kernel_size: 3 375 | } 376 | } 377 | layer { 378 | bottom: "conv4_3" 379 | top: "conv4_3" 380 | name: "relu4_3" 381 | type: "ReLU" 382 | } 383 | layer { 384 | bottom: "conv4_3" 385 | top: "pool4" 386 | name: "pool4" 387 | type: "Pooling" 388 | pooling_param { 389 | pool: MAX 390 | kernel_size: 3 391 | pad: 1 392 | stride: 1 393 | } 394 | } 395 | layer { 396 | bottom: "pool4" 397 | top: "conv5_1" 398 | name: "conv5_1" 399 | type: "Convolution" 400 | param { 401 | name: "conv5_1_w" 402 | lr_mult: 1 403 | decay_mult: 1 404 | } 405 | param { 406 | name: "conv5_1_b" 407 | lr_mult: 2 408 | decay_mult: 0 409 | } 410 | convolution_param { 411 | num_output: 512 412 | #pad: 1 413 | pad: 2 414 | dilation: 2 415 | kernel_size: 3 416 | } 417 | } 418 | layer { 419 | bottom: "conv5_1" 420 | top: "conv5_1" 421 | name: "relu5_1" 422 | type: "ReLU" 423 | } 424 | layer { 425 | bottom: "conv5_1" 426 | top: "conv5_2" 427 | name: "conv5_2" 428 | type: "Convolution" 429 | param { 430 | name: "conv5_2_w" 431 | lr_mult: 1 432 | decay_mult: 1 433 | } 434 | param { 435 | name: "conv5_2_b" 436 | lr_mult: 2 437 | decay_mult: 0 438 | } 439 | convolution_param { 440 | num_output: 512 441 | #pad: 1 442 | pad: 2 443 | dilation: 2 444 | kernel_size: 3 445 | } 446 | } 447 | layer { 448 | bottom: "conv5_2" 449 | top: "conv5_2" 450 | name: "relu5_2" 451 | type: "ReLU" 452 | } 453 | layer { 454 | bottom: "conv5_2" 455 | top: "conv5_3" 456 | name: "conv5_3" 457 | type: "Convolution" 458 | param { 459 | name: "conv5_3_w" 460 | lr_mult: 1 461 | decay_mult: 1 462 | } 463 | param { 464 | name: "conv5_3_b" 465 | lr_mult: 2 466 | decay_mult: 0 467 | } 468 | convolution_param { 469 | num_output: 512 470 | #pad: 1 471 | pad: 2 472 | dilation: 2 473 | kernel_size: 3 474 | } 475 | } 476 | layer { 477 | bottom: "conv5_3" 478 | top: "conv5_3" 479 | name: "relu5_3" 480 | type: "ReLU" 481 | } 482 | layer { 483 | bottom: "conv5_3" 484 | top: "pool5" 485 | name: "pool5" 486 | type: "Pooling" 487 | pooling_param { 488 | pool: MAX 489 | #kernel_size: 2 490 | #stride: 2 491 | kernel_size: 3 492 | stride: 1 493 | pad: 1 494 | } 495 | } 496 | 497 | layer { 498 | bottom: "pool5" 499 | top: "fc6" 500 | name: "fc6" 501 | type: "Convolution" 502 | param { 503 | name: "fc6_w" 504 | lr_mult: 1 505 | decay_mult: 1 506 | } 507 | param { 508 | name: "fc6_b" 509 | lr_mult: 2 510 | decay_mult: 0 511 | } 512 | convolution_param { 513 | num_output: 1024 514 | pad: 12 515 | dilation: 12 516 | kernel_size: 3 517 | } 518 | } 519 | layer { 520 | bottom: "fc6" 521 | top: "fc6" 522 | name: "relu6" 523 | type: "ReLU" 524 | } 525 | layer { 526 | bottom: "fc6" 527 | top: "fc6" 528 | name: "drop6" 529 | type: "Dropout" 530 | dropout_param { 531 | dropout_ratio: 0.5 532 | } 533 | } 534 | layer { 535 | bottom: "fc6" 536 | top: "fc7" 537 | name: "fc7" 538 | type: "Convolution" 539 | param { 540 | name: "fc7_w" 541 | lr_mult: 1 542 | decay_mult: 1 543 | } 544 | param { 545 | name: "fc7_b" 546 | lr_mult: 2 547 | decay_mult: 0 548 | } 549 | convolution_param { 550 | num_output: 1024 551 | kernel_size: 1 552 | } 553 | } 554 | layer { 555 | bottom: "fc7" 556 | top: "fc7" 557 | name: "relu7" 558 | type: "ReLU" 559 | } 560 | layer { 561 | bottom: "fc7" 562 | top: "fc7" 563 | name: "drop7" 564 | type: "Dropout" 565 | dropout_param { 566 | dropout_ratio: 0.5 567 | } 568 | } 569 | 570 | ### NETWORK for resolution 1/2 ### 571 | 572 | layer { 573 | bottom: "shrink_data05" 574 | top: "conv1_1_res05" 575 | name: "conv1_1_res05" 576 | type: "Convolution" 577 | param { 578 | name: "conv1_1_w" 579 | lr_mult: 1 580 | decay_mult: 1 581 | } 582 | param { 583 | name: "conv1_1_b" 584 | lr_mult: 2 585 | decay_mult: 0 586 | } 587 | convolution_param { 588 | num_output: 64 589 | pad: 1 590 | kernel_size: 3 591 | } 592 | } 593 | layer { 594 | bottom: "conv1_1_res05" 595 | top: "conv1_1_res05" 596 | name: "relu1_1_res05" 597 | type: "ReLU" 598 | } 599 | layer { 600 | bottom: "conv1_1_res05" 601 | top: "conv1_2_res05" 602 | name: "conv1_2_res05" 603 | type: "Convolution" 604 | param { 605 | name: "conv1_2_w" 606 | lr_mult: 1 607 | decay_mult: 1 608 | } 609 | param { 610 | name: "conv1_2_b" 611 | lr_mult: 2 612 | decay_mult: 0 613 | } 614 | convolution_param { 615 | num_output: 64 616 | pad: 1 617 | kernel_size: 3 618 | } 619 | } 620 | layer { 621 | bottom: "conv1_2_res05" 622 | top: "conv1_2_res05" 623 | name: "relu1_2_res05" 624 | type: "ReLU" 625 | } 626 | layer { 627 | bottom: "conv1_2_res05" 628 | top: "pool1_res05" 629 | name: "pool1_res05" 630 | type: "Pooling" 631 | pooling_param { 632 | pool: MAX 633 | kernel_size: 3 634 | stride: 2 635 | pad: 1 636 | } 637 | } 638 | layer { 639 | bottom: "pool1_res05" 640 | top: "conv2_1_res05" 641 | name: "conv2_1_res05" 642 | type: "Convolution" 643 | param { 644 | name: "conv2_1_w" 645 | lr_mult: 1 646 | decay_mult: 1 647 | } 648 | param { 649 | name: "conv2_1_b" 650 | lr_mult: 2 651 | decay_mult: 0 652 | } 653 | convolution_param { 654 | num_output: 128 655 | pad: 1 656 | kernel_size: 3 657 | } 658 | } 659 | layer { 660 | bottom: "conv2_1_res05" 661 | top: "conv2_1_res05" 662 | name: "relu2_1_res05" 663 | type: "ReLU" 664 | } 665 | layer { 666 | bottom: "conv2_1_res05" 667 | top: "conv2_2_res05" 668 | name: "conv2_2_res05" 669 | type: "Convolution" 670 | param { 671 | name: "conv2_2_w" 672 | lr_mult: 1 673 | decay_mult: 1 674 | } 675 | param { 676 | name: "conv2_2_b" 677 | lr_mult: 2 678 | decay_mult: 0 679 | } 680 | convolution_param { 681 | num_output: 128 682 | pad: 1 683 | kernel_size: 3 684 | } 685 | } 686 | layer { 687 | bottom: "conv2_2_res05" 688 | top: "conv2_2_res05" 689 | name: "relu2_2_res05" 690 | type: "ReLU" 691 | } 692 | layer { 693 | bottom: "conv2_2_res05" 694 | top: "pool2_res05" 695 | name: "pool2_res05" 696 | type: "Pooling" 697 | pooling_param { 698 | pool: MAX 699 | kernel_size: 3 700 | stride: 2 701 | pad: 1 702 | } 703 | } 704 | layer { 705 | bottom: "pool2_res05" 706 | top: "conv3_1_res05" 707 | name: "conv3_1_res05" 708 | type: "Convolution" 709 | param { 710 | name: "conv3_1_w" 711 | lr_mult: 1 712 | decay_mult: 1 713 | } 714 | param { 715 | name: "conv3_1_b" 716 | lr_mult: 2 717 | decay_mult: 0 718 | } 719 | convolution_param { 720 | num_output: 256 721 | pad: 1 722 | kernel_size: 3 723 | } 724 | } 725 | layer { 726 | bottom: "conv3_1_res05" 727 | top: "conv3_1_res05" 728 | name: "relu3_1_res05" 729 | type: "ReLU" 730 | } 731 | layer { 732 | bottom: "conv3_1_res05" 733 | top: "conv3_2_res05" 734 | name: "conv3_2_res05" 735 | type: "Convolution" 736 | param { 737 | name: "conv3_2_w" 738 | lr_mult: 1 739 | decay_mult: 1 740 | } 741 | param { 742 | name: "conv3_2_b" 743 | lr_mult: 2 744 | decay_mult: 0 745 | } 746 | convolution_param { 747 | num_output: 256 748 | pad: 1 749 | kernel_size: 3 750 | } 751 | } 752 | layer { 753 | bottom: "conv3_2_res05" 754 | top: "conv3_2_res05" 755 | name: "relu3_2_res05" 756 | type: "ReLU" 757 | } 758 | layer { 759 | bottom: "conv3_2_res05" 760 | top: "conv3_3_res05" 761 | name: "conv3_3_res05" 762 | type: "Convolution" 763 | param { 764 | name: "conv3_3_w" 765 | lr_mult: 1 766 | decay_mult: 1 767 | } 768 | param { 769 | name: "conv3_3_b" 770 | lr_mult: 2 771 | decay_mult: 0 772 | } 773 | convolution_param { 774 | num_output: 256 775 | pad: 1 776 | kernel_size: 3 777 | } 778 | } 779 | layer { 780 | bottom: "conv3_3_res05" 781 | top: "conv3_3_res05" 782 | name: "relu3_3_res05" 783 | type: "ReLU" 784 | } 785 | layer { 786 | bottom: "conv3_3_res05" 787 | top: "pool3_res05" 788 | name: "pool3_res05" 789 | type: "Pooling" 790 | pooling_param { 791 | pool: MAX 792 | kernel_size: 3 793 | stride: 2 794 | pad: 1 795 | } 796 | } 797 | layer { 798 | bottom: "pool3_res05" 799 | top: "conv4_1_res05" 800 | name: "conv4_1_res05" 801 | type: "Convolution" 802 | param { 803 | name: "conv4_1_w" 804 | lr_mult: 1 805 | decay_mult: 1 806 | } 807 | param { 808 | name: "conv4_1_b" 809 | lr_mult: 2 810 | decay_mult: 0 811 | } 812 | convolution_param { 813 | num_output: 512 814 | pad: 1 815 | kernel_size: 3 816 | } 817 | } 818 | layer { 819 | bottom: "conv4_1_res05" 820 | top: "conv4_1_res05" 821 | name: "relu4_1_res05" 822 | type: "ReLU" 823 | } 824 | layer { 825 | bottom: "conv4_1_res05" 826 | top: "conv4_2_res05" 827 | name: "conv4_2_res05" 828 | type: "Convolution" 829 | param { 830 | name: "conv4_2_w" 831 | lr_mult: 1 832 | decay_mult: 1 833 | } 834 | param { 835 | name: "conv4_2_b" 836 | lr_mult: 2 837 | decay_mult: 0 838 | } 839 | convolution_param { 840 | num_output: 512 841 | pad: 1 842 | kernel_size: 3 843 | } 844 | } 845 | layer { 846 | bottom: "conv4_2_res05" 847 | top: "conv4_2_res05" 848 | name: "relu4_2_res05" 849 | type: "ReLU" 850 | } 851 | layer { 852 | bottom: "conv4_2_res05" 853 | top: "conv4_3_res05" 854 | name: "conv4_3_res05" 855 | type: "Convolution" 856 | param { 857 | name: "conv4_3_w" 858 | lr_mult: 1 859 | decay_mult: 1 860 | } 861 | param { 862 | name: "conv4_3_b" 863 | lr_mult: 2 864 | decay_mult: 0 865 | } 866 | convolution_param { 867 | num_output: 512 868 | pad: 1 869 | kernel_size: 3 870 | } 871 | } 872 | layer { 873 | bottom: "conv4_3_res05" 874 | top: "conv4_3_res05" 875 | name: "relu4_3_res05" 876 | type: "ReLU" 877 | } 878 | layer { 879 | bottom: "conv4_3_res05" 880 | top: "pool4_res05" 881 | name: "pool4_res05" 882 | type: "Pooling" 883 | pooling_param { 884 | pool: MAX 885 | kernel_size: 3 886 | pad: 1 887 | stride: 1 888 | } 889 | } 890 | layer { 891 | bottom: "pool4_res05" 892 | top: "conv5_1_res05" 893 | name: "conv5_1_res05" 894 | type: "Convolution" 895 | param { 896 | name: "conv5_1_w" 897 | lr_mult: 1 898 | decay_mult: 1 899 | } 900 | param { 901 | name: "conv5_1_b" 902 | lr_mult: 2 903 | decay_mult: 0 904 | } 905 | convolution_param { 906 | num_output: 512 907 | #pad: 1 908 | pad: 2 909 | dilation: 2 910 | kernel_size: 3 911 | } 912 | } 913 | layer { 914 | bottom: "conv5_1_res05" 915 | top: "conv5_1_res05" 916 | name: "relu5_1_res05" 917 | type: "ReLU" 918 | } 919 | layer { 920 | bottom: "conv5_1_res05" 921 | top: "conv5_2_res05" 922 | name: "conv5_2_res05" 923 | type: "Convolution" 924 | param { 925 | name: "conv5_2_w" 926 | lr_mult: 1 927 | decay_mult: 1 928 | } 929 | param { 930 | name: "conv5_2_b" 931 | lr_mult: 2 932 | decay_mult: 0 933 | } 934 | convolution_param { 935 | num_output: 512 936 | #pad: 1 937 | pad: 2 938 | dilation: 2 939 | kernel_size: 3 940 | } 941 | } 942 | layer { 943 | bottom: "conv5_2_res05" 944 | top: "conv5_2_res05" 945 | name: "relu5_2_res05" 946 | type: "ReLU" 947 | } 948 | layer { 949 | bottom: "conv5_2_res05" 950 | top: "conv5_3_res05" 951 | name: "conv5_3_res05" 952 | type: "Convolution" 953 | param { 954 | name: "conv5_3_w" 955 | lr_mult: 1 956 | decay_mult: 1 957 | } 958 | param { 959 | name: "conv5_3_b" 960 | lr_mult: 2 961 | decay_mult: 0 962 | } 963 | convolution_param { 964 | num_output: 512 965 | #pad: 1 966 | pad: 2 967 | dilation: 2 968 | kernel_size: 3 969 | } 970 | } 971 | layer { 972 | bottom: "conv5_3_res05" 973 | top: "conv5_3_res05" 974 | name: "relu5_3_res05" 975 | type: "ReLU" 976 | } 977 | layer { 978 | bottom: "conv5_3_res05" 979 | top: "pool5_res05" 980 | name: "pool5_res05" 981 | type: "Pooling" 982 | pooling_param { 983 | pool: MAX 984 | #kernel_size: 2 985 | #stride: 2 986 | kernel_size: 3 987 | stride: 1 988 | pad: 1 989 | } 990 | } 991 | 992 | layer { 993 | bottom: "pool5_res05" 994 | top: "fc6_res05" 995 | name: "fc6_res05" 996 | type: "Convolution" 997 | param { 998 | name: "fc6_w" 999 | lr_mult: 1 1000 | decay_mult: 1 1001 | } 1002 | param { 1003 | name: "fc6_b" 1004 | lr_mult: 2 1005 | decay_mult: 0 1006 | } 1007 | convolution_param { 1008 | num_output: 1024 1009 | pad: 12 1010 | dilation: 12 1011 | kernel_size: 3 1012 | } 1013 | } 1014 | layer { 1015 | bottom: "fc6_res05" 1016 | top: "fc6_res05" 1017 | name: "relu6_res05" 1018 | type: "ReLU" 1019 | } 1020 | layer { 1021 | bottom: "fc6_res05" 1022 | top: "fc6_res05" 1023 | name: "drop6_res05" 1024 | type: "Dropout" 1025 | dropout_param { 1026 | dropout_ratio: 0.5 1027 | } 1028 | } 1029 | layer { 1030 | bottom: "fc6_res05" 1031 | top: "fc7_res05" 1032 | name: "fc7_res05" 1033 | type: "Convolution" 1034 | param { 1035 | name: "fc7_w" 1036 | lr_mult: 1 1037 | decay_mult: 1 1038 | } 1039 | param { 1040 | name: "fc7_b" 1041 | lr_mult: 2 1042 | decay_mult: 0 1043 | } 1044 | convolution_param { 1045 | num_output: 1024 1046 | kernel_size: 1 1047 | } 1048 | } 1049 | layer { 1050 | bottom: "fc7_res05" 1051 | top: "fc7_res05" 1052 | name: "relu7_res05" 1053 | type: "ReLU" 1054 | } 1055 | layer { 1056 | bottom: "fc7_res05" 1057 | top: "fc7_res05" 1058 | name: "drop7_res05" 1059 | type: "Dropout" 1060 | dropout_param { 1061 | dropout_ratio: 0.5 1062 | } 1063 | } 1064 | 1065 | ### NETWORK for resolution 3/4 ### 1066 | 1067 | layer { 1068 | bottom: "shrink_data075" 1069 | top: "conv1_1_res075" 1070 | name: "conv1_1_res075" 1071 | type: "Convolution" 1072 | param { 1073 | name: "conv1_1_w" 1074 | lr_mult: 1 1075 | decay_mult: 1 1076 | } 1077 | param { 1078 | name: "conv1_1_b" 1079 | lr_mult: 2 1080 | decay_mult: 0 1081 | } 1082 | convolution_param { 1083 | num_output: 64 1084 | pad: 1 1085 | kernel_size: 3 1086 | } 1087 | } 1088 | layer { 1089 | bottom: "conv1_1_res075" 1090 | top: "conv1_1_res075" 1091 | name: "relu1_1_res075" 1092 | type: "ReLU" 1093 | } 1094 | layer { 1095 | bottom: "conv1_1_res075" 1096 | top: "conv1_2_res075" 1097 | name: "conv1_2_res075" 1098 | type: "Convolution" 1099 | param { 1100 | name: "conv1_2_w" 1101 | lr_mult: 1 1102 | decay_mult: 1 1103 | } 1104 | param { 1105 | name: "conv1_2_b" 1106 | lr_mult: 2 1107 | decay_mult: 0 1108 | } 1109 | convolution_param { 1110 | num_output: 64 1111 | pad: 1 1112 | kernel_size: 3 1113 | } 1114 | } 1115 | layer { 1116 | bottom: "conv1_2_res075" 1117 | top: "conv1_2_res075" 1118 | name: "relu1_2_res075" 1119 | type: "ReLU" 1120 | } 1121 | layer { 1122 | bottom: "conv1_2_res075" 1123 | top: "pool1_res075" 1124 | name: "pool1_res075" 1125 | type: "Pooling" 1126 | pooling_param { 1127 | pool: MAX 1128 | kernel_size: 3 1129 | stride: 2 1130 | pad: 1 1131 | } 1132 | } 1133 | layer { 1134 | bottom: "pool1_res075" 1135 | top: "conv2_1_res075" 1136 | name: "conv2_1_res075" 1137 | type: "Convolution" 1138 | param { 1139 | name: "conv2_1_w" 1140 | lr_mult: 1 1141 | decay_mult: 1 1142 | } 1143 | param { 1144 | name: "conv2_1_b" 1145 | lr_mult: 2 1146 | decay_mult: 0 1147 | } 1148 | convolution_param { 1149 | num_output: 128 1150 | pad: 1 1151 | kernel_size: 3 1152 | } 1153 | } 1154 | layer { 1155 | bottom: "conv2_1_res075" 1156 | top: "conv2_1_res075" 1157 | name: "relu2_1_res075" 1158 | type: "ReLU" 1159 | } 1160 | layer { 1161 | bottom: "conv2_1_res075" 1162 | top: "conv2_2_res075" 1163 | name: "conv2_2_res075" 1164 | type: "Convolution" 1165 | param { 1166 | name: "conv2_2_w" 1167 | lr_mult: 1 1168 | decay_mult: 1 1169 | } 1170 | param { 1171 | name: "conv2_2_b" 1172 | lr_mult: 2 1173 | decay_mult: 0 1174 | } 1175 | convolution_param { 1176 | num_output: 128 1177 | pad: 1 1178 | kernel_size: 3 1179 | } 1180 | } 1181 | layer { 1182 | bottom: "conv2_2_res075" 1183 | top: "conv2_2_res075" 1184 | name: "relu2_2_res075" 1185 | type: "ReLU" 1186 | } 1187 | layer { 1188 | bottom: "conv2_2_res075" 1189 | top: "pool2_res075" 1190 | name: "pool2_res075" 1191 | type: "Pooling" 1192 | pooling_param { 1193 | pool: MAX 1194 | kernel_size: 3 1195 | stride: 2 1196 | pad: 1 1197 | } 1198 | } 1199 | layer { 1200 | bottom: "pool2_res075" 1201 | top: "conv3_1_res075" 1202 | name: "conv3_1_res075" 1203 | type: "Convolution" 1204 | param { 1205 | name: "conv3_1_w" 1206 | lr_mult: 1 1207 | decay_mult: 1 1208 | } 1209 | param { 1210 | name: "conv3_1_b" 1211 | lr_mult: 2 1212 | decay_mult: 0 1213 | } 1214 | convolution_param { 1215 | num_output: 256 1216 | pad: 1 1217 | kernel_size: 3 1218 | } 1219 | } 1220 | layer { 1221 | bottom: "conv3_1_res075" 1222 | top: "conv3_1_res075" 1223 | name: "relu3_1_res075" 1224 | type: "ReLU" 1225 | } 1226 | layer { 1227 | bottom: "conv3_1_res075" 1228 | top: "conv3_2_res075" 1229 | name: "conv3_2_res075" 1230 | type: "Convolution" 1231 | param { 1232 | name: "conv3_2_w" 1233 | lr_mult: 1 1234 | decay_mult: 1 1235 | } 1236 | param { 1237 | name: "conv3_2_b" 1238 | lr_mult: 2 1239 | decay_mult: 0 1240 | } 1241 | convolution_param { 1242 | num_output: 256 1243 | pad: 1 1244 | kernel_size: 3 1245 | } 1246 | } 1247 | layer { 1248 | bottom: "conv3_2_res075" 1249 | top: "conv3_2_res075" 1250 | name: "relu3_2_res075" 1251 | type: "ReLU" 1252 | } 1253 | layer { 1254 | bottom: "conv3_2_res075" 1255 | top: "conv3_3_res075" 1256 | name: "conv3_3_res075" 1257 | type: "Convolution" 1258 | param { 1259 | name: "conv3_3_w" 1260 | lr_mult: 1 1261 | decay_mult: 1 1262 | } 1263 | param { 1264 | name: "conv3_3_b" 1265 | lr_mult: 2 1266 | decay_mult: 0 1267 | } 1268 | convolution_param { 1269 | num_output: 256 1270 | pad: 1 1271 | kernel_size: 3 1272 | } 1273 | } 1274 | layer { 1275 | bottom: "conv3_3_res075" 1276 | top: "conv3_3_res075" 1277 | name: "relu3_3_res075" 1278 | type: "ReLU" 1279 | } 1280 | layer { 1281 | bottom: "conv3_3_res075" 1282 | top: "pool3_res075" 1283 | name: "pool3_res075" 1284 | type: "Pooling" 1285 | pooling_param { 1286 | pool: MAX 1287 | kernel_size: 3 1288 | stride: 2 1289 | pad: 1 1290 | } 1291 | } 1292 | layer { 1293 | bottom: "pool3_res075" 1294 | top: "conv4_1_res075" 1295 | name: "conv4_1_res075" 1296 | type: "Convolution" 1297 | param { 1298 | name: "conv4_1_w" 1299 | lr_mult: 1 1300 | decay_mult: 1 1301 | } 1302 | param { 1303 | name: "conv4_1_b" 1304 | lr_mult: 2 1305 | decay_mult: 0 1306 | } 1307 | convolution_param { 1308 | num_output: 512 1309 | pad: 1 1310 | kernel_size: 3 1311 | } 1312 | } 1313 | layer { 1314 | bottom: "conv4_1_res075" 1315 | top: "conv4_1_res075" 1316 | name: "relu4_1_res075" 1317 | type: "ReLU" 1318 | } 1319 | layer { 1320 | bottom: "conv4_1_res075" 1321 | top: "conv4_2_res075" 1322 | name: "conv4_2_res075" 1323 | type: "Convolution" 1324 | param { 1325 | name: "conv4_2_w" 1326 | lr_mult: 1 1327 | decay_mult: 1 1328 | } 1329 | param { 1330 | name: "conv4_2_b" 1331 | lr_mult: 2 1332 | decay_mult: 0 1333 | } 1334 | convolution_param { 1335 | num_output: 512 1336 | pad: 1 1337 | kernel_size: 3 1338 | } 1339 | } 1340 | layer { 1341 | bottom: "conv4_2_res075" 1342 | top: "conv4_2_res075" 1343 | name: "relu4_2_res075" 1344 | type: "ReLU" 1345 | } 1346 | layer { 1347 | bottom: "conv4_2_res075" 1348 | top: "conv4_3_res075" 1349 | name: "conv4_3_res075" 1350 | type: "Convolution" 1351 | param { 1352 | name: "conv4_3_w" 1353 | lr_mult: 1 1354 | decay_mult: 1 1355 | } 1356 | param { 1357 | name: "conv4_3_b" 1358 | lr_mult: 2 1359 | decay_mult: 0 1360 | } 1361 | convolution_param { 1362 | num_output: 512 1363 | pad: 1 1364 | kernel_size: 3 1365 | } 1366 | } 1367 | layer { 1368 | bottom: "conv4_3_res075" 1369 | top: "conv4_3_res075" 1370 | name: "relu4_3_res075" 1371 | type: "ReLU" 1372 | } 1373 | layer { 1374 | bottom: "conv4_3_res075" 1375 | top: "pool4_res075" 1376 | name: "pool4_res075" 1377 | type: "Pooling" 1378 | pooling_param { 1379 | pool: MAX 1380 | kernel_size: 3 1381 | pad: 1 1382 | stride: 1 1383 | } 1384 | } 1385 | layer { 1386 | bottom: "pool4_res075" 1387 | top: "conv5_1_res075" 1388 | name: "conv5_1_res075" 1389 | type: "Convolution" 1390 | param { 1391 | name: "conv5_1_w" 1392 | lr_mult: 1 1393 | decay_mult: 1 1394 | } 1395 | param { 1396 | name: "conv5_1_b" 1397 | lr_mult: 2 1398 | decay_mult: 0 1399 | } 1400 | convolution_param { 1401 | num_output: 512 1402 | #pad: 1 1403 | pad: 2 1404 | dilation: 2 1405 | kernel_size: 3 1406 | } 1407 | } 1408 | layer { 1409 | bottom: "conv5_1_res075" 1410 | top: "conv5_1_res075" 1411 | name: "relu5_1_res075" 1412 | type: "ReLU" 1413 | } 1414 | layer { 1415 | bottom: "conv5_1_res075" 1416 | top: "conv5_2_res075" 1417 | name: "conv5_2_res075" 1418 | type: "Convolution" 1419 | param { 1420 | name: "conv5_2_w" 1421 | lr_mult: 1 1422 | decay_mult: 1 1423 | } 1424 | param { 1425 | name: "conv5_2_b" 1426 | lr_mult: 2 1427 | decay_mult: 0 1428 | } 1429 | convolution_param { 1430 | num_output: 512 1431 | #pad: 1 1432 | pad: 2 1433 | dilation: 2 1434 | kernel_size: 3 1435 | } 1436 | } 1437 | layer { 1438 | bottom: "conv5_2_res075" 1439 | top: "conv5_2_res075" 1440 | name: "relu5_2_res075" 1441 | type: "ReLU" 1442 | } 1443 | layer { 1444 | bottom: "conv5_2_res075" 1445 | top: "conv5_3_res075" 1446 | name: "conv5_3_res075" 1447 | type: "Convolution" 1448 | param { 1449 | name: "conv5_3_w" 1450 | lr_mult: 1 1451 | decay_mult: 1 1452 | } 1453 | param { 1454 | name: "conv5_3_b" 1455 | lr_mult: 2 1456 | decay_mult: 0 1457 | } 1458 | convolution_param { 1459 | num_output: 512 1460 | #pad: 1 1461 | pad: 2 1462 | dilation: 2 1463 | kernel_size: 3 1464 | } 1465 | } 1466 | layer { 1467 | bottom: "conv5_3_res075" 1468 | top: "conv5_3_res075" 1469 | name: "relu5_3_res075" 1470 | type: "ReLU" 1471 | } 1472 | layer { 1473 | bottom: "conv5_3_res075" 1474 | top: "pool5_res075" 1475 | name: "pool5_res075" 1476 | type: "Pooling" 1477 | pooling_param { 1478 | pool: MAX 1479 | #kernel_size: 2 1480 | #stride: 2 1481 | kernel_size: 3 1482 | stride: 1 1483 | pad: 1 1484 | } 1485 | } 1486 | 1487 | layer { 1488 | bottom: "pool5_res075" 1489 | top: "fc6_res075" 1490 | name: "fc6_res075" 1491 | type: "Convolution" 1492 | param { 1493 | name: "fc6_w" 1494 | lr_mult: 1 1495 | decay_mult: 1 1496 | } 1497 | param { 1498 | name: "fc6_b" 1499 | lr_mult: 2 1500 | decay_mult: 0 1501 | } 1502 | convolution_param { 1503 | num_output: 1024 1504 | pad: 12 1505 | dilation: 12 1506 | kernel_size: 3 1507 | } 1508 | } 1509 | layer { 1510 | bottom: "fc6_res075" 1511 | top: "fc6_res075" 1512 | name: "relu6_res075" 1513 | type: "ReLU" 1514 | } 1515 | layer { 1516 | bottom: "fc6_res075" 1517 | top: "fc6_res075" 1518 | name: "drop6_res075" 1519 | type: "Dropout" 1520 | dropout_param { 1521 | dropout_ratio: 0.5 1522 | } 1523 | } 1524 | layer { 1525 | bottom: "fc6_res075" 1526 | top: "fc7_res075" 1527 | name: "fc7_res075" 1528 | type: "Convolution" 1529 | param { 1530 | name: "fc7_w" 1531 | lr_mult: 1 1532 | decay_mult: 1 1533 | } 1534 | param { 1535 | name: "fc7_b" 1536 | lr_mult: 2 1537 | decay_mult: 0 1538 | } 1539 | convolution_param { 1540 | num_output: 1024 1541 | kernel_size: 1 1542 | } 1543 | } 1544 | layer { 1545 | bottom: "fc7_res075" 1546 | top: "fc7_res075" 1547 | name: "relu7_res075" 1548 | type: "ReLU" 1549 | } 1550 | layer { 1551 | bottom: "fc7_res075" 1552 | top: "fc7_res075" 1553 | name: "drop7_res075" 1554 | type: "Dropout" 1555 | dropout_param { 1556 | dropout_ratio: 0.5 1557 | } 1558 | } 1559 | 1560 | ############### classifier for resolution 1################### 1561 | layer { 1562 | bottom: "fc7" 1563 | top: "fc8_${EXP}" 1564 | name: "fc8_${EXP}" 1565 | type: "Convolution" 1566 | param { 1567 | name: "fc8_w" 1568 | lr_mult: 10 1569 | decay_mult: 1 1570 | } 1571 | param { 1572 | name: "fc8_b" 1573 | lr_mult: 20 1574 | decay_mult: 0 1575 | } 1576 | convolution_param { 1577 | num_output: ${NUM_LABELS} 1578 | kernel_size: 1 1579 | weight_filler { 1580 | type: "gaussian" 1581 | std: 0.01 1582 | } 1583 | bias_filler { 1584 | type: "constant" 1585 | value: 0 1586 | } 1587 | } 1588 | } 1589 | 1590 | ############### classifier for resolution 1/2 ################### 1591 | layer { 1592 | bottom: "fc7_res05" 1593 | top: "fc8_${EXP}_res05" 1594 | name: "fc8_${EXP}_res05" 1595 | type: "Convolution" 1596 | param { 1597 | name: "fc8_w" 1598 | lr_mult: 10 1599 | decay_mult: 1 1600 | } 1601 | param { 1602 | name: "fc8_b" 1603 | lr_mult: 20 1604 | decay_mult: 0 1605 | } 1606 | convolution_param { 1607 | num_output: ${NUM_LABELS} 1608 | kernel_size: 1 1609 | weight_filler { 1610 | type: "gaussian" 1611 | std: 0.01 1612 | } 1613 | bias_filler { 1614 | type: "constant" 1615 | value: 0 1616 | } 1617 | } 1618 | } 1619 | 1620 | ############### classifier for resolution 3/4 ################### 1621 | layer { 1622 | bottom: "fc7_res075" 1623 | top: "fc8_${EXP}_res075" 1624 | name: "fc8_${EXP}_res075" 1625 | type: "Convolution" 1626 | param { 1627 | name: "fc8_w" 1628 | lr_mult: 10 1629 | decay_mult: 1 1630 | } 1631 | param { 1632 | name: "fc8_b" 1633 | lr_mult: 20 1634 | decay_mult: 0 1635 | } 1636 | convolution_param { 1637 | num_output: ${NUM_LABELS} 1638 | kernel_size: 1 1639 | weight_filler { 1640 | type: "gaussian" 1641 | std: 0.01 1642 | } 1643 | bias_filler { 1644 | type: "constant" 1645 | value: 0 1646 | } 1647 | } 1648 | } 1649 | 1650 | ############### upsampling ################## 1651 | layer { 1652 | bottom: "fc7_res05" 1653 | top: "fc7_res05_interp" 1654 | name: "fc7_res05_interp" 1655 | type: "Interp" 1656 | interp_param { 1657 | zoom_factor: 2 1658 | pad_beg: 0 1659 | pad_end: 0 1660 | } 1661 | } 1662 | layer { 1663 | bottom: "fc7_res075" 1664 | top: "fc7_res075_interp" 1665 | name: "fc7_res075_interp" 1666 | type: "Interp" 1667 | interp_param { 1668 | zoom_factor: 4 1669 | shrink_factor: 3 1670 | pad_beg: 0 1671 | pad_end: 0 1672 | } 1673 | } 1674 | 1675 | layer { 1676 | bottom: "fc8_${EXP}_res05" 1677 | top: "fc8_${EXP}_res05_interp" 1678 | name: "fc8_${EXP}_res05_interp" 1679 | type: "Interp" 1680 | interp_param { 1681 | zoom_factor: 2 1682 | pad_beg: 0 1683 | pad_end: 0 1684 | } 1685 | } 1686 | layer { 1687 | bottom: "fc8_${EXP}_res075" 1688 | top: "fc8_${EXP}_res075_interp" 1689 | name: "fc8_${EXP}_res075_interp" 1690 | type: "Interp" 1691 | interp_param { 1692 | zoom_factor: 4 1693 | shrink_factor: 3 1694 | pad_beg: 0 1695 | pad_end: 0 1696 | } 1697 | } 1698 | 1699 | ############### concatenation and pass through attention model ######### 1700 | layer { 1701 | bottom: "fc7" 1702 | bottom: "fc7_res075_interp" 1703 | bottom: "fc7_res05_interp" 1704 | top: "fc7_concat" 1705 | name: "fc7_concat" 1706 | type: "Concat" 1707 | concat_param { 1708 | axis: 1 1709 | } 1710 | } 1711 | 1712 | ### attention model 1713 | # change lr_mult to be 1, since it has been pretrained on coco 1714 | layer { 1715 | bottom: "fc7_concat" 1716 | top: "att_conv1" 1717 | name: "att_conv1" 1718 | type: "Convolution" 1719 | param { 1720 | lr_mult: 1 1721 | decay_mult: 1 1722 | } 1723 | param { 1724 | lr_mult: 2 1725 | decay_mult: 0 1726 | } 1727 | convolution_param { 1728 | num_output: 512 1729 | kernel_size: 3 1730 | pad: 1 1731 | } 1732 | } 1733 | layer { 1734 | bottom: "att_conv1" 1735 | top: "att_conv1" 1736 | name: "relu_att_conv1" 1737 | type: "ReLU" 1738 | } 1739 | layer { 1740 | bottom: "att_conv1" 1741 | top: "att_conv1" 1742 | name: "drop_att_conv1" 1743 | type: "Dropout" 1744 | dropout_param { 1745 | dropout_ratio: 0.5 1746 | } 1747 | } 1748 | 1749 | layer { 1750 | bottom: "att_conv1" 1751 | top: "att_fc" 1752 | name: "att_fc" 1753 | type: "Convolution" 1754 | param { 1755 | lr_mult: 1 1756 | decay_mult: 1 1757 | } 1758 | param { 1759 | lr_mult: 2 1760 | decay_mult: 0 1761 | } 1762 | convolution_param { 1763 | num_output: 3 1764 | kernel_size: 1 1765 | } 1766 | } 1767 | layer { 1768 | bottom: "att_fc" 1769 | top: "attention" 1770 | name: "attention" 1771 | type: "Softmax" 1772 | } 1773 | 1774 | ############### collect the output from attention model ######## 1775 | layer { 1776 | bottom: "attention" 1777 | top: "attention1" 1778 | top: "attention2" 1779 | top: "attention3" 1780 | name: "slice_attention" 1781 | type: "Slice" 1782 | slice_param { 1783 | axis: 1 1784 | slice_point: 1 1785 | slice_point: 2 1786 | } 1787 | } 1788 | 1789 | ############### scale features ############# 1790 | layer { 1791 | bottom: "fc8_${EXP}" 1792 | bottom: "attention1" 1793 | top: "fc8_product" 1794 | name: "fc8_product" 1795 | type: "SpatialProduct" 1796 | } 1797 | 1798 | layer { 1799 | bottom: "fc8_${EXP}_res075_interp" 1800 | bottom: "attention2" 1801 | top: "fc8_res075_product" 1802 | name: "fc8_res075_product" 1803 | type: "SpatialProduct" 1804 | } 1805 | 1806 | layer { 1807 | bottom: "fc8_${EXP}_res05_interp" 1808 | bottom: "attention3" 1809 | top: "fc8_res05_product" 1810 | name: "fc8_res05_product" 1811 | type: "SpatialProduct" 1812 | } 1813 | 1814 | ### add features ### 1815 | layer { 1816 | bottom: "fc8_product" 1817 | bottom: "fc8_res075_product" 1818 | bottom: "fc8_res05_product" 1819 | top: "fc8_fusion" 1820 | name: "fc8_fusion" 1821 | type: "Eltwise" 1822 | eltwise_param { 1823 | operation: SUM 1824 | } 1825 | } 1826 | ############### shrink label ################ 1827 | layer { 1828 | bottom: "label" 1829 | top: "label_shrink8" 1830 | name: "label_shrink8" 1831 | type: "Interp" 1832 | interp_param { 1833 | shrink_factor: 8 1834 | pad_beg: 0 1835 | pad_end: 0 1836 | } 1837 | } 1838 | ############### compute loss ################# 1839 | layer { 1840 | name: "loss" 1841 | type: "SoftmaxWithLoss" 1842 | bottom: "fc8_fusion" 1843 | bottom: "label_shrink8" 1844 | loss_param { 1845 | ignore_label: 255 1846 | } 1847 | include: { phase: TRAIN } 1848 | } 1849 | layer { 1850 | name: "accuracy" 1851 | type: "SegAccuracy" 1852 | bottom: "fc8_fusion" 1853 | bottom: "label_shrink8" 1854 | top: "accuracy" 1855 | seg_accuracy_param { 1856 | ignore_label: 255 1857 | } 1858 | } 1859 | 1860 | ############## add supervsion to fc8's directly ########## 1861 | ## resolution 1 1862 | layer { 1863 | name: "loss_res1" 1864 | type: "SoftmaxWithLoss" 1865 | bottom: "fc8_${EXP}" 1866 | bottom: "label_shrink8" 1867 | loss_param { 1868 | ignore_label: 255 1869 | } 1870 | include: { phase: TRAIN } 1871 | } 1872 | layer { 1873 | name: "accuracy_res1" 1874 | type: "SegAccuracy" 1875 | bottom: "fc8_${EXP}" 1876 | bottom: "label_shrink8" 1877 | top: "accuracy_res1" 1878 | seg_accuracy_param { 1879 | ignore_label: 255 1880 | } 1881 | } 1882 | 1883 | ## resolution 3/4 1884 | # interp layer does not support 3/4 scale for ground truth 1885 | # instead we interpolate score map 1886 | layer { 1887 | name: "loss_res075" 1888 | type: "SoftmaxWithLoss" 1889 | bottom: "fc8_${EXP}_res075_interp" 1890 | bottom: "label_shrink8" 1891 | loss_param { 1892 | ignore_label: 255 1893 | } 1894 | include: { phase: TRAIN } 1895 | } 1896 | layer { 1897 | name: "accuracy_res075" 1898 | type: "SegAccuracy" 1899 | bottom: "fc8_${EXP}_res075_interp" 1900 | bottom: "label_shrink8" 1901 | top: "accuracy_res075" 1902 | seg_accuracy_param { 1903 | ignore_label: 255 1904 | } 1905 | } 1906 | 1907 | ## resolution 1/2 1908 | layer { 1909 | bottom: "label" 1910 | top: "label_shrink16" 1911 | name: "label_shrink16" 1912 | type: "Interp" 1913 | interp_param { 1914 | shrink_factor: 16 1915 | pad_beg: 0 1916 | pad_end: 0 1917 | } 1918 | } 1919 | layer { 1920 | name: "loss_res05" 1921 | type: "SoftmaxWithLoss" 1922 | bottom: "fc8_${EXP}_res05" 1923 | bottom: "label_shrink16" 1924 | loss_param { 1925 | ignore_label: 255 1926 | } 1927 | include: { phase: TRAIN } 1928 | } 1929 | layer { 1930 | name: "accuracy_res05" 1931 | type: "SegAccuracy" 1932 | bottom: "fc8_${EXP}_res05" 1933 | bottom: "label_shrink16" 1934 | top: "accuracy_res05" 1935 | seg_accuracy_param { 1936 | ignore_label: 255 1937 | } 1938 | } 1939 | -------------------------------------------------------------------------------- /human/config/attention/train.prototxt: -------------------------------------------------------------------------------- 1 | ## train_attention_ssl 2 | # 3 | name: "${NET_ID}" 4 | 5 | ### load data ### 6 | layer { 7 | name: "data" 8 | type: "ImageSegData" 9 | top: "data" 10 | top: "label" 11 | image_data_param { 12 | root_folder: "${DATA_ROOT}" 13 | source: "${EXP}/list/${TRAIN_SET}.txt" 14 | label_type: PIXEL 15 | batch_size: 1 16 | shuffle: true 17 | } 18 | transform_param { 19 | mean_value: 104.008 20 | mean_value: 116.669 21 | mean_value: 122.675 22 | crop_size: 321 23 | scale_factors: 0.6 24 | scale_factors: 0.8 25 | scale_factors: 1 26 | scale_factors: 1.2 27 | scale_factors: 1.4 28 | mirror: true 29 | } 30 | include: { phase: TRAIN } 31 | } 32 | 33 | ### shrink data ### 34 | layer { 35 | bottom: "data" 36 | top: "shrink_data05" 37 | name: "shrink_data05" 38 | type: "Interp" 39 | interp_param { 40 | shrink_factor: 2 41 | pad_beg: 0 42 | pad_end: 0 43 | } 44 | } 45 | 46 | layer { 47 | bottom: "data" 48 | top: "shrink_data075" 49 | name: "shrink_data075" 50 | type: "Interp" 51 | interp_param { 52 | shrink_factor: 4 53 | zoom_factor: 3 54 | pad_beg: 0 55 | pad_end: 0 56 | } 57 | } 58 | 59 | ### NETWORK for resolution 1 ### 60 | 61 | layer { 62 | bottom: "data" 63 | top: "conv1_1" 64 | name: "conv1_1" 65 | type: "Convolution" 66 | param { 67 | name: "conv1_1_w" 68 | lr_mult: 1 69 | decay_mult: 1 70 | } 71 | param { 72 | name: "conv1_1_b" 73 | lr_mult: 2 74 | decay_mult: 0 75 | } 76 | convolution_param { 77 | num_output: 64 78 | pad: 1 79 | kernel_size: 3 80 | } 81 | } 82 | layer { 83 | bottom: "conv1_1" 84 | top: "conv1_1" 85 | name: "relu1_1" 86 | type: "ReLU" 87 | } 88 | layer { 89 | bottom: "conv1_1" 90 | top: "conv1_2" 91 | name: "conv1_2" 92 | type: "Convolution" 93 | param { 94 | name: "conv1_2_w" 95 | lr_mult: 1 96 | decay_mult: 1 97 | } 98 | param { 99 | name: "conv1_2_b" 100 | lr_mult: 2 101 | decay_mult: 0 102 | } 103 | convolution_param { 104 | num_output: 64 105 | pad: 1 106 | kernel_size: 3 107 | } 108 | } 109 | layer { 110 | bottom: "conv1_2" 111 | top: "conv1_2" 112 | name: "relu1_2" 113 | type: "ReLU" 114 | } 115 | layer { 116 | bottom: "conv1_2" 117 | top: "pool1" 118 | name: "pool1" 119 | type: "Pooling" 120 | pooling_param { 121 | pool: MAX 122 | kernel_size: 3 123 | stride: 2 124 | pad: 1 125 | } 126 | } 127 | layer { 128 | bottom: "pool1" 129 | top: "conv2_1" 130 | name: "conv2_1" 131 | type: "Convolution" 132 | param { 133 | name: "conv2_1_w" 134 | lr_mult: 1 135 | decay_mult: 1 136 | } 137 | param { 138 | name: "conv2_1_b" 139 | lr_mult: 2 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 128 144 | pad: 1 145 | kernel_size: 3 146 | } 147 | } 148 | layer { 149 | bottom: "conv2_1" 150 | top: "conv2_1" 151 | name: "relu2_1" 152 | type: "ReLU" 153 | } 154 | layer { 155 | bottom: "conv2_1" 156 | top: "conv2_2" 157 | name: "conv2_2" 158 | type: "Convolution" 159 | param { 160 | name: "conv2_2_w" 161 | lr_mult: 1 162 | decay_mult: 1 163 | } 164 | param { 165 | name: "conv2_2_b" 166 | lr_mult: 2 167 | decay_mult: 0 168 | } 169 | convolution_param { 170 | num_output: 128 171 | pad: 1 172 | kernel_size: 3 173 | } 174 | } 175 | layer { 176 | bottom: "conv2_2" 177 | top: "conv2_2" 178 | name: "relu2_2" 179 | type: "ReLU" 180 | } 181 | layer { 182 | bottom: "conv2_2" 183 | top: "pool2" 184 | name: "pool2" 185 | type: "Pooling" 186 | pooling_param { 187 | pool: MAX 188 | kernel_size: 3 189 | stride: 2 190 | pad: 1 191 | } 192 | } 193 | layer { 194 | bottom: "pool2" 195 | top: "conv3_1" 196 | name: "conv3_1" 197 | type: "Convolution" 198 | param { 199 | name: "conv3_1_w" 200 | lr_mult: 1 201 | decay_mult: 1 202 | } 203 | param { 204 | name: "conv3_1_b" 205 | lr_mult: 2 206 | decay_mult: 0 207 | } 208 | convolution_param { 209 | num_output: 256 210 | pad: 1 211 | kernel_size: 3 212 | } 213 | } 214 | layer { 215 | bottom: "conv3_1" 216 | top: "conv3_1" 217 | name: "relu3_1" 218 | type: "ReLU" 219 | } 220 | layer { 221 | bottom: "conv3_1" 222 | top: "conv3_2" 223 | name: "conv3_2" 224 | type: "Convolution" 225 | param { 226 | name: "conv3_2_w" 227 | lr_mult: 1 228 | decay_mult: 1 229 | } 230 | param { 231 | name: "conv3_2_b" 232 | lr_mult: 2 233 | decay_mult: 0 234 | } 235 | convolution_param { 236 | num_output: 256 237 | pad: 1 238 | kernel_size: 3 239 | } 240 | } 241 | layer { 242 | bottom: "conv3_2" 243 | top: "conv3_2" 244 | name: "relu3_2" 245 | type: "ReLU" 246 | } 247 | layer { 248 | bottom: "conv3_2" 249 | top: "conv3_3" 250 | name: "conv3_3" 251 | type: "Convolution" 252 | param { 253 | name: "conv3_3_w" 254 | lr_mult: 1 255 | decay_mult: 1 256 | } 257 | param { 258 | name: "conv3_3_b" 259 | lr_mult: 2 260 | decay_mult: 0 261 | } 262 | convolution_param { 263 | num_output: 256 264 | pad: 1 265 | kernel_size: 3 266 | } 267 | } 268 | layer { 269 | bottom: "conv3_3" 270 | top: "conv3_3" 271 | name: "relu3_3" 272 | type: "ReLU" 273 | } 274 | layer { 275 | bottom: "conv3_3" 276 | top: "pool3" 277 | name: "pool3" 278 | type: "Pooling" 279 | pooling_param { 280 | pool: MAX 281 | kernel_size: 3 282 | stride: 2 283 | pad: 1 284 | } 285 | } 286 | layer { 287 | bottom: "pool3" 288 | top: "conv4_1" 289 | name: "conv4_1" 290 | type: "Convolution" 291 | param { 292 | name: "conv4_1_w" 293 | lr_mult: 1 294 | decay_mult: 1 295 | } 296 | param { 297 | name: "conv4_1_b" 298 | lr_mult: 2 299 | decay_mult: 0 300 | } 301 | convolution_param { 302 | num_output: 512 303 | pad: 1 304 | kernel_size: 3 305 | } 306 | } 307 | layer { 308 | bottom: "conv4_1" 309 | top: "conv4_1" 310 | name: "relu4_1" 311 | type: "ReLU" 312 | } 313 | layer { 314 | bottom: "conv4_1" 315 | top: "conv4_2" 316 | name: "conv4_2" 317 | type: "Convolution" 318 | param { 319 | name: "conv4_2_w" 320 | lr_mult: 1 321 | decay_mult: 1 322 | } 323 | param { 324 | name: "conv4_2_b" 325 | lr_mult: 2 326 | decay_mult: 0 327 | } 328 | convolution_param { 329 | num_output: 512 330 | pad: 1 331 | kernel_size: 3 332 | } 333 | } 334 | layer { 335 | bottom: "conv4_2" 336 | top: "conv4_2" 337 | name: "relu4_2" 338 | type: "ReLU" 339 | } 340 | layer { 341 | bottom: "conv4_2" 342 | top: "conv4_3" 343 | name: "conv4_3" 344 | type: "Convolution" 345 | param { 346 | name: "conv4_3_w" 347 | lr_mult: 1 348 | decay_mult: 1 349 | } 350 | param { 351 | name: "conv4_3_b" 352 | lr_mult: 2 353 | decay_mult: 0 354 | } 355 | convolution_param { 356 | num_output: 512 357 | pad: 1 358 | kernel_size: 3 359 | } 360 | } 361 | layer { 362 | bottom: "conv4_3" 363 | top: "conv4_3" 364 | name: "relu4_3" 365 | type: "ReLU" 366 | } 367 | layer { 368 | bottom: "conv4_3" 369 | top: "pool4" 370 | name: "pool4" 371 | type: "Pooling" 372 | pooling_param { 373 | pool: MAX 374 | kernel_size: 3 375 | pad: 1 376 | stride: 1 377 | } 378 | } 379 | layer { 380 | bottom: "pool4" 381 | top: "conv5_1" 382 | name: "conv5_1" 383 | type: "Convolution" 384 | param { 385 | name: "conv5_1_w" 386 | lr_mult: 1 387 | decay_mult: 1 388 | } 389 | param { 390 | name: "conv5_1_b" 391 | lr_mult: 2 392 | decay_mult: 0 393 | } 394 | convolution_param { 395 | num_output: 512 396 | #pad: 1 397 | pad: 2 398 | dilation: 2 399 | kernel_size: 3 400 | } 401 | } 402 | layer { 403 | bottom: "conv5_1" 404 | top: "conv5_1" 405 | name: "relu5_1" 406 | type: "ReLU" 407 | } 408 | layer { 409 | bottom: "conv5_1" 410 | top: "conv5_2" 411 | name: "conv5_2" 412 | type: "Convolution" 413 | param { 414 | name: "conv5_2_w" 415 | lr_mult: 1 416 | decay_mult: 1 417 | } 418 | param { 419 | name: "conv5_2_b" 420 | lr_mult: 2 421 | decay_mult: 0 422 | } 423 | convolution_param { 424 | num_output: 512 425 | #pad: 1 426 | pad: 2 427 | dilation: 2 428 | kernel_size: 3 429 | } 430 | } 431 | layer { 432 | bottom: "conv5_2" 433 | top: "conv5_2" 434 | name: "relu5_2" 435 | type: "ReLU" 436 | } 437 | layer { 438 | bottom: "conv5_2" 439 | top: "conv5_3" 440 | name: "conv5_3" 441 | type: "Convolution" 442 | param { 443 | name: "conv5_3_w" 444 | lr_mult: 1 445 | decay_mult: 1 446 | } 447 | param { 448 | name: "conv5_3_b" 449 | lr_mult: 2 450 | decay_mult: 0 451 | } 452 | convolution_param { 453 | num_output: 512 454 | #pad: 1 455 | pad: 2 456 | dilation: 2 457 | kernel_size: 3 458 | } 459 | } 460 | layer { 461 | bottom: "conv5_3" 462 | top: "conv5_3" 463 | name: "relu5_3" 464 | type: "ReLU" 465 | } 466 | layer { 467 | bottom: "conv5_3" 468 | top: "pool5" 469 | name: "pool5" 470 | type: "Pooling" 471 | pooling_param { 472 | pool: MAX 473 | #kernel_size: 2 474 | #stride: 2 475 | kernel_size: 3 476 | stride: 1 477 | pad: 1 478 | } 479 | } 480 | 481 | layer { 482 | bottom: "pool5" 483 | top: "fc6" 484 | name: "fc6" 485 | type: "Convolution" 486 | param { 487 | name: "fc6_w" 488 | lr_mult: 1 489 | decay_mult: 1 490 | } 491 | param { 492 | name: "fc6_b" 493 | lr_mult: 2 494 | decay_mult: 0 495 | } 496 | convolution_param { 497 | num_output: 1024 498 | pad: 12 499 | dilation: 12 500 | kernel_size: 3 501 | } 502 | } 503 | layer { 504 | bottom: "fc6" 505 | top: "fc6" 506 | name: "relu6" 507 | type: "ReLU" 508 | } 509 | layer { 510 | bottom: "fc6" 511 | top: "fc6" 512 | name: "drop6" 513 | type: "Dropout" 514 | dropout_param { 515 | dropout_ratio: 0.5 516 | } 517 | } 518 | layer { 519 | bottom: "fc6" 520 | top: "fc7" 521 | name: "fc7" 522 | type: "Convolution" 523 | param { 524 | name: "fc7_w" 525 | lr_mult: 1 526 | decay_mult: 1 527 | } 528 | param { 529 | name: "fc7_b" 530 | lr_mult: 2 531 | decay_mult: 0 532 | } 533 | convolution_param { 534 | num_output: 1024 535 | kernel_size: 1 536 | } 537 | } 538 | layer { 539 | bottom: "fc7" 540 | top: "fc7" 541 | name: "relu7" 542 | type: "ReLU" 543 | } 544 | layer { 545 | bottom: "fc7" 546 | top: "fc7" 547 | name: "drop7" 548 | type: "Dropout" 549 | dropout_param { 550 | dropout_ratio: 0.5 551 | } 552 | } 553 | 554 | ### NETWORK for resolution 1/2 ### 555 | 556 | layer { 557 | bottom: "shrink_data05" 558 | top: "conv1_1_res05" 559 | name: "conv1_1_res05" 560 | type: "Convolution" 561 | param { 562 | name: "conv1_1_w" 563 | lr_mult: 1 564 | decay_mult: 1 565 | } 566 | param { 567 | name: "conv1_1_b" 568 | lr_mult: 2 569 | decay_mult: 0 570 | } 571 | convolution_param { 572 | num_output: 64 573 | pad: 1 574 | kernel_size: 3 575 | } 576 | } 577 | layer { 578 | bottom: "conv1_1_res05" 579 | top: "conv1_1_res05" 580 | name: "relu1_1_res05" 581 | type: "ReLU" 582 | } 583 | layer { 584 | bottom: "conv1_1_res05" 585 | top: "conv1_2_res05" 586 | name: "conv1_2_res05" 587 | type: "Convolution" 588 | param { 589 | name: "conv1_2_w" 590 | lr_mult: 1 591 | decay_mult: 1 592 | } 593 | param { 594 | name: "conv1_2_b" 595 | lr_mult: 2 596 | decay_mult: 0 597 | } 598 | convolution_param { 599 | num_output: 64 600 | pad: 1 601 | kernel_size: 3 602 | } 603 | } 604 | layer { 605 | bottom: "conv1_2_res05" 606 | top: "conv1_2_res05" 607 | name: "relu1_2_res05" 608 | type: "ReLU" 609 | } 610 | layer { 611 | bottom: "conv1_2_res05" 612 | top: "pool1_res05" 613 | name: "pool1_res05" 614 | type: "Pooling" 615 | pooling_param { 616 | pool: MAX 617 | kernel_size: 3 618 | stride: 2 619 | pad: 1 620 | } 621 | } 622 | layer { 623 | bottom: "pool1_res05" 624 | top: "conv2_1_res05" 625 | name: "conv2_1_res05" 626 | type: "Convolution" 627 | param { 628 | name: "conv2_1_w" 629 | lr_mult: 1 630 | decay_mult: 1 631 | } 632 | param { 633 | name: "conv2_1_b" 634 | lr_mult: 2 635 | decay_mult: 0 636 | } 637 | convolution_param { 638 | num_output: 128 639 | pad: 1 640 | kernel_size: 3 641 | } 642 | } 643 | layer { 644 | bottom: "conv2_1_res05" 645 | top: "conv2_1_res05" 646 | name: "relu2_1_res05" 647 | type: "ReLU" 648 | } 649 | layer { 650 | bottom: "conv2_1_res05" 651 | top: "conv2_2_res05" 652 | name: "conv2_2_res05" 653 | type: "Convolution" 654 | param { 655 | name: "conv2_2_w" 656 | lr_mult: 1 657 | decay_mult: 1 658 | } 659 | param { 660 | name: "conv2_2_b" 661 | lr_mult: 2 662 | decay_mult: 0 663 | } 664 | convolution_param { 665 | num_output: 128 666 | pad: 1 667 | kernel_size: 3 668 | } 669 | } 670 | layer { 671 | bottom: "conv2_2_res05" 672 | top: "conv2_2_res05" 673 | name: "relu2_2_res05" 674 | type: "ReLU" 675 | } 676 | layer { 677 | bottom: "conv2_2_res05" 678 | top: "pool2_res05" 679 | name: "pool2_res05" 680 | type: "Pooling" 681 | pooling_param { 682 | pool: MAX 683 | kernel_size: 3 684 | stride: 2 685 | pad: 1 686 | } 687 | } 688 | layer { 689 | bottom: "pool2_res05" 690 | top: "conv3_1_res05" 691 | name: "conv3_1_res05" 692 | type: "Convolution" 693 | param { 694 | name: "conv3_1_w" 695 | lr_mult: 1 696 | decay_mult: 1 697 | } 698 | param { 699 | name: "conv3_1_b" 700 | lr_mult: 2 701 | decay_mult: 0 702 | } 703 | convolution_param { 704 | num_output: 256 705 | pad: 1 706 | kernel_size: 3 707 | } 708 | } 709 | layer { 710 | bottom: "conv3_1_res05" 711 | top: "conv3_1_res05" 712 | name: "relu3_1_res05" 713 | type: "ReLU" 714 | } 715 | layer { 716 | bottom: "conv3_1_res05" 717 | top: "conv3_2_res05" 718 | name: "conv3_2_res05" 719 | type: "Convolution" 720 | param { 721 | name: "conv3_2_w" 722 | lr_mult: 1 723 | decay_mult: 1 724 | } 725 | param { 726 | name: "conv3_2_b" 727 | lr_mult: 2 728 | decay_mult: 0 729 | } 730 | convolution_param { 731 | num_output: 256 732 | pad: 1 733 | kernel_size: 3 734 | } 735 | } 736 | layer { 737 | bottom: "conv3_2_res05" 738 | top: "conv3_2_res05" 739 | name: "relu3_2_res05" 740 | type: "ReLU" 741 | } 742 | layer { 743 | bottom: "conv3_2_res05" 744 | top: "conv3_3_res05" 745 | name: "conv3_3_res05" 746 | type: "Convolution" 747 | param { 748 | name: "conv3_3_w" 749 | lr_mult: 1 750 | decay_mult: 1 751 | } 752 | param { 753 | name: "conv3_3_b" 754 | lr_mult: 2 755 | decay_mult: 0 756 | } 757 | convolution_param { 758 | num_output: 256 759 | pad: 1 760 | kernel_size: 3 761 | } 762 | } 763 | layer { 764 | bottom: "conv3_3_res05" 765 | top: "conv3_3_res05" 766 | name: "relu3_3_res05" 767 | type: "ReLU" 768 | } 769 | layer { 770 | bottom: "conv3_3_res05" 771 | top: "pool3_res05" 772 | name: "pool3_res05" 773 | type: "Pooling" 774 | pooling_param { 775 | pool: MAX 776 | kernel_size: 3 777 | stride: 2 778 | pad: 1 779 | } 780 | } 781 | layer { 782 | bottom: "pool3_res05" 783 | top: "conv4_1_res05" 784 | name: "conv4_1_res05" 785 | type: "Convolution" 786 | param { 787 | name: "conv4_1_w" 788 | lr_mult: 1 789 | decay_mult: 1 790 | } 791 | param { 792 | name: "conv4_1_b" 793 | lr_mult: 2 794 | decay_mult: 0 795 | } 796 | convolution_param { 797 | num_output: 512 798 | pad: 1 799 | kernel_size: 3 800 | } 801 | } 802 | layer { 803 | bottom: "conv4_1_res05" 804 | top: "conv4_1_res05" 805 | name: "relu4_1_res05" 806 | type: "ReLU" 807 | } 808 | layer { 809 | bottom: "conv4_1_res05" 810 | top: "conv4_2_res05" 811 | name: "conv4_2_res05" 812 | type: "Convolution" 813 | param { 814 | name: "conv4_2_w" 815 | lr_mult: 1 816 | decay_mult: 1 817 | } 818 | param { 819 | name: "conv4_2_b" 820 | lr_mult: 2 821 | decay_mult: 0 822 | } 823 | convolution_param { 824 | num_output: 512 825 | pad: 1 826 | kernel_size: 3 827 | } 828 | } 829 | layer { 830 | bottom: "conv4_2_res05" 831 | top: "conv4_2_res05" 832 | name: "relu4_2_res05" 833 | type: "ReLU" 834 | } 835 | layer { 836 | bottom: "conv4_2_res05" 837 | top: "conv4_3_res05" 838 | name: "conv4_3_res05" 839 | type: "Convolution" 840 | param { 841 | name: "conv4_3_w" 842 | lr_mult: 1 843 | decay_mult: 1 844 | } 845 | param { 846 | name: "conv4_3_b" 847 | lr_mult: 2 848 | decay_mult: 0 849 | } 850 | convolution_param { 851 | num_output: 512 852 | pad: 1 853 | kernel_size: 3 854 | } 855 | } 856 | layer { 857 | bottom: "conv4_3_res05" 858 | top: "conv4_3_res05" 859 | name: "relu4_3_res05" 860 | type: "ReLU" 861 | } 862 | layer { 863 | bottom: "conv4_3_res05" 864 | top: "pool4_res05" 865 | name: "pool4_res05" 866 | type: "Pooling" 867 | pooling_param { 868 | pool: MAX 869 | kernel_size: 3 870 | pad: 1 871 | stride: 1 872 | } 873 | } 874 | layer { 875 | bottom: "pool4_res05" 876 | top: "conv5_1_res05" 877 | name: "conv5_1_res05" 878 | type: "Convolution" 879 | param { 880 | name: "conv5_1_w" 881 | lr_mult: 1 882 | decay_mult: 1 883 | } 884 | param { 885 | name: "conv5_1_b" 886 | lr_mult: 2 887 | decay_mult: 0 888 | } 889 | convolution_param { 890 | num_output: 512 891 | #pad: 1 892 | pad: 2 893 | dilation: 2 894 | kernel_size: 3 895 | } 896 | } 897 | layer { 898 | bottom: "conv5_1_res05" 899 | top: "conv5_1_res05" 900 | name: "relu5_1_res05" 901 | type: "ReLU" 902 | } 903 | layer { 904 | bottom: "conv5_1_res05" 905 | top: "conv5_2_res05" 906 | name: "conv5_2_res05" 907 | type: "Convolution" 908 | param { 909 | name: "conv5_2_w" 910 | lr_mult: 1 911 | decay_mult: 1 912 | } 913 | param { 914 | name: "conv5_2_b" 915 | lr_mult: 2 916 | decay_mult: 0 917 | } 918 | convolution_param { 919 | num_output: 512 920 | #pad: 1 921 | pad: 2 922 | dilation: 2 923 | kernel_size: 3 924 | } 925 | } 926 | layer { 927 | bottom: "conv5_2_res05" 928 | top: "conv5_2_res05" 929 | name: "relu5_2_res05" 930 | type: "ReLU" 931 | } 932 | layer { 933 | bottom: "conv5_2_res05" 934 | top: "conv5_3_res05" 935 | name: "conv5_3_res05" 936 | type: "Convolution" 937 | param { 938 | name: "conv5_3_w" 939 | lr_mult: 1 940 | decay_mult: 1 941 | } 942 | param { 943 | name: "conv5_3_b" 944 | lr_mult: 2 945 | decay_mult: 0 946 | } 947 | convolution_param { 948 | num_output: 512 949 | #pad: 1 950 | pad: 2 951 | dilation: 2 952 | kernel_size: 3 953 | } 954 | } 955 | layer { 956 | bottom: "conv5_3_res05" 957 | top: "conv5_3_res05" 958 | name: "relu5_3_res05" 959 | type: "ReLU" 960 | } 961 | layer { 962 | bottom: "conv5_3_res05" 963 | top: "pool5_res05" 964 | name: "pool5_res05" 965 | type: "Pooling" 966 | pooling_param { 967 | pool: MAX 968 | #kernel_size: 2 969 | #stride: 2 970 | kernel_size: 3 971 | stride: 1 972 | pad: 1 973 | } 974 | } 975 | 976 | layer { 977 | bottom: "pool5_res05" 978 | top: "fc6_res05" 979 | name: "fc6_res05" 980 | type: "Convolution" 981 | param { 982 | name: "fc6_w" 983 | lr_mult: 1 984 | decay_mult: 1 985 | } 986 | param { 987 | name: "fc6_b" 988 | lr_mult: 2 989 | decay_mult: 0 990 | } 991 | convolution_param { 992 | num_output: 1024 993 | pad: 12 994 | dilation: 12 995 | kernel_size: 3 996 | } 997 | } 998 | layer { 999 | bottom: "fc6_res05" 1000 | top: "fc6_res05" 1001 | name: "relu6_res05" 1002 | type: "ReLU" 1003 | } 1004 | layer { 1005 | bottom: "fc6_res05" 1006 | top: "fc6_res05" 1007 | name: "drop6_res05" 1008 | type: "Dropout" 1009 | dropout_param { 1010 | dropout_ratio: 0.5 1011 | } 1012 | } 1013 | layer { 1014 | bottom: "fc6_res05" 1015 | top: "fc7_res05" 1016 | name: "fc7_res05" 1017 | type: "Convolution" 1018 | param { 1019 | name: "fc7_w" 1020 | lr_mult: 1 1021 | decay_mult: 1 1022 | } 1023 | param { 1024 | name: "fc7_b" 1025 | lr_mult: 2 1026 | decay_mult: 0 1027 | } 1028 | convolution_param { 1029 | num_output: 1024 1030 | kernel_size: 1 1031 | } 1032 | } 1033 | layer { 1034 | bottom: "fc7_res05" 1035 | top: "fc7_res05" 1036 | name: "relu7_res05" 1037 | type: "ReLU" 1038 | } 1039 | layer { 1040 | bottom: "fc7_res05" 1041 | top: "fc7_res05" 1042 | name: "drop7_res05" 1043 | type: "Dropout" 1044 | dropout_param { 1045 | dropout_ratio: 0.5 1046 | } 1047 | } 1048 | 1049 | ### NETWORK for resolution 3/4 ### 1050 | 1051 | layer { 1052 | bottom: "shrink_data075" 1053 | top: "conv1_1_res075" 1054 | name: "conv1_1_res075" 1055 | type: "Convolution" 1056 | param { 1057 | name: "conv1_1_w" 1058 | lr_mult: 1 1059 | decay_mult: 1 1060 | } 1061 | param { 1062 | name: "conv1_1_b" 1063 | lr_mult: 2 1064 | decay_mult: 0 1065 | } 1066 | convolution_param { 1067 | num_output: 64 1068 | pad: 1 1069 | kernel_size: 3 1070 | } 1071 | } 1072 | layer { 1073 | bottom: "conv1_1_res075" 1074 | top: "conv1_1_res075" 1075 | name: "relu1_1_res075" 1076 | type: "ReLU" 1077 | } 1078 | layer { 1079 | bottom: "conv1_1_res075" 1080 | top: "conv1_2_res075" 1081 | name: "conv1_2_res075" 1082 | type: "Convolution" 1083 | param { 1084 | name: "conv1_2_w" 1085 | lr_mult: 1 1086 | decay_mult: 1 1087 | } 1088 | param { 1089 | name: "conv1_2_b" 1090 | lr_mult: 2 1091 | decay_mult: 0 1092 | } 1093 | convolution_param { 1094 | num_output: 64 1095 | pad: 1 1096 | kernel_size: 3 1097 | } 1098 | } 1099 | layer { 1100 | bottom: "conv1_2_res075" 1101 | top: "conv1_2_res075" 1102 | name: "relu1_2_res075" 1103 | type: "ReLU" 1104 | } 1105 | layer { 1106 | bottom: "conv1_2_res075" 1107 | top: "pool1_res075" 1108 | name: "pool1_res075" 1109 | type: "Pooling" 1110 | pooling_param { 1111 | pool: MAX 1112 | kernel_size: 3 1113 | stride: 2 1114 | pad: 1 1115 | } 1116 | } 1117 | layer { 1118 | bottom: "pool1_res075" 1119 | top: "conv2_1_res075" 1120 | name: "conv2_1_res075" 1121 | type: "Convolution" 1122 | param { 1123 | name: "conv2_1_w" 1124 | lr_mult: 1 1125 | decay_mult: 1 1126 | } 1127 | param { 1128 | name: "conv2_1_b" 1129 | lr_mult: 2 1130 | decay_mult: 0 1131 | } 1132 | convolution_param { 1133 | num_output: 128 1134 | pad: 1 1135 | kernel_size: 3 1136 | } 1137 | } 1138 | layer { 1139 | bottom: "conv2_1_res075" 1140 | top: "conv2_1_res075" 1141 | name: "relu2_1_res075" 1142 | type: "ReLU" 1143 | } 1144 | layer { 1145 | bottom: "conv2_1_res075" 1146 | top: "conv2_2_res075" 1147 | name: "conv2_2_res075" 1148 | type: "Convolution" 1149 | param { 1150 | name: "conv2_2_w" 1151 | lr_mult: 1 1152 | decay_mult: 1 1153 | } 1154 | param { 1155 | name: "conv2_2_b" 1156 | lr_mult: 2 1157 | decay_mult: 0 1158 | } 1159 | convolution_param { 1160 | num_output: 128 1161 | pad: 1 1162 | kernel_size: 3 1163 | } 1164 | } 1165 | layer { 1166 | bottom: "conv2_2_res075" 1167 | top: "conv2_2_res075" 1168 | name: "relu2_2_res075" 1169 | type: "ReLU" 1170 | } 1171 | layer { 1172 | bottom: "conv2_2_res075" 1173 | top: "pool2_res075" 1174 | name: "pool2_res075" 1175 | type: "Pooling" 1176 | pooling_param { 1177 | pool: MAX 1178 | kernel_size: 3 1179 | stride: 2 1180 | pad: 1 1181 | } 1182 | } 1183 | layer { 1184 | bottom: "pool2_res075" 1185 | top: "conv3_1_res075" 1186 | name: "conv3_1_res075" 1187 | type: "Convolution" 1188 | param { 1189 | name: "conv3_1_w" 1190 | lr_mult: 1 1191 | decay_mult: 1 1192 | } 1193 | param { 1194 | name: "conv3_1_b" 1195 | lr_mult: 2 1196 | decay_mult: 0 1197 | } 1198 | convolution_param { 1199 | num_output: 256 1200 | pad: 1 1201 | kernel_size: 3 1202 | } 1203 | } 1204 | layer { 1205 | bottom: "conv3_1_res075" 1206 | top: "conv3_1_res075" 1207 | name: "relu3_1_res075" 1208 | type: "ReLU" 1209 | } 1210 | layer { 1211 | bottom: "conv3_1_res075" 1212 | top: "conv3_2_res075" 1213 | name: "conv3_2_res075" 1214 | type: "Convolution" 1215 | param { 1216 | name: "conv3_2_w" 1217 | lr_mult: 1 1218 | decay_mult: 1 1219 | } 1220 | param { 1221 | name: "conv3_2_b" 1222 | lr_mult: 2 1223 | decay_mult: 0 1224 | } 1225 | convolution_param { 1226 | num_output: 256 1227 | pad: 1 1228 | kernel_size: 3 1229 | } 1230 | } 1231 | layer { 1232 | bottom: "conv3_2_res075" 1233 | top: "conv3_2_res075" 1234 | name: "relu3_2_res075" 1235 | type: "ReLU" 1236 | } 1237 | layer { 1238 | bottom: "conv3_2_res075" 1239 | top: "conv3_3_res075" 1240 | name: "conv3_3_res075" 1241 | type: "Convolution" 1242 | param { 1243 | name: "conv3_3_w" 1244 | lr_mult: 1 1245 | decay_mult: 1 1246 | } 1247 | param { 1248 | name: "conv3_3_b" 1249 | lr_mult: 2 1250 | decay_mult: 0 1251 | } 1252 | convolution_param { 1253 | num_output: 256 1254 | pad: 1 1255 | kernel_size: 3 1256 | } 1257 | } 1258 | layer { 1259 | bottom: "conv3_3_res075" 1260 | top: "conv3_3_res075" 1261 | name: "relu3_3_res075" 1262 | type: "ReLU" 1263 | } 1264 | layer { 1265 | bottom: "conv3_3_res075" 1266 | top: "pool3_res075" 1267 | name: "pool3_res075" 1268 | type: "Pooling" 1269 | pooling_param { 1270 | pool: MAX 1271 | kernel_size: 3 1272 | stride: 2 1273 | pad: 1 1274 | } 1275 | } 1276 | layer { 1277 | bottom: "pool3_res075" 1278 | top: "conv4_1_res075" 1279 | name: "conv4_1_res075" 1280 | type: "Convolution" 1281 | param { 1282 | name: "conv4_1_w" 1283 | lr_mult: 1 1284 | decay_mult: 1 1285 | } 1286 | param { 1287 | name: "conv4_1_b" 1288 | lr_mult: 2 1289 | decay_mult: 0 1290 | } 1291 | convolution_param { 1292 | num_output: 512 1293 | pad: 1 1294 | kernel_size: 3 1295 | } 1296 | } 1297 | layer { 1298 | bottom: "conv4_1_res075" 1299 | top: "conv4_1_res075" 1300 | name: "relu4_1_res075" 1301 | type: "ReLU" 1302 | } 1303 | layer { 1304 | bottom: "conv4_1_res075" 1305 | top: "conv4_2_res075" 1306 | name: "conv4_2_res075" 1307 | type: "Convolution" 1308 | param { 1309 | name: "conv4_2_w" 1310 | lr_mult: 1 1311 | decay_mult: 1 1312 | } 1313 | param { 1314 | name: "conv4_2_b" 1315 | lr_mult: 2 1316 | decay_mult: 0 1317 | } 1318 | convolution_param { 1319 | num_output: 512 1320 | pad: 1 1321 | kernel_size: 3 1322 | } 1323 | } 1324 | layer { 1325 | bottom: "conv4_2_res075" 1326 | top: "conv4_2_res075" 1327 | name: "relu4_2_res075" 1328 | type: "ReLU" 1329 | } 1330 | layer { 1331 | bottom: "conv4_2_res075" 1332 | top: "conv4_3_res075" 1333 | name: "conv4_3_res075" 1334 | type: "Convolution" 1335 | param { 1336 | name: "conv4_3_w" 1337 | lr_mult: 1 1338 | decay_mult: 1 1339 | } 1340 | param { 1341 | name: "conv4_3_b" 1342 | lr_mult: 2 1343 | decay_mult: 0 1344 | } 1345 | convolution_param { 1346 | num_output: 512 1347 | pad: 1 1348 | kernel_size: 3 1349 | } 1350 | } 1351 | layer { 1352 | bottom: "conv4_3_res075" 1353 | top: "conv4_3_res075" 1354 | name: "relu4_3_res075" 1355 | type: "ReLU" 1356 | } 1357 | layer { 1358 | bottom: "conv4_3_res075" 1359 | top: "pool4_res075" 1360 | name: "pool4_res075" 1361 | type: "Pooling" 1362 | pooling_param { 1363 | pool: MAX 1364 | kernel_size: 3 1365 | pad: 1 1366 | stride: 1 1367 | } 1368 | } 1369 | layer { 1370 | bottom: "pool4_res075" 1371 | top: "conv5_1_res075" 1372 | name: "conv5_1_res075" 1373 | type: "Convolution" 1374 | param { 1375 | name: "conv5_1_w" 1376 | lr_mult: 1 1377 | decay_mult: 1 1378 | } 1379 | param { 1380 | name: "conv5_1_b" 1381 | lr_mult: 2 1382 | decay_mult: 0 1383 | } 1384 | convolution_param { 1385 | num_output: 512 1386 | #pad: 1 1387 | pad: 2 1388 | dilation: 2 1389 | kernel_size: 3 1390 | } 1391 | } 1392 | layer { 1393 | bottom: "conv5_1_res075" 1394 | top: "conv5_1_res075" 1395 | name: "relu5_1_res075" 1396 | type: "ReLU" 1397 | } 1398 | layer { 1399 | bottom: "conv5_1_res075" 1400 | top: "conv5_2_res075" 1401 | name: "conv5_2_res075" 1402 | type: "Convolution" 1403 | param { 1404 | name: "conv5_2_w" 1405 | lr_mult: 1 1406 | decay_mult: 1 1407 | } 1408 | param { 1409 | name: "conv5_2_b" 1410 | lr_mult: 2 1411 | decay_mult: 0 1412 | } 1413 | convolution_param { 1414 | num_output: 512 1415 | #pad: 1 1416 | pad: 2 1417 | dilation: 2 1418 | kernel_size: 3 1419 | } 1420 | } 1421 | layer { 1422 | bottom: "conv5_2_res075" 1423 | top: "conv5_2_res075" 1424 | name: "relu5_2_res075" 1425 | type: "ReLU" 1426 | } 1427 | layer { 1428 | bottom: "conv5_2_res075" 1429 | top: "conv5_3_res075" 1430 | name: "conv5_3_res075" 1431 | type: "Convolution" 1432 | param { 1433 | name: "conv5_3_w" 1434 | lr_mult: 1 1435 | decay_mult: 1 1436 | } 1437 | param { 1438 | name: "conv5_3_b" 1439 | lr_mult: 2 1440 | decay_mult: 0 1441 | } 1442 | convolution_param { 1443 | num_output: 512 1444 | #pad: 1 1445 | pad: 2 1446 | dilation: 2 1447 | kernel_size: 3 1448 | } 1449 | } 1450 | layer { 1451 | bottom: "conv5_3_res075" 1452 | top: "conv5_3_res075" 1453 | name: "relu5_3_res075" 1454 | type: "ReLU" 1455 | } 1456 | layer { 1457 | bottom: "conv5_3_res075" 1458 | top: "pool5_res075" 1459 | name: "pool5_res075" 1460 | type: "Pooling" 1461 | pooling_param { 1462 | pool: MAX 1463 | #kernel_size: 2 1464 | #stride: 2 1465 | kernel_size: 3 1466 | stride: 1 1467 | pad: 1 1468 | } 1469 | } 1470 | 1471 | layer { 1472 | bottom: "pool5_res075" 1473 | top: "fc6_res075" 1474 | name: "fc6_res075" 1475 | type: "Convolution" 1476 | param { 1477 | name: "fc6_w" 1478 | lr_mult: 1 1479 | decay_mult: 1 1480 | } 1481 | param { 1482 | name: "fc6_b" 1483 | lr_mult: 2 1484 | decay_mult: 0 1485 | } 1486 | convolution_param { 1487 | num_output: 1024 1488 | pad: 12 1489 | dilation: 12 1490 | kernel_size: 3 1491 | } 1492 | } 1493 | layer { 1494 | bottom: "fc6_res075" 1495 | top: "fc6_res075" 1496 | name: "relu6_res075" 1497 | type: "ReLU" 1498 | } 1499 | layer { 1500 | bottom: "fc6_res075" 1501 | top: "fc6_res075" 1502 | name: "drop6_res075" 1503 | type: "Dropout" 1504 | dropout_param { 1505 | dropout_ratio: 0.5 1506 | } 1507 | } 1508 | layer { 1509 | bottom: "fc6_res075" 1510 | top: "fc7_res075" 1511 | name: "fc7_res075" 1512 | type: "Convolution" 1513 | param { 1514 | name: "fc7_w" 1515 | lr_mult: 1 1516 | decay_mult: 1 1517 | } 1518 | param { 1519 | name: "fc7_b" 1520 | lr_mult: 2 1521 | decay_mult: 0 1522 | } 1523 | convolution_param { 1524 | num_output: 1024 1525 | kernel_size: 1 1526 | } 1527 | } 1528 | layer { 1529 | bottom: "fc7_res075" 1530 | top: "fc7_res075" 1531 | name: "relu7_res075" 1532 | type: "ReLU" 1533 | } 1534 | layer { 1535 | bottom: "fc7_res075" 1536 | top: "fc7_res075" 1537 | name: "drop7_res075" 1538 | type: "Dropout" 1539 | dropout_param { 1540 | dropout_ratio: 0.5 1541 | } 1542 | } 1543 | 1544 | ############### classifier for resolution 1################### 1545 | layer { 1546 | bottom: "fc7" 1547 | top: "fc8_${EXP}" 1548 | name: "fc8_${EXP}" 1549 | type: "Convolution" 1550 | param { 1551 | name: "fc8_w" 1552 | lr_mult: 10 1553 | decay_mult: 1 1554 | } 1555 | param { 1556 | name: "fc8_b" 1557 | lr_mult: 20 1558 | decay_mult: 0 1559 | } 1560 | convolution_param { 1561 | num_output: ${NUM_LABELS} 1562 | kernel_size: 1 1563 | weight_filler { 1564 | type: "gaussian" 1565 | std: 0.01 1566 | } 1567 | bias_filler { 1568 | type: "constant" 1569 | value: 0 1570 | } 1571 | } 1572 | } 1573 | 1574 | 1575 | ############### classifier for resolution 1/2 ################### 1576 | layer { 1577 | bottom: "fc7_res05" 1578 | top: "fc8_${EXP}_res05" 1579 | name: "fc8_${EXP}_res05" 1580 | type: "Convolution" 1581 | param { 1582 | name: "fc8_w" 1583 | lr_mult: 10 1584 | decay_mult: 1 1585 | } 1586 | param { 1587 | name: "fc8_b" 1588 | lr_mult: 20 1589 | decay_mult: 0 1590 | } 1591 | convolution_param { 1592 | num_output: ${NUM_LABELS} 1593 | kernel_size: 1 1594 | weight_filler { 1595 | type: "gaussian" 1596 | std: 0.01 1597 | } 1598 | bias_filler { 1599 | type: "constant" 1600 | value: 0 1601 | } 1602 | } 1603 | } 1604 | ############### classifier for resolution 3/4 ################### 1605 | layer { 1606 | bottom: "fc7_res075" 1607 | top: "fc8_${EXP}_res075" 1608 | name: "fc8_${EXP}_res075" 1609 | type: "Convolution" 1610 | param { 1611 | name: "fc8_w" 1612 | lr_mult: 10 1613 | decay_mult: 1 1614 | } 1615 | param { 1616 | name: "fc8_b" 1617 | lr_mult: 20 1618 | decay_mult: 0 1619 | } 1620 | convolution_param { 1621 | num_output: ${NUM_LABELS} 1622 | kernel_size: 1 1623 | weight_filler { 1624 | type: "gaussian" 1625 | std: 0.01 1626 | } 1627 | bias_filler { 1628 | type: "constant" 1629 | value: 0 1630 | } 1631 | } 1632 | } 1633 | ############### upsampling ################## 1634 | layer { 1635 | bottom: "fc7_res05" 1636 | top: "fc7_res05_interp" 1637 | name: "fc7_res05_interp" 1638 | type: "Interp" 1639 | interp_param { 1640 | zoom_factor: 2 1641 | pad_beg: 0 1642 | pad_end: 0 1643 | } 1644 | } 1645 | layer { 1646 | bottom: "fc7_res075" 1647 | top: "fc7_res075_interp" 1648 | name: "fc7_res075_interp" 1649 | type: "Interp" 1650 | interp_param { 1651 | zoom_factor: 4 1652 | shrink_factor: 3 1653 | pad_beg: 0 1654 | pad_end: 0 1655 | } 1656 | } 1657 | layer { 1658 | bottom: "fc8_${EXP}_res05" 1659 | top: "fc8_${EXP}_res05_interp" 1660 | name: "fc8_${EXP}_res05_interp" 1661 | type: "Interp" 1662 | interp_param { 1663 | zoom_factor: 2 1664 | pad_beg: 0 1665 | pad_end: 0 1666 | } 1667 | } 1668 | 1669 | 1670 | layer { 1671 | bottom: "fc8_${EXP}_res075" 1672 | top: "fc8_${EXP}_res075_interp" 1673 | name: "fc8_${EXP}_res075_interp" 1674 | type: "Interp" 1675 | interp_param { 1676 | zoom_factor: 4 1677 | shrink_factor: 3 1678 | pad_beg: 0 1679 | pad_end: 0 1680 | } 1681 | } 1682 | 1683 | 1684 | ############### concatenation and pass through attention model ######### 1685 | layer { 1686 | bottom: "fc7" 1687 | bottom: "fc7_res075_interp" 1688 | bottom: "fc7_res05_interp" 1689 | top: "fc7_concat" 1690 | name: "fc7_concat" 1691 | type: "Concat" 1692 | concat_param { 1693 | axis: 1 1694 | } 1695 | } 1696 | 1697 | ### attention model 1698 | # change lr_mult to be 1, since it has been pretrained on coco 1699 | layer { 1700 | bottom: "fc7_concat" 1701 | top: "att_conv1" 1702 | name: "att_conv1" 1703 | type: "Convolution" 1704 | param { 1705 | lr_mult: 1 1706 | decay_mult: 1 1707 | } 1708 | param { 1709 | lr_mult: 2 1710 | decay_mult: 0 1711 | } 1712 | convolution_param { 1713 | num_output: 512 1714 | kernel_size: 3 1715 | pad: 1 1716 | } 1717 | } 1718 | layer { 1719 | bottom: "att_conv1" 1720 | top: "att_conv1" 1721 | name: "relu_att_conv1" 1722 | type: "ReLU" 1723 | } 1724 | layer { 1725 | bottom: "att_conv1" 1726 | top: "att_conv1" 1727 | name: "drop_att_conv1" 1728 | type: "Dropout" 1729 | dropout_param { 1730 | dropout_ratio: 0.5 1731 | } 1732 | } 1733 | 1734 | layer { 1735 | bottom: "att_conv1" 1736 | top: "att_fc" 1737 | name: "att_fc" 1738 | type: "Convolution" 1739 | param { 1740 | lr_mult: 1 1741 | decay_mult: 1 1742 | } 1743 | param { 1744 | lr_mult: 2 1745 | decay_mult: 0 1746 | } 1747 | convolution_param { 1748 | num_output: 3 1749 | kernel_size: 1 1750 | } 1751 | } 1752 | layer { 1753 | bottom: "att_fc" 1754 | top: "attention" 1755 | name: "attention" 1756 | type: "Softmax" 1757 | } 1758 | 1759 | ############### collect the output from attention model ######## 1760 | layer { 1761 | bottom: "attention" 1762 | top: "attention1" 1763 | top: "attention2" 1764 | top: "attention3" 1765 | name: "slice_attention" 1766 | type: "Slice" 1767 | slice_param { 1768 | axis: 1 1769 | slice_point: 1 1770 | slice_point: 2 1771 | } 1772 | } 1773 | 1774 | ############### scale features ############# 1775 | layer { 1776 | bottom: "fc8_${EXP}" 1777 | bottom: "attention1" 1778 | top: "fc8_product" 1779 | name: "fc8_product" 1780 | type: "SpatialProduct" 1781 | } 1782 | 1783 | 1784 | layer { 1785 | bottom: "fc8_${EXP}_res075_interp" 1786 | bottom: "attention2" 1787 | top: "fc8_res075_product" 1788 | name: "fc8_res075_product" 1789 | type: "SpatialProduct" 1790 | } 1791 | 1792 | 1793 | layer { 1794 | bottom: "fc8_${EXP}_res05_interp" 1795 | bottom: "attention3" 1796 | top: "fc8_res05_product" 1797 | name: "fc8_res05_product" 1798 | type: "SpatialProduct" 1799 | } 1800 | 1801 | 1802 | ### add features ### 1803 | layer { 1804 | bottom: "fc8_product" 1805 | bottom: "fc8_res075_product" 1806 | bottom: "fc8_res05_product" 1807 | top: "fc8_fusion" 1808 | name: "fc8_fusion" 1809 | type: "Eltwise" 1810 | eltwise_param { 1811 | operation: SUM 1812 | } 1813 | } 1814 | ############### shrink label ################ 1815 | layer { 1816 | bottom: "label" 1817 | top: "label_shrink8" 1818 | name: "label_shrink8" 1819 | type: "Interp" 1820 | interp_param { 1821 | shrink_factor: 8 1822 | pad_beg: 0 1823 | pad_end: 0 1824 | } 1825 | } 1826 | layer { 1827 | name: "label_pose_1st" 1828 | type: "PoseEvaluate" 1829 | bottom: "label_shrink8" 1830 | top: "label_pose_1st" 1831 | pose_evaluate_param { 1832 | num_joint: 9 1833 | } 1834 | } 1835 | layer{ 1836 | name: "label_heatmap" 1837 | type: "PoseCreate" 1838 | bottom: "label_pose_1st" 1839 | bottom: "label_shrink8" 1840 | top: "label_heatmap" 1841 | pose_create_param { 1842 | num_joint: 9 1843 | } 1844 | } 1845 | ############### compute loss first order################# 1846 | layer { 1847 | name: "fc8_mask_1st" 1848 | type: "MaskCreate" 1849 | bottom: "fc8_fusion" 1850 | top: "fc8_mask_1st" 1851 | mask_create_param { 1852 | num_cls: 20 1853 | } 1854 | } 1855 | layer { 1856 | name: "fc8_pose_1st" 1857 | type: "PoseEvaluate" 1858 | bottom: "fc8_mask_1st" 1859 | top: "fc8_pose_1st" 1860 | pose_evaluate_param { 1861 | num_joint: 9 1862 | } 1863 | } 1864 | layer { 1865 | name: "predict_heatmap" 1866 | type: "PoseCreate" 1867 | bottom: "fc8_pose_1st" 1868 | bottom: "fc8_mask_1st" 1869 | top: "predict_heatmap" 1870 | pose_create_param { 1871 | num_joint: 9 1872 | } 1873 | } 1874 | layer { 1875 | name: "heatmap_error" 1876 | type: "HeatmapError" 1877 | bottom: "predict_heatmap" 1878 | bottom: "label_heatmap" 1879 | top: "heatmap_error" 1880 | heatmap_error_param { 1881 | scale: 0.05 1882 | } 1883 | propagate_down:false 1884 | propagate_down:false 1885 | } 1886 | layer { 1887 | name: "loss_first" 1888 | type: "SoftmaxWithLoss" 1889 | bottom: "fc8_fusion" 1890 | bottom: "label_shrink8" 1891 | bottom: "heatmap_error" 1892 | top: "loss_first" 1893 | loss_param { 1894 | ignore_label: 255 1895 | } 1896 | } 1897 | layer { 1898 | name: "accuracy_first" 1899 | type: "SegAccuracy" 1900 | bottom: "fc8_fusion" 1901 | bottom: "label_shrink8" 1902 | top: "accuracy_first" 1903 | seg_accuracy_param { 1904 | ignore_label: 255 1905 | } 1906 | } 1907 | 1908 | ############## add supervsion to fc8's directly ########## 1909 | ## resolution 1 1910 | layer { 1911 | name: "loss_first_res1" 1912 | type: "SoftmaxWithLoss" 1913 | bottom: "fc8_${EXP}" 1914 | bottom: "label_shrink8" 1915 | bottom: "heatmap_error" 1916 | top: "loss_first_res1" 1917 | loss_param { 1918 | ignore_label: 255 1919 | } 1920 | include: { phase: TRAIN } 1921 | } 1922 | layer { 1923 | name: "accuracy_first_res1" 1924 | type: "SegAccuracy" 1925 | bottom: "fc8_${EXP}" 1926 | bottom: "label_shrink8" 1927 | top: "accuracy_first_res1" 1928 | seg_accuracy_param { 1929 | ignore_label: 255 1930 | } 1931 | } 1932 | 1933 | ## resolution 3/4 1934 | # interp layer does not support 3/4 scale for ground truth 1935 | # instead we interpolate score map 1936 | layer { 1937 | name: "loss_first_res075" 1938 | type: "SoftmaxWithLoss" 1939 | bottom: "fc8_${EXP}_res075_interp" 1940 | bottom: "label_shrink8" 1941 | bottom: "heatmap_error" 1942 | top: "loss_first_res075" 1943 | loss_param { 1944 | ignore_label: 255 1945 | } 1946 | include: { phase: TRAIN } 1947 | } 1948 | layer { 1949 | name: "accuracy_first_res075" 1950 | type: "SegAccuracy" 1951 | bottom: "fc8_${EXP}_res075_interp" 1952 | bottom: "label_shrink8" 1953 | top: "accuracy_first_res075" 1954 | seg_accuracy_param { 1955 | ignore_label: 255 1956 | } 1957 | } 1958 | 1959 | ## resolution 1/2 1960 | layer { 1961 | bottom: "label" 1962 | top: "label_shrink16" 1963 | name: "label_shrink16" 1964 | type: "Interp" 1965 | interp_param { 1966 | shrink_factor: 16 1967 | pad_beg: 0 1968 | pad_end: 0 1969 | } 1970 | } 1971 | layer { 1972 | name: "loss_first_res05" 1973 | type: "SoftmaxWithLoss" 1974 | bottom: "fc8_${EXP}_res05" 1975 | bottom: "label_shrink16" 1976 | bottom: "heatmap_error" 1977 | top: "loss_first_res05" 1978 | loss_param { 1979 | ignore_label: 255 1980 | } 1981 | include: { phase: TRAIN } 1982 | } 1983 | layer { 1984 | name: "accuracy_first_res05" 1985 | type: "SegAccuracy" 1986 | bottom: "fc8_${EXP}_res05" 1987 | bottom: "label_shrink16" 1988 | top: "accuracy_first_res05" 1989 | seg_accuracy_param { 1990 | ignore_label: 255 1991 | } 1992 | } 1993 | 1994 | 1995 | 1996 | --------------------------------------------------------------------------------