├── lib └── .gitignore ├── .gitmodules ├── cnn_finetuning ├── vgg19 │ ├── mean.mat │ ├── mean.binaryproto │ ├── solver_template.prototxt │ ├── deploy.prototxt │ ├── deploy_template.prototxt │ └── train_val_template.prototxt ├── googlenet │ ├── mean.mat │ ├── mean.npy │ ├── mean.binaryproto │ ├── solver_template.prototxt │ ├── deploy.prototxt │ └── deploy_template.prototxt ├── caffe_reference │ ├── mean.mat │ ├── mean.binaryproto │ ├── solver_template.prototxt │ ├── deploy.prototxt │ ├── deploy_template.prototxt │ └── train_val_template.prototxt └── finetuning.m ├── part_based_classification ├── .activity.csv ├── createTrainTest.m └── part_box_classification_multiscale.m ├── part_generation ├── convert_locs_to_CUB200_format.m ├── parts_locs_from_grads.m └── fitGMMToGradient.m ├── README.md ├── setup.m ├── start.m ├── patch_filtering └── selsearch_object_detector.m └── part_selection └── evaluate_part_locs_anchor_multiview.m /lib/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !caffe_pp 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/caffe_pp"] 2 | path = lib/caffe_pp 3 | url = https://github.com/cvjena/caffe_pp.git 4 | -------------------------------------------------------------------------------- /cnn_finetuning/vgg19/mean.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/vgg19/mean.mat -------------------------------------------------------------------------------- /cnn_finetuning/googlenet/mean.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/googlenet/mean.mat -------------------------------------------------------------------------------- /cnn_finetuning/googlenet/mean.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/googlenet/mean.npy -------------------------------------------------------------------------------- /cnn_finetuning/vgg19/mean.binaryproto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/vgg19/mean.binaryproto -------------------------------------------------------------------------------- /cnn_finetuning/caffe_reference/mean.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/caffe_reference/mean.mat -------------------------------------------------------------------------------- /cnn_finetuning/googlenet/mean.binaryproto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/googlenet/mean.binaryproto -------------------------------------------------------------------------------- /cnn_finetuning/caffe_reference/mean.binaryproto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/caffe_reference/mean.binaryproto -------------------------------------------------------------------------------- /part_based_classification/.activity.csv: -------------------------------------------------------------------------------- 1 | Logged off at Fr 3. Apr 12:04:17 CEST 2015 2 | Logged off at Mo 13. Apr 10:34:12 CEST 2015 3 | Logged off at Di 14. Apr 18:41:13 CEST 2015 4 | Logged off at Fr 17. Apr 14:46:19 CEST 2015 5 | Logged off at Di 21. Apr 07:23:58 CEST 2015 6 | Logged off at Do 23. Apr 01:49:19 CEST 2015 7 | -------------------------------------------------------------------------------- /cnn_finetuning/vgg19/solver_template.prototxt: -------------------------------------------------------------------------------- 1 | net: "train_val.prototxt" 2 | test_iter: 40 3 | test_interval: ##MAX_ITER## 4 | base_lr: 0.001 5 | lr_policy: "step" 6 | gamma: 0.1 7 | stepsize: 9000 8 | display: 20 9 | max_iter: ##MAX_ITER## 10 | momentum: 0.9 11 | weight_decay: 0.0005 12 | snapshot: ##MAX_ITER## 13 | snapshot_prefix: "model_ft" 14 | -------------------------------------------------------------------------------- /cnn_finetuning/caffe_reference/solver_template.prototxt: -------------------------------------------------------------------------------- 1 | net: "train_val.prototxt" 2 | test_iter: 40 3 | test_interval: ##MAX_ITER## 4 | base_lr: 0.001 5 | lr_policy: "step" 6 | gamma: 0.1 7 | stepsize: 9000 8 | display: 20 9 | max_iter: ##MAX_ITER## 10 | momentum: 0.9 11 | weight_decay: 0.0005 12 | snapshot: ##MAX_ITER## 13 | snapshot_prefix: "model_ft" 14 | -------------------------------------------------------------------------------- /cnn_finetuning/googlenet/solver_template.prototxt: -------------------------------------------------------------------------------- 1 | net: "train_val.prototxt" 2 | test_iter: 40 3 | test_interval: ##MAX_ITER## 4 | test_initialization: true 5 | display: 20 6 | average_loss: 40 7 | base_lr: 0.001 8 | lr_policy: "step" 9 | gamma: 0.1 10 | stepsize: 24000 11 | max_iter: ##MAX_ITER## 12 | momentum: 0.9 13 | weight_decay: 0.0002 14 | snapshot: ##MAX_ITER## 15 | snapshot_prefix: "model_ft" 16 | solver_mode: GPU 17 | -------------------------------------------------------------------------------- /part_based_classification/createTrainTest.m: -------------------------------------------------------------------------------- 1 | function [ tr_ID ] = createTrainTest( labels, min_tr, min_tr_percentage ) 2 | tr_ID=zeros(size(labels)); 3 | for c=1:length(unique(labels)) 4 | class_elements=find(labels==c); 5 | if ( length(class_elements) <= min_tr) 6 | tr_ID(class_elements)=1; 7 | else 8 | tr_elements=class_elements(randperm(length(class_elements),max(min_tr,ceil(min_tr_percentage*length(class_elements))))); 9 | tr_ID(tr_elements)=1; 10 | end 11 | end 12 | tr_ID=logical(tr_ID); 13 | end 14 | 15 | -------------------------------------------------------------------------------- /part_generation/convert_locs_to_CUB200_format.m: -------------------------------------------------------------------------------- 1 | function [ part_locs2 ] = convert_locs_to_CUB200_format( part_locs ) 2 | 3 | % load('part_locs_caffe.mat','part_locs'); 4 | [image_count, part_count, ~] = size(part_locs); 5 | % Convert to CUB200 format 6 | part_locs2=nan(image_count* part_count,5); 7 | for i = 1:image_count 8 | % fprintf('Image %i\n',i); 9 | for p=1:part_count 10 | if any(isnan(part_locs(i,p,:))) 11 | part_locs2((i-1)*part_count+p,:)=[i p -1 -1 0]; 12 | else 13 | part_locs2((i-1)*part_count+p,:)=[i p reshape(part_locs(i,p,:),1,2) 1]; 14 | end 15 | end 16 | end 17 | % part_locs = part_locs2; 18 | % save('part_locs_caffe.mat','part_locs'); 19 | end 20 | 21 | -------------------------------------------------------------------------------- /part_generation/parts_locs_from_grads.m: -------------------------------------------------------------------------------- 1 | function [ part_locs ] = parts_locs_from_grads(opts) 2 | imagedir= opts.imagedir; 3 | %read image list 4 | fid=fopen(opts.imagelist_file,'r'); 5 | imagelist=textscan(fid,'%s'); 6 | imagelist=imagelist{1}; 7 | fclose(fid); 8 | % layer 9 | layer = opts.part_layer; 10 | part_count = opts.part_layer_channel_count; 11 | 12 | mean_file = opts.mean_mat_file; 13 | batch_size = opts.batch_size; 14 | crop_size = opts.crop_size; 15 | deploy = opts.deploy; 16 | model = opts.model; 17 | 18 | parfor (i=1:opts.parfor_workers, opts.parfor_arg) 19 | matcaffe_init(1,deploy,model,1,mod(i,opts.gpu_count)); 20 | end 21 | 22 | fprintf('%s\n',datestr(now)); 23 | % The estimated part locations for all images and parts 24 | part_locs=nan(size(imagelist,1), part_count,2); 25 | parfor (i = 1:size(imagelist,1), opts.parfor_arg) 26 | if opts.verbose_output 27 | fprintf('Image %i: %s\n',i, imagelist{i}); 28 | end 29 | g=caffe_gradients(imread([imagedir '/' imagelist{i}]),layer,(1:part_count)',mean_file,batch_size,crop_size); 30 | for p=1:part_count 31 | %read gradient map 32 | % gmap=load(sprintf('%s%s/gradient_layer%s_channel%i.mat',basedir, imagelist{i},layer, p-1)); 33 | % gmap=gmap.gradient_map; 34 | gmap = squeeze(sum(abs(g(:,:,:,p)),3)); 35 | if sum(isnan(gmap(:))) >0 || sum(gmap(:)~=0)<1 36 | continue 37 | end 38 | [est_x,est_y]=fitGMMToGradient(zeros(crop_size,crop_size,3),gmap,[],2); 39 | % imshow(gmap,[]) 40 | % hold all 41 | % plot(est_x,est_y,'X','MarkerSize',20,'LineWidth',10) 42 | % ginput(1) 43 | part_locs(i,p,:)=[est_x,est_y]; 44 | end 45 | end 46 | part_locs = convert_locs_to_CUB200_format(part_locs); 47 | save(opts.part_loc_file,'part_locs'); 48 | end -------------------------------------------------------------------------------- /cnn_finetuning/finetuning.m: -------------------------------------------------------------------------------- 1 | function [ ] = finetuning( num_classes, opts ) 2 | olddir = pwd; 3 | mkdir(opts.finetuning_dir); 4 | % Adjust and copy proto files 5 | if 0~=system(['sed ''s/##NUM_CLASSES##/' int2str(num_classes+1) '/g'' ''' opts.cnn_dir '/train_val_template.prototxt'' > ''' opts.finetuning_dir '/train_val.prototxt''']) 6 | error('Error creating train_val.prototxt') 7 | end 8 | if 0~=system(['sed ''s/##NUM_CLASSES##/' int2str(num_classes+1) '/g'' ''' opts.cnn_dir '/deploy_template.prototxt'' > ''' opts.finetuning_dir '/deploy_ft.prototxt''']) 9 | error('Error creating train_val.prototxt') 10 | end 11 | if 0~=system(['sed ''s/##MAX_ITER##/' int2str(opts.finetuning_iters) '/g'' ''' opts.cnn_dir '/solver_template.prototxt'' > ''' opts.finetuning_dir '/solver.prototxt''']) 12 | error('Error creating solver.prototxt') 13 | end 14 | if 0~=system(['cp ''' opts.mean_proto_file ''' ''' opts.finetuning_dir '/mean.binaryproto''']) 15 | error('Error creating solver.prototxt') 16 | end 17 | if 0~=system(['sed ''s/##MAX_ITER##/' int2str(opts.finetuning_iters) '/g'' ''' opts.cnn_dir '/solver_template.prototxt'' > ''' opts.finetuning_dir '/solver.prototxt''']) 18 | error('Error creating solver.prototxt') 19 | end 20 | 21 | fprintf(['\n\nNow open a bash, go to ' opts.finetuning_dir ' and run:\n']); 22 | fprintf(['# ' opts.caffe_executable ' train -solver=solver.prototxt -weights=''' opts.cnn_dir '/model'' -gpu=' int2str(opts.finetuning_gpu) ' \n']); 23 | fprintf('Hit enter when training has finished!'); 24 | input('','s'); 25 | cd(opts.finetuning_dir); 26 | % if opts.verbose_output 27 | % outputfile = ''; 28 | % else 29 | % outputfile = ' 2> /dev/null'; 30 | % end 31 | % if 0~=system([opts.caffe_executable ' train -solver=solver.prototxt -weights=''' opts.cnn_dir '/model'' -gpu=' int2str(opts.finetuning_gpu) ' ' outputfile]) 32 | % cd(olddir) 33 | % error('Caffe training failed.') 34 | % end 35 | if 0~=system(['rm ./*.solverstate']) 36 | warning('Did not delete any solverstate files.') 37 | end 38 | cd(olddir) 39 | end 40 | 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Part Constellation Models 2 | 3 | This is the code used in our paper "Neural Activation Constellations: Unsupervised Part Model Discovery with Convolutional Networks" by Marcel Simon and Erik Rodner published at ICCV 2015. 4 | If you would like to refer to this work, please cite the corresponding paper 5 | 6 | @inproceedings{Simon15:NAC, 7 | author = {Marcel Simon and Erik Rodner}, 8 | booktitle = {International Conference on Computer Vision (ICCV)}, 9 | title = {Neural Activation Constellations: Unsupervised Part Model Discovery with Convolutional Networks}, 10 | year = {2015}, 11 | } 12 | 13 | The following steps will guide you through the usage of the code. 14 | 15 | ## 1. Setup 16 | 1. Open Matlab and go to the folder containing this package 17 | 2. Run setup.m to download all libraries 18 | 3. Go to lib/caffe_pp and make it, you will need to create a Makefile.config. If you have an existing caffe, use that Makefile.config from there BUT DO NOT USE ANY EXISTING CAFFE as caffe_pp is a modified version. 19 | 4. Execute `make mat` in `lib/caffe_pp` 20 | 5. Go to lib/liblinear-2.1 and make it 21 | 6. Go to lib/liblinear-2.1/matlab and make it 22 | 23 | ## 2. Running the code 24 | 25 | The `script.m` in the root folder of the package is all you need. You want to override the paths to the data set by passing them as name-value-pairs, for example `start('basedir','/path/to/dataset/')`. For more options, open it to see all options. Just pass additional parameters by adding name-value-pairs: `start('basedir','/path/to/dataset/','cnn_dir','./cnn_finetuning/vgg19/','crop_size',224);`. 26 | 27 | The dataset files should contain a list of absolute image paths, a list of corresponding labels starting from 1, and a list of the corresponding assignment to train and test, where 1 indicates training and 0 test. 28 | 29 | 30 | imagelist.txt 31 | 32 | ``` 33 | /path/to/image1.jpg 34 | /path/to/image2.jpg 35 | /path/to/image3.jpg 36 | /path/to/image4.jpg 37 | /path/to/image5.jpg 38 | ... 39 | ``` 40 | 41 | labels.txt 42 | 43 | ``` 44 | 1 45 | 1 46 | 1 47 | 2 48 | 2 49 | 2 50 | ... 51 | ``` 52 | 53 | tr_ID.txt 54 | 55 | ``` 56 | 0 57 | 1 58 | 1 59 | 0 60 | 1 61 | 1 62 | ... 63 | ``` 64 | 65 | ## 3. Testing the models from the paper 66 | The models of the paper are available at [https://drive.google.com/file/d/0B6VgjAr4t_oTQXN2Y3VYaEMwVDA/view?usp=sharing](https://drive.google.com/file/d/0B6VgjAr4t_oTQXN2Y3VYaEMwVDA/view?usp=sharing). Download and unzip them to the root folder of the code. You can run them by executing, for example, `start('cache_dir','./cache_iccv_cub200','cnn_dir','./cnn_finetuning/vgg19/','crop_size',224,'basedir','/home/simon/Datasets/CUB_200_2011/')`. 67 | 68 | ## License 69 | The Part Constellation Models Framework by [Marcel Simon](http://www.inf-cv.uni-jena.de/simon.html) and [Erik Rodner](http://www.inf-cv.uni-jena.de/rodner.html) is licensed under the non-commercial license [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by-nc-sa/4.0/). For usage beyond the scope of this license, please contact [Marcel Simon](http://www.inf-cv.uni-jena.de/simon.html). 70 | -------------------------------------------------------------------------------- /setup.m: -------------------------------------------------------------------------------- 1 | function setup() 2 | %% Get libs 3 | if ~exist('lib/SelectiveSearchCodeIJCV','file') 4 | getlib('http://koen.me/research/downloads/SelectiveSearchCodeIJCV.zip'); 5 | else 6 | fprintf('Selective search exists already, skipping...\n'); 7 | end 8 | na = dir('lib/liblinea*'); 9 | if numel(na)==0 10 | getlib('http://www.csie.ntu.edu.tw/~cjlin/cgi-bin/liblinear.cgi?+http://www.csie.ntu.edu.tw/~cjlin/liblinear+zip'); 11 | else 12 | fprintf('liblinear exists already, skipping...\n'); 13 | end 14 | if ~exist('lib/GetFullPath.m','file') 15 | getlib('http://www.mathworks.com/matlabcentral/mlc-downloads/downloads/submissions/28249/versions/8/download/zip') 16 | !rm lib/GetFullPath.c lib/InstallMex.m lib/Readme.txt lib/license.txt lib/uTest_GetFullPath.m 17 | else 18 | fprintf('GetFullPath.m exists already, skipping...\n'); 19 | end 20 | if ~exist('lib/vl_argparse.m','file') 21 | !wget --no-check-certificate -O lib/vl_argparse.m https://raw.githubusercontent.com/vlfeat/matconvnet/master/matlab/vl_argparse.m 22 | else 23 | fprintf('vl_argparse exists already, skipping...\n'); 24 | end 25 | 26 | %% Get models 27 | required_files = {}; 28 | % required_files = [required_files;{'cnn_finetuning/googlenet/model','http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel','405fc5acd08a3bb12de8ee5e23a96bec22f08204'}]; 29 | % required_files = [required_files;{'cnn_finetuning/vgg19/model','http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel','239785e7862442717d831f682bb824055e51e9ba'}]; 30 | % required_files = [required_files;{'cnn_finetuning/caffe_reference/model','http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel','4c8d77deb20ea792f84eb5e6d0a11ca0a8660a46'}]; 31 | 32 | for i=1:size(required_files,1) 33 | if exist(required_files{i,1},'file') 34 | [~,sha1sum] = system(['sha1sum ' required_files{i,1} ' | awk ''{ print $1 }''']); 35 | if strcmp(strtrim(sha1sum), required_files{i,3}) 36 | fprintf('%s exists already, skipping...\n',required_files{i,1}); 37 | continue 38 | else 39 | fprintf('%s exists but is corrupt, downloading again...\n',required_files{i,1}); 40 | end 41 | end 42 | if 0~=system(['wget -O ' required_files{i,1} ' ' required_files{i,2}]); 43 | error('Could not download file %s from %s\n',required_files{i,1},required_files{i,2}); 44 | end 45 | end 46 | 47 | fprintf('\n\nSetup done, now clone caffe_pp and go to ./lib/ and compile all libraries and Matlab interfaces!\n'); 48 | fprintf('1. ''git submodule update --init --recursive'' in the main folder\n'); 49 | fprintf('2. ''make'' in ./lib/caffe_pp/\n'); 50 | fprintf('3. ''make mat'' in ./lib/caffe_pp/\n'); 51 | fprintf('4. ''make'' in ./lib/liblinear-2.1/\n'); 52 | fprintf('5. ''make'' in ./lib/liblinear-2.1/matlab/\n'); 53 | end 54 | 55 | function getlib(url) 56 | system('mkdir tmp'); 57 | if 0~= system(['wget -O tmp/lib.zip ' url]) 58 | error('Could download code'); 59 | end 60 | if 0~= system('cd lib && unzip ../tmp/lib.zip ') 61 | error('Could not unzip'); 62 | end 63 | if 0~= system('rm tmp/lib.zip') 64 | error('Could not remove temporary file'); 65 | end 66 | if 0~= system('rmdir tmp') 67 | error('Could not remove directory tmp'); 68 | end 69 | end -------------------------------------------------------------------------------- /cnn_finetuning/caffe_reference/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | input: "data" 3 | input_dim: 11 4 | input_dim: 3 5 | input_dim: 227 6 | input_dim: 227 7 | layers { 8 | name: "conv1" 9 | type: CONVOLUTION 10 | bottom: "data" 11 | top: "conv1" 12 | convolution_param { 13 | num_output: 96 14 | kernel_size: 11 15 | stride: 4 16 | } 17 | } 18 | layers { 19 | name: "relu1" 20 | type: RELU 21 | bottom: "conv1" 22 | top: "conv1" 23 | } 24 | layers { 25 | name: "pool1" 26 | type: POOLING 27 | bottom: "conv1" 28 | top: "pool1" 29 | pooling_param { 30 | pool: MAX 31 | kernel_size: 3 32 | stride: 2 33 | } 34 | } 35 | layers { 36 | name: "norm1" 37 | type: LRN 38 | bottom: "pool1" 39 | top: "norm1" 40 | lrn_param { 41 | local_size: 5 42 | alpha: 0.0001 43 | beta: 0.75 44 | } 45 | } 46 | layers { 47 | name: "conv2" 48 | type: CONVOLUTION 49 | bottom: "norm1" 50 | top: "conv2" 51 | convolution_param { 52 | num_output: 256 53 | pad: 2 54 | kernel_size: 5 55 | group: 2 56 | } 57 | } 58 | layers { 59 | name: "relu2" 60 | type: RELU 61 | bottom: "conv2" 62 | top: "conv2" 63 | } 64 | layers { 65 | name: "pool2" 66 | type: POOLING 67 | bottom: "conv2" 68 | top: "pool2" 69 | pooling_param { 70 | pool: MAX 71 | kernel_size: 3 72 | stride: 2 73 | } 74 | } 75 | layers { 76 | name: "norm2" 77 | type: LRN 78 | bottom: "pool2" 79 | top: "norm2" 80 | lrn_param { 81 | local_size: 5 82 | alpha: 0.0001 83 | beta: 0.75 84 | } 85 | } 86 | layers { 87 | name: "conv3" 88 | type: CONVOLUTION 89 | bottom: "norm2" 90 | top: "conv3" 91 | convolution_param { 92 | num_output: 384 93 | pad: 1 94 | kernel_size: 3 95 | } 96 | } 97 | layers { 98 | name: "relu3" 99 | type: RELU 100 | bottom: "conv3" 101 | top: "conv3" 102 | } 103 | layers { 104 | name: "conv4" 105 | type: CONVOLUTION 106 | bottom: "conv3" 107 | top: "conv4" 108 | convolution_param { 109 | num_output: 384 110 | pad: 1 111 | kernel_size: 3 112 | group: 2 113 | } 114 | } 115 | layers { 116 | name: "relu4" 117 | type: RELU 118 | bottom: "conv4" 119 | top: "conv4" 120 | } 121 | layers { 122 | name: "conv5" 123 | type: CONVOLUTION 124 | bottom: "conv4" 125 | top: "conv5" 126 | convolution_param { 127 | num_output: 256 128 | pad: 1 129 | kernel_size: 3 130 | group: 2 131 | } 132 | } 133 | layers { 134 | name: "relu5" 135 | type: RELU 136 | bottom: "conv5" 137 | top: "conv5" 138 | } 139 | layers { 140 | name: "pool5" 141 | type: POOLING 142 | bottom: "conv5" 143 | top: "pool5" 144 | pooling_param { 145 | pool: MAX 146 | kernel_size: 3 147 | stride: 2 148 | } 149 | } 150 | layers { 151 | name: "fc6" 152 | type: INNER_PRODUCT 153 | bottom: "pool5" 154 | top: "fc6" 155 | inner_product_param { 156 | num_output: 4096 157 | } 158 | } 159 | layers { 160 | name: "relu6" 161 | type: RELU 162 | bottom: "fc6" 163 | top: "fc6" 164 | } 165 | layers { 166 | name: "drop6" 167 | type: DROPOUT 168 | bottom: "fc6" 169 | top: "fc6" 170 | dropout_param { 171 | dropout_ratio: 0.5 172 | } 173 | } 174 | layers { 175 | name: "fc7" 176 | type: INNER_PRODUCT 177 | bottom: "fc6" 178 | top: "fc7" 179 | inner_product_param { 180 | num_output: 4096 181 | } 182 | } 183 | layers { 184 | name: "relu7" 185 | type: RELU 186 | bottom: "fc7" 187 | top: "fc7" 188 | } 189 | layers { 190 | name: "drop7" 191 | type: DROPOUT 192 | bottom: "fc7" 193 | top: "fc7" 194 | dropout_param { 195 | dropout_ratio: 0.5 196 | } 197 | } 198 | layers { 199 | name: "fc8" 200 | type: INNER_PRODUCT 201 | bottom: "fc7" 202 | top: "fc8" 203 | inner_product_param { 204 | num_output: 1000 205 | } 206 | } 207 | layers { 208 | name: "prob" 209 | type: SOFTMAX 210 | bottom: "fc8" 211 | top: "prob" 212 | } 213 | -------------------------------------------------------------------------------- /cnn_finetuning/caffe_reference/deploy_template.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | input: "data" 3 | input_dim: 11 4 | input_dim: 3 5 | input_dim: 227 6 | input_dim: 227 7 | layers { 8 | name: "conv1" 9 | type: CONVOLUTION 10 | bottom: "data" 11 | top: "conv1" 12 | convolution_param { 13 | num_output: 96 14 | kernel_size: 11 15 | stride: 4 16 | } 17 | } 18 | layers { 19 | name: "relu1" 20 | type: RELU 21 | bottom: "conv1" 22 | top: "conv1" 23 | } 24 | layers { 25 | name: "pool1" 26 | type: POOLING 27 | bottom: "conv1" 28 | top: "pool1" 29 | pooling_param { 30 | pool: MAX 31 | kernel_size: 3 32 | stride: 2 33 | } 34 | } 35 | layers { 36 | name: "norm1" 37 | type: LRN 38 | bottom: "pool1" 39 | top: "norm1" 40 | lrn_param { 41 | local_size: 5 42 | alpha: 0.0001 43 | beta: 0.75 44 | } 45 | } 46 | layers { 47 | name: "conv2" 48 | type: CONVOLUTION 49 | bottom: "norm1" 50 | top: "conv2" 51 | convolution_param { 52 | num_output: 256 53 | pad: 2 54 | kernel_size: 5 55 | group: 2 56 | } 57 | } 58 | layers { 59 | name: "relu2" 60 | type: RELU 61 | bottom: "conv2" 62 | top: "conv2" 63 | } 64 | layers { 65 | name: "pool2" 66 | type: POOLING 67 | bottom: "conv2" 68 | top: "pool2" 69 | pooling_param { 70 | pool: MAX 71 | kernel_size: 3 72 | stride: 2 73 | } 74 | } 75 | layers { 76 | name: "norm2" 77 | type: LRN 78 | bottom: "pool2" 79 | top: "norm2" 80 | lrn_param { 81 | local_size: 5 82 | alpha: 0.0001 83 | beta: 0.75 84 | } 85 | } 86 | layers { 87 | name: "conv3" 88 | type: CONVOLUTION 89 | bottom: "norm2" 90 | top: "conv3" 91 | convolution_param { 92 | num_output: 384 93 | pad: 1 94 | kernel_size: 3 95 | } 96 | } 97 | layers { 98 | name: "relu3" 99 | type: RELU 100 | bottom: "conv3" 101 | top: "conv3" 102 | } 103 | layers { 104 | name: "conv4" 105 | type: CONVOLUTION 106 | bottom: "conv3" 107 | top: "conv4" 108 | convolution_param { 109 | num_output: 384 110 | pad: 1 111 | kernel_size: 3 112 | group: 2 113 | } 114 | } 115 | layers { 116 | name: "relu4" 117 | type: RELU 118 | bottom: "conv4" 119 | top: "conv4" 120 | } 121 | layers { 122 | name: "conv5" 123 | type: CONVOLUTION 124 | bottom: "conv4" 125 | top: "conv5" 126 | convolution_param { 127 | num_output: 256 128 | pad: 1 129 | kernel_size: 3 130 | group: 2 131 | } 132 | } 133 | layers { 134 | name: "relu5" 135 | type: RELU 136 | bottom: "conv5" 137 | top: "conv5" 138 | } 139 | layers { 140 | name: "pool5" 141 | type: POOLING 142 | bottom: "conv5" 143 | top: "pool5" 144 | pooling_param { 145 | pool: MAX 146 | kernel_size: 3 147 | stride: 2 148 | } 149 | } 150 | layers { 151 | name: "fc6" 152 | type: INNER_PRODUCT 153 | bottom: "pool5" 154 | top: "fc6" 155 | inner_product_param { 156 | num_output: 4096 157 | } 158 | } 159 | layers { 160 | name: "relu6" 161 | type: RELU 162 | bottom: "fc6" 163 | top: "fc6" 164 | } 165 | layers { 166 | name: "drop6" 167 | type: DROPOUT 168 | bottom: "fc6" 169 | top: "fc6" 170 | dropout_param { 171 | dropout_ratio: 0.5 172 | } 173 | } 174 | layers { 175 | name: "fc7" 176 | type: INNER_PRODUCT 177 | bottom: "fc6" 178 | top: "fc7" 179 | inner_product_param { 180 | num_output: 4096 181 | } 182 | } 183 | layers { 184 | name: "relu7" 185 | type: RELU 186 | bottom: "fc7" 187 | top: "fc7" 188 | } 189 | layers { 190 | name: "drop7" 191 | type: DROPOUT 192 | bottom: "fc7" 193 | top: "fc7" 194 | dropout_param { 195 | dropout_ratio: 0.5 196 | } 197 | } 198 | layers { 199 | name: "fc8_ft" 200 | type: INNER_PRODUCT 201 | bottom: "fc7" 202 | top: "fc8_ft" 203 | inner_product_param { 204 | num_output: ##NUM_CLASSES## 205 | } 206 | } 207 | layers { 208 | name: "prob" 209 | type: SOFTMAX 210 | bottom: "fc8_ft" 211 | top: "prob" 212 | } 213 | -------------------------------------------------------------------------------- /part_generation/fitGMMToGradient.m: -------------------------------------------------------------------------------- 1 | function [ x,y ] = fitGMMToGradient(imagepath, gmap,bbox, num_clusters ) 2 | %fitGMMToGradient: 3 | % bbox = [col row width height] 4 | 5 | 6 | % d=load(gradient_path); 7 | % gmap=d.gradient_map; 8 | img=zeros(227,227,3);%imread(imagepath); 9 | % % set gradient outside of bounding box to 0 10 | % bbox_orig=bbox; 11 | % rect_size=min(size(img(:,:,1))); 12 | % ratio = max(227.0 / size(img,1), 227.0 / size(img,2)); 13 | % bbox=int32(floor(bbox*ratio)); 14 | % h_offset = ceil(size(img,1)*ratio - 227) / 2; 15 | % w_offset = ceil(size(img,2)*ratio - 227) / 2; 16 | % bbox(1)=max(w_offset+1,bbox(1)); 17 | % bbox(2)=max(h_offset+1,bbox(2)); 18 | % bbox(3)=bbox(3)-max(w_offset-bbox(1),0); 19 | % bbox(4)=bbox(4)-max(h_offset-bbox(2),0); 20 | % bbox(1)=bbox(1)-w_offset+1; 21 | % bbox(2)=bbox(2)-h_offset+1; 22 | % bbox(3)=min(227-bbox(1),bbox(3)); 23 | % bbox(4)=min(227-bbox(2),bbox(4)); 24 | % mask = ones(size(gmap)); 25 | % mask(bbox(2):bbox(2)+bbox(4),bbox(1):bbox(1)+bbox(3))=0; 26 | % gmap(logical(mask))=0; 27 | 28 | if (false) 29 | % simplify calculation 30 | gmap(gmap0 42 | [ tr_ID ] = createTrainTest( labels, rand_tr_images, opts.rand_tr_part ); 43 | else 44 | train_test=logical(load(tr_ID_file)); 45 | tr_ID = train_test;%(:,2); 46 | end 47 | % Bounding boxes 48 | bboxes = []; 49 | if use_bounding_box 50 | % bboxes = load([basedir '/bounding_boxes.txt'])+1; 51 | bboxes = load(bbox_file); 52 | end 53 | 54 | if use_parts 55 | parts = array2table(part_locs,'VariableNames',{'Var1','Var2','Var3','Var4','Var5'}); 56 | % parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/part_locs.txt','Delimiter',' ','ReadVariableNames',false); 57 | % parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/est_part_locs.txt','Delimiter',' ','ReadVariableNames',false); 58 | parts.Properties.VariableNames{'Var1'} = 'image'; 59 | parts.Properties.VariableNames{'Var2'} = 'part'; 60 | parts.Properties.VariableNames{'Var3'} = 'x'; 61 | parts.Properties.VariableNames{'Var4'} = 'y'; 62 | parts.Properties.VariableNames{'Var5'} = 'visible'; 63 | 64 | part_ids = unique(parts.part); 65 | image_ids = unique(parts.image); 66 | part_count = numel(part_ids); 67 | image_count = numel(image_ids); 68 | 69 | parts_x = reshape(parts.x,part_count,image_count); 70 | parts_y = reshape(parts.y,part_count,image_count); 71 | else 72 | image_count = size(imagelist,1); 73 | channel_ids = []; 74 | parts = []; 75 | part_count = []; 76 | parts_x = []; 77 | parts_y = []; 78 | end 79 | 80 | if use_flipped 81 | flipped_image_count = image_count + sum(tr_ID); 82 | labels = [labels;labels(tr_ID)]; 83 | image_idx = [(1:image_count)';find(tr_ID)]; 84 | tr_ID = [tr_ID;true(sum(tr_ID),1)]; 85 | else 86 | flipped_image_count = image_count; 87 | image_idx = (1:image_count)'; 88 | end 89 | 90 | matcaffe_init(1,deploy,model,1,0); 91 | if opts.use_parts 92 | f = caffe_features({[0]},layer_parts,mean_file,batch_size,crop_size); 93 | else 94 | f = []; 95 | end 96 | f2 = caffe_features({[0]},layer_image,mean_file,batch_size,crop_size); 97 | caffe('reset'); 98 | 99 | if use_parfor 100 | if ~isempty(gcp('nocreate'))%matlabpool('size') 101 | pctRunOnAll caffe('reset') 102 | else 103 | caffe('reset'); 104 | parpool(parfor_workers); 105 | end 106 | end 107 | parfor (i=1:parfor_workers, opts.parfor_arg) 108 | matcaffe_init(1,deploy,model,1,mod(i,opts.gpu_count)); 109 | end 110 | 111 | num_patches_per_image = 0; 112 | if use_parts 113 | num_patches_per_image = num_patches_per_image + numel(part_scales)*numel(channel_ids); 114 | end 115 | if use_bounding_box 116 | num_patches_per_image = num_patches_per_image+1; 117 | end 118 | if pyramid_levels>0 119 | num_patches_per_image = num_patches_per_image+(1-4^(pyramid_levels+1))/-3 - 1; 120 | end 121 | 122 | features = sparse(flipped_image_count, num_patches_per_image*size(f,2)+size(f2,2)); 123 | % visible = true(flipped_image_count, numel(channel_ids)); 124 | feature_count = size(features,2); 125 | parfor (i=1:flipped_image_count, opts.parfor_arg) % randperm(image_count)%[1:10 11788+(1:10)]% 126 | cur_image_idx = image_idx(i); 127 | if opts.verbose_output 128 | fprintf('Working on %i: %s\n',i,imagelist{cur_image_idx}); 129 | end 130 | im = imread([imagedir '/' imagelist{cur_image_idx}]); 131 | if i>image_count 132 | im = flip(im,2); 133 | end 134 | batch_data = {}; 135 | missing_data = false(0,0); 136 | 137 | if use_bounding_box 138 | cur_box = bboxes(cur_image_idx,:); 139 | cur_box(4) = min(cur_box(4),size(im,2)-cur_box(2)+1); 140 | cur_box(5) = min(cur_box(5),size(im,1)-cur_box(3)+1); 141 | batch_data = [batch_data;im(cur_box(3):(cur_box(3)+cur_box(5)-1),cur_box(2):(cur_box(2)+cur_box(4)-1),:)]; 142 | missing_data = [missing_data;false]; 143 | % box_size = 0.5*sqrt(cur_box(4)*cur_box(5)); 144 | end 145 | 146 | if use_parts 147 | for part_scale = part_scales 148 | % Get all relevant and visible part positions 149 | selection = parts.visible((cur_image_idx-1)*part_count + channel_ids); 150 | visible_channels = channel_ids(logical(selection)); 151 | cur_locs = [parts_x(channel_ids,cur_image_idx) parts_y(channel_ids,cur_image_idx)]; 152 | if scale_relative_to_bbox 153 | box_size = 0.5*sqrt(cur_box(4)*cur_box(5)); 154 | else 155 | box_size = part_scale*sqrt(size(im,1)*size(im,2)); 156 | end 157 | for c=1:size(cur_locs,1) 158 | if parts.visible((cur_image_idx-1)*part_count + channel_ids(c)) 159 | x=cur_locs(c,1); 160 | y=cur_locs(c,2); 161 | if i>image_count 162 | x=227-x; 163 | end 164 | ratio_x = 227.0 / size(im,2); 165 | ratio_y = 227.0 / size(im,1); 166 | x=int32(x/ratio_x); 167 | y=int32(y/ratio_y); 168 | x_min = max(x-box_size/2, 1); 169 | x_max = min(x+box_size/2, size(im,2)); 170 | y_min = max(y-box_size/2, 1); 171 | y_max = min(y+box_size/2, size(im,1)); 172 | batch_data = [batch_data; im(int32(y_min:y_max),int32(x_min:x_max),:)]; 173 | missing_data = [missing_data;false]; 174 | else 175 | batch_data = [batch_data; [125]]; 176 | missing_data = [missing_data;true]; 177 | end 178 | end 179 | end 180 | end 181 | % Add spatial pyramid levels of image 182 | for l=pyramid_levels:-1:1 183 | x = fix(size(im,2)/(2^l)); 184 | y = fix(size(im,1)/(2^l)); 185 | if (x==0 || y==0) 186 | error('Image too small for spm'); 187 | end 188 | xx=0; 189 | yy=0; 190 | while xx+x<=size(im,2) 191 | while yy +y <=size(im,1) 192 | batch_data = [batch_data;im(yy+1:yy+y,xx+1:xx+x,:)]; 193 | missing_data = [missing_data;false]; 194 | yy = yy+y; 195 | end 196 | yy = 0; 197 | xx = xx+x; 198 | end 199 | end 200 | % Add the image 201 | batch_data = [batch_data; im]; 202 | missing_data = [missing_data;false]; 203 | tmp = caffe_features(batch_data,layer_image,mean_file,batch_size,crop_size)'; 204 | features(i,:) = tmp(:); 205 | end 206 | 207 | if opts.store_features 208 | save([opts.cache_dir '/feats.mat'],'features','labels','tr_ID','-v7.3'); 209 | end 210 | 211 | ORR_total = ones(opts.repetitions,1); 212 | ARR_total = ones(opts.repetitions,1); 213 | for i=1:opts.repetitions 214 | if rand_tr_images>0 215 | [ tr_ID ] = createTrainTest( labels, rand_tr_images, rand_tr_part ); 216 | end 217 | % Train and test 218 | model = train(labels(tr_ID,:),(features(tr_ID,:)),params); 219 | [pred,acc_cur,~] = predict(labels(~tr_ID,:),(features(~tr_ID,:)),model); 220 | 221 | % evaluate 222 | cm = confusionmat(labels(~tr_ID),pred); 223 | acc=sum(diag(cm))/sum(cm(:))*100; 224 | cm = cm./repmat(sum(cm,2),1,size(cm,2)); 225 | map=nanmean(diag(cm)./sum(cm,2))*100; 226 | ORR_total(i,1)=acc; 227 | ARR_total(i,1)=map; 228 | fprintf('Run %i ORR=%5.2f ARR=%5.2f\n',i, ORR_total(i,1),ARR_total(i,1)); 229 | end 230 | fprintf('Mean over %i runs:\n',opts.repetitions); 231 | fprintf('ORR=%f +- %f\n',nanmean(ORR_total), nanstd(ORR_total)); 232 | fprintf('ARR=%f +- %f\n',nanmean(ARR_total), nanstd(ARR_total)); 233 | end 234 | -------------------------------------------------------------------------------- /patch_filtering/selsearch_object_detector.m: -------------------------------------------------------------------------------- 1 | function [ output_args ] = selsearch_object_detector( channel_ids, part_locs, part_visibility, opts ) 2 | output_file_train = opts.caffe_window_file_train; 3 | output_file_val = opts.caffe_window_file_val; 4 | output_file_bbox = opts.est_bbox_file; 5 | 6 | imagedir=opts.imagedir; 7 | imagelist_file = opts.imagelist_file; 8 | tr_ID_file = opts.tr_ID_file; 9 | labels_file = opts.labels_file; 10 | channels_for_boxes = channel_ids; 11 | 12 | add_est_bbox = opts.estimate_bbox; 13 | add_part_patches = true; 14 | add_proposals = true; 15 | write_bbox = opts.estimate_bbox; 16 | write_proposals = true; 17 | 18 | part_scales = opts.part_scales; 19 | scale_relative_to_bbox = false; 20 | 21 | % Init caffe 22 | mean_file = opts.mean_mat_file; 23 | batch_size = opts.batch_size; 24 | crop_size = opts.crop_size; 25 | deploy = opts.deploy; 26 | model = opts.model; 27 | if write_bbox 28 | matcaffe_init(1,deploy,model,1,0); 29 | end 30 | %read image list 31 | fid=fopen(imagelist_file,'r'); 32 | imagelist=textscan(fid,'%s'); 33 | imagelist=imagelist{1}; 34 | fclose(fid); 35 | % load train test split 36 | tr_ID=logical(load(tr_ID_file)); 37 | % tr_ID=true(size(imagelist,1),1); 38 | % Labels 39 | % Make sure labels start at 1 40 | labels=load(labels_file); 41 | labels=labels-min(labels(:))+1; 42 | 43 | 44 | %% Preparation starts 45 | parts = array2table(part_locs,'VariableNames',{'Var1','Var2','Var3','Var4','Var5'}); 46 | % parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/part_locs.txt','Delimiter',' ','ReadVariableNames',false); 47 | % parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/est_part_locs.txt','Delimiter',' ','ReadVariableNames',false); 48 | parts.Properties.VariableNames{'Var1'} = 'image'; 49 | parts.Properties.VariableNames{'Var2'} = 'part'; 50 | parts.Properties.VariableNames{'Var3'} = 'x'; 51 | parts.Properties.VariableNames{'Var4'} = 'y'; 52 | parts.Properties.VariableNames{'Var5'} = 'visible'; 53 | part_ids = unique(parts.part); 54 | image_ids = unique(parts.image); 55 | part_count = numel(part_ids); 56 | image_count = numel(image_ids); 57 | parts_x = reshape(parts.x,part_count,image_count); 58 | parts_y = reshape(parts.y,part_count,image_count); 59 | 60 | 61 | %% Calculation starts 62 | all_boxes = cell(image_count,1); 63 | all_images = cell(image_count,1); 64 | 65 | if write_proposals 66 | fid_train = fopen(output_file_train,'w'); 67 | fid_val = fopen(output_file_val,'w'); 68 | i_train = 0; 69 | i_test = 0; 70 | % Mean image 71 | mean_image = uint64(zeros(224,224,3)); 72 | total_image_count = 0; 73 | end 74 | if write_bbox 75 | bbox_locs = nan(image_count,5); 76 | end 77 | 78 | fprintf('%s\n',datestr(now)); 79 | for i=1:image_count%randperm(image_count)% 80 | if opts.verbose_output 81 | fprintf('Working on %i: %s\n',i,imagelist{i}); 82 | end 83 | % if tr_ID(i) 84 | % continue 85 | % end 86 | 87 | im = imread([imagedir '/' imagelist{i}]); 88 | if size(im,3)==1 89 | im=repmat(im,1,1,3); 90 | end 91 | %% Get the propsals for the image 92 | if add_proposals 93 | % all_images{i} = im; 94 | colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'}; 95 | colorType = colorTypes{1:5}; 96 | % Here you specify which similarity functions to use in merging 97 | simFunctionHandles = {@SSSimColourTextureSizeFillOrig, @SSSimTextureSizeFill, @SSSimBoxFillOrig, @SSSimSize}; 98 | simFunctionHandles = simFunctionHandles(1:4); % Two different merging strategies 99 | % Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm. 100 | % Note that by default, we set minSize = k, and sigma = 0.8. 101 | k = 200; % controls size of segments of initial segmentation. 102 | minSize = k; 103 | sigma = 0.8; 104 | % Selective search start 105 | [all_boxes{i}] = Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles); 106 | all_boxes{i} = BoxRemoveDuplicates(all_boxes{i}); 107 | else 108 | all_boxes{i} = []; 109 | end 110 | boxes = all_boxes{i}; 111 | 112 | %% Filter out boxes with zero size or which are too narrow 113 | if size(boxes,1)<1 114 | % Always keep the whole image (in case sel search fails) 115 | boxes = [1 1 size(im,1) size(im,2)]; 116 | else 117 | box_size_selection = (boxes(:,3)-boxes(:,1)).*(boxes(:,4)-boxes(:,2))>0 & ... 118 | (boxes(:,3)-boxes(:,1))>30 & (boxes(:,4)-boxes(:,2))>30; 119 | boxes = boxes(box_size_selection,:); 120 | end 121 | 122 | %% Transform part locs to actual part locations in the image 123 | if add_proposals 124 | % Get all relevant and visible part positions 125 | channel_ids = find(part_visibility(i,:)); 126 | selection = parts.visible((i-1)*part_count + channel_ids); 127 | cur_channels = channel_ids(logical(selection)); 128 | cur_locs = [parts_x(cur_channels,i) parts_y(cur_channels,i)]; 129 | 130 | for k=1:size(cur_locs,1) 131 | x=cur_locs(k,1); 132 | y=cur_locs(k,2); 133 | % calc ratio 134 | ratio_x = opts.crop_size / size(im,2); 135 | ratio_y = opts.crop_size / size(im,1); 136 | cur_locs(k,1)=int32(x/ratio_x); 137 | cur_locs(k,2)=int32(y/ratio_y); 138 | end 139 | % hold off 140 | else 141 | cur_locs = []; 142 | end 143 | 144 | %% Add part based boxes 145 | if add_part_patches 146 | box_part_selection = false(size(boxes,1),1); 147 | for part_scale = part_scales 148 | % Get visibile parts for the custom channel selection 149 | part_based_locs = [parts_x(channels_for_boxes,i) parts_y(channels_for_boxes,i)]; 150 | if scale_relative_to_bbox 151 | box_size = 0.5*sqrt(cur_box(4)*cur_box(5)); 152 | else 153 | box_size = part_scale*sqrt(size(im,1)*size(im,2)); 154 | end 155 | for c=1:size(part_based_locs,1) 156 | if parts.visible((i-1)*part_count + channels_for_boxes(c)) 157 | x=part_based_locs(c,1); 158 | y=part_based_locs(c,2); 159 | if i>image_count 160 | x=opts.crop_size-x; 161 | end 162 | ratio_x = opts.crop_size / size(im,2); 163 | ratio_y = opts.crop_size / size(im,1); 164 | x=int32(x/ratio_x); 165 | y=int32(y/ratio_y); 166 | x_min = max(x-box_size/2, 1); 167 | x_max = min(x+box_size/2, size(im,2)); 168 | y_min = max(y-box_size/2, 1); 169 | y_max = min(y+box_size/2, size(im,1)); 170 | boxes = [boxes;y_min x_min y_max x_max]; 171 | box_part_selection = [box_part_selection;true]; 172 | end 173 | end 174 | end 175 | end 176 | 177 | %% Bounding box estimation 178 | if write_bbox 179 | % Classify all boxes 180 | batch_data = {}; 181 | for b=1:size(boxes,1) 182 | batch_data = [batch_data; im(boxes(b,1):boxes(b,3),boxes(b,2):boxes(b,4),:)]; 183 | end 184 | probs = caffe_features(batch_data,'prob',mean_file,batch_size,crop_size); 185 | if tr_ID(i) 186 | pred_class = labels(i,:); 187 | else 188 | % Predict the class 189 | % Use the most confident classification result as class 190 | % pred 191 | [val,pred_class]=max(max(probs(:,2:end),[],1),[],2); 192 | end 193 | % Take the bbox with the most sure classification 194 | [~,idx ] = sort(-probs(:,pred_class+1)); 195 | bbox_locs(i,:) = [i, ... 196 | boxes(idx(1),2) ,boxes(idx(1),1),... 197 | boxes(idx(1),4)-boxes(idx(1),2),boxes(idx(1),3)-boxes(idx(1),1)]; 198 | end 199 | 200 | %% Proposals 201 | if write_proposals 202 | if add_proposals 203 | %% Decide foreground and background boxes according to part location 204 | % Count how many parts are inside the proposed box 205 | fg_bg_selection = zeros(size(boxes,1),1); 206 | cur_boxes = []; 207 | for loc = cur_locs' 208 | % loc has shape [x=col y=row] 209 | % Check which boxes contain this part and count 210 | fg_bg_selection = fg_bg_selection + ... 211 | (boxes(:,1)<=loc(2) & boxes(:,3)>=loc(2)& ... 212 | boxes(:,2)<=loc(1) & boxes(:,4)>=loc(1)); 213 | end 214 | % Take only boxes with three or more part detections 215 | fg_bg_selection=fg_bg_selection>numel(cur_channels)-3; 216 | 217 | box_selection = fg_bg_selection | box_part_selection; 218 | % Always take the full image 219 | box_selection(1) = true; 220 | elseif add_part_patches 221 | box_selection = box_part_selection; 222 | else 223 | box_selection = []; 224 | end 225 | % Add the estimated bounding box 226 | if add_est_bbox 227 | boxes = [boxes;[bbox_locs(i,3),bbox_locs(i,2),... 228 | bbox_locs(i,5)+bbox_locs(i,3),bbox_locs(i,4)+bbox_locs(i,2)]]; 229 | box_selection = [box_selection;true]; 230 | end 231 | 232 | if opts.verbose_output 233 | fprintf('Found %i boxes\n',sum(box_selection)); 234 | end 235 | 236 | %% Now store these bboxes in text file 237 | if tr_ID(i,:) 238 | fid = fid_train; 239 | fprintf(fid,'# %i\n',i_train); % Image id 240 | i_train = i_train + 1; 241 | else 242 | fid = fid_val; 243 | fprintf(fid,'# %i\n',i_test); % Image id 244 | i_test = i_test + 1; 245 | end 246 | fprintf(fid,'%s\n',[imagedir imagelist{i}]); % absolute image path 247 | fprintf(fid,'%i\n',3); % num channels 248 | fprintf(fid,'%i\n',size(im,1)); % height 249 | fprintf(fid,'%i\n',size(im,2)); % width 250 | fprintf(fid,'%i\n',size(boxes,1)); % num_windows 251 | for b = 1:size(boxes,1) 252 | fprintf(fid,'%i %i %.0f %.0f %.0f %.0f\n',labels(i,:),box_selection(b,:),... 253 | boxes(b,2),boxes(b,1),boxes(b,4),boxes(b,3)); 254 | if opts.calculate_mean 255 | mean_image = mean_image + uint64(imresize(im(boxes(b,1):boxes(b,3),boxes(b,2):boxes(b,4),:),[224 224])); 256 | total_image_count = total_image_count + 1; 257 | end 258 | % if box_selection(b,:) 259 | % imshow(im(boxes(b,1):boxes(b,3),boxes(b,2):boxes(b,4),:)); 260 | % waitforbuttonpress; clf 261 | % end 262 | end 263 | end 264 | end 265 | if write_proposals 266 | fclose(fid_train); 267 | fclose(fid_val); 268 | if opts.calculate_mean 269 | mean_image = double(mean_image/total_image_count); 270 | save('tmp_mean.mat','mean_image'); 271 | end 272 | end 273 | if write_bbox 274 | dlmwrite(output_file_bbox,bbox_locs,'Delimiter',' '); 275 | end 276 | end 277 | 278 | -------------------------------------------------------------------------------- /cnn_finetuning/vgg19/train_val_template.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layers { 3 | name: "data" 4 | type: WINDOW_DATA 5 | top: "data" 6 | top: "label" 7 | window_data_param { 8 | source: "../../windows_train.txt" 9 | batch_size: 40 10 | fg_threshold: 0.5 11 | bg_threshold: 0.5 12 | fg_fraction: 1.00 13 | context_pad: 16 14 | crop_mode: "warp" 15 | cache_images: true 16 | } 17 | transform_param { 18 | mirror: true 19 | crop_size: 224 20 | mean_file: "mean.binaryproto" 21 | } 22 | include: { phase: TRAIN } 23 | } 24 | layers { 25 | name: "data" 26 | type: WINDOW_DATA 27 | top: "data" 28 | top: "label" 29 | window_data_param { 30 | source: "../../windows_val.txt" 31 | batch_size: 5 32 | fg_threshold: 0.5 33 | bg_threshold: 0.5 34 | fg_fraction: 1.00 35 | context_pad: 16 36 | crop_mode: "square" 37 | cache_images: true 38 | } 39 | transform_param { 40 | mirror: true 41 | crop_size: 224 42 | mean_file: "mean.binaryproto" 43 | } 44 | include: { phase: TEST } 45 | } 46 | 47 | layers { 48 | bottom: "data" 49 | top: "conv1_1" 50 | name: "conv1_1" 51 | type: CONVOLUTION 52 | blobs_lr: 1 53 | blobs_lr: 2 54 | weight_decay: 1 55 | weight_decay: 0 56 | convolution_param { 57 | num_output: 64 58 | pad: 1 59 | kernel_size: 3 60 | weight_filler { 61 | type: "gaussian" 62 | std: 0.01 63 | } 64 | bias_filler { 65 | type: "constant" 66 | value: 0 67 | } 68 | } 69 | } 70 | layers { 71 | bottom: "conv1_1" 72 | top: "conv1_1" 73 | name: "relu1_1" 74 | type: RELU 75 | } 76 | layers { 77 | bottom: "conv1_1" 78 | top: "conv1_2" 79 | name: "conv1_2" 80 | type: CONVOLUTION 81 | blobs_lr: 1 82 | blobs_lr: 2 83 | weight_decay: 1 84 | weight_decay: 0 85 | convolution_param { 86 | num_output: 64 87 | pad: 1 88 | kernel_size: 3 89 | weight_filler { 90 | type: "gaussian" 91 | std: 0.01 92 | } 93 | bias_filler { 94 | type: "constant" 95 | value: 0 96 | } 97 | } 98 | } 99 | layers { 100 | bottom: "conv1_2" 101 | top: "conv1_2" 102 | name: "relu1_2" 103 | type: RELU 104 | } 105 | layers { 106 | bottom: "conv1_2" 107 | top: "pool1" 108 | name: "pool1" 109 | type: POOLING 110 | pooling_param { 111 | pool: MAX 112 | kernel_size: 2 113 | stride: 2 114 | } 115 | } 116 | layers { 117 | bottom: "pool1" 118 | top: "conv2_1" 119 | name: "conv2_1" 120 | type: CONVOLUTION 121 | blobs_lr: 1 122 | blobs_lr: 2 123 | weight_decay: 1 124 | weight_decay: 0 125 | convolution_param { 126 | num_output: 128 127 | pad: 1 128 | kernel_size: 3 129 | weight_filler { 130 | type: "gaussian" 131 | std: 0.01 132 | } 133 | bias_filler { 134 | type: "constant" 135 | value: 0 136 | } 137 | } 138 | } 139 | layers { 140 | bottom: "conv2_1" 141 | top: "conv2_1" 142 | name: "relu2_1" 143 | type: RELU 144 | } 145 | layers { 146 | bottom: "conv2_1" 147 | top: "conv2_2" 148 | name: "conv2_2" 149 | type: CONVOLUTION 150 | blobs_lr: 1 151 | blobs_lr: 2 152 | weight_decay: 1 153 | weight_decay: 0 154 | convolution_param { 155 | num_output: 128 156 | pad: 1 157 | kernel_size: 3 158 | weight_filler { 159 | type: "gaussian" 160 | std: 0.01 161 | } 162 | bias_filler { 163 | type: "constant" 164 | value: 0 165 | } 166 | } 167 | } 168 | layers { 169 | bottom: "conv2_2" 170 | top: "conv2_2" 171 | name: "relu2_2" 172 | type: RELU 173 | } 174 | layers { 175 | bottom: "conv2_2" 176 | top: "pool2" 177 | name: "pool2" 178 | type: POOLING 179 | pooling_param { 180 | pool: MAX 181 | kernel_size: 2 182 | stride: 2 183 | } 184 | } 185 | layers { 186 | bottom: "pool2" 187 | top: "conv3_1" 188 | name: "conv3_1" 189 | type: CONVOLUTION 190 | blobs_lr: 1 191 | blobs_lr: 2 192 | weight_decay: 1 193 | weight_decay: 0 194 | convolution_param { 195 | num_output: 256 196 | pad: 1 197 | kernel_size: 3 198 | weight_filler { 199 | type: "gaussian" 200 | std: 0.01 201 | } 202 | bias_filler { 203 | type: "constant" 204 | value: 0 205 | } 206 | } 207 | } 208 | layers { 209 | bottom: "conv3_1" 210 | top: "conv3_1" 211 | name: "relu3_1" 212 | type: RELU 213 | } 214 | layers { 215 | bottom: "conv3_1" 216 | top: "conv3_2" 217 | name: "conv3_2" 218 | type: CONVOLUTION 219 | blobs_lr: 1 220 | blobs_lr: 2 221 | weight_decay: 1 222 | weight_decay: 0 223 | convolution_param { 224 | num_output: 256 225 | pad: 1 226 | kernel_size: 3 227 | weight_filler { 228 | type: "gaussian" 229 | std: 0.01 230 | } 231 | bias_filler { 232 | type: "constant" 233 | value: 0 234 | } 235 | } 236 | } 237 | layers { 238 | bottom: "conv3_2" 239 | top: "conv3_2" 240 | name: "relu3_2" 241 | type: RELU 242 | } 243 | layers { 244 | bottom: "conv3_2" 245 | top: "conv3_3" 246 | name: "conv3_3" 247 | type: CONVOLUTION 248 | blobs_lr: 1 249 | blobs_lr: 2 250 | weight_decay: 1 251 | weight_decay: 0 252 | convolution_param { 253 | num_output: 256 254 | pad: 1 255 | kernel_size: 3 256 | weight_filler { 257 | type: "gaussian" 258 | std: 0.01 259 | } 260 | bias_filler { 261 | type: "constant" 262 | value: 0 263 | } 264 | } 265 | } 266 | layers { 267 | bottom: "conv3_3" 268 | top: "conv3_3" 269 | name: "relu3_3" 270 | type: RELU 271 | } 272 | layers { 273 | bottom: "conv3_3" 274 | top: "conv3_4" 275 | name: "conv3_4" 276 | type: CONVOLUTION 277 | blobs_lr: 1 278 | blobs_lr: 2 279 | weight_decay: 1 280 | weight_decay: 0 281 | convolution_param { 282 | num_output: 256 283 | pad: 1 284 | kernel_size: 3 285 | weight_filler { 286 | type: "gaussian" 287 | std: 0.01 288 | } 289 | bias_filler { 290 | type: "constant" 291 | value: 0 292 | } 293 | } 294 | } 295 | layers { 296 | bottom: "conv3_4" 297 | top: "conv3_4" 298 | name: "relu3_4" 299 | type: RELU 300 | } 301 | layers { 302 | bottom: "conv3_4" 303 | top: "pool3" 304 | name: "pool3" 305 | type: POOLING 306 | pooling_param { 307 | pool: MAX 308 | kernel_size: 2 309 | stride: 2 310 | } 311 | } 312 | layers { 313 | bottom: "pool3" 314 | top: "conv4_1" 315 | name: "conv4_1" 316 | type: CONVOLUTION 317 | blobs_lr: 1 318 | blobs_lr: 2 319 | weight_decay: 1 320 | weight_decay: 0 321 | convolution_param { 322 | num_output: 512 323 | pad: 1 324 | kernel_size: 3 325 | weight_filler { 326 | type: "gaussian" 327 | std: 0.01 328 | } 329 | bias_filler { 330 | type: "constant" 331 | value: 0 332 | } 333 | } 334 | } 335 | layers { 336 | bottom: "conv4_1" 337 | top: "conv4_1" 338 | name: "relu4_1" 339 | type: RELU 340 | } 341 | layers { 342 | bottom: "conv4_1" 343 | top: "conv4_2" 344 | name: "conv4_2" 345 | type: CONVOLUTION 346 | blobs_lr: 1 347 | blobs_lr: 2 348 | weight_decay: 1 349 | weight_decay: 0 350 | convolution_param { 351 | num_output: 512 352 | pad: 1 353 | kernel_size: 3 354 | weight_filler { 355 | type: "gaussian" 356 | std: 0.01 357 | } 358 | bias_filler { 359 | type: "constant" 360 | value: 0 361 | } 362 | } 363 | } 364 | layers { 365 | bottom: "conv4_2" 366 | top: "conv4_2" 367 | name: "relu4_2" 368 | type: RELU 369 | } 370 | layers { 371 | bottom: "conv4_2" 372 | top: "conv4_3" 373 | name: "conv4_3" 374 | type: CONVOLUTION 375 | blobs_lr: 1 376 | blobs_lr: 2 377 | weight_decay: 1 378 | weight_decay: 0 379 | convolution_param { 380 | num_output: 512 381 | pad: 1 382 | kernel_size: 3 383 | weight_filler { 384 | type: "gaussian" 385 | std: 0.01 386 | } 387 | bias_filler { 388 | type: "constant" 389 | value: 0 390 | } 391 | } 392 | } 393 | layers { 394 | bottom: "conv4_3" 395 | top: "conv4_3" 396 | name: "relu4_3" 397 | type: RELU 398 | } 399 | layers { 400 | bottom: "conv4_3" 401 | top: "conv4_4" 402 | name: "conv4_4" 403 | type: CONVOLUTION 404 | blobs_lr: 1 405 | blobs_lr: 2 406 | weight_decay: 1 407 | weight_decay: 0 408 | convolution_param { 409 | num_output: 512 410 | pad: 1 411 | kernel_size: 3 412 | weight_filler { 413 | type: "gaussian" 414 | std: 0.01 415 | } 416 | bias_filler { 417 | type: "constant" 418 | value: 0 419 | } 420 | } 421 | } 422 | layers { 423 | bottom: "conv4_4" 424 | top: "conv4_4" 425 | name: "relu4_4" 426 | type: RELU 427 | } 428 | layers { 429 | bottom: "conv4_4" 430 | top: "pool4" 431 | name: "pool4" 432 | type: POOLING 433 | pooling_param { 434 | pool: MAX 435 | kernel_size: 2 436 | stride: 2 437 | } 438 | } 439 | layers { 440 | bottom: "pool4" 441 | top: "conv5_1" 442 | name: "conv5_1" 443 | type: CONVOLUTION 444 | blobs_lr: 1 445 | blobs_lr: 2 446 | weight_decay: 1 447 | weight_decay: 0 448 | convolution_param { 449 | num_output: 512 450 | pad: 1 451 | kernel_size: 3 452 | weight_filler { 453 | type: "gaussian" 454 | std: 0.01 455 | } 456 | bias_filler { 457 | type: "constant" 458 | value: 0 459 | } 460 | } 461 | } 462 | layers { 463 | bottom: "conv5_1" 464 | top: "conv5_1" 465 | name: "relu5_1" 466 | type: RELU 467 | } 468 | layers { 469 | bottom: "conv5_1" 470 | top: "conv5_2" 471 | name: "conv5_2" 472 | type: CONVOLUTION 473 | blobs_lr: 1 474 | blobs_lr: 2 475 | weight_decay: 1 476 | weight_decay: 0 477 | convolution_param { 478 | num_output: 512 479 | pad: 1 480 | kernel_size: 3 481 | weight_filler { 482 | type: "gaussian" 483 | std: 0.01 484 | } 485 | bias_filler { 486 | type: "constant" 487 | value: 0 488 | } 489 | } 490 | } 491 | layers { 492 | bottom: "conv5_2" 493 | top: "conv5_2" 494 | name: "relu5_2" 495 | type: RELU 496 | } 497 | layers { 498 | bottom: "conv5_2" 499 | top: "conv5_3" 500 | name: "conv5_3" 501 | type: CONVOLUTION 502 | blobs_lr: 1 503 | blobs_lr: 2 504 | weight_decay: 1 505 | weight_decay: 0 506 | convolution_param { 507 | num_output: 512 508 | pad: 1 509 | kernel_size: 3 510 | weight_filler { 511 | type: "gaussian" 512 | std: 0.01 513 | } 514 | bias_filler { 515 | type: "constant" 516 | value: 0 517 | } 518 | } 519 | } 520 | layers { 521 | bottom: "conv5_3" 522 | top: "conv5_3" 523 | name: "relu5_3" 524 | type: RELU 525 | } 526 | layers { 527 | bottom: "conv5_3" 528 | top: "conv5_4" 529 | name: "conv5_4" 530 | type: CONVOLUTION 531 | blobs_lr: 1 532 | blobs_lr: 2 533 | weight_decay: 1 534 | weight_decay: 0 535 | convolution_param { 536 | num_output: 512 537 | pad: 1 538 | kernel_size: 3 539 | weight_filler { 540 | type: "gaussian" 541 | std: 0.01 542 | } 543 | bias_filler { 544 | type: "constant" 545 | value: 0 546 | } 547 | } 548 | } 549 | layers { 550 | bottom: "conv5_4" 551 | top: "conv5_4" 552 | name: "relu5_4" 553 | type: RELU 554 | } 555 | layers { 556 | bottom: "conv5_4" 557 | top: "pool5" 558 | name: "pool5" 559 | type: POOLING 560 | pooling_param { 561 | pool: MAX 562 | kernel_size: 2 563 | stride: 2 564 | } 565 | } 566 | layers { 567 | bottom: "pool5" 568 | top: "fc6" 569 | name: "fc6" 570 | type: INNER_PRODUCT 571 | blobs_lr: 1 572 | blobs_lr: 2 573 | weight_decay: 1 574 | weight_decay: 0 575 | inner_product_param { 576 | num_output: 4096 577 | weight_filler { 578 | type: "gaussian" 579 | std: 0.005 580 | } 581 | bias_filler { 582 | type: "constant" 583 | value: 1 584 | } 585 | } 586 | } 587 | layers { 588 | bottom: "fc6" 589 | top: "fc6" 590 | name: "relu6" 591 | type: RELU 592 | } 593 | layers { 594 | bottom: "fc6" 595 | top: "fc6" 596 | name: "drop6" 597 | type: DROPOUT 598 | dropout_param { 599 | dropout_ratio: 0.5 600 | } 601 | } 602 | layers { 603 | bottom: "fc6" 604 | top: "fc7" 605 | name: "fc7" 606 | type: INNER_PRODUCT 607 | blobs_lr: 1 608 | blobs_lr: 2 609 | weight_decay: 1 610 | weight_decay: 0 611 | inner_product_param { 612 | num_output: 4096 613 | weight_filler { 614 | type: "gaussian" 615 | std: 0.005 616 | } 617 | bias_filler { 618 | type: "constant" 619 | value: 1 620 | } 621 | } 622 | } 623 | layers { 624 | bottom: "fc7" 625 | top: "fc7" 626 | name: "relu7" 627 | type: RELU 628 | } 629 | layers { 630 | bottom: "fc7" 631 | top: "fc7" 632 | name: "drop7" 633 | type: DROPOUT 634 | dropout_param { 635 | dropout_ratio: 0.5 636 | } 637 | } 638 | layers { 639 | bottom: "fc7" 640 | top: "fc8_ft" 641 | name: "fc8_ft" 642 | type: INNER_PRODUCT 643 | blobs_lr: 1 644 | blobs_lr: 2 645 | weight_decay: 1 646 | weight_decay: 0 647 | inner_product_param { 648 | num_output: ##NUM_CLASSES## 649 | weight_filler { 650 | type: "gaussian" 651 | std: 0.01 652 | } 653 | bias_filler { 654 | type: "constant" 655 | value: 0 656 | } 657 | } 658 | } 659 | layers { 660 | name: "loss" 661 | type: SOFTMAX_LOSS 662 | bottom: "fc8_ft" 663 | bottom: "label" 664 | top: "loss" 665 | } 666 | layers { 667 | name: "accuracy" 668 | type: ACCURACY 669 | bottom: "fc8_ft" 670 | bottom: "label" 671 | top: "accuracy" 672 | include { phase: TEST } 673 | } 674 | -------------------------------------------------------------------------------- /part_selection/evaluate_part_locs_anchor_multiview.m: -------------------------------------------------------------------------------- 1 | function [ channel_ids, part_visibility, anchor_points, shift_vectors, view_assignment, obj_value, err ] = ... 2 | evaluate_part_locs_anchor_multiview(part_locs, tr_ID, labels, no_selected_parts, no_visible_parts, view_count, iterations) 3 | % Set no_visible_parts to NaN to avoid estimating visible parts 4 | 5 | %read part locations 6 | parts = array2table(part_locs,'VariableNames',{'Var1','Var2','Var3','Var4','Var5'}); 7 | % parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/part_locs.txt','Delimiter',' ','ReadVariableNames',false); 8 | % parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/est_part_locs.txt','Delimiter',' ','ReadVariableNames',false); 9 | parts.Properties.VariableNames{'Var1'} = 'image'; 10 | parts.Properties.VariableNames{'Var2'} = 'part'; 11 | parts.Properties.VariableNames{'Var3'} = 'x'; 12 | parts.Properties.VariableNames{'Var4'} = 'y'; 13 | parts.Properties.VariableNames{'Var5'} = 'visible'; 14 | part_ids = unique(parts.part); 15 | part_count = numel(part_ids); 16 | % Some temp variables 17 | parts_x = reshape(parts.x,numel(unique(parts.part)),numel(unique(parts.image))); 18 | % parts_x = parts_x(:,tr_ID); 19 | parts_y = reshape(parts.y,numel(unique(parts.part)),numel(unique(parts.image))); 20 | % parts_y = parts_y(:,tr_ID); 21 | part_locs = cat(3,parts_x,parts_y); 22 | part_locs = part_locs(1:part_count,:,:); 23 | part_ids = unique(parts.part); 24 | image_ids = unique(parts.image); 25 | part_count = numel(part_ids); 26 | image_count = numel(image_ids); 27 | 28 | % Load train test, perform selection only on train 29 | if nargin<2 30 | tr_ID = logical(load('/home/simon/Datasets/CUB_200_2011/tr_ID.txt')); 31 | end 32 | 33 | 34 | %% Constraints 35 | % Number of parts to select 36 | if nargin<3 37 | no_selected_parts = 5; 38 | end 39 | % Number of visible parts per image 40 | if nargin<4 41 | no_visible_parts = NaN;%ceil(no_selected_parts/2); 42 | end 43 | % Number of views 44 | if nargin<5 45 | view_count = 3; 46 | end 47 | 48 | 49 | part_visibility = nan(image_count,part_count); 50 | anchor_points = nan(image_count,2); 51 | shift_vectors = nan(numel(unique(labels)),part_count,view_count,2); 52 | view_assignment = false(image_count,view_count); 53 | model_errors = nan(image_count,1); 54 | fprintf('Working on class '); 55 | for c=unique(labels)' 56 | fprintf('%i ',c); 57 | class_tr_ID = tr_ID & labels==c; 58 | if sum(class_tr_ID)<1 59 | continue 60 | end 61 | best_obj_value = -Inf; 62 | for k=1:iterations 63 | [ ~, h, a, d, s, obj_value, err ] = ... 64 | do_build_part_models(parts, part_locs, class_tr_ID,... 65 | no_selected_parts, no_visible_parts, view_count ); 66 | if obj_value>best_obj_value 67 | best_obj_value = obj_value; 68 | best_h=h; 69 | best_a=a; 70 | best_d=d; 71 | best_s=s; 72 | best_err=err; 73 | end 74 | end 75 | % fprintf('%f\n',best_obj_value); 76 | part_visibility(class_tr_ID,:)=best_h; 77 | anchor_points(class_tr_ID,:) = best_a; 78 | shift_vectors(c,:,:,:)=best_d; 79 | view_assignment(class_tr_ID,:)=best_s; 80 | model_errors(class_tr_ID,:)=best_err; 81 | end 82 | 83 | % Inference for test images 84 | [~,channel_ids] = sort(-nansum(part_visibility,1)); 85 | % channel_ids = channel_ids(1:no_selected_parts); 86 | % TODO: Here should be a proper inference 87 | part_visibility(~tr_ID,:)=false; 88 | part_visibility(~tr_ID,channel_ids)=true; 89 | part_visibility = logical(part_visibility); 90 | end 91 | 92 | function [ idx, part_visibility, anchor_points, shift_vectors, view_assignment, obj_value, err ] = do_build_part_models(parts, part_locs, tr_ID, no_selected_parts, no_visible_parts, view_count) 93 | 94 | part_ids = unique(parts.part); 95 | image_ids = unique(parts.image); 96 | part_count = numel(part_ids); 97 | image_count = sum(tr_ID); 98 | 99 | part_locs = part_locs(:,tr_ID,:); 100 | 101 | %% Variables to estimate 102 | % View selection for each image 103 | s = false(image_count,view_count); 104 | % part selection b (indicator vector) for each view 105 | b = false(view_count,part_count); 106 | % Anchor points for each image 107 | a = zeros(image_count,2); 108 | % Shift vectors for each part in each view 109 | d = zeros(part_count, view_count,2); 110 | % Visibility of each part in each image 111 | h = false(image_count, part_count); 112 | 113 | %% Initialization 114 | % Select a random view for each image 115 | for i=1:image_count 116 | s(i,randperm(view_count,1))=true; 117 | end 118 | % Select m random parts for each view 119 | for v=1:view_count 120 | b(v,randperm(part_count,no_selected_parts))=true; 121 | end 122 | % Set mean part position as default anchor point 123 | a = repmat(mean([parts.x(logical(parts.visible)) ... 124 | parts.y(logical(parts.visible))],1),size(a,1),1); 125 | % Set 0 as default shift vector 126 | d = zeros(size(d)); 127 | if ~isnan(no_visible_parts) 128 | % Select no_visible_parts random parts for every view 129 | for i=1:image_count 130 | % get the view for this image 131 | available_parts = find(b(s(i,:),:)); 132 | h(i,available_parts(randperm(numel(available_parts),no_visible_parts)))=true; 133 | end 134 | end 135 | 136 | h = logical(reshape(parts.visible,numel(unique(parts.part)),numel(unique(parts.image))))'; 137 | h = h(tr_ID,1:part_count); 138 | i = 0; 139 | 140 | done = false; 141 | best_obj_value = Inf; 142 | while ~done && ceil(i/2)<15 143 | i = i+1; 144 | old_b = b; 145 | 146 | % if mod(i,2)==1 147 | % fprintf('Running round %i \n',ceil(i/2)); 148 | % end 149 | 150 | % General preparations 151 | % First, build a image_count x view_count x part_count x coordinates 152 | % Create singleton dimensions to fit target matrix shape 153 | % part_locs had shape part_count x image_count x coordinates 154 | mu_tmp = permute(part_locs,[2 4 1 3]); 155 | % a had shape image_count x coordinates 156 | a_tmp = permute(a,[1 3 4 2]); 157 | % d had shape part_count x view_count x coordinates 158 | d_tmp = permute(d,[4 2 1 3]); 159 | 160 | if mod(i,2)==1 161 | %% Estimate d 162 | % d has shape part_count x view_count x coordinates 163 | % Calculate d first, as we cannot do much wrong here (in contrast 164 | % to the part selection), and is required for the following steps 165 | % in order to produce any meaningful results 166 | % d is calculated by mean(mu-a along image_index) 167 | mu_a = bsxfun(@minus,mu_tmp,a_tmp); 168 | mu_a = repmat(mu_a,1,view_count,1,1); 169 | % Mask out data that is not visible 170 | mask = true(image_count,view_count,part_count); 171 | mask = bsxfun(@and, mask, permute(h,[1 3 2])); 172 | mask = repmat(mask,1,1,1,2); 173 | mu_a(~mask) = NaN; 174 | d = nanmean(mu_a,1); 175 | d = permute(d,[3 2 4 1]); 176 | 177 | %% Estimate a 178 | % a has shape image_count x coordinates 179 | % Calculate d first, as we cannot do much wrong here (in contrast 180 | % to the part selection), and is required for the following steps 181 | % in order to produce any meaningful results 182 | % d is calculated by mean(mu-a along image_index) 183 | mu_d = bsxfun(@minus,mu_tmp,d_tmp); 184 | mu_d(~mask) = NaN; 185 | a = nanmean(nanmean(mu_d,3),2); 186 | a = permute(a, [1 4 2 3]); 187 | else 188 | %% Preparations for b, h and s 189 | % calculate mu - (a + d) using bsxfun to automatically duplicate axis 190 | mu_a_d = bsxfun(@minus,mu_tmp,bsxfun(@plus,a_tmp,d_tmp)); 191 | % Calculate the quadratic norm^2 along coordinate-axis 192 | mu_a_d = sum(mu_a_d.^2,4); 193 | 194 | %% Estimate h 195 | if ~isnan(no_visible_parts) 196 | % h has shape image_count x part_count 197 | est_h = false(size(h)); 198 | % Shape of mu_a_d is (image_count x view_count x part_count) 199 | mask = true(image_count,view_count,part_count); 200 | mask = bsxfun(@and, mask, permute(s,[1 2 3])); 201 | mask = bsxfun(@and, mask, permute(b,[3 1 2])); 202 | mu_a_d_tmp = mu_a_d; 203 | mu_a_d_tmp(~mask) = NaN; 204 | mu_a_d_tmp = nansum(mu_a_d_tmp,2); 205 | % Only select parts to hide from chosen parts 206 | mu_a_d_tmp(mu_a_d_tmp == 0) = Inf; 207 | [~,idx] = sort(mu_a_d_tmp, 3); 208 | idx = permute(idx,[1 3 2]); 209 | idx = idx(:,1:no_visible_parts); 210 | idx2 = repmat((1:size(idx,1))',1,size(idx,2)); 211 | est_h(sub2ind(size(h), idx2(:), idx(:))) = true; 212 | end 213 | 214 | %% Estimate s 215 | % s has shape image_count x view_count 216 | s = false(size(s)); 217 | % Shape of mu_a_d is (image_count x view_count x part_count) 218 | mask = true(image_count,view_count,part_count); 219 | mask = bsxfun(@and, mask, permute(h,[1 3 2])); 220 | mask = bsxfun(@and, mask, permute(b,[3 1 2])); 221 | mu_a_d_tmp = mu_a_d; 222 | mu_a_d_tmp(~mask) = NaN; 223 | mu_a_d_tmp = nansum(mu_a_d_tmp,3); 224 | [~,idx] = sort(mu_a_d_tmp, 2); 225 | idx = idx(:,1); 226 | idx2 = repmat((1:size(idx,1))',1,size(idx,2)); 227 | s(sub2ind(size(h), idx2(:), idx(:))) = true; 228 | 229 | %% Estimate b 230 | % b has shape view_count x part_count 231 | b = false(size(b)); 232 | % Shape of mu_a_d is (image_count x view_count x part_count) 233 | mask = true(image_count,view_count,part_count); 234 | mask = bsxfun(@and, mask, permute(s,[1 2 3])); 235 | % mask = bsxfun(@and, mask, permute(h,[1 3 2])); 236 | mu_a_d_tmp = mu_a_d; 237 | mu_a_d_tmp(~mask) = NaN; 238 | mu_a_d_tmp = nansum(mu_a_d_tmp,1); 239 | [~,idx] = sort(mu_a_d_tmp, 3); 240 | idx = permute(idx,[2 3 1]); 241 | idx = idx(:,1:no_selected_parts); 242 | idx2 = repmat((1:size(idx,1))',1,size(idx,2)); 243 | b(sub2ind(size(b), idx2(:), idx(:))) = true; 244 | % % If you want to include distance between parts as selection 245 | % % criteria: 246 | % v = sum(pdist2(squeeze(d),squeeze(d))); 247 | % v = v/max(v(:))*10; 248 | % v = exp(v); 249 | % v = v/sum(v); 250 | % [~,idx] = sort(mu_a_d_tmp.*permute(v,[1 3 2]), 3); 251 | 252 | %% Remember old b to check for convergence 253 | if old_b == b 254 | done = true; 255 | else 256 | old_b = b; 257 | end 258 | end 259 | 260 | 261 | % % Calculate objective value 262 | % mu_tmp = permute(part_locs,[2 4 1 3]); 263 | % % a had shape image_count x coordinates 264 | % a_tmp = permute(a,[1 3 4 2]); 265 | % % d had shape part_count x view_count x coordinates 266 | % d_tmp = permute(d,[4 2 1 3]); 267 | % % calculate mu - (a + d) using bsxfun to automatically duplicate axis 268 | % mu_a_d = bsxfun(@minus,mu_tmp,bsxfun(@plus,a_tmp,d_tmp)); 269 | % % Calculate the quadratic norm^2 along coordinate-axis 270 | % mu_a_d = sum(mu_a_d.^2,4); 271 | % mask = true(image_count,view_count,part_count); 272 | % mask = bsxfun(@and, mask, permute(s,[1 2 3])); 273 | % mask = bsxfun(@and, mask, permute(h,[1 3 2])); 274 | % mask = bsxfun(@and, mask, permute(b,[3 1 2])); 275 | % mu_a_d_tmp = mu_a_d; 276 | % mu_a_d_tmp(~mask) = NaN; 277 | % new_obj_value = -nansum(mu_a_d_tmp(:)); 278 | % % if true %new_obj_value < best_obj_value 279 | % % part_visibility = h; 280 | % % anchor_points = a; 281 | % % shift_vectors = d; 282 | % % best_obj_value = new_obj_value; 283 | % % end 284 | % fprintf('Objective value %10.0f\n', new_obj_value); 285 | end 286 | % channel_ids = idx(1:no_selected_parts); 287 | % save('part_selection_anchor_vgg19.mat','channel_ids'); 288 | 289 | 290 | 291 | 292 | %% Get the error of each training image 293 | mu_tmp = permute(part_locs,[2 4 1 3]); 294 | % a had shape image_count x coordinates 295 | a_tmp = permute(a,[1 3 4 2]); 296 | % d had shape part_count x view_count x coordinates 297 | d_tmp = permute(d,[4 2 1 3]); 298 | % calculate mu - (a + d) using bsxfun to automatically duplicate axis 299 | mu_a_d = bsxfun(@minus,mu_tmp,bsxfun(@plus,a_tmp,d_tmp)); 300 | % Calculate the quadratic norm^2 along coordinate-axis 301 | mu_a_d = sum(mu_a_d.^2,4); 302 | mask = true(image_count,view_count,part_count); 303 | mask = bsxfun(@and, mask, permute(s,[1 2 3])); 304 | mask = bsxfun(@and, mask, permute(h,[1 3 2])); 305 | mask = bsxfun(@and, mask, permute(b,[3 1 2])); 306 | mu_a_d(~mask) = NaN; 307 | err = nansum(nansum(mu_a_d,2),3); 308 | obj_value = -nansum(err); 309 | 310 | %% Return values 311 | part_visibility = est_h; 312 | anchor_points = a; 313 | shift_vectors = d; 314 | view_assignment = s; 315 | end -------------------------------------------------------------------------------- /cnn_finetuning/googlenet/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "GoogleNet" 2 | input: "data" 3 | input_dim: 11 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | layer { 8 | name: "conv1/7x7_s2" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1/7x7_s2" 12 | param { 13 | lr_mult: 1 14 | decay_mult: 1 15 | } 16 | param { 17 | lr_mult: 2 18 | decay_mult: 0 19 | } 20 | convolution_param { 21 | num_output: 64 22 | pad: 3 23 | kernel_size: 7 24 | stride: 2 25 | weight_filler { 26 | type: "xavier" 27 | std: 0.1 28 | } 29 | bias_filler { 30 | type: "constant" 31 | value: 0.2 32 | } 33 | } 34 | } 35 | layer { 36 | name: "conv1/relu_7x7" 37 | type: "ReLU" 38 | bottom: "conv1/7x7_s2" 39 | top: "conv1/7x7_s2" 40 | } 41 | layer { 42 | name: "pool1/3x3_s2" 43 | type: "Pooling" 44 | bottom: "conv1/7x7_s2" 45 | top: "pool1/3x3_s2" 46 | pooling_param { 47 | pool: MAX 48 | kernel_size: 3 49 | stride: 2 50 | } 51 | } 52 | layer { 53 | name: "pool1/norm1" 54 | type: "LRN" 55 | bottom: "pool1/3x3_s2" 56 | top: "pool1/norm1" 57 | lrn_param { 58 | local_size: 5 59 | alpha: 0.0001 60 | beta: 0.75 61 | } 62 | } 63 | layer { 64 | name: "conv2/3x3_reduce" 65 | type: "Convolution" 66 | bottom: "pool1/norm1" 67 | top: "conv2/3x3_reduce" 68 | param { 69 | lr_mult: 1 70 | decay_mult: 1 71 | } 72 | param { 73 | lr_mult: 2 74 | decay_mult: 0 75 | } 76 | convolution_param { 77 | num_output: 64 78 | kernel_size: 1 79 | weight_filler { 80 | type: "xavier" 81 | std: 0.1 82 | } 83 | bias_filler { 84 | type: "constant" 85 | value: 0.2 86 | } 87 | } 88 | } 89 | layer { 90 | name: "conv2/relu_3x3_reduce" 91 | type: "ReLU" 92 | bottom: "conv2/3x3_reduce" 93 | top: "conv2/3x3_reduce" 94 | } 95 | layer { 96 | name: "conv2/3x3" 97 | type: "Convolution" 98 | bottom: "conv2/3x3_reduce" 99 | top: "conv2/3x3" 100 | param { 101 | lr_mult: 1 102 | decay_mult: 1 103 | } 104 | param { 105 | lr_mult: 2 106 | decay_mult: 0 107 | } 108 | convolution_param { 109 | num_output: 192 110 | pad: 1 111 | kernel_size: 3 112 | weight_filler { 113 | type: "xavier" 114 | std: 0.03 115 | } 116 | bias_filler { 117 | type: "constant" 118 | value: 0.2 119 | } 120 | } 121 | } 122 | layer { 123 | name: "conv2/relu_3x3" 124 | type: "ReLU" 125 | bottom: "conv2/3x3" 126 | top: "conv2/3x3" 127 | } 128 | layer { 129 | name: "conv2/norm2" 130 | type: "LRN" 131 | bottom: "conv2/3x3" 132 | top: "conv2/norm2" 133 | lrn_param { 134 | local_size: 5 135 | alpha: 0.0001 136 | beta: 0.75 137 | } 138 | } 139 | layer { 140 | name: "pool2/3x3_s2" 141 | type: "Pooling" 142 | bottom: "conv2/norm2" 143 | top: "pool2/3x3_s2" 144 | pooling_param { 145 | pool: MAX 146 | kernel_size: 3 147 | stride: 2 148 | } 149 | } 150 | layer { 151 | name: "inception_3a/1x1" 152 | type: "Convolution" 153 | bottom: "pool2/3x3_s2" 154 | top: "inception_3a/1x1" 155 | param { 156 | lr_mult: 1 157 | decay_mult: 1 158 | } 159 | param { 160 | lr_mult: 2 161 | decay_mult: 0 162 | } 163 | convolution_param { 164 | num_output: 64 165 | kernel_size: 1 166 | weight_filler { 167 | type: "xavier" 168 | std: 0.03 169 | } 170 | bias_filler { 171 | type: "constant" 172 | value: 0.2 173 | } 174 | } 175 | } 176 | layer { 177 | name: "inception_3a/relu_1x1" 178 | type: "ReLU" 179 | bottom: "inception_3a/1x1" 180 | top: "inception_3a/1x1" 181 | } 182 | layer { 183 | name: "inception_3a/3x3_reduce" 184 | type: "Convolution" 185 | bottom: "pool2/3x3_s2" 186 | top: "inception_3a/3x3_reduce" 187 | param { 188 | lr_mult: 1 189 | decay_mult: 1 190 | } 191 | param { 192 | lr_mult: 2 193 | decay_mult: 0 194 | } 195 | convolution_param { 196 | num_output: 96 197 | kernel_size: 1 198 | weight_filler { 199 | type: "xavier" 200 | std: 0.09 201 | } 202 | bias_filler { 203 | type: "constant" 204 | value: 0.2 205 | } 206 | } 207 | } 208 | layer { 209 | name: "inception_3a/relu_3x3_reduce" 210 | type: "ReLU" 211 | bottom: "inception_3a/3x3_reduce" 212 | top: "inception_3a/3x3_reduce" 213 | } 214 | layer { 215 | name: "inception_3a/3x3" 216 | type: "Convolution" 217 | bottom: "inception_3a/3x3_reduce" 218 | top: "inception_3a/3x3" 219 | param { 220 | lr_mult: 1 221 | decay_mult: 1 222 | } 223 | param { 224 | lr_mult: 2 225 | decay_mult: 0 226 | } 227 | convolution_param { 228 | num_output: 128 229 | pad: 1 230 | kernel_size: 3 231 | weight_filler { 232 | type: "xavier" 233 | std: 0.03 234 | } 235 | bias_filler { 236 | type: "constant" 237 | value: 0.2 238 | } 239 | } 240 | } 241 | layer { 242 | name: "inception_3a/relu_3x3" 243 | type: "ReLU" 244 | bottom: "inception_3a/3x3" 245 | top: "inception_3a/3x3" 246 | } 247 | layer { 248 | name: "inception_3a/5x5_reduce" 249 | type: "Convolution" 250 | bottom: "pool2/3x3_s2" 251 | top: "inception_3a/5x5_reduce" 252 | param { 253 | lr_mult: 1 254 | decay_mult: 1 255 | } 256 | param { 257 | lr_mult: 2 258 | decay_mult: 0 259 | } 260 | convolution_param { 261 | num_output: 16 262 | kernel_size: 1 263 | weight_filler { 264 | type: "xavier" 265 | std: 0.2 266 | } 267 | bias_filler { 268 | type: "constant" 269 | value: 0.2 270 | } 271 | } 272 | } 273 | layer { 274 | name: "inception_3a/relu_5x5_reduce" 275 | type: "ReLU" 276 | bottom: "inception_3a/5x5_reduce" 277 | top: "inception_3a/5x5_reduce" 278 | } 279 | layer { 280 | name: "inception_3a/5x5" 281 | type: "Convolution" 282 | bottom: "inception_3a/5x5_reduce" 283 | top: "inception_3a/5x5" 284 | param { 285 | lr_mult: 1 286 | decay_mult: 1 287 | } 288 | param { 289 | lr_mult: 2 290 | decay_mult: 0 291 | } 292 | convolution_param { 293 | num_output: 32 294 | pad: 2 295 | kernel_size: 5 296 | weight_filler { 297 | type: "xavier" 298 | std: 0.03 299 | } 300 | bias_filler { 301 | type: "constant" 302 | value: 0.2 303 | } 304 | } 305 | } 306 | layer { 307 | name: "inception_3a/relu_5x5" 308 | type: "ReLU" 309 | bottom: "inception_3a/5x5" 310 | top: "inception_3a/5x5" 311 | } 312 | layer { 313 | name: "inception_3a/pool" 314 | type: "Pooling" 315 | bottom: "pool2/3x3_s2" 316 | top: "inception_3a/pool" 317 | pooling_param { 318 | pool: MAX 319 | kernel_size: 3 320 | stride: 1 321 | pad: 1 322 | } 323 | } 324 | layer { 325 | name: "inception_3a/pool_proj" 326 | type: "Convolution" 327 | bottom: "inception_3a/pool" 328 | top: "inception_3a/pool_proj" 329 | param { 330 | lr_mult: 1 331 | decay_mult: 1 332 | } 333 | param { 334 | lr_mult: 2 335 | decay_mult: 0 336 | } 337 | convolution_param { 338 | num_output: 32 339 | kernel_size: 1 340 | weight_filler { 341 | type: "xavier" 342 | std: 0.1 343 | } 344 | bias_filler { 345 | type: "constant" 346 | value: 0.2 347 | } 348 | } 349 | } 350 | layer { 351 | name: "inception_3a/relu_pool_proj" 352 | type: "ReLU" 353 | bottom: "inception_3a/pool_proj" 354 | top: "inception_3a/pool_proj" 355 | } 356 | layer { 357 | name: "inception_3a/output" 358 | type: "Concat" 359 | bottom: "inception_3a/1x1" 360 | bottom: "inception_3a/3x3" 361 | bottom: "inception_3a/5x5" 362 | bottom: "inception_3a/pool_proj" 363 | top: "inception_3a/output" 364 | } 365 | layer { 366 | name: "inception_3b/1x1" 367 | type: "Convolution" 368 | bottom: "inception_3a/output" 369 | top: "inception_3b/1x1" 370 | param { 371 | lr_mult: 1 372 | decay_mult: 1 373 | } 374 | param { 375 | lr_mult: 2 376 | decay_mult: 0 377 | } 378 | convolution_param { 379 | num_output: 128 380 | kernel_size: 1 381 | weight_filler { 382 | type: "xavier" 383 | std: 0.03 384 | } 385 | bias_filler { 386 | type: "constant" 387 | value: 0.2 388 | } 389 | } 390 | } 391 | layer { 392 | name: "inception_3b/relu_1x1" 393 | type: "ReLU" 394 | bottom: "inception_3b/1x1" 395 | top: "inception_3b/1x1" 396 | } 397 | layer { 398 | name: "inception_3b/3x3_reduce" 399 | type: "Convolution" 400 | bottom: "inception_3a/output" 401 | top: "inception_3b/3x3_reduce" 402 | param { 403 | lr_mult: 1 404 | decay_mult: 1 405 | } 406 | param { 407 | lr_mult: 2 408 | decay_mult: 0 409 | } 410 | convolution_param { 411 | num_output: 128 412 | kernel_size: 1 413 | weight_filler { 414 | type: "xavier" 415 | std: 0.09 416 | } 417 | bias_filler { 418 | type: "constant" 419 | value: 0.2 420 | } 421 | } 422 | } 423 | layer { 424 | name: "inception_3b/relu_3x3_reduce" 425 | type: "ReLU" 426 | bottom: "inception_3b/3x3_reduce" 427 | top: "inception_3b/3x3_reduce" 428 | } 429 | layer { 430 | name: "inception_3b/3x3" 431 | type: "Convolution" 432 | bottom: "inception_3b/3x3_reduce" 433 | top: "inception_3b/3x3" 434 | param { 435 | lr_mult: 1 436 | decay_mult: 1 437 | } 438 | param { 439 | lr_mult: 2 440 | decay_mult: 0 441 | } 442 | convolution_param { 443 | num_output: 192 444 | pad: 1 445 | kernel_size: 3 446 | weight_filler { 447 | type: "xavier" 448 | std: 0.03 449 | } 450 | bias_filler { 451 | type: "constant" 452 | value: 0.2 453 | } 454 | } 455 | } 456 | layer { 457 | name: "inception_3b/relu_3x3" 458 | type: "ReLU" 459 | bottom: "inception_3b/3x3" 460 | top: "inception_3b/3x3" 461 | } 462 | layer { 463 | name: "inception_3b/5x5_reduce" 464 | type: "Convolution" 465 | bottom: "inception_3a/output" 466 | top: "inception_3b/5x5_reduce" 467 | param { 468 | lr_mult: 1 469 | decay_mult: 1 470 | } 471 | param { 472 | lr_mult: 2 473 | decay_mult: 0 474 | } 475 | convolution_param { 476 | num_output: 32 477 | kernel_size: 1 478 | weight_filler { 479 | type: "xavier" 480 | std: 0.2 481 | } 482 | bias_filler { 483 | type: "constant" 484 | value: 0.2 485 | } 486 | } 487 | } 488 | layer { 489 | name: "inception_3b/relu_5x5_reduce" 490 | type: "ReLU" 491 | bottom: "inception_3b/5x5_reduce" 492 | top: "inception_3b/5x5_reduce" 493 | } 494 | layer { 495 | name: "inception_3b/5x5" 496 | type: "Convolution" 497 | bottom: "inception_3b/5x5_reduce" 498 | top: "inception_3b/5x5" 499 | param { 500 | lr_mult: 1 501 | decay_mult: 1 502 | } 503 | param { 504 | lr_mult: 2 505 | decay_mult: 0 506 | } 507 | convolution_param { 508 | num_output: 96 509 | pad: 2 510 | kernel_size: 5 511 | weight_filler { 512 | type: "xavier" 513 | std: 0.03 514 | } 515 | bias_filler { 516 | type: "constant" 517 | value: 0.2 518 | } 519 | } 520 | } 521 | layer { 522 | name: "inception_3b/relu_5x5" 523 | type: "ReLU" 524 | bottom: "inception_3b/5x5" 525 | top: "inception_3b/5x5" 526 | } 527 | layer { 528 | name: "inception_3b/pool" 529 | type: "Pooling" 530 | bottom: "inception_3a/output" 531 | top: "inception_3b/pool" 532 | pooling_param { 533 | pool: MAX 534 | kernel_size: 3 535 | stride: 1 536 | pad: 1 537 | } 538 | } 539 | layer { 540 | name: "inception_3b/pool_proj" 541 | type: "Convolution" 542 | bottom: "inception_3b/pool" 543 | top: "inception_3b/pool_proj" 544 | param { 545 | lr_mult: 1 546 | decay_mult: 1 547 | } 548 | param { 549 | lr_mult: 2 550 | decay_mult: 0 551 | } 552 | convolution_param { 553 | num_output: 64 554 | kernel_size: 1 555 | weight_filler { 556 | type: "xavier" 557 | std: 0.1 558 | } 559 | bias_filler { 560 | type: "constant" 561 | value: 0.2 562 | } 563 | } 564 | } 565 | layer { 566 | name: "inception_3b/relu_pool_proj" 567 | type: "ReLU" 568 | bottom: "inception_3b/pool_proj" 569 | top: "inception_3b/pool_proj" 570 | } 571 | layer { 572 | name: "inception_3b/output" 573 | type: "Concat" 574 | bottom: "inception_3b/1x1" 575 | bottom: "inception_3b/3x3" 576 | bottom: "inception_3b/5x5" 577 | bottom: "inception_3b/pool_proj" 578 | top: "inception_3b/output" 579 | } 580 | layer { 581 | name: "pool3/3x3_s2" 582 | type: "Pooling" 583 | bottom: "inception_3b/output" 584 | top: "pool3/3x3_s2" 585 | pooling_param { 586 | pool: MAX 587 | kernel_size: 3 588 | stride: 2 589 | } 590 | } 591 | layer { 592 | name: "inception_4a/1x1" 593 | type: "Convolution" 594 | bottom: "pool3/3x3_s2" 595 | top: "inception_4a/1x1" 596 | param { 597 | lr_mult: 1 598 | decay_mult: 1 599 | } 600 | param { 601 | lr_mult: 2 602 | decay_mult: 0 603 | } 604 | convolution_param { 605 | num_output: 192 606 | kernel_size: 1 607 | weight_filler { 608 | type: "xavier" 609 | std: 0.03 610 | } 611 | bias_filler { 612 | type: "constant" 613 | value: 0.2 614 | } 615 | } 616 | } 617 | layer { 618 | name: "inception_4a/relu_1x1" 619 | type: "ReLU" 620 | bottom: "inception_4a/1x1" 621 | top: "inception_4a/1x1" 622 | } 623 | layer { 624 | name: "inception_4a/3x3_reduce" 625 | type: "Convolution" 626 | bottom: "pool3/3x3_s2" 627 | top: "inception_4a/3x3_reduce" 628 | param { 629 | lr_mult: 1 630 | decay_mult: 1 631 | } 632 | param { 633 | lr_mult: 2 634 | decay_mult: 0 635 | } 636 | convolution_param { 637 | num_output: 96 638 | kernel_size: 1 639 | weight_filler { 640 | type: "xavier" 641 | std: 0.09 642 | } 643 | bias_filler { 644 | type: "constant" 645 | value: 0.2 646 | } 647 | } 648 | } 649 | layer { 650 | name: "inception_4a/relu_3x3_reduce" 651 | type: "ReLU" 652 | bottom: "inception_4a/3x3_reduce" 653 | top: "inception_4a/3x3_reduce" 654 | } 655 | layer { 656 | name: "inception_4a/3x3" 657 | type: "Convolution" 658 | bottom: "inception_4a/3x3_reduce" 659 | top: "inception_4a/3x3" 660 | param { 661 | lr_mult: 1 662 | decay_mult: 1 663 | } 664 | param { 665 | lr_mult: 2 666 | decay_mult: 0 667 | } 668 | convolution_param { 669 | num_output: 208 670 | pad: 1 671 | kernel_size: 3 672 | weight_filler { 673 | type: "xavier" 674 | std: 0.03 675 | } 676 | bias_filler { 677 | type: "constant" 678 | value: 0.2 679 | } 680 | } 681 | } 682 | layer { 683 | name: "inception_4a/relu_3x3" 684 | type: "ReLU" 685 | bottom: "inception_4a/3x3" 686 | top: "inception_4a/3x3" 687 | } 688 | layer { 689 | name: "inception_4a/5x5_reduce" 690 | type: "Convolution" 691 | bottom: "pool3/3x3_s2" 692 | top: "inception_4a/5x5_reduce" 693 | param { 694 | lr_mult: 1 695 | decay_mult: 1 696 | } 697 | param { 698 | lr_mult: 2 699 | decay_mult: 0 700 | } 701 | convolution_param { 702 | num_output: 16 703 | kernel_size: 1 704 | weight_filler { 705 | type: "xavier" 706 | std: 0.2 707 | } 708 | bias_filler { 709 | type: "constant" 710 | value: 0.2 711 | } 712 | } 713 | } 714 | layer { 715 | name: "inception_4a/relu_5x5_reduce" 716 | type: "ReLU" 717 | bottom: "inception_4a/5x5_reduce" 718 | top: "inception_4a/5x5_reduce" 719 | } 720 | layer { 721 | name: "inception_4a/5x5" 722 | type: "Convolution" 723 | bottom: "inception_4a/5x5_reduce" 724 | top: "inception_4a/5x5" 725 | param { 726 | lr_mult: 1 727 | decay_mult: 1 728 | } 729 | param { 730 | lr_mult: 2 731 | decay_mult: 0 732 | } 733 | convolution_param { 734 | num_output: 48 735 | pad: 2 736 | kernel_size: 5 737 | weight_filler { 738 | type: "xavier" 739 | std: 0.03 740 | } 741 | bias_filler { 742 | type: "constant" 743 | value: 0.2 744 | } 745 | } 746 | } 747 | layer { 748 | name: "inception_4a/relu_5x5" 749 | type: "ReLU" 750 | bottom: "inception_4a/5x5" 751 | top: "inception_4a/5x5" 752 | } 753 | layer { 754 | name: "inception_4a/pool" 755 | type: "Pooling" 756 | bottom: "pool3/3x3_s2" 757 | top: "inception_4a/pool" 758 | pooling_param { 759 | pool: MAX 760 | kernel_size: 3 761 | stride: 1 762 | pad: 1 763 | } 764 | } 765 | layer { 766 | name: "inception_4a/pool_proj" 767 | type: "Convolution" 768 | bottom: "inception_4a/pool" 769 | top: "inception_4a/pool_proj" 770 | param { 771 | lr_mult: 1 772 | decay_mult: 1 773 | } 774 | param { 775 | lr_mult: 2 776 | decay_mult: 0 777 | } 778 | convolution_param { 779 | num_output: 64 780 | kernel_size: 1 781 | weight_filler { 782 | type: "xavier" 783 | std: 0.1 784 | } 785 | bias_filler { 786 | type: "constant" 787 | value: 0.2 788 | } 789 | } 790 | } 791 | layer { 792 | name: "inception_4a/relu_pool_proj" 793 | type: "ReLU" 794 | bottom: "inception_4a/pool_proj" 795 | top: "inception_4a/pool_proj" 796 | } 797 | layer { 798 | name: "inception_4a/output" 799 | type: "Concat" 800 | bottom: "inception_4a/1x1" 801 | bottom: "inception_4a/3x3" 802 | bottom: "inception_4a/5x5" 803 | bottom: "inception_4a/pool_proj" 804 | top: "inception_4a/output" 805 | } 806 | layer { 807 | name: "inception_4b/1x1" 808 | type: "Convolution" 809 | bottom: "inception_4a/output" 810 | top: "inception_4b/1x1" 811 | param { 812 | lr_mult: 1 813 | decay_mult: 1 814 | } 815 | param { 816 | lr_mult: 2 817 | decay_mult: 0 818 | } 819 | convolution_param { 820 | num_output: 160 821 | kernel_size: 1 822 | weight_filler { 823 | type: "xavier" 824 | std: 0.03 825 | } 826 | bias_filler { 827 | type: "constant" 828 | value: 0.2 829 | } 830 | } 831 | } 832 | layer { 833 | name: "inception_4b/relu_1x1" 834 | type: "ReLU" 835 | bottom: "inception_4b/1x1" 836 | top: "inception_4b/1x1" 837 | } 838 | layer { 839 | name: "inception_4b/3x3_reduce" 840 | type: "Convolution" 841 | bottom: "inception_4a/output" 842 | top: "inception_4b/3x3_reduce" 843 | param { 844 | lr_mult: 1 845 | decay_mult: 1 846 | } 847 | param { 848 | lr_mult: 2 849 | decay_mult: 0 850 | } 851 | convolution_param { 852 | num_output: 112 853 | kernel_size: 1 854 | weight_filler { 855 | type: "xavier" 856 | std: 0.09 857 | } 858 | bias_filler { 859 | type: "constant" 860 | value: 0.2 861 | } 862 | } 863 | } 864 | layer { 865 | name: "inception_4b/relu_3x3_reduce" 866 | type: "ReLU" 867 | bottom: "inception_4b/3x3_reduce" 868 | top: "inception_4b/3x3_reduce" 869 | } 870 | layer { 871 | name: "inception_4b/3x3" 872 | type: "Convolution" 873 | bottom: "inception_4b/3x3_reduce" 874 | top: "inception_4b/3x3" 875 | param { 876 | lr_mult: 1 877 | decay_mult: 1 878 | } 879 | param { 880 | lr_mult: 2 881 | decay_mult: 0 882 | } 883 | convolution_param { 884 | num_output: 224 885 | pad: 1 886 | kernel_size: 3 887 | weight_filler { 888 | type: "xavier" 889 | std: 0.03 890 | } 891 | bias_filler { 892 | type: "constant" 893 | value: 0.2 894 | } 895 | } 896 | } 897 | layer { 898 | name: "inception_4b/relu_3x3" 899 | type: "ReLU" 900 | bottom: "inception_4b/3x3" 901 | top: "inception_4b/3x3" 902 | } 903 | layer { 904 | name: "inception_4b/5x5_reduce" 905 | type: "Convolution" 906 | bottom: "inception_4a/output" 907 | top: "inception_4b/5x5_reduce" 908 | param { 909 | lr_mult: 1 910 | decay_mult: 1 911 | } 912 | param { 913 | lr_mult: 2 914 | decay_mult: 0 915 | } 916 | convolution_param { 917 | num_output: 24 918 | kernel_size: 1 919 | weight_filler { 920 | type: "xavier" 921 | std: 0.2 922 | } 923 | bias_filler { 924 | type: "constant" 925 | value: 0.2 926 | } 927 | } 928 | } 929 | layer { 930 | name: "inception_4b/relu_5x5_reduce" 931 | type: "ReLU" 932 | bottom: "inception_4b/5x5_reduce" 933 | top: "inception_4b/5x5_reduce" 934 | } 935 | layer { 936 | name: "inception_4b/5x5" 937 | type: "Convolution" 938 | bottom: "inception_4b/5x5_reduce" 939 | top: "inception_4b/5x5" 940 | param { 941 | lr_mult: 1 942 | decay_mult: 1 943 | } 944 | param { 945 | lr_mult: 2 946 | decay_mult: 0 947 | } 948 | convolution_param { 949 | num_output: 64 950 | pad: 2 951 | kernel_size: 5 952 | weight_filler { 953 | type: "xavier" 954 | std: 0.03 955 | } 956 | bias_filler { 957 | type: "constant" 958 | value: 0.2 959 | } 960 | } 961 | } 962 | layer { 963 | name: "inception_4b/relu_5x5" 964 | type: "ReLU" 965 | bottom: "inception_4b/5x5" 966 | top: "inception_4b/5x5" 967 | } 968 | layer { 969 | name: "inception_4b/pool" 970 | type: "Pooling" 971 | bottom: "inception_4a/output" 972 | top: "inception_4b/pool" 973 | pooling_param { 974 | pool: MAX 975 | kernel_size: 3 976 | stride: 1 977 | pad: 1 978 | } 979 | } 980 | layer { 981 | name: "inception_4b/pool_proj" 982 | type: "Convolution" 983 | bottom: "inception_4b/pool" 984 | top: "inception_4b/pool_proj" 985 | param { 986 | lr_mult: 1 987 | decay_mult: 1 988 | } 989 | param { 990 | lr_mult: 2 991 | decay_mult: 0 992 | } 993 | convolution_param { 994 | num_output: 64 995 | kernel_size: 1 996 | weight_filler { 997 | type: "xavier" 998 | std: 0.1 999 | } 1000 | bias_filler { 1001 | type: "constant" 1002 | value: 0.2 1003 | } 1004 | } 1005 | } 1006 | layer { 1007 | name: "inception_4b/relu_pool_proj" 1008 | type: "ReLU" 1009 | bottom: "inception_4b/pool_proj" 1010 | top: "inception_4b/pool_proj" 1011 | } 1012 | layer { 1013 | name: "inception_4b/output" 1014 | type: "Concat" 1015 | bottom: "inception_4b/1x1" 1016 | bottom: "inception_4b/3x3" 1017 | bottom: "inception_4b/5x5" 1018 | bottom: "inception_4b/pool_proj" 1019 | top: "inception_4b/output" 1020 | } 1021 | layer { 1022 | name: "inception_4c/1x1" 1023 | type: "Convolution" 1024 | bottom: "inception_4b/output" 1025 | top: "inception_4c/1x1" 1026 | param { 1027 | lr_mult: 1 1028 | decay_mult: 1 1029 | } 1030 | param { 1031 | lr_mult: 2 1032 | decay_mult: 0 1033 | } 1034 | convolution_param { 1035 | num_output: 128 1036 | kernel_size: 1 1037 | weight_filler { 1038 | type: "xavier" 1039 | std: 0.03 1040 | } 1041 | bias_filler { 1042 | type: "constant" 1043 | value: 0.2 1044 | } 1045 | } 1046 | } 1047 | layer { 1048 | name: "inception_4c/relu_1x1" 1049 | type: "ReLU" 1050 | bottom: "inception_4c/1x1" 1051 | top: "inception_4c/1x1" 1052 | } 1053 | layer { 1054 | name: "inception_4c/3x3_reduce" 1055 | type: "Convolution" 1056 | bottom: "inception_4b/output" 1057 | top: "inception_4c/3x3_reduce" 1058 | param { 1059 | lr_mult: 1 1060 | decay_mult: 1 1061 | } 1062 | param { 1063 | lr_mult: 2 1064 | decay_mult: 0 1065 | } 1066 | convolution_param { 1067 | num_output: 128 1068 | kernel_size: 1 1069 | weight_filler { 1070 | type: "xavier" 1071 | std: 0.09 1072 | } 1073 | bias_filler { 1074 | type: "constant" 1075 | value: 0.2 1076 | } 1077 | } 1078 | } 1079 | layer { 1080 | name: "inception_4c/relu_3x3_reduce" 1081 | type: "ReLU" 1082 | bottom: "inception_4c/3x3_reduce" 1083 | top: "inception_4c/3x3_reduce" 1084 | } 1085 | layer { 1086 | name: "inception_4c/3x3" 1087 | type: "Convolution" 1088 | bottom: "inception_4c/3x3_reduce" 1089 | top: "inception_4c/3x3" 1090 | param { 1091 | lr_mult: 1 1092 | decay_mult: 1 1093 | } 1094 | param { 1095 | lr_mult: 2 1096 | decay_mult: 0 1097 | } 1098 | convolution_param { 1099 | num_output: 256 1100 | pad: 1 1101 | kernel_size: 3 1102 | weight_filler { 1103 | type: "xavier" 1104 | std: 0.03 1105 | } 1106 | bias_filler { 1107 | type: "constant" 1108 | value: 0.2 1109 | } 1110 | } 1111 | } 1112 | layer { 1113 | name: "inception_4c/relu_3x3" 1114 | type: "ReLU" 1115 | bottom: "inception_4c/3x3" 1116 | top: "inception_4c/3x3" 1117 | } 1118 | layer { 1119 | name: "inception_4c/5x5_reduce" 1120 | type: "Convolution" 1121 | bottom: "inception_4b/output" 1122 | top: "inception_4c/5x5_reduce" 1123 | param { 1124 | lr_mult: 1 1125 | decay_mult: 1 1126 | } 1127 | param { 1128 | lr_mult: 2 1129 | decay_mult: 0 1130 | } 1131 | convolution_param { 1132 | num_output: 24 1133 | kernel_size: 1 1134 | weight_filler { 1135 | type: "xavier" 1136 | std: 0.2 1137 | } 1138 | bias_filler { 1139 | type: "constant" 1140 | value: 0.2 1141 | } 1142 | } 1143 | } 1144 | layer { 1145 | name: "inception_4c/relu_5x5_reduce" 1146 | type: "ReLU" 1147 | bottom: "inception_4c/5x5_reduce" 1148 | top: "inception_4c/5x5_reduce" 1149 | } 1150 | layer { 1151 | name: "inception_4c/5x5" 1152 | type: "Convolution" 1153 | bottom: "inception_4c/5x5_reduce" 1154 | top: "inception_4c/5x5" 1155 | param { 1156 | lr_mult: 1 1157 | decay_mult: 1 1158 | } 1159 | param { 1160 | lr_mult: 2 1161 | decay_mult: 0 1162 | } 1163 | convolution_param { 1164 | num_output: 64 1165 | pad: 2 1166 | kernel_size: 5 1167 | weight_filler { 1168 | type: "xavier" 1169 | std: 0.03 1170 | } 1171 | bias_filler { 1172 | type: "constant" 1173 | value: 0.2 1174 | } 1175 | } 1176 | } 1177 | layer { 1178 | name: "inception_4c/relu_5x5" 1179 | type: "ReLU" 1180 | bottom: "inception_4c/5x5" 1181 | top: "inception_4c/5x5" 1182 | } 1183 | layer { 1184 | name: "inception_4c/pool" 1185 | type: "Pooling" 1186 | bottom: "inception_4b/output" 1187 | top: "inception_4c/pool" 1188 | pooling_param { 1189 | pool: MAX 1190 | kernel_size: 3 1191 | stride: 1 1192 | pad: 1 1193 | } 1194 | } 1195 | layer { 1196 | name: "inception_4c/pool_proj" 1197 | type: "Convolution" 1198 | bottom: "inception_4c/pool" 1199 | top: "inception_4c/pool_proj" 1200 | param { 1201 | lr_mult: 1 1202 | decay_mult: 1 1203 | } 1204 | param { 1205 | lr_mult: 2 1206 | decay_mult: 0 1207 | } 1208 | convolution_param { 1209 | num_output: 64 1210 | kernel_size: 1 1211 | weight_filler { 1212 | type: "xavier" 1213 | std: 0.1 1214 | } 1215 | bias_filler { 1216 | type: "constant" 1217 | value: 0.2 1218 | } 1219 | } 1220 | } 1221 | layer { 1222 | name: "inception_4c/relu_pool_proj" 1223 | type: "ReLU" 1224 | bottom: "inception_4c/pool_proj" 1225 | top: "inception_4c/pool_proj" 1226 | } 1227 | layer { 1228 | name: "inception_4c/output" 1229 | type: "Concat" 1230 | bottom: "inception_4c/1x1" 1231 | bottom: "inception_4c/3x3" 1232 | bottom: "inception_4c/5x5" 1233 | bottom: "inception_4c/pool_proj" 1234 | top: "inception_4c/output" 1235 | } 1236 | layer { 1237 | name: "inception_4d/1x1" 1238 | type: "Convolution" 1239 | bottom: "inception_4c/output" 1240 | top: "inception_4d/1x1" 1241 | param { 1242 | lr_mult: 1 1243 | decay_mult: 1 1244 | } 1245 | param { 1246 | lr_mult: 2 1247 | decay_mult: 0 1248 | } 1249 | convolution_param { 1250 | num_output: 112 1251 | kernel_size: 1 1252 | weight_filler { 1253 | type: "xavier" 1254 | std: 0.03 1255 | } 1256 | bias_filler { 1257 | type: "constant" 1258 | value: 0.2 1259 | } 1260 | } 1261 | } 1262 | layer { 1263 | name: "inception_4d/relu_1x1" 1264 | type: "ReLU" 1265 | bottom: "inception_4d/1x1" 1266 | top: "inception_4d/1x1" 1267 | } 1268 | layer { 1269 | name: "inception_4d/3x3_reduce" 1270 | type: "Convolution" 1271 | bottom: "inception_4c/output" 1272 | top: "inception_4d/3x3_reduce" 1273 | param { 1274 | lr_mult: 1 1275 | decay_mult: 1 1276 | } 1277 | param { 1278 | lr_mult: 2 1279 | decay_mult: 0 1280 | } 1281 | convolution_param { 1282 | num_output: 144 1283 | kernel_size: 1 1284 | weight_filler { 1285 | type: "xavier" 1286 | std: 0.09 1287 | } 1288 | bias_filler { 1289 | type: "constant" 1290 | value: 0.2 1291 | } 1292 | } 1293 | } 1294 | layer { 1295 | name: "inception_4d/relu_3x3_reduce" 1296 | type: "ReLU" 1297 | bottom: "inception_4d/3x3_reduce" 1298 | top: "inception_4d/3x3_reduce" 1299 | } 1300 | layer { 1301 | name: "inception_4d/3x3" 1302 | type: "Convolution" 1303 | bottom: "inception_4d/3x3_reduce" 1304 | top: "inception_4d/3x3" 1305 | param { 1306 | lr_mult: 1 1307 | decay_mult: 1 1308 | } 1309 | param { 1310 | lr_mult: 2 1311 | decay_mult: 0 1312 | } 1313 | convolution_param { 1314 | num_output: 288 1315 | pad: 1 1316 | kernel_size: 3 1317 | weight_filler { 1318 | type: "xavier" 1319 | std: 0.03 1320 | } 1321 | bias_filler { 1322 | type: "constant" 1323 | value: 0.2 1324 | } 1325 | } 1326 | } 1327 | layer { 1328 | name: "inception_4d/relu_3x3" 1329 | type: "ReLU" 1330 | bottom: "inception_4d/3x3" 1331 | top: "inception_4d/3x3" 1332 | } 1333 | layer { 1334 | name: "inception_4d/5x5_reduce" 1335 | type: "Convolution" 1336 | bottom: "inception_4c/output" 1337 | top: "inception_4d/5x5_reduce" 1338 | param { 1339 | lr_mult: 1 1340 | decay_mult: 1 1341 | } 1342 | param { 1343 | lr_mult: 2 1344 | decay_mult: 0 1345 | } 1346 | convolution_param { 1347 | num_output: 32 1348 | kernel_size: 1 1349 | weight_filler { 1350 | type: "xavier" 1351 | std: 0.2 1352 | } 1353 | bias_filler { 1354 | type: "constant" 1355 | value: 0.2 1356 | } 1357 | } 1358 | } 1359 | layer { 1360 | name: "inception_4d/relu_5x5_reduce" 1361 | type: "ReLU" 1362 | bottom: "inception_4d/5x5_reduce" 1363 | top: "inception_4d/5x5_reduce" 1364 | } 1365 | layer { 1366 | name: "inception_4d/5x5" 1367 | type: "Convolution" 1368 | bottom: "inception_4d/5x5_reduce" 1369 | top: "inception_4d/5x5" 1370 | param { 1371 | lr_mult: 1 1372 | decay_mult: 1 1373 | } 1374 | param { 1375 | lr_mult: 2 1376 | decay_mult: 0 1377 | } 1378 | convolution_param { 1379 | num_output: 64 1380 | pad: 2 1381 | kernel_size: 5 1382 | weight_filler { 1383 | type: "xavier" 1384 | std: 0.03 1385 | } 1386 | bias_filler { 1387 | type: "constant" 1388 | value: 0.2 1389 | } 1390 | } 1391 | } 1392 | layer { 1393 | name: "inception_4d/relu_5x5" 1394 | type: "ReLU" 1395 | bottom: "inception_4d/5x5" 1396 | top: "inception_4d/5x5" 1397 | } 1398 | layer { 1399 | name: "inception_4d/pool" 1400 | type: "Pooling" 1401 | bottom: "inception_4c/output" 1402 | top: "inception_4d/pool" 1403 | pooling_param { 1404 | pool: MAX 1405 | kernel_size: 3 1406 | stride: 1 1407 | pad: 1 1408 | } 1409 | } 1410 | layer { 1411 | name: "inception_4d/pool_proj" 1412 | type: "Convolution" 1413 | bottom: "inception_4d/pool" 1414 | top: "inception_4d/pool_proj" 1415 | param { 1416 | lr_mult: 1 1417 | decay_mult: 1 1418 | } 1419 | param { 1420 | lr_mult: 2 1421 | decay_mult: 0 1422 | } 1423 | convolution_param { 1424 | num_output: 64 1425 | kernel_size: 1 1426 | weight_filler { 1427 | type: "xavier" 1428 | std: 0.1 1429 | } 1430 | bias_filler { 1431 | type: "constant" 1432 | value: 0.2 1433 | } 1434 | } 1435 | } 1436 | layer { 1437 | name: "inception_4d/relu_pool_proj" 1438 | type: "ReLU" 1439 | bottom: "inception_4d/pool_proj" 1440 | top: "inception_4d/pool_proj" 1441 | } 1442 | layer { 1443 | name: "inception_4d/output" 1444 | type: "Concat" 1445 | bottom: "inception_4d/1x1" 1446 | bottom: "inception_4d/3x3" 1447 | bottom: "inception_4d/5x5" 1448 | bottom: "inception_4d/pool_proj" 1449 | top: "inception_4d/output" 1450 | } 1451 | layer { 1452 | name: "inception_4e/1x1" 1453 | type: "Convolution" 1454 | bottom: "inception_4d/output" 1455 | top: "inception_4e/1x1" 1456 | param { 1457 | lr_mult: 1 1458 | decay_mult: 1 1459 | } 1460 | param { 1461 | lr_mult: 2 1462 | decay_mult: 0 1463 | } 1464 | convolution_param { 1465 | num_output: 256 1466 | kernel_size: 1 1467 | weight_filler { 1468 | type: "xavier" 1469 | std: 0.03 1470 | } 1471 | bias_filler { 1472 | type: "constant" 1473 | value: 0.2 1474 | } 1475 | } 1476 | } 1477 | layer { 1478 | name: "inception_4e/relu_1x1" 1479 | type: "ReLU" 1480 | bottom: "inception_4e/1x1" 1481 | top: "inception_4e/1x1" 1482 | } 1483 | layer { 1484 | name: "inception_4e/3x3_reduce" 1485 | type: "Convolution" 1486 | bottom: "inception_4d/output" 1487 | top: "inception_4e/3x3_reduce" 1488 | param { 1489 | lr_mult: 1 1490 | decay_mult: 1 1491 | } 1492 | param { 1493 | lr_mult: 2 1494 | decay_mult: 0 1495 | } 1496 | convolution_param { 1497 | num_output: 160 1498 | kernel_size: 1 1499 | weight_filler { 1500 | type: "xavier" 1501 | std: 0.09 1502 | } 1503 | bias_filler { 1504 | type: "constant" 1505 | value: 0.2 1506 | } 1507 | } 1508 | } 1509 | layer { 1510 | name: "inception_4e/relu_3x3_reduce" 1511 | type: "ReLU" 1512 | bottom: "inception_4e/3x3_reduce" 1513 | top: "inception_4e/3x3_reduce" 1514 | } 1515 | layer { 1516 | name: "inception_4e/3x3" 1517 | type: "Convolution" 1518 | bottom: "inception_4e/3x3_reduce" 1519 | top: "inception_4e/3x3" 1520 | param { 1521 | lr_mult: 1 1522 | decay_mult: 1 1523 | } 1524 | param { 1525 | lr_mult: 2 1526 | decay_mult: 0 1527 | } 1528 | convolution_param { 1529 | num_output: 320 1530 | pad: 1 1531 | kernel_size: 3 1532 | weight_filler { 1533 | type: "xavier" 1534 | std: 0.03 1535 | } 1536 | bias_filler { 1537 | type: "constant" 1538 | value: 0.2 1539 | } 1540 | } 1541 | } 1542 | layer { 1543 | name: "inception_4e/relu_3x3" 1544 | type: "ReLU" 1545 | bottom: "inception_4e/3x3" 1546 | top: "inception_4e/3x3" 1547 | } 1548 | layer { 1549 | name: "inception_4e/5x5_reduce" 1550 | type: "Convolution" 1551 | bottom: "inception_4d/output" 1552 | top: "inception_4e/5x5_reduce" 1553 | param { 1554 | lr_mult: 1 1555 | decay_mult: 1 1556 | } 1557 | param { 1558 | lr_mult: 2 1559 | decay_mult: 0 1560 | } 1561 | convolution_param { 1562 | num_output: 32 1563 | kernel_size: 1 1564 | weight_filler { 1565 | type: "xavier" 1566 | std: 0.2 1567 | } 1568 | bias_filler { 1569 | type: "constant" 1570 | value: 0.2 1571 | } 1572 | } 1573 | } 1574 | layer { 1575 | name: "inception_4e/relu_5x5_reduce" 1576 | type: "ReLU" 1577 | bottom: "inception_4e/5x5_reduce" 1578 | top: "inception_4e/5x5_reduce" 1579 | } 1580 | layer { 1581 | name: "inception_4e/5x5" 1582 | type: "Convolution" 1583 | bottom: "inception_4e/5x5_reduce" 1584 | top: "inception_4e/5x5" 1585 | param { 1586 | lr_mult: 1 1587 | decay_mult: 1 1588 | } 1589 | param { 1590 | lr_mult: 2 1591 | decay_mult: 0 1592 | } 1593 | convolution_param { 1594 | num_output: 128 1595 | pad: 2 1596 | kernel_size: 5 1597 | weight_filler { 1598 | type: "xavier" 1599 | std: 0.03 1600 | } 1601 | bias_filler { 1602 | type: "constant" 1603 | value: 0.2 1604 | } 1605 | } 1606 | } 1607 | layer { 1608 | name: "inception_4e/relu_5x5" 1609 | type: "ReLU" 1610 | bottom: "inception_4e/5x5" 1611 | top: "inception_4e/5x5" 1612 | } 1613 | layer { 1614 | name: "inception_4e/pool" 1615 | type: "Pooling" 1616 | bottom: "inception_4d/output" 1617 | top: "inception_4e/pool" 1618 | pooling_param { 1619 | pool: MAX 1620 | kernel_size: 3 1621 | stride: 1 1622 | pad: 1 1623 | } 1624 | } 1625 | layer { 1626 | name: "inception_4e/pool_proj" 1627 | type: "Convolution" 1628 | bottom: "inception_4e/pool" 1629 | top: "inception_4e/pool_proj" 1630 | param { 1631 | lr_mult: 1 1632 | decay_mult: 1 1633 | } 1634 | param { 1635 | lr_mult: 2 1636 | decay_mult: 0 1637 | } 1638 | convolution_param { 1639 | num_output: 128 1640 | kernel_size: 1 1641 | weight_filler { 1642 | type: "xavier" 1643 | std: 0.1 1644 | } 1645 | bias_filler { 1646 | type: "constant" 1647 | value: 0.2 1648 | } 1649 | } 1650 | } 1651 | layer { 1652 | name: "inception_4e/relu_pool_proj" 1653 | type: "ReLU" 1654 | bottom: "inception_4e/pool_proj" 1655 | top: "inception_4e/pool_proj" 1656 | } 1657 | layer { 1658 | name: "inception_4e/output" 1659 | type: "Concat" 1660 | bottom: "inception_4e/1x1" 1661 | bottom: "inception_4e/3x3" 1662 | bottom: "inception_4e/5x5" 1663 | bottom: "inception_4e/pool_proj" 1664 | top: "inception_4e/output" 1665 | } 1666 | layer { 1667 | name: "pool4/3x3_s2" 1668 | type: "Pooling" 1669 | bottom: "inception_4e/output" 1670 | top: "pool4/3x3_s2" 1671 | pooling_param { 1672 | pool: MAX 1673 | kernel_size: 3 1674 | stride: 2 1675 | } 1676 | } 1677 | layer { 1678 | name: "inception_5a/1x1" 1679 | type: "Convolution" 1680 | bottom: "pool4/3x3_s2" 1681 | top: "inception_5a/1x1" 1682 | param { 1683 | lr_mult: 1 1684 | decay_mult: 1 1685 | } 1686 | param { 1687 | lr_mult: 2 1688 | decay_mult: 0 1689 | } 1690 | convolution_param { 1691 | num_output: 256 1692 | kernel_size: 1 1693 | weight_filler { 1694 | type: "xavier" 1695 | std: 0.03 1696 | } 1697 | bias_filler { 1698 | type: "constant" 1699 | value: 0.2 1700 | } 1701 | } 1702 | } 1703 | layer { 1704 | name: "inception_5a/relu_1x1" 1705 | type: "ReLU" 1706 | bottom: "inception_5a/1x1" 1707 | top: "inception_5a/1x1" 1708 | } 1709 | layer { 1710 | name: "inception_5a/3x3_reduce" 1711 | type: "Convolution" 1712 | bottom: "pool4/3x3_s2" 1713 | top: "inception_5a/3x3_reduce" 1714 | param { 1715 | lr_mult: 1 1716 | decay_mult: 1 1717 | } 1718 | param { 1719 | lr_mult: 2 1720 | decay_mult: 0 1721 | } 1722 | convolution_param { 1723 | num_output: 160 1724 | kernel_size: 1 1725 | weight_filler { 1726 | type: "xavier" 1727 | std: 0.09 1728 | } 1729 | bias_filler { 1730 | type: "constant" 1731 | value: 0.2 1732 | } 1733 | } 1734 | } 1735 | layer { 1736 | name: "inception_5a/relu_3x3_reduce" 1737 | type: "ReLU" 1738 | bottom: "inception_5a/3x3_reduce" 1739 | top: "inception_5a/3x3_reduce" 1740 | } 1741 | layer { 1742 | name: "inception_5a/3x3" 1743 | type: "Convolution" 1744 | bottom: "inception_5a/3x3_reduce" 1745 | top: "inception_5a/3x3" 1746 | param { 1747 | lr_mult: 1 1748 | decay_mult: 1 1749 | } 1750 | param { 1751 | lr_mult: 2 1752 | decay_mult: 0 1753 | } 1754 | convolution_param { 1755 | num_output: 320 1756 | pad: 1 1757 | kernel_size: 3 1758 | weight_filler { 1759 | type: "xavier" 1760 | std: 0.03 1761 | } 1762 | bias_filler { 1763 | type: "constant" 1764 | value: 0.2 1765 | } 1766 | } 1767 | } 1768 | layer { 1769 | name: "inception_5a/relu_3x3" 1770 | type: "ReLU" 1771 | bottom: "inception_5a/3x3" 1772 | top: "inception_5a/3x3" 1773 | } 1774 | layer { 1775 | name: "inception_5a/5x5_reduce" 1776 | type: "Convolution" 1777 | bottom: "pool4/3x3_s2" 1778 | top: "inception_5a/5x5_reduce" 1779 | param { 1780 | lr_mult: 1 1781 | decay_mult: 1 1782 | } 1783 | param { 1784 | lr_mult: 2 1785 | decay_mult: 0 1786 | } 1787 | convolution_param { 1788 | num_output: 32 1789 | kernel_size: 1 1790 | weight_filler { 1791 | type: "xavier" 1792 | std: 0.2 1793 | } 1794 | bias_filler { 1795 | type: "constant" 1796 | value: 0.2 1797 | } 1798 | } 1799 | } 1800 | layer { 1801 | name: "inception_5a/relu_5x5_reduce" 1802 | type: "ReLU" 1803 | bottom: "inception_5a/5x5_reduce" 1804 | top: "inception_5a/5x5_reduce" 1805 | } 1806 | layer { 1807 | name: "inception_5a/5x5" 1808 | type: "Convolution" 1809 | bottom: "inception_5a/5x5_reduce" 1810 | top: "inception_5a/5x5" 1811 | param { 1812 | lr_mult: 1 1813 | decay_mult: 1 1814 | } 1815 | param { 1816 | lr_mult: 2 1817 | decay_mult: 0 1818 | } 1819 | convolution_param { 1820 | num_output: 128 1821 | pad: 2 1822 | kernel_size: 5 1823 | weight_filler { 1824 | type: "xavier" 1825 | std: 0.03 1826 | } 1827 | bias_filler { 1828 | type: "constant" 1829 | value: 0.2 1830 | } 1831 | } 1832 | } 1833 | layer { 1834 | name: "inception_5a/relu_5x5" 1835 | type: "ReLU" 1836 | bottom: "inception_5a/5x5" 1837 | top: "inception_5a/5x5" 1838 | } 1839 | layer { 1840 | name: "inception_5a/pool" 1841 | type: "Pooling" 1842 | bottom: "pool4/3x3_s2" 1843 | top: "inception_5a/pool" 1844 | pooling_param { 1845 | pool: MAX 1846 | kernel_size: 3 1847 | stride: 1 1848 | pad: 1 1849 | } 1850 | } 1851 | layer { 1852 | name: "inception_5a/pool_proj" 1853 | type: "Convolution" 1854 | bottom: "inception_5a/pool" 1855 | top: "inception_5a/pool_proj" 1856 | param { 1857 | lr_mult: 1 1858 | decay_mult: 1 1859 | } 1860 | param { 1861 | lr_mult: 2 1862 | decay_mult: 0 1863 | } 1864 | convolution_param { 1865 | num_output: 128 1866 | kernel_size: 1 1867 | weight_filler { 1868 | type: "xavier" 1869 | std: 0.1 1870 | } 1871 | bias_filler { 1872 | type: "constant" 1873 | value: 0.2 1874 | } 1875 | } 1876 | } 1877 | layer { 1878 | name: "inception_5a/relu_pool_proj" 1879 | type: "ReLU" 1880 | bottom: "inception_5a/pool_proj" 1881 | top: "inception_5a/pool_proj" 1882 | } 1883 | layer { 1884 | name: "inception_5a/output" 1885 | type: "Concat" 1886 | bottom: "inception_5a/1x1" 1887 | bottom: "inception_5a/3x3" 1888 | bottom: "inception_5a/5x5" 1889 | bottom: "inception_5a/pool_proj" 1890 | top: "inception_5a/output" 1891 | } 1892 | layer { 1893 | name: "inception_5b/1x1" 1894 | type: "Convolution" 1895 | bottom: "inception_5a/output" 1896 | top: "inception_5b/1x1" 1897 | param { 1898 | lr_mult: 1 1899 | decay_mult: 1 1900 | } 1901 | param { 1902 | lr_mult: 2 1903 | decay_mult: 0 1904 | } 1905 | convolution_param { 1906 | num_output: 384 1907 | kernel_size: 1 1908 | weight_filler { 1909 | type: "xavier" 1910 | std: 0.03 1911 | } 1912 | bias_filler { 1913 | type: "constant" 1914 | value: 0.2 1915 | } 1916 | } 1917 | } 1918 | layer { 1919 | name: "inception_5b/relu_1x1" 1920 | type: "ReLU" 1921 | bottom: "inception_5b/1x1" 1922 | top: "inception_5b/1x1" 1923 | } 1924 | layer { 1925 | name: "inception_5b/3x3_reduce" 1926 | type: "Convolution" 1927 | bottom: "inception_5a/output" 1928 | top: "inception_5b/3x3_reduce" 1929 | param { 1930 | lr_mult: 1 1931 | decay_mult: 1 1932 | } 1933 | param { 1934 | lr_mult: 2 1935 | decay_mult: 0 1936 | } 1937 | convolution_param { 1938 | num_output: 192 1939 | kernel_size: 1 1940 | weight_filler { 1941 | type: "xavier" 1942 | std: 0.09 1943 | } 1944 | bias_filler { 1945 | type: "constant" 1946 | value: 0.2 1947 | } 1948 | } 1949 | } 1950 | layer { 1951 | name: "inception_5b/relu_3x3_reduce" 1952 | type: "ReLU" 1953 | bottom: "inception_5b/3x3_reduce" 1954 | top: "inception_5b/3x3_reduce" 1955 | } 1956 | layer { 1957 | name: "inception_5b/3x3" 1958 | type: "Convolution" 1959 | bottom: "inception_5b/3x3_reduce" 1960 | top: "inception_5b/3x3" 1961 | param { 1962 | lr_mult: 1 1963 | decay_mult: 1 1964 | } 1965 | param { 1966 | lr_mult: 2 1967 | decay_mult: 0 1968 | } 1969 | convolution_param { 1970 | num_output: 384 1971 | pad: 1 1972 | kernel_size: 3 1973 | weight_filler { 1974 | type: "xavier" 1975 | std: 0.03 1976 | } 1977 | bias_filler { 1978 | type: "constant" 1979 | value: 0.2 1980 | } 1981 | } 1982 | } 1983 | layer { 1984 | name: "inception_5b/relu_3x3" 1985 | type: "ReLU" 1986 | bottom: "inception_5b/3x3" 1987 | top: "inception_5b/3x3" 1988 | } 1989 | layer { 1990 | name: "inception_5b/5x5_reduce" 1991 | type: "Convolution" 1992 | bottom: "inception_5a/output" 1993 | top: "inception_5b/5x5_reduce" 1994 | param { 1995 | lr_mult: 1 1996 | decay_mult: 1 1997 | } 1998 | param { 1999 | lr_mult: 2 2000 | decay_mult: 0 2001 | } 2002 | convolution_param { 2003 | num_output: 48 2004 | kernel_size: 1 2005 | weight_filler { 2006 | type: "xavier" 2007 | std: 0.2 2008 | } 2009 | bias_filler { 2010 | type: "constant" 2011 | value: 0.2 2012 | } 2013 | } 2014 | } 2015 | layer { 2016 | name: "inception_5b/relu_5x5_reduce" 2017 | type: "ReLU" 2018 | bottom: "inception_5b/5x5_reduce" 2019 | top: "inception_5b/5x5_reduce" 2020 | } 2021 | layer { 2022 | name: "inception_5b/5x5" 2023 | type: "Convolution" 2024 | bottom: "inception_5b/5x5_reduce" 2025 | top: "inception_5b/5x5" 2026 | param { 2027 | lr_mult: 1 2028 | decay_mult: 1 2029 | } 2030 | param { 2031 | lr_mult: 2 2032 | decay_mult: 0 2033 | } 2034 | convolution_param { 2035 | num_output: 128 2036 | pad: 2 2037 | kernel_size: 5 2038 | weight_filler { 2039 | type: "xavier" 2040 | std: 0.03 2041 | } 2042 | bias_filler { 2043 | type: "constant" 2044 | value: 0.2 2045 | } 2046 | } 2047 | } 2048 | layer { 2049 | name: "inception_5b/relu_5x5" 2050 | type: "ReLU" 2051 | bottom: "inception_5b/5x5" 2052 | top: "inception_5b/5x5" 2053 | } 2054 | layer { 2055 | name: "inception_5b/pool" 2056 | type: "Pooling" 2057 | bottom: "inception_5a/output" 2058 | top: "inception_5b/pool" 2059 | pooling_param { 2060 | pool: MAX 2061 | kernel_size: 3 2062 | stride: 1 2063 | pad: 1 2064 | } 2065 | } 2066 | layer { 2067 | name: "inception_5b/pool_proj" 2068 | type: "Convolution" 2069 | bottom: "inception_5b/pool" 2070 | top: "inception_5b/pool_proj" 2071 | param { 2072 | lr_mult: 1 2073 | decay_mult: 1 2074 | } 2075 | param { 2076 | lr_mult: 2 2077 | decay_mult: 0 2078 | } 2079 | convolution_param { 2080 | num_output: 128 2081 | kernel_size: 1 2082 | weight_filler { 2083 | type: "xavier" 2084 | std: 0.1 2085 | } 2086 | bias_filler { 2087 | type: "constant" 2088 | value: 0.2 2089 | } 2090 | } 2091 | } 2092 | layer { 2093 | name: "inception_5b/relu_pool_proj" 2094 | type: "ReLU" 2095 | bottom: "inception_5b/pool_proj" 2096 | top: "inception_5b/pool_proj" 2097 | } 2098 | layer { 2099 | name: "inception_5b/output" 2100 | type: "Concat" 2101 | bottom: "inception_5b/1x1" 2102 | bottom: "inception_5b/3x3" 2103 | bottom: "inception_5b/5x5" 2104 | bottom: "inception_5b/pool_proj" 2105 | top: "inception_5b/output" 2106 | } 2107 | layer { 2108 | name: "pool5/7x7_s1" 2109 | type: "Pooling" 2110 | bottom: "inception_5b/output" 2111 | top: "pool5/7x7_s1" 2112 | pooling_param { 2113 | pool: AVE 2114 | kernel_size: 7 2115 | stride: 1 2116 | } 2117 | } 2118 | layer { 2119 | name: "pool5/drop_7x7_s1" 2120 | type: "Dropout" 2121 | bottom: "pool5/7x7_s1" 2122 | top: "pool5/7x7_s1" 2123 | dropout_param { 2124 | dropout_ratio: 0.4 2125 | } 2126 | } 2127 | layer { 2128 | name: "loss3/classifier" 2129 | type: "InnerProduct" 2130 | bottom: "pool5/7x7_s1" 2131 | top: "loss3/classifier" 2132 | param { 2133 | lr_mult: 1 2134 | decay_mult: 1 2135 | } 2136 | param { 2137 | lr_mult: 2 2138 | decay_mult: 0 2139 | } 2140 | inner_product_param { 2141 | num_output: 1000 2142 | weight_filler { 2143 | type: "xavier" 2144 | } 2145 | bias_filler { 2146 | type: "constant" 2147 | value: 0 2148 | } 2149 | } 2150 | } 2151 | layer { 2152 | name: "prob" 2153 | type: "Softmax" 2154 | bottom: "loss3/classifier" 2155 | top: "prob" 2156 | } 2157 | -------------------------------------------------------------------------------- /cnn_finetuning/googlenet/deploy_template.prototxt: -------------------------------------------------------------------------------- 1 | name: "GoogleNet" 2 | input: "data" 3 | input_dim: 11 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | layer { 8 | name: "conv1/7x7_s2" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1/7x7_s2" 12 | param { 13 | lr_mult: 1 14 | decay_mult: 1 15 | } 16 | param { 17 | lr_mult: 2 18 | decay_mult: 0 19 | } 20 | convolution_param { 21 | num_output: 64 22 | pad: 3 23 | kernel_size: 7 24 | stride: 2 25 | weight_filler { 26 | type: "xavier" 27 | std: 0.1 28 | } 29 | bias_filler { 30 | type: "constant" 31 | value: 0.2 32 | } 33 | } 34 | } 35 | layer { 36 | name: "conv1/relu_7x7" 37 | type: "ReLU" 38 | bottom: "conv1/7x7_s2" 39 | top: "conv1/7x7_s2" 40 | } 41 | layer { 42 | name: "pool1/3x3_s2" 43 | type: "Pooling" 44 | bottom: "conv1/7x7_s2" 45 | top: "pool1/3x3_s2" 46 | pooling_param { 47 | pool: MAX 48 | kernel_size: 3 49 | stride: 2 50 | } 51 | } 52 | layer { 53 | name: "pool1/norm1" 54 | type: "LRN" 55 | bottom: "pool1/3x3_s2" 56 | top: "pool1/norm1" 57 | lrn_param { 58 | local_size: 5 59 | alpha: 0.0001 60 | beta: 0.75 61 | } 62 | } 63 | layer { 64 | name: "conv2/3x3_reduce" 65 | type: "Convolution" 66 | bottom: "pool1/norm1" 67 | top: "conv2/3x3_reduce" 68 | param { 69 | lr_mult: 1 70 | decay_mult: 1 71 | } 72 | param { 73 | lr_mult: 2 74 | decay_mult: 0 75 | } 76 | convolution_param { 77 | num_output: 64 78 | kernel_size: 1 79 | weight_filler { 80 | type: "xavier" 81 | std: 0.1 82 | } 83 | bias_filler { 84 | type: "constant" 85 | value: 0.2 86 | } 87 | } 88 | } 89 | layer { 90 | name: "conv2/relu_3x3_reduce" 91 | type: "ReLU" 92 | bottom: "conv2/3x3_reduce" 93 | top: "conv2/3x3_reduce" 94 | } 95 | layer { 96 | name: "conv2/3x3" 97 | type: "Convolution" 98 | bottom: "conv2/3x3_reduce" 99 | top: "conv2/3x3" 100 | param { 101 | lr_mult: 1 102 | decay_mult: 1 103 | } 104 | param { 105 | lr_mult: 2 106 | decay_mult: 0 107 | } 108 | convolution_param { 109 | num_output: 192 110 | pad: 1 111 | kernel_size: 3 112 | weight_filler { 113 | type: "xavier" 114 | std: 0.03 115 | } 116 | bias_filler { 117 | type: "constant" 118 | value: 0.2 119 | } 120 | } 121 | } 122 | layer { 123 | name: "conv2/relu_3x3" 124 | type: "ReLU" 125 | bottom: "conv2/3x3" 126 | top: "conv2/3x3" 127 | } 128 | layer { 129 | name: "conv2/norm2" 130 | type: "LRN" 131 | bottom: "conv2/3x3" 132 | top: "conv2/norm2" 133 | lrn_param { 134 | local_size: 5 135 | alpha: 0.0001 136 | beta: 0.75 137 | } 138 | } 139 | layer { 140 | name: "pool2/3x3_s2" 141 | type: "Pooling" 142 | bottom: "conv2/norm2" 143 | top: "pool2/3x3_s2" 144 | pooling_param { 145 | pool: MAX 146 | kernel_size: 3 147 | stride: 2 148 | } 149 | } 150 | layer { 151 | name: "inception_3a/1x1" 152 | type: "Convolution" 153 | bottom: "pool2/3x3_s2" 154 | top: "inception_3a/1x1" 155 | param { 156 | lr_mult: 1 157 | decay_mult: 1 158 | } 159 | param { 160 | lr_mult: 2 161 | decay_mult: 0 162 | } 163 | convolution_param { 164 | num_output: 64 165 | kernel_size: 1 166 | weight_filler { 167 | type: "xavier" 168 | std: 0.03 169 | } 170 | bias_filler { 171 | type: "constant" 172 | value: 0.2 173 | } 174 | } 175 | } 176 | layer { 177 | name: "inception_3a/relu_1x1" 178 | type: "ReLU" 179 | bottom: "inception_3a/1x1" 180 | top: "inception_3a/1x1" 181 | } 182 | layer { 183 | name: "inception_3a/3x3_reduce" 184 | type: "Convolution" 185 | bottom: "pool2/3x3_s2" 186 | top: "inception_3a/3x3_reduce" 187 | param { 188 | lr_mult: 1 189 | decay_mult: 1 190 | } 191 | param { 192 | lr_mult: 2 193 | decay_mult: 0 194 | } 195 | convolution_param { 196 | num_output: 96 197 | kernel_size: 1 198 | weight_filler { 199 | type: "xavier" 200 | std: 0.09 201 | } 202 | bias_filler { 203 | type: "constant" 204 | value: 0.2 205 | } 206 | } 207 | } 208 | layer { 209 | name: "inception_3a/relu_3x3_reduce" 210 | type: "ReLU" 211 | bottom: "inception_3a/3x3_reduce" 212 | top: "inception_3a/3x3_reduce" 213 | } 214 | layer { 215 | name: "inception_3a/3x3" 216 | type: "Convolution" 217 | bottom: "inception_3a/3x3_reduce" 218 | top: "inception_3a/3x3" 219 | param { 220 | lr_mult: 1 221 | decay_mult: 1 222 | } 223 | param { 224 | lr_mult: 2 225 | decay_mult: 0 226 | } 227 | convolution_param { 228 | num_output: 128 229 | pad: 1 230 | kernel_size: 3 231 | weight_filler { 232 | type: "xavier" 233 | std: 0.03 234 | } 235 | bias_filler { 236 | type: "constant" 237 | value: 0.2 238 | } 239 | } 240 | } 241 | layer { 242 | name: "inception_3a/relu_3x3" 243 | type: "ReLU" 244 | bottom: "inception_3a/3x3" 245 | top: "inception_3a/3x3" 246 | } 247 | layer { 248 | name: "inception_3a/5x5_reduce" 249 | type: "Convolution" 250 | bottom: "pool2/3x3_s2" 251 | top: "inception_3a/5x5_reduce" 252 | param { 253 | lr_mult: 1 254 | decay_mult: 1 255 | } 256 | param { 257 | lr_mult: 2 258 | decay_mult: 0 259 | } 260 | convolution_param { 261 | num_output: 16 262 | kernel_size: 1 263 | weight_filler { 264 | type: "xavier" 265 | std: 0.2 266 | } 267 | bias_filler { 268 | type: "constant" 269 | value: 0.2 270 | } 271 | } 272 | } 273 | layer { 274 | name: "inception_3a/relu_5x5_reduce" 275 | type: "ReLU" 276 | bottom: "inception_3a/5x5_reduce" 277 | top: "inception_3a/5x5_reduce" 278 | } 279 | layer { 280 | name: "inception_3a/5x5" 281 | type: "Convolution" 282 | bottom: "inception_3a/5x5_reduce" 283 | top: "inception_3a/5x5" 284 | param { 285 | lr_mult: 1 286 | decay_mult: 1 287 | } 288 | param { 289 | lr_mult: 2 290 | decay_mult: 0 291 | } 292 | convolution_param { 293 | num_output: 32 294 | pad: 2 295 | kernel_size: 5 296 | weight_filler { 297 | type: "xavier" 298 | std: 0.03 299 | } 300 | bias_filler { 301 | type: "constant" 302 | value: 0.2 303 | } 304 | } 305 | } 306 | layer { 307 | name: "inception_3a/relu_5x5" 308 | type: "ReLU" 309 | bottom: "inception_3a/5x5" 310 | top: "inception_3a/5x5" 311 | } 312 | layer { 313 | name: "inception_3a/pool" 314 | type: "Pooling" 315 | bottom: "pool2/3x3_s2" 316 | top: "inception_3a/pool" 317 | pooling_param { 318 | pool: MAX 319 | kernel_size: 3 320 | stride: 1 321 | pad: 1 322 | } 323 | } 324 | layer { 325 | name: "inception_3a/pool_proj" 326 | type: "Convolution" 327 | bottom: "inception_3a/pool" 328 | top: "inception_3a/pool_proj" 329 | param { 330 | lr_mult: 1 331 | decay_mult: 1 332 | } 333 | param { 334 | lr_mult: 2 335 | decay_mult: 0 336 | } 337 | convolution_param { 338 | num_output: 32 339 | kernel_size: 1 340 | weight_filler { 341 | type: "xavier" 342 | std: 0.1 343 | } 344 | bias_filler { 345 | type: "constant" 346 | value: 0.2 347 | } 348 | } 349 | } 350 | layer { 351 | name: "inception_3a/relu_pool_proj" 352 | type: "ReLU" 353 | bottom: "inception_3a/pool_proj" 354 | top: "inception_3a/pool_proj" 355 | } 356 | layer { 357 | name: "inception_3a/output" 358 | type: "Concat" 359 | bottom: "inception_3a/1x1" 360 | bottom: "inception_3a/3x3" 361 | bottom: "inception_3a/5x5" 362 | bottom: "inception_3a/pool_proj" 363 | top: "inception_3a/output" 364 | } 365 | layer { 366 | name: "inception_3b/1x1" 367 | type: "Convolution" 368 | bottom: "inception_3a/output" 369 | top: "inception_3b/1x1" 370 | param { 371 | lr_mult: 1 372 | decay_mult: 1 373 | } 374 | param { 375 | lr_mult: 2 376 | decay_mult: 0 377 | } 378 | convolution_param { 379 | num_output: 128 380 | kernel_size: 1 381 | weight_filler { 382 | type: "xavier" 383 | std: 0.03 384 | } 385 | bias_filler { 386 | type: "constant" 387 | value: 0.2 388 | } 389 | } 390 | } 391 | layer { 392 | name: "inception_3b/relu_1x1" 393 | type: "ReLU" 394 | bottom: "inception_3b/1x1" 395 | top: "inception_3b/1x1" 396 | } 397 | layer { 398 | name: "inception_3b/3x3_reduce" 399 | type: "Convolution" 400 | bottom: "inception_3a/output" 401 | top: "inception_3b/3x3_reduce" 402 | param { 403 | lr_mult: 1 404 | decay_mult: 1 405 | } 406 | param { 407 | lr_mult: 2 408 | decay_mult: 0 409 | } 410 | convolution_param { 411 | num_output: 128 412 | kernel_size: 1 413 | weight_filler { 414 | type: "xavier" 415 | std: 0.09 416 | } 417 | bias_filler { 418 | type: "constant" 419 | value: 0.2 420 | } 421 | } 422 | } 423 | layer { 424 | name: "inception_3b/relu_3x3_reduce" 425 | type: "ReLU" 426 | bottom: "inception_3b/3x3_reduce" 427 | top: "inception_3b/3x3_reduce" 428 | } 429 | layer { 430 | name: "inception_3b/3x3" 431 | type: "Convolution" 432 | bottom: "inception_3b/3x3_reduce" 433 | top: "inception_3b/3x3" 434 | param { 435 | lr_mult: 1 436 | decay_mult: 1 437 | } 438 | param { 439 | lr_mult: 2 440 | decay_mult: 0 441 | } 442 | convolution_param { 443 | num_output: 192 444 | pad: 1 445 | kernel_size: 3 446 | weight_filler { 447 | type: "xavier" 448 | std: 0.03 449 | } 450 | bias_filler { 451 | type: "constant" 452 | value: 0.2 453 | } 454 | } 455 | } 456 | layer { 457 | name: "inception_3b/relu_3x3" 458 | type: "ReLU" 459 | bottom: "inception_3b/3x3" 460 | top: "inception_3b/3x3" 461 | } 462 | layer { 463 | name: "inception_3b/5x5_reduce" 464 | type: "Convolution" 465 | bottom: "inception_3a/output" 466 | top: "inception_3b/5x5_reduce" 467 | param { 468 | lr_mult: 1 469 | decay_mult: 1 470 | } 471 | param { 472 | lr_mult: 2 473 | decay_mult: 0 474 | } 475 | convolution_param { 476 | num_output: 32 477 | kernel_size: 1 478 | weight_filler { 479 | type: "xavier" 480 | std: 0.2 481 | } 482 | bias_filler { 483 | type: "constant" 484 | value: 0.2 485 | } 486 | } 487 | } 488 | layer { 489 | name: "inception_3b/relu_5x5_reduce" 490 | type: "ReLU" 491 | bottom: "inception_3b/5x5_reduce" 492 | top: "inception_3b/5x5_reduce" 493 | } 494 | layer { 495 | name: "inception_3b/5x5" 496 | type: "Convolution" 497 | bottom: "inception_3b/5x5_reduce" 498 | top: "inception_3b/5x5" 499 | param { 500 | lr_mult: 1 501 | decay_mult: 1 502 | } 503 | param { 504 | lr_mult: 2 505 | decay_mult: 0 506 | } 507 | convolution_param { 508 | num_output: 96 509 | pad: 2 510 | kernel_size: 5 511 | weight_filler { 512 | type: "xavier" 513 | std: 0.03 514 | } 515 | bias_filler { 516 | type: "constant" 517 | value: 0.2 518 | } 519 | } 520 | } 521 | layer { 522 | name: "inception_3b/relu_5x5" 523 | type: "ReLU" 524 | bottom: "inception_3b/5x5" 525 | top: "inception_3b/5x5" 526 | } 527 | layer { 528 | name: "inception_3b/pool" 529 | type: "Pooling" 530 | bottom: "inception_3a/output" 531 | top: "inception_3b/pool" 532 | pooling_param { 533 | pool: MAX 534 | kernel_size: 3 535 | stride: 1 536 | pad: 1 537 | } 538 | } 539 | layer { 540 | name: "inception_3b/pool_proj" 541 | type: "Convolution" 542 | bottom: "inception_3b/pool" 543 | top: "inception_3b/pool_proj" 544 | param { 545 | lr_mult: 1 546 | decay_mult: 1 547 | } 548 | param { 549 | lr_mult: 2 550 | decay_mult: 0 551 | } 552 | convolution_param { 553 | num_output: 64 554 | kernel_size: 1 555 | weight_filler { 556 | type: "xavier" 557 | std: 0.1 558 | } 559 | bias_filler { 560 | type: "constant" 561 | value: 0.2 562 | } 563 | } 564 | } 565 | layer { 566 | name: "inception_3b/relu_pool_proj" 567 | type: "ReLU" 568 | bottom: "inception_3b/pool_proj" 569 | top: "inception_3b/pool_proj" 570 | } 571 | layer { 572 | name: "inception_3b/output" 573 | type: "Concat" 574 | bottom: "inception_3b/1x1" 575 | bottom: "inception_3b/3x3" 576 | bottom: "inception_3b/5x5" 577 | bottom: "inception_3b/pool_proj" 578 | top: "inception_3b/output" 579 | } 580 | layer { 581 | name: "pool3/3x3_s2" 582 | type: "Pooling" 583 | bottom: "inception_3b/output" 584 | top: "pool3/3x3_s2" 585 | pooling_param { 586 | pool: MAX 587 | kernel_size: 3 588 | stride: 2 589 | } 590 | } 591 | layer { 592 | name: "inception_4a/1x1" 593 | type: "Convolution" 594 | bottom: "pool3/3x3_s2" 595 | top: "inception_4a/1x1" 596 | param { 597 | lr_mult: 1 598 | decay_mult: 1 599 | } 600 | param { 601 | lr_mult: 2 602 | decay_mult: 0 603 | } 604 | convolution_param { 605 | num_output: 192 606 | kernel_size: 1 607 | weight_filler { 608 | type: "xavier" 609 | std: 0.03 610 | } 611 | bias_filler { 612 | type: "constant" 613 | value: 0.2 614 | } 615 | } 616 | } 617 | layer { 618 | name: "inception_4a/relu_1x1" 619 | type: "ReLU" 620 | bottom: "inception_4a/1x1" 621 | top: "inception_4a/1x1" 622 | } 623 | layer { 624 | name: "inception_4a/3x3_reduce" 625 | type: "Convolution" 626 | bottom: "pool3/3x3_s2" 627 | top: "inception_4a/3x3_reduce" 628 | param { 629 | lr_mult: 1 630 | decay_mult: 1 631 | } 632 | param { 633 | lr_mult: 2 634 | decay_mult: 0 635 | } 636 | convolution_param { 637 | num_output: 96 638 | kernel_size: 1 639 | weight_filler { 640 | type: "xavier" 641 | std: 0.09 642 | } 643 | bias_filler { 644 | type: "constant" 645 | value: 0.2 646 | } 647 | } 648 | } 649 | layer { 650 | name: "inception_4a/relu_3x3_reduce" 651 | type: "ReLU" 652 | bottom: "inception_4a/3x3_reduce" 653 | top: "inception_4a/3x3_reduce" 654 | } 655 | layer { 656 | name: "inception_4a/3x3" 657 | type: "Convolution" 658 | bottom: "inception_4a/3x3_reduce" 659 | top: "inception_4a/3x3" 660 | param { 661 | lr_mult: 1 662 | decay_mult: 1 663 | } 664 | param { 665 | lr_mult: 2 666 | decay_mult: 0 667 | } 668 | convolution_param { 669 | num_output: 208 670 | pad: 1 671 | kernel_size: 3 672 | weight_filler { 673 | type: "xavier" 674 | std: 0.03 675 | } 676 | bias_filler { 677 | type: "constant" 678 | value: 0.2 679 | } 680 | } 681 | } 682 | layer { 683 | name: "inception_4a/relu_3x3" 684 | type: "ReLU" 685 | bottom: "inception_4a/3x3" 686 | top: "inception_4a/3x3" 687 | } 688 | layer { 689 | name: "inception_4a/5x5_reduce" 690 | type: "Convolution" 691 | bottom: "pool3/3x3_s2" 692 | top: "inception_4a/5x5_reduce" 693 | param { 694 | lr_mult: 1 695 | decay_mult: 1 696 | } 697 | param { 698 | lr_mult: 2 699 | decay_mult: 0 700 | } 701 | convolution_param { 702 | num_output: 16 703 | kernel_size: 1 704 | weight_filler { 705 | type: "xavier" 706 | std: 0.2 707 | } 708 | bias_filler { 709 | type: "constant" 710 | value: 0.2 711 | } 712 | } 713 | } 714 | layer { 715 | name: "inception_4a/relu_5x5_reduce" 716 | type: "ReLU" 717 | bottom: "inception_4a/5x5_reduce" 718 | top: "inception_4a/5x5_reduce" 719 | } 720 | layer { 721 | name: "inception_4a/5x5" 722 | type: "Convolution" 723 | bottom: "inception_4a/5x5_reduce" 724 | top: "inception_4a/5x5" 725 | param { 726 | lr_mult: 1 727 | decay_mult: 1 728 | } 729 | param { 730 | lr_mult: 2 731 | decay_mult: 0 732 | } 733 | convolution_param { 734 | num_output: 48 735 | pad: 2 736 | kernel_size: 5 737 | weight_filler { 738 | type: "xavier" 739 | std: 0.03 740 | } 741 | bias_filler { 742 | type: "constant" 743 | value: 0.2 744 | } 745 | } 746 | } 747 | layer { 748 | name: "inception_4a/relu_5x5" 749 | type: "ReLU" 750 | bottom: "inception_4a/5x5" 751 | top: "inception_4a/5x5" 752 | } 753 | layer { 754 | name: "inception_4a/pool" 755 | type: "Pooling" 756 | bottom: "pool3/3x3_s2" 757 | top: "inception_4a/pool" 758 | pooling_param { 759 | pool: MAX 760 | kernel_size: 3 761 | stride: 1 762 | pad: 1 763 | } 764 | } 765 | layer { 766 | name: "inception_4a/pool_proj" 767 | type: "Convolution" 768 | bottom: "inception_4a/pool" 769 | top: "inception_4a/pool_proj" 770 | param { 771 | lr_mult: 1 772 | decay_mult: 1 773 | } 774 | param { 775 | lr_mult: 2 776 | decay_mult: 0 777 | } 778 | convolution_param { 779 | num_output: 64 780 | kernel_size: 1 781 | weight_filler { 782 | type: "xavier" 783 | std: 0.1 784 | } 785 | bias_filler { 786 | type: "constant" 787 | value: 0.2 788 | } 789 | } 790 | } 791 | layer { 792 | name: "inception_4a/relu_pool_proj" 793 | type: "ReLU" 794 | bottom: "inception_4a/pool_proj" 795 | top: "inception_4a/pool_proj" 796 | } 797 | layer { 798 | name: "inception_4a/output" 799 | type: "Concat" 800 | bottom: "inception_4a/1x1" 801 | bottom: "inception_4a/3x3" 802 | bottom: "inception_4a/5x5" 803 | bottom: "inception_4a/pool_proj" 804 | top: "inception_4a/output" 805 | } 806 | layer { 807 | name: "inception_4b/1x1" 808 | type: "Convolution" 809 | bottom: "inception_4a/output" 810 | top: "inception_4b/1x1" 811 | param { 812 | lr_mult: 1 813 | decay_mult: 1 814 | } 815 | param { 816 | lr_mult: 2 817 | decay_mult: 0 818 | } 819 | convolution_param { 820 | num_output: 160 821 | kernel_size: 1 822 | weight_filler { 823 | type: "xavier" 824 | std: 0.03 825 | } 826 | bias_filler { 827 | type: "constant" 828 | value: 0.2 829 | } 830 | } 831 | } 832 | layer { 833 | name: "inception_4b/relu_1x1" 834 | type: "ReLU" 835 | bottom: "inception_4b/1x1" 836 | top: "inception_4b/1x1" 837 | } 838 | layer { 839 | name: "inception_4b/3x3_reduce" 840 | type: "Convolution" 841 | bottom: "inception_4a/output" 842 | top: "inception_4b/3x3_reduce" 843 | param { 844 | lr_mult: 1 845 | decay_mult: 1 846 | } 847 | param { 848 | lr_mult: 2 849 | decay_mult: 0 850 | } 851 | convolution_param { 852 | num_output: 112 853 | kernel_size: 1 854 | weight_filler { 855 | type: "xavier" 856 | std: 0.09 857 | } 858 | bias_filler { 859 | type: "constant" 860 | value: 0.2 861 | } 862 | } 863 | } 864 | layer { 865 | name: "inception_4b/relu_3x3_reduce" 866 | type: "ReLU" 867 | bottom: "inception_4b/3x3_reduce" 868 | top: "inception_4b/3x3_reduce" 869 | } 870 | layer { 871 | name: "inception_4b/3x3" 872 | type: "Convolution" 873 | bottom: "inception_4b/3x3_reduce" 874 | top: "inception_4b/3x3" 875 | param { 876 | lr_mult: 1 877 | decay_mult: 1 878 | } 879 | param { 880 | lr_mult: 2 881 | decay_mult: 0 882 | } 883 | convolution_param { 884 | num_output: 224 885 | pad: 1 886 | kernel_size: 3 887 | weight_filler { 888 | type: "xavier" 889 | std: 0.03 890 | } 891 | bias_filler { 892 | type: "constant" 893 | value: 0.2 894 | } 895 | } 896 | } 897 | layer { 898 | name: "inception_4b/relu_3x3" 899 | type: "ReLU" 900 | bottom: "inception_4b/3x3" 901 | top: "inception_4b/3x3" 902 | } 903 | layer { 904 | name: "inception_4b/5x5_reduce" 905 | type: "Convolution" 906 | bottom: "inception_4a/output" 907 | top: "inception_4b/5x5_reduce" 908 | param { 909 | lr_mult: 1 910 | decay_mult: 1 911 | } 912 | param { 913 | lr_mult: 2 914 | decay_mult: 0 915 | } 916 | convolution_param { 917 | num_output: 24 918 | kernel_size: 1 919 | weight_filler { 920 | type: "xavier" 921 | std: 0.2 922 | } 923 | bias_filler { 924 | type: "constant" 925 | value: 0.2 926 | } 927 | } 928 | } 929 | layer { 930 | name: "inception_4b/relu_5x5_reduce" 931 | type: "ReLU" 932 | bottom: "inception_4b/5x5_reduce" 933 | top: "inception_4b/5x5_reduce" 934 | } 935 | layer { 936 | name: "inception_4b/5x5" 937 | type: "Convolution" 938 | bottom: "inception_4b/5x5_reduce" 939 | top: "inception_4b/5x5" 940 | param { 941 | lr_mult: 1 942 | decay_mult: 1 943 | } 944 | param { 945 | lr_mult: 2 946 | decay_mult: 0 947 | } 948 | convolution_param { 949 | num_output: 64 950 | pad: 2 951 | kernel_size: 5 952 | weight_filler { 953 | type: "xavier" 954 | std: 0.03 955 | } 956 | bias_filler { 957 | type: "constant" 958 | value: 0.2 959 | } 960 | } 961 | } 962 | layer { 963 | name: "inception_4b/relu_5x5" 964 | type: "ReLU" 965 | bottom: "inception_4b/5x5" 966 | top: "inception_4b/5x5" 967 | } 968 | layer { 969 | name: "inception_4b/pool" 970 | type: "Pooling" 971 | bottom: "inception_4a/output" 972 | top: "inception_4b/pool" 973 | pooling_param { 974 | pool: MAX 975 | kernel_size: 3 976 | stride: 1 977 | pad: 1 978 | } 979 | } 980 | layer { 981 | name: "inception_4b/pool_proj" 982 | type: "Convolution" 983 | bottom: "inception_4b/pool" 984 | top: "inception_4b/pool_proj" 985 | param { 986 | lr_mult: 1 987 | decay_mult: 1 988 | } 989 | param { 990 | lr_mult: 2 991 | decay_mult: 0 992 | } 993 | convolution_param { 994 | num_output: 64 995 | kernel_size: 1 996 | weight_filler { 997 | type: "xavier" 998 | std: 0.1 999 | } 1000 | bias_filler { 1001 | type: "constant" 1002 | value: 0.2 1003 | } 1004 | } 1005 | } 1006 | layer { 1007 | name: "inception_4b/relu_pool_proj" 1008 | type: "ReLU" 1009 | bottom: "inception_4b/pool_proj" 1010 | top: "inception_4b/pool_proj" 1011 | } 1012 | layer { 1013 | name: "inception_4b/output" 1014 | type: "Concat" 1015 | bottom: "inception_4b/1x1" 1016 | bottom: "inception_4b/3x3" 1017 | bottom: "inception_4b/5x5" 1018 | bottom: "inception_4b/pool_proj" 1019 | top: "inception_4b/output" 1020 | } 1021 | layer { 1022 | name: "inception_4c/1x1" 1023 | type: "Convolution" 1024 | bottom: "inception_4b/output" 1025 | top: "inception_4c/1x1" 1026 | param { 1027 | lr_mult: 1 1028 | decay_mult: 1 1029 | } 1030 | param { 1031 | lr_mult: 2 1032 | decay_mult: 0 1033 | } 1034 | convolution_param { 1035 | num_output: 128 1036 | kernel_size: 1 1037 | weight_filler { 1038 | type: "xavier" 1039 | std: 0.03 1040 | } 1041 | bias_filler { 1042 | type: "constant" 1043 | value: 0.2 1044 | } 1045 | } 1046 | } 1047 | layer { 1048 | name: "inception_4c/relu_1x1" 1049 | type: "ReLU" 1050 | bottom: "inception_4c/1x1" 1051 | top: "inception_4c/1x1" 1052 | } 1053 | layer { 1054 | name: "inception_4c/3x3_reduce" 1055 | type: "Convolution" 1056 | bottom: "inception_4b/output" 1057 | top: "inception_4c/3x3_reduce" 1058 | param { 1059 | lr_mult: 1 1060 | decay_mult: 1 1061 | } 1062 | param { 1063 | lr_mult: 2 1064 | decay_mult: 0 1065 | } 1066 | convolution_param { 1067 | num_output: 128 1068 | kernel_size: 1 1069 | weight_filler { 1070 | type: "xavier" 1071 | std: 0.09 1072 | } 1073 | bias_filler { 1074 | type: "constant" 1075 | value: 0.2 1076 | } 1077 | } 1078 | } 1079 | layer { 1080 | name: "inception_4c/relu_3x3_reduce" 1081 | type: "ReLU" 1082 | bottom: "inception_4c/3x3_reduce" 1083 | top: "inception_4c/3x3_reduce" 1084 | } 1085 | layer { 1086 | name: "inception_4c/3x3" 1087 | type: "Convolution" 1088 | bottom: "inception_4c/3x3_reduce" 1089 | top: "inception_4c/3x3" 1090 | param { 1091 | lr_mult: 1 1092 | decay_mult: 1 1093 | } 1094 | param { 1095 | lr_mult: 2 1096 | decay_mult: 0 1097 | } 1098 | convolution_param { 1099 | num_output: 256 1100 | pad: 1 1101 | kernel_size: 3 1102 | weight_filler { 1103 | type: "xavier" 1104 | std: 0.03 1105 | } 1106 | bias_filler { 1107 | type: "constant" 1108 | value: 0.2 1109 | } 1110 | } 1111 | } 1112 | layer { 1113 | name: "inception_4c/relu_3x3" 1114 | type: "ReLU" 1115 | bottom: "inception_4c/3x3" 1116 | top: "inception_4c/3x3" 1117 | } 1118 | layer { 1119 | name: "inception_4c/5x5_reduce" 1120 | type: "Convolution" 1121 | bottom: "inception_4b/output" 1122 | top: "inception_4c/5x5_reduce" 1123 | param { 1124 | lr_mult: 1 1125 | decay_mult: 1 1126 | } 1127 | param { 1128 | lr_mult: 2 1129 | decay_mult: 0 1130 | } 1131 | convolution_param { 1132 | num_output: 24 1133 | kernel_size: 1 1134 | weight_filler { 1135 | type: "xavier" 1136 | std: 0.2 1137 | } 1138 | bias_filler { 1139 | type: "constant" 1140 | value: 0.2 1141 | } 1142 | } 1143 | } 1144 | layer { 1145 | name: "inception_4c/relu_5x5_reduce" 1146 | type: "ReLU" 1147 | bottom: "inception_4c/5x5_reduce" 1148 | top: "inception_4c/5x5_reduce" 1149 | } 1150 | layer { 1151 | name: "inception_4c/5x5" 1152 | type: "Convolution" 1153 | bottom: "inception_4c/5x5_reduce" 1154 | top: "inception_4c/5x5" 1155 | param { 1156 | lr_mult: 1 1157 | decay_mult: 1 1158 | } 1159 | param { 1160 | lr_mult: 2 1161 | decay_mult: 0 1162 | } 1163 | convolution_param { 1164 | num_output: 64 1165 | pad: 2 1166 | kernel_size: 5 1167 | weight_filler { 1168 | type: "xavier" 1169 | std: 0.03 1170 | } 1171 | bias_filler { 1172 | type: "constant" 1173 | value: 0.2 1174 | } 1175 | } 1176 | } 1177 | layer { 1178 | name: "inception_4c/relu_5x5" 1179 | type: "ReLU" 1180 | bottom: "inception_4c/5x5" 1181 | top: "inception_4c/5x5" 1182 | } 1183 | layer { 1184 | name: "inception_4c/pool" 1185 | type: "Pooling" 1186 | bottom: "inception_4b/output" 1187 | top: "inception_4c/pool" 1188 | pooling_param { 1189 | pool: MAX 1190 | kernel_size: 3 1191 | stride: 1 1192 | pad: 1 1193 | } 1194 | } 1195 | layer { 1196 | name: "inception_4c/pool_proj" 1197 | type: "Convolution" 1198 | bottom: "inception_4c/pool" 1199 | top: "inception_4c/pool_proj" 1200 | param { 1201 | lr_mult: 1 1202 | decay_mult: 1 1203 | } 1204 | param { 1205 | lr_mult: 2 1206 | decay_mult: 0 1207 | } 1208 | convolution_param { 1209 | num_output: 64 1210 | kernel_size: 1 1211 | weight_filler { 1212 | type: "xavier" 1213 | std: 0.1 1214 | } 1215 | bias_filler { 1216 | type: "constant" 1217 | value: 0.2 1218 | } 1219 | } 1220 | } 1221 | layer { 1222 | name: "inception_4c/relu_pool_proj" 1223 | type: "ReLU" 1224 | bottom: "inception_4c/pool_proj" 1225 | top: "inception_4c/pool_proj" 1226 | } 1227 | layer { 1228 | name: "inception_4c/output" 1229 | type: "Concat" 1230 | bottom: "inception_4c/1x1" 1231 | bottom: "inception_4c/3x3" 1232 | bottom: "inception_4c/5x5" 1233 | bottom: "inception_4c/pool_proj" 1234 | top: "inception_4c/output" 1235 | } 1236 | layer { 1237 | name: "inception_4d/1x1" 1238 | type: "Convolution" 1239 | bottom: "inception_4c/output" 1240 | top: "inception_4d/1x1" 1241 | param { 1242 | lr_mult: 1 1243 | decay_mult: 1 1244 | } 1245 | param { 1246 | lr_mult: 2 1247 | decay_mult: 0 1248 | } 1249 | convolution_param { 1250 | num_output: 112 1251 | kernel_size: 1 1252 | weight_filler { 1253 | type: "xavier" 1254 | std: 0.03 1255 | } 1256 | bias_filler { 1257 | type: "constant" 1258 | value: 0.2 1259 | } 1260 | } 1261 | } 1262 | layer { 1263 | name: "inception_4d/relu_1x1" 1264 | type: "ReLU" 1265 | bottom: "inception_4d/1x1" 1266 | top: "inception_4d/1x1" 1267 | } 1268 | layer { 1269 | name: "inception_4d/3x3_reduce" 1270 | type: "Convolution" 1271 | bottom: "inception_4c/output" 1272 | top: "inception_4d/3x3_reduce" 1273 | param { 1274 | lr_mult: 1 1275 | decay_mult: 1 1276 | } 1277 | param { 1278 | lr_mult: 2 1279 | decay_mult: 0 1280 | } 1281 | convolution_param { 1282 | num_output: 144 1283 | kernel_size: 1 1284 | weight_filler { 1285 | type: "xavier" 1286 | std: 0.09 1287 | } 1288 | bias_filler { 1289 | type: "constant" 1290 | value: 0.2 1291 | } 1292 | } 1293 | } 1294 | layer { 1295 | name: "inception_4d/relu_3x3_reduce" 1296 | type: "ReLU" 1297 | bottom: "inception_4d/3x3_reduce" 1298 | top: "inception_4d/3x3_reduce" 1299 | } 1300 | layer { 1301 | name: "inception_4d/3x3" 1302 | type: "Convolution" 1303 | bottom: "inception_4d/3x3_reduce" 1304 | top: "inception_4d/3x3" 1305 | param { 1306 | lr_mult: 1 1307 | decay_mult: 1 1308 | } 1309 | param { 1310 | lr_mult: 2 1311 | decay_mult: 0 1312 | } 1313 | convolution_param { 1314 | num_output: 288 1315 | pad: 1 1316 | kernel_size: 3 1317 | weight_filler { 1318 | type: "xavier" 1319 | std: 0.03 1320 | } 1321 | bias_filler { 1322 | type: "constant" 1323 | value: 0.2 1324 | } 1325 | } 1326 | } 1327 | layer { 1328 | name: "inception_4d/relu_3x3" 1329 | type: "ReLU" 1330 | bottom: "inception_4d/3x3" 1331 | top: "inception_4d/3x3" 1332 | } 1333 | layer { 1334 | name: "inception_4d/5x5_reduce" 1335 | type: "Convolution" 1336 | bottom: "inception_4c/output" 1337 | top: "inception_4d/5x5_reduce" 1338 | param { 1339 | lr_mult: 1 1340 | decay_mult: 1 1341 | } 1342 | param { 1343 | lr_mult: 2 1344 | decay_mult: 0 1345 | } 1346 | convolution_param { 1347 | num_output: 32 1348 | kernel_size: 1 1349 | weight_filler { 1350 | type: "xavier" 1351 | std: 0.2 1352 | } 1353 | bias_filler { 1354 | type: "constant" 1355 | value: 0.2 1356 | } 1357 | } 1358 | } 1359 | layer { 1360 | name: "inception_4d/relu_5x5_reduce" 1361 | type: "ReLU" 1362 | bottom: "inception_4d/5x5_reduce" 1363 | top: "inception_4d/5x5_reduce" 1364 | } 1365 | layer { 1366 | name: "inception_4d/5x5" 1367 | type: "Convolution" 1368 | bottom: "inception_4d/5x5_reduce" 1369 | top: "inception_4d/5x5" 1370 | param { 1371 | lr_mult: 1 1372 | decay_mult: 1 1373 | } 1374 | param { 1375 | lr_mult: 2 1376 | decay_mult: 0 1377 | } 1378 | convolution_param { 1379 | num_output: 64 1380 | pad: 2 1381 | kernel_size: 5 1382 | weight_filler { 1383 | type: "xavier" 1384 | std: 0.03 1385 | } 1386 | bias_filler { 1387 | type: "constant" 1388 | value: 0.2 1389 | } 1390 | } 1391 | } 1392 | layer { 1393 | name: "inception_4d/relu_5x5" 1394 | type: "ReLU" 1395 | bottom: "inception_4d/5x5" 1396 | top: "inception_4d/5x5" 1397 | } 1398 | layer { 1399 | name: "inception_4d/pool" 1400 | type: "Pooling" 1401 | bottom: "inception_4c/output" 1402 | top: "inception_4d/pool" 1403 | pooling_param { 1404 | pool: MAX 1405 | kernel_size: 3 1406 | stride: 1 1407 | pad: 1 1408 | } 1409 | } 1410 | layer { 1411 | name: "inception_4d/pool_proj" 1412 | type: "Convolution" 1413 | bottom: "inception_4d/pool" 1414 | top: "inception_4d/pool_proj" 1415 | param { 1416 | lr_mult: 1 1417 | decay_mult: 1 1418 | } 1419 | param { 1420 | lr_mult: 2 1421 | decay_mult: 0 1422 | } 1423 | convolution_param { 1424 | num_output: 64 1425 | kernel_size: 1 1426 | weight_filler { 1427 | type: "xavier" 1428 | std: 0.1 1429 | } 1430 | bias_filler { 1431 | type: "constant" 1432 | value: 0.2 1433 | } 1434 | } 1435 | } 1436 | layer { 1437 | name: "inception_4d/relu_pool_proj" 1438 | type: "ReLU" 1439 | bottom: "inception_4d/pool_proj" 1440 | top: "inception_4d/pool_proj" 1441 | } 1442 | layer { 1443 | name: "inception_4d/output" 1444 | type: "Concat" 1445 | bottom: "inception_4d/1x1" 1446 | bottom: "inception_4d/3x3" 1447 | bottom: "inception_4d/5x5" 1448 | bottom: "inception_4d/pool_proj" 1449 | top: "inception_4d/output" 1450 | } 1451 | layer { 1452 | name: "inception_4e/1x1" 1453 | type: "Convolution" 1454 | bottom: "inception_4d/output" 1455 | top: "inception_4e/1x1" 1456 | param { 1457 | lr_mult: 1 1458 | decay_mult: 1 1459 | } 1460 | param { 1461 | lr_mult: 2 1462 | decay_mult: 0 1463 | } 1464 | convolution_param { 1465 | num_output: 256 1466 | kernel_size: 1 1467 | weight_filler { 1468 | type: "xavier" 1469 | std: 0.03 1470 | } 1471 | bias_filler { 1472 | type: "constant" 1473 | value: 0.2 1474 | } 1475 | } 1476 | } 1477 | layer { 1478 | name: "inception_4e/relu_1x1" 1479 | type: "ReLU" 1480 | bottom: "inception_4e/1x1" 1481 | top: "inception_4e/1x1" 1482 | } 1483 | layer { 1484 | name: "inception_4e/3x3_reduce" 1485 | type: "Convolution" 1486 | bottom: "inception_4d/output" 1487 | top: "inception_4e/3x3_reduce" 1488 | param { 1489 | lr_mult: 1 1490 | decay_mult: 1 1491 | } 1492 | param { 1493 | lr_mult: 2 1494 | decay_mult: 0 1495 | } 1496 | convolution_param { 1497 | num_output: 160 1498 | kernel_size: 1 1499 | weight_filler { 1500 | type: "xavier" 1501 | std: 0.09 1502 | } 1503 | bias_filler { 1504 | type: "constant" 1505 | value: 0.2 1506 | } 1507 | } 1508 | } 1509 | layer { 1510 | name: "inception_4e/relu_3x3_reduce" 1511 | type: "ReLU" 1512 | bottom: "inception_4e/3x3_reduce" 1513 | top: "inception_4e/3x3_reduce" 1514 | } 1515 | layer { 1516 | name: "inception_4e/3x3" 1517 | type: "Convolution" 1518 | bottom: "inception_4e/3x3_reduce" 1519 | top: "inception_4e/3x3" 1520 | param { 1521 | lr_mult: 1 1522 | decay_mult: 1 1523 | } 1524 | param { 1525 | lr_mult: 2 1526 | decay_mult: 0 1527 | } 1528 | convolution_param { 1529 | num_output: 320 1530 | pad: 1 1531 | kernel_size: 3 1532 | weight_filler { 1533 | type: "xavier" 1534 | std: 0.03 1535 | } 1536 | bias_filler { 1537 | type: "constant" 1538 | value: 0.2 1539 | } 1540 | } 1541 | } 1542 | layer { 1543 | name: "inception_4e/relu_3x3" 1544 | type: "ReLU" 1545 | bottom: "inception_4e/3x3" 1546 | top: "inception_4e/3x3" 1547 | } 1548 | layer { 1549 | name: "inception_4e/5x5_reduce" 1550 | type: "Convolution" 1551 | bottom: "inception_4d/output" 1552 | top: "inception_4e/5x5_reduce" 1553 | param { 1554 | lr_mult: 1 1555 | decay_mult: 1 1556 | } 1557 | param { 1558 | lr_mult: 2 1559 | decay_mult: 0 1560 | } 1561 | convolution_param { 1562 | num_output: 32 1563 | kernel_size: 1 1564 | weight_filler { 1565 | type: "xavier" 1566 | std: 0.2 1567 | } 1568 | bias_filler { 1569 | type: "constant" 1570 | value: 0.2 1571 | } 1572 | } 1573 | } 1574 | layer { 1575 | name: "inception_4e/relu_5x5_reduce" 1576 | type: "ReLU" 1577 | bottom: "inception_4e/5x5_reduce" 1578 | top: "inception_4e/5x5_reduce" 1579 | } 1580 | layer { 1581 | name: "inception_4e/5x5" 1582 | type: "Convolution" 1583 | bottom: "inception_4e/5x5_reduce" 1584 | top: "inception_4e/5x5" 1585 | param { 1586 | lr_mult: 1 1587 | decay_mult: 1 1588 | } 1589 | param { 1590 | lr_mult: 2 1591 | decay_mult: 0 1592 | } 1593 | convolution_param { 1594 | num_output: 128 1595 | pad: 2 1596 | kernel_size: 5 1597 | weight_filler { 1598 | type: "xavier" 1599 | std: 0.03 1600 | } 1601 | bias_filler { 1602 | type: "constant" 1603 | value: 0.2 1604 | } 1605 | } 1606 | } 1607 | layer { 1608 | name: "inception_4e/relu_5x5" 1609 | type: "ReLU" 1610 | bottom: "inception_4e/5x5" 1611 | top: "inception_4e/5x5" 1612 | } 1613 | layer { 1614 | name: "inception_4e/pool" 1615 | type: "Pooling" 1616 | bottom: "inception_4d/output" 1617 | top: "inception_4e/pool" 1618 | pooling_param { 1619 | pool: MAX 1620 | kernel_size: 3 1621 | stride: 1 1622 | pad: 1 1623 | } 1624 | } 1625 | layer { 1626 | name: "inception_4e/pool_proj" 1627 | type: "Convolution" 1628 | bottom: "inception_4e/pool" 1629 | top: "inception_4e/pool_proj" 1630 | param { 1631 | lr_mult: 1 1632 | decay_mult: 1 1633 | } 1634 | param { 1635 | lr_mult: 2 1636 | decay_mult: 0 1637 | } 1638 | convolution_param { 1639 | num_output: 128 1640 | kernel_size: 1 1641 | weight_filler { 1642 | type: "xavier" 1643 | std: 0.1 1644 | } 1645 | bias_filler { 1646 | type: "constant" 1647 | value: 0.2 1648 | } 1649 | } 1650 | } 1651 | layer { 1652 | name: "inception_4e/relu_pool_proj" 1653 | type: "ReLU" 1654 | bottom: "inception_4e/pool_proj" 1655 | top: "inception_4e/pool_proj" 1656 | } 1657 | layer { 1658 | name: "inception_4e/output" 1659 | type: "Concat" 1660 | bottom: "inception_4e/1x1" 1661 | bottom: "inception_4e/3x3" 1662 | bottom: "inception_4e/5x5" 1663 | bottom: "inception_4e/pool_proj" 1664 | top: "inception_4e/output" 1665 | } 1666 | layer { 1667 | name: "pool4/3x3_s2" 1668 | type: "Pooling" 1669 | bottom: "inception_4e/output" 1670 | top: "pool4/3x3_s2" 1671 | pooling_param { 1672 | pool: MAX 1673 | kernel_size: 3 1674 | stride: 2 1675 | } 1676 | } 1677 | layer { 1678 | name: "inception_5a/1x1" 1679 | type: "Convolution" 1680 | bottom: "pool4/3x3_s2" 1681 | top: "inception_5a/1x1" 1682 | param { 1683 | lr_mult: 1 1684 | decay_mult: 1 1685 | } 1686 | param { 1687 | lr_mult: 2 1688 | decay_mult: 0 1689 | } 1690 | convolution_param { 1691 | num_output: 256 1692 | kernel_size: 1 1693 | weight_filler { 1694 | type: "xavier" 1695 | std: 0.03 1696 | } 1697 | bias_filler { 1698 | type: "constant" 1699 | value: 0.2 1700 | } 1701 | } 1702 | } 1703 | layer { 1704 | name: "inception_5a/relu_1x1" 1705 | type: "ReLU" 1706 | bottom: "inception_5a/1x1" 1707 | top: "inception_5a/1x1" 1708 | } 1709 | layer { 1710 | name: "inception_5a/3x3_reduce" 1711 | type: "Convolution" 1712 | bottom: "pool4/3x3_s2" 1713 | top: "inception_5a/3x3_reduce" 1714 | param { 1715 | lr_mult: 1 1716 | decay_mult: 1 1717 | } 1718 | param { 1719 | lr_mult: 2 1720 | decay_mult: 0 1721 | } 1722 | convolution_param { 1723 | num_output: 160 1724 | kernel_size: 1 1725 | weight_filler { 1726 | type: "xavier" 1727 | std: 0.09 1728 | } 1729 | bias_filler { 1730 | type: "constant" 1731 | value: 0.2 1732 | } 1733 | } 1734 | } 1735 | layer { 1736 | name: "inception_5a/relu_3x3_reduce" 1737 | type: "ReLU" 1738 | bottom: "inception_5a/3x3_reduce" 1739 | top: "inception_5a/3x3_reduce" 1740 | } 1741 | layer { 1742 | name: "inception_5a/3x3" 1743 | type: "Convolution" 1744 | bottom: "inception_5a/3x3_reduce" 1745 | top: "inception_5a/3x3" 1746 | param { 1747 | lr_mult: 1 1748 | decay_mult: 1 1749 | } 1750 | param { 1751 | lr_mult: 2 1752 | decay_mult: 0 1753 | } 1754 | convolution_param { 1755 | num_output: 320 1756 | pad: 1 1757 | kernel_size: 3 1758 | weight_filler { 1759 | type: "xavier" 1760 | std: 0.03 1761 | } 1762 | bias_filler { 1763 | type: "constant" 1764 | value: 0.2 1765 | } 1766 | } 1767 | } 1768 | layer { 1769 | name: "inception_5a/relu_3x3" 1770 | type: "ReLU" 1771 | bottom: "inception_5a/3x3" 1772 | top: "inception_5a/3x3" 1773 | } 1774 | layer { 1775 | name: "inception_5a/5x5_reduce" 1776 | type: "Convolution" 1777 | bottom: "pool4/3x3_s2" 1778 | top: "inception_5a/5x5_reduce" 1779 | param { 1780 | lr_mult: 1 1781 | decay_mult: 1 1782 | } 1783 | param { 1784 | lr_mult: 2 1785 | decay_mult: 0 1786 | } 1787 | convolution_param { 1788 | num_output: 32 1789 | kernel_size: 1 1790 | weight_filler { 1791 | type: "xavier" 1792 | std: 0.2 1793 | } 1794 | bias_filler { 1795 | type: "constant" 1796 | value: 0.2 1797 | } 1798 | } 1799 | } 1800 | layer { 1801 | name: "inception_5a/relu_5x5_reduce" 1802 | type: "ReLU" 1803 | bottom: "inception_5a/5x5_reduce" 1804 | top: "inception_5a/5x5_reduce" 1805 | } 1806 | layer { 1807 | name: "inception_5a/5x5" 1808 | type: "Convolution" 1809 | bottom: "inception_5a/5x5_reduce" 1810 | top: "inception_5a/5x5" 1811 | param { 1812 | lr_mult: 1 1813 | decay_mult: 1 1814 | } 1815 | param { 1816 | lr_mult: 2 1817 | decay_mult: 0 1818 | } 1819 | convolution_param { 1820 | num_output: 128 1821 | pad: 2 1822 | kernel_size: 5 1823 | weight_filler { 1824 | type: "xavier" 1825 | std: 0.03 1826 | } 1827 | bias_filler { 1828 | type: "constant" 1829 | value: 0.2 1830 | } 1831 | } 1832 | } 1833 | layer { 1834 | name: "inception_5a/relu_5x5" 1835 | type: "ReLU" 1836 | bottom: "inception_5a/5x5" 1837 | top: "inception_5a/5x5" 1838 | } 1839 | layer { 1840 | name: "inception_5a/pool" 1841 | type: "Pooling" 1842 | bottom: "pool4/3x3_s2" 1843 | top: "inception_5a/pool" 1844 | pooling_param { 1845 | pool: MAX 1846 | kernel_size: 3 1847 | stride: 1 1848 | pad: 1 1849 | } 1850 | } 1851 | layer { 1852 | name: "inception_5a/pool_proj" 1853 | type: "Convolution" 1854 | bottom: "inception_5a/pool" 1855 | top: "inception_5a/pool_proj" 1856 | param { 1857 | lr_mult: 1 1858 | decay_mult: 1 1859 | } 1860 | param { 1861 | lr_mult: 2 1862 | decay_mult: 0 1863 | } 1864 | convolution_param { 1865 | num_output: 128 1866 | kernel_size: 1 1867 | weight_filler { 1868 | type: "xavier" 1869 | std: 0.1 1870 | } 1871 | bias_filler { 1872 | type: "constant" 1873 | value: 0.2 1874 | } 1875 | } 1876 | } 1877 | layer { 1878 | name: "inception_5a/relu_pool_proj" 1879 | type: "ReLU" 1880 | bottom: "inception_5a/pool_proj" 1881 | top: "inception_5a/pool_proj" 1882 | } 1883 | layer { 1884 | name: "inception_5a/output" 1885 | type: "Concat" 1886 | bottom: "inception_5a/1x1" 1887 | bottom: "inception_5a/3x3" 1888 | bottom: "inception_5a/5x5" 1889 | bottom: "inception_5a/pool_proj" 1890 | top: "inception_5a/output" 1891 | } 1892 | layer { 1893 | name: "inception_5b/1x1" 1894 | type: "Convolution" 1895 | bottom: "inception_5a/output" 1896 | top: "inception_5b/1x1" 1897 | param { 1898 | lr_mult: 1 1899 | decay_mult: 1 1900 | } 1901 | param { 1902 | lr_mult: 2 1903 | decay_mult: 0 1904 | } 1905 | convolution_param { 1906 | num_output: 384 1907 | kernel_size: 1 1908 | weight_filler { 1909 | type: "xavier" 1910 | std: 0.03 1911 | } 1912 | bias_filler { 1913 | type: "constant" 1914 | value: 0.2 1915 | } 1916 | } 1917 | } 1918 | layer { 1919 | name: "inception_5b/relu_1x1" 1920 | type: "ReLU" 1921 | bottom: "inception_5b/1x1" 1922 | top: "inception_5b/1x1" 1923 | } 1924 | layer { 1925 | name: "inception_5b/3x3_reduce" 1926 | type: "Convolution" 1927 | bottom: "inception_5a/output" 1928 | top: "inception_5b/3x3_reduce" 1929 | param { 1930 | lr_mult: 1 1931 | decay_mult: 1 1932 | } 1933 | param { 1934 | lr_mult: 2 1935 | decay_mult: 0 1936 | } 1937 | convolution_param { 1938 | num_output: 192 1939 | kernel_size: 1 1940 | weight_filler { 1941 | type: "xavier" 1942 | std: 0.09 1943 | } 1944 | bias_filler { 1945 | type: "constant" 1946 | value: 0.2 1947 | } 1948 | } 1949 | } 1950 | layer { 1951 | name: "inception_5b/relu_3x3_reduce" 1952 | type: "ReLU" 1953 | bottom: "inception_5b/3x3_reduce" 1954 | top: "inception_5b/3x3_reduce" 1955 | } 1956 | layer { 1957 | name: "inception_5b/3x3" 1958 | type: "Convolution" 1959 | bottom: "inception_5b/3x3_reduce" 1960 | top: "inception_5b/3x3" 1961 | param { 1962 | lr_mult: 1 1963 | decay_mult: 1 1964 | } 1965 | param { 1966 | lr_mult: 2 1967 | decay_mult: 0 1968 | } 1969 | convolution_param { 1970 | num_output: 384 1971 | pad: 1 1972 | kernel_size: 3 1973 | weight_filler { 1974 | type: "xavier" 1975 | std: 0.03 1976 | } 1977 | bias_filler { 1978 | type: "constant" 1979 | value: 0.2 1980 | } 1981 | } 1982 | } 1983 | layer { 1984 | name: "inception_5b/relu_3x3" 1985 | type: "ReLU" 1986 | bottom: "inception_5b/3x3" 1987 | top: "inception_5b/3x3" 1988 | } 1989 | layer { 1990 | name: "inception_5b/5x5_reduce" 1991 | type: "Convolution" 1992 | bottom: "inception_5a/output" 1993 | top: "inception_5b/5x5_reduce" 1994 | param { 1995 | lr_mult: 1 1996 | decay_mult: 1 1997 | } 1998 | param { 1999 | lr_mult: 2 2000 | decay_mult: 0 2001 | } 2002 | convolution_param { 2003 | num_output: 48 2004 | kernel_size: 1 2005 | weight_filler { 2006 | type: "xavier" 2007 | std: 0.2 2008 | } 2009 | bias_filler { 2010 | type: "constant" 2011 | value: 0.2 2012 | } 2013 | } 2014 | } 2015 | layer { 2016 | name: "inception_5b/relu_5x5_reduce" 2017 | type: "ReLU" 2018 | bottom: "inception_5b/5x5_reduce" 2019 | top: "inception_5b/5x5_reduce" 2020 | } 2021 | layer { 2022 | name: "inception_5b/5x5" 2023 | type: "Convolution" 2024 | bottom: "inception_5b/5x5_reduce" 2025 | top: "inception_5b/5x5" 2026 | param { 2027 | lr_mult: 1 2028 | decay_mult: 1 2029 | } 2030 | param { 2031 | lr_mult: 2 2032 | decay_mult: 0 2033 | } 2034 | convolution_param { 2035 | num_output: 128 2036 | pad: 2 2037 | kernel_size: 5 2038 | weight_filler { 2039 | type: "xavier" 2040 | std: 0.03 2041 | } 2042 | bias_filler { 2043 | type: "constant" 2044 | value: 0.2 2045 | } 2046 | } 2047 | } 2048 | layer { 2049 | name: "inception_5b/relu_5x5" 2050 | type: "ReLU" 2051 | bottom: "inception_5b/5x5" 2052 | top: "inception_5b/5x5" 2053 | } 2054 | layer { 2055 | name: "inception_5b/pool" 2056 | type: "Pooling" 2057 | bottom: "inception_5a/output" 2058 | top: "inception_5b/pool" 2059 | pooling_param { 2060 | pool: MAX 2061 | kernel_size: 3 2062 | stride: 1 2063 | pad: 1 2064 | } 2065 | } 2066 | layer { 2067 | name: "inception_5b/pool_proj" 2068 | type: "Convolution" 2069 | bottom: "inception_5b/pool" 2070 | top: "inception_5b/pool_proj" 2071 | param { 2072 | lr_mult: 1 2073 | decay_mult: 1 2074 | } 2075 | param { 2076 | lr_mult: 2 2077 | decay_mult: 0 2078 | } 2079 | convolution_param { 2080 | num_output: 128 2081 | kernel_size: 1 2082 | weight_filler { 2083 | type: "xavier" 2084 | std: 0.1 2085 | } 2086 | bias_filler { 2087 | type: "constant" 2088 | value: 0.2 2089 | } 2090 | } 2091 | } 2092 | layer { 2093 | name: "inception_5b/relu_pool_proj" 2094 | type: "ReLU" 2095 | bottom: "inception_5b/pool_proj" 2096 | top: "inception_5b/pool_proj" 2097 | } 2098 | layer { 2099 | name: "inception_5b/output" 2100 | type: "Concat" 2101 | bottom: "inception_5b/1x1" 2102 | bottom: "inception_5b/3x3" 2103 | bottom: "inception_5b/5x5" 2104 | bottom: "inception_5b/pool_proj" 2105 | top: "inception_5b/output" 2106 | } 2107 | layer { 2108 | name: "pool5/7x7_s1" 2109 | type: "Pooling" 2110 | bottom: "inception_5b/output" 2111 | top: "pool5/7x7_s1" 2112 | pooling_param { 2113 | pool: AVE 2114 | kernel_size: 7 2115 | stride: 1 2116 | } 2117 | } 2118 | layer { 2119 | name: "pool5/drop_7x7_s1" 2120 | type: "Dropout" 2121 | bottom: "pool5/7x7_s1" 2122 | top: "pool5/7x7_s1" 2123 | dropout_param { 2124 | dropout_ratio: 0.4 2125 | } 2126 | } 2127 | layer { 2128 | name: "loss3/classifier" 2129 | type: "InnerProduct" 2130 | bottom: "pool5/7x7_s1" 2131 | top: "loss3/classifier" 2132 | param { 2133 | lr_mult: 1 2134 | decay_mult: 1 2135 | } 2136 | param { 2137 | lr_mult: 2 2138 | decay_mult: 0 2139 | } 2140 | inner_product_param { 2141 | num_output: ##NUM_CLASSES## 2142 | weight_filler { 2143 | type: "xavier" 2144 | } 2145 | bias_filler { 2146 | type: "constant" 2147 | value: 0 2148 | } 2149 | } 2150 | } 2151 | layer { 2152 | name: "prob" 2153 | type: "Softmax" 2154 | bottom: "loss3/classifier" 2155 | top: "prob" 2156 | } 2157 | --------------------------------------------------------------------------------