├── lib
    └── .gitignore
├── .gitmodules
├── cnn_finetuning
    ├── vgg19
    │   ├── mean.mat
    │   ├── mean.binaryproto
    │   ├── solver_template.prototxt
    │   ├── deploy.prototxt
    │   ├── deploy_template.prototxt
    │   └── train_val_template.prototxt
    ├── googlenet
    │   ├── mean.mat
    │   ├── mean.npy
    │   ├── mean.binaryproto
    │   ├── solver_template.prototxt
    │   ├── deploy.prototxt
    │   └── deploy_template.prototxt
    ├── caffe_reference
    │   ├── mean.mat
    │   ├── mean.binaryproto
    │   ├── solver_template.prototxt
    │   ├── deploy.prototxt
    │   ├── deploy_template.prototxt
    │   └── train_val_template.prototxt
    └── finetuning.m
├── part_based_classification
    ├── .activity.csv
    ├── createTrainTest.m
    └── part_box_classification_multiscale.m
├── part_generation
    ├── convert_locs_to_CUB200_format.m
    ├── parts_locs_from_grads.m
    └── fitGMMToGradient.m
├── README.md
├── setup.m
├── start.m
├── patch_filtering
    └── selsearch_object_detector.m
└── part_selection
    └── evaluate_part_locs_anchor_multiview.m


/lib/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | !caffe_pp
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/caffe_pp"]
2 | 	path = lib/caffe_pp
3 | 	url = https://github.com/cvjena/caffe_pp.git
4 | 


--------------------------------------------------------------------------------
/cnn_finetuning/vgg19/mean.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/vgg19/mean.mat


--------------------------------------------------------------------------------
/cnn_finetuning/googlenet/mean.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/googlenet/mean.mat


--------------------------------------------------------------------------------
/cnn_finetuning/googlenet/mean.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/googlenet/mean.npy


--------------------------------------------------------------------------------
/cnn_finetuning/vgg19/mean.binaryproto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/vgg19/mean.binaryproto


--------------------------------------------------------------------------------
/cnn_finetuning/caffe_reference/mean.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/caffe_reference/mean.mat


--------------------------------------------------------------------------------
/cnn_finetuning/googlenet/mean.binaryproto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/googlenet/mean.binaryproto


--------------------------------------------------------------------------------
/cnn_finetuning/caffe_reference/mean.binaryproto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvjena/part_constellation_models/HEAD/cnn_finetuning/caffe_reference/mean.binaryproto


--------------------------------------------------------------------------------
/part_based_classification/.activity.csv:
--------------------------------------------------------------------------------
1 | Logged off at Fr 3. Apr 12:04:17 CEST 2015
2 | Logged off at Mo 13. Apr 10:34:12 CEST 2015
3 | Logged off at Di 14. Apr 18:41:13 CEST 2015
4 | Logged off at Fr 17. Apr 14:46:19 CEST 2015
5 | Logged off at Di 21. Apr 07:23:58 CEST 2015
6 | Logged off at Do 23. Apr 01:49:19 CEST 2015
7 | 


--------------------------------------------------------------------------------
/cnn_finetuning/vgg19/solver_template.prototxt:
--------------------------------------------------------------------------------
 1 | net: "train_val.prototxt"
 2 | test_iter: 40
 3 | test_interval: ##MAX_ITER##
 4 | base_lr: 0.001
 5 | lr_policy: "step"
 6 | gamma: 0.1
 7 | stepsize: 9000
 8 | display: 20
 9 | max_iter: ##MAX_ITER##
10 | momentum: 0.9
11 | weight_decay: 0.0005
12 | snapshot: ##MAX_ITER##
13 | snapshot_prefix: "model_ft"
14 | 


--------------------------------------------------------------------------------
/cnn_finetuning/caffe_reference/solver_template.prototxt:
--------------------------------------------------------------------------------
 1 | net: "train_val.prototxt"
 2 | test_iter: 40
 3 | test_interval: ##MAX_ITER##
 4 | base_lr: 0.001
 5 | lr_policy: "step"
 6 | gamma: 0.1
 7 | stepsize: 9000
 8 | display: 20
 9 | max_iter: ##MAX_ITER##
10 | momentum: 0.9
11 | weight_decay: 0.0005
12 | snapshot: ##MAX_ITER##
13 | snapshot_prefix: "model_ft"
14 | 


--------------------------------------------------------------------------------
/cnn_finetuning/googlenet/solver_template.prototxt:
--------------------------------------------------------------------------------
 1 | net: "train_val.prototxt"
 2 | test_iter: 40
 3 | test_interval: ##MAX_ITER##
 4 | test_initialization: true
 5 | display: 20
 6 | average_loss: 40
 7 | base_lr: 0.001
 8 | lr_policy: "step"
 9 | gamma: 0.1
10 | stepsize: 24000
11 | max_iter: ##MAX_ITER##
12 | momentum: 0.9
13 | weight_decay: 0.0002
14 | snapshot: ##MAX_ITER##
15 | snapshot_prefix: "model_ft"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/part_based_classification/createTrainTest.m:
--------------------------------------------------------------------------------
 1 | function [ tr_ID ] = createTrainTest( labels, min_tr, min_tr_percentage )
 2 |     tr_ID=zeros(size(labels));
 3 |     for c=1:length(unique(labels))
 4 |         class_elements=find(labels==c);
 5 |         if ( length(class_elements) <= min_tr)
 6 |             tr_ID(class_elements)=1;
 7 |         else
 8 |             tr_elements=class_elements(randperm(length(class_elements),max(min_tr,ceil(min_tr_percentage*length(class_elements)))));
 9 |             tr_ID(tr_elements)=1;
10 |         end
11 |     end
12 |     tr_ID=logical(tr_ID);
13 | end
14 | 
15 | 


--------------------------------------------------------------------------------
/part_generation/convert_locs_to_CUB200_format.m:
--------------------------------------------------------------------------------
 1 | function [ part_locs2 ] = convert_locs_to_CUB200_format( part_locs )
 2 | 
 3 | %     load('part_locs_caffe.mat','part_locs');
 4 |     [image_count, part_count, ~] = size(part_locs);
 5 |     % Convert to CUB200 format
 6 |     part_locs2=nan(image_count* part_count,5);
 7 |     for i = 1:image_count
 8 | %         fprintf('Image %i\n',i);
 9 |         for p=1:part_count
10 |             if any(isnan(part_locs(i,p,:)))
11 |                 part_locs2((i-1)*part_count+p,:)=[i p -1 -1 0];
12 |             else
13 |                 part_locs2((i-1)*part_count+p,:)=[i p reshape(part_locs(i,p,:),1,2) 1];
14 |             end
15 |         end
16 |     end
17 | %     part_locs = part_locs2;
18 | %     save('part_locs_caffe.mat','part_locs');
19 | end
20 | 
21 | 


--------------------------------------------------------------------------------
/part_generation/parts_locs_from_grads.m:
--------------------------------------------------------------------------------
 1 | function [ part_locs ] = parts_locs_from_grads(opts)
 2 |     imagedir= opts.imagedir;
 3 |     %read image list
 4 |     fid=fopen(opts.imagelist_file,'r');
 5 |     imagelist=textscan(fid,'%s');
 6 |     imagelist=imagelist{1};
 7 |     fclose(fid);
 8 |     % layer
 9 |     layer = opts.part_layer;
10 |     part_count = opts.part_layer_channel_count;
11 |     
12 |     mean_file = opts.mean_mat_file;
13 |     batch_size = opts.batch_size;
14 |     crop_size = opts.crop_size;
15 |     deploy = opts.deploy;
16 |     model = opts.model;
17 |         
18 |     parfor (i=1:opts.parfor_workers, opts.parfor_arg)
19 |         matcaffe_init(1,deploy,model,1,mod(i,opts.gpu_count));
20 |     end
21 |     
22 |     fprintf('%s\n',datestr(now));
23 |     % The estimated part locations for all images and parts
24 |     part_locs=nan(size(imagelist,1), part_count,2);
25 |     parfor (i = 1:size(imagelist,1), opts.parfor_arg)
26 |         if opts.verbose_output
27 |             fprintf('Image %i: %s\n',i, imagelist{i});
28 |         end
29 |         g=caffe_gradients(imread([imagedir '/' imagelist{i}]),layer,(1:part_count)',mean_file,batch_size,crop_size);
30 |         for p=1:part_count
31 |             %read gradient map
32 | %             gmap=load(sprintf('%s%s/gradient_layer%s_channel%i.mat',basedir, imagelist{i},layer, p-1));
33 | %             gmap=gmap.gradient_map;
34 |             gmap = squeeze(sum(abs(g(:,:,:,p)),3));
35 |             if sum(isnan(gmap(:))) >0 || sum(gmap(:)~=0)<1
36 |                 continue
37 |             end
38 |             [est_x,est_y]=fitGMMToGradient(zeros(crop_size,crop_size,3),gmap,[],2);
39 | %             imshow(gmap,[])
40 | %             hold all
41 | %             plot(est_x,est_y,'X','MarkerSize',20,'LineWidth',10)
42 | %             ginput(1)
43 |             part_locs(i,p,:)=[est_x,est_y];
44 |         end
45 |     end
46 |     part_locs = convert_locs_to_CUB200_format(part_locs);
47 |     save(opts.part_loc_file,'part_locs');
48 | end


--------------------------------------------------------------------------------
/cnn_finetuning/finetuning.m:
--------------------------------------------------------------------------------
 1 | function [  ] = finetuning( num_classes, opts )
 2 |     olddir = pwd;
 3 |     mkdir(opts.finetuning_dir);
 4 |     % Adjust and copy proto files 
 5 |     if 0~=system(['sed ''s/##NUM_CLASSES##/' int2str(num_classes+1) '/g'' ''' opts.cnn_dir '/train_val_template.prototxt'' > ''' opts.finetuning_dir '/train_val.prototxt'''])
 6 |         error('Error creating train_val.prototxt')
 7 |     end
 8 |     if 0~=system(['sed ''s/##NUM_CLASSES##/' int2str(num_classes+1) '/g'' ''' opts.cnn_dir '/deploy_template.prototxt'' > ''' opts.finetuning_dir '/deploy_ft.prototxt'''])
 9 |         error('Error creating train_val.prototxt')
10 |     end
11 |     if 0~=system(['sed ''s/##MAX_ITER##/' int2str(opts.finetuning_iters) '/g'' ''' opts.cnn_dir '/solver_template.prototxt'' > ''' opts.finetuning_dir '/solver.prototxt'''])
12 |         error('Error creating solver.prototxt')
13 |     end
14 |     if 0~=system(['cp ''' opts.mean_proto_file ''' ''' opts.finetuning_dir '/mean.binaryproto'''])
15 |         error('Error creating solver.prototxt')
16 |     end
17 |     if 0~=system(['sed ''s/##MAX_ITER##/' int2str(opts.finetuning_iters) '/g'' ''' opts.cnn_dir '/solver_template.prototxt'' > ''' opts.finetuning_dir '/solver.prototxt'''])
18 |         error('Error creating solver.prototxt')
19 |     end
20 |     
21 |     fprintf(['\n\nNow open a bash, go to ' opts.finetuning_dir ' and run:\n']);
22 |     fprintf(['# ' opts.caffe_executable ' train -solver=solver.prototxt -weights=''' opts.cnn_dir '/model'' -gpu=' int2str(opts.finetuning_gpu) ' \n']);
23 |     fprintf('Hit enter when training has finished!');
24 |     input('','s');
25 |     cd(opts.finetuning_dir);
26 | %     if opts.verbose_output
27 | %         outputfile = '';
28 | %     else
29 | %         outputfile = ' 2> /dev/null';
30 | %     end
31 | %     if 0~=system([opts.caffe_executable ' train -solver=solver.prototxt -weights=''' opts.cnn_dir '/model'' -gpu=' int2str(opts.finetuning_gpu) ' ' outputfile])
32 | %         cd(olddir)
33 | %         error('Caffe training failed.')
34 | %     end
35 |     if 0~=system(['rm ./*.solverstate'])
36 |         warning('Did not delete any solverstate files.')
37 |     end
38 |     cd(olddir)
39 | end
40 | 
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Part Constellation Models
 2 | 
 3 | This is the code used in our paper "Neural Activation Constellations: Unsupervised Part Model Discovery with Convolutional Networks" by Marcel Simon and Erik Rodner published at ICCV 2015. 
 4 | If you would like to refer to this work, please cite the corresponding paper
 5 | 
 6 |     @inproceedings{Simon15:NAC,
 7 | 	author = {Marcel Simon and Erik Rodner},
 8 | 	booktitle = {International Conference on Computer Vision (ICCV)},
 9 | 	title = {Neural Activation Constellations: Unsupervised Part Model Discovery with Convolutional Networks},
10 | 	year = {2015},
11 |     }
12 | 
13 | The following steps will guide you through the usage of the code.
14 | 
15 | ## 1. Setup
16 | 1. Open Matlab and go to the folder containing this package
17 | 2. Run setup.m to download all libraries
18 | 3. Go to lib/caffe_pp and make it, you will need to create a Makefile.config. If you have an existing caffe, use that Makefile.config from there BUT DO NOT USE ANY EXISTING CAFFE as caffe_pp is a modified version.
19 | 4. Execute `make mat` in `lib/caffe_pp`
20 | 5. Go to lib/liblinear-2.1 and make it
21 | 6. Go to lib/liblinear-2.1/matlab and make it
22 | 
23 | ## 2. Running the code
24 | 
25 | The `script.m` in the root folder of the package is all you need. You want to override the paths to the data set by passing them as name-value-pairs, for example `start('basedir','/path/to/dataset/')`. For more options, open it to see all options. Just pass additional parameters by adding name-value-pairs: `start('basedir','/path/to/dataset/','cnn_dir','./cnn_finetuning/vgg19/','crop_size',224);`.
26 | 
27 | The dataset files should contain a list of absolute image paths, a list of corresponding labels starting from 1, and a list of the corresponding assignment to train and test, where 1 indicates training and 0 test. 
28 | 
29 | 
30 | imagelist.txt
31 | 
32 | ```
33 | /path/to/image1.jpg
34 | /path/to/image2.jpg
35 | /path/to/image3.jpg
36 | /path/to/image4.jpg
37 | /path/to/image5.jpg
38 | ...
39 | ```
40 | 
41 | labels.txt
42 | 
43 | ```
44 | 1
45 | 1
46 | 1
47 | 2
48 | 2
49 | 2
50 | ...
51 | ```
52 | 
53 | tr_ID.txt
54 | 
55 | ```
56 | 0
57 | 1
58 | 1
59 | 0
60 | 1
61 | 1
62 | ...
63 | ```
64 | 
65 | ## 3. Testing the models from the paper
66 | The models of the paper are available at [https://drive.google.com/file/d/0B6VgjAr4t_oTQXN2Y3VYaEMwVDA/view?usp=sharing](https://drive.google.com/file/d/0B6VgjAr4t_oTQXN2Y3VYaEMwVDA/view?usp=sharing). Download and unzip them to the root folder of the code. You can run them by executing, for example, `start('cache_dir','./cache_iccv_cub200','cnn_dir','./cnn_finetuning/vgg19/','crop_size',224,'basedir','/home/simon/Datasets/CUB_200_2011/')`.
67 | 
68 | ## License 
69 | The Part Constellation Models Framework by [Marcel Simon](http://www.inf-cv.uni-jena.de/simon.html) and [Erik Rodner](http://www.inf-cv.uni-jena.de/rodner.html) is licensed under the non-commercial license [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by-nc-sa/4.0/). For usage beyond the scope of this license, please contact [Marcel Simon](http://www.inf-cv.uni-jena.de/simon.html).
70 | 


--------------------------------------------------------------------------------
/setup.m:
--------------------------------------------------------------------------------
 1 | function setup()    
 2 |     %% Get libs
 3 |     if ~exist('lib/SelectiveSearchCodeIJCV','file')
 4 |         getlib('http://koen.me/research/downloads/SelectiveSearchCodeIJCV.zip');
 5 |     else
 6 |         fprintf('Selective search exists already, skipping...\n');
 7 |     end
 8 |     na = dir('lib/liblinea*');
 9 |     if numel(na)==0
10 |         getlib('http://www.csie.ntu.edu.tw/~cjlin/cgi-bin/liblinear.cgi?+http://www.csie.ntu.edu.tw/~cjlin/liblinear+zip');
11 |     else
12 |         fprintf('liblinear exists already, skipping...\n');
13 |     end
14 |     if ~exist('lib/GetFullPath.m','file')
15 |         getlib('http://www.mathworks.com/matlabcentral/mlc-downloads/downloads/submissions/28249/versions/8/download/zip')
16 |         !rm lib/GetFullPath.c lib/InstallMex.m lib/Readme.txt lib/license.txt lib/uTest_GetFullPath.m
17 |     else 
18 |         fprintf('GetFullPath.m exists already, skipping...\n');
19 |     end
20 |     if ~exist('lib/vl_argparse.m','file')
21 |         !wget --no-check-certificate -O lib/vl_argparse.m https://raw.githubusercontent.com/vlfeat/matconvnet/master/matlab/vl_argparse.m
22 |     else
23 | 	fprintf('vl_argparse exists already, skipping...\n');
24 |     end
25 |     
26 |     %% Get models
27 |     required_files = {};
28 | %     required_files = [required_files;{'cnn_finetuning/googlenet/model','http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel','405fc5acd08a3bb12de8ee5e23a96bec22f08204'}];
29 | %     required_files = [required_files;{'cnn_finetuning/vgg19/model','http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel','239785e7862442717d831f682bb824055e51e9ba'}];
30 | %     required_files = [required_files;{'cnn_finetuning/caffe_reference/model','http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel','4c8d77deb20ea792f84eb5e6d0a11ca0a8660a46'}];
31 | 
32 |     for i=1:size(required_files,1)
33 |         if exist(required_files{i,1},'file') 
34 |             [~,sha1sum] = system(['sha1sum ' required_files{i,1} '  | awk ''{ print $1 }''']);
35 |             if strcmp(strtrim(sha1sum), required_files{i,3})
36 |                 fprintf('%s exists already, skipping...\n',required_files{i,1});
37 |                 continue
38 |             else
39 |                 fprintf('%s exists but is corrupt, downloading again...\n',required_files{i,1});
40 |             end
41 |         end
42 |         if 0~=system(['wget -O ' required_files{i,1} ' ' required_files{i,2}]);
43 |             error('Could not download file %s from %s\n',required_files{i,1},required_files{i,2});
44 |         end
45 |     end
46 |     
47 |     fprintf('\n\nSetup done, now clone caffe_pp and go to ./lib/ and compile all libraries and Matlab interfaces!\n');
48 |     fprintf('1. ''git submodule update --init --recursive'' in the main folder\n');
49 |     fprintf('2. ''make'' in ./lib/caffe_pp/\n');
50 |     fprintf('3. ''make mat'' in ./lib/caffe_pp/\n');
51 |     fprintf('4. ''make'' in ./lib/liblinear-2.1/\n');
52 |     fprintf('5. ''make'' in ./lib/liblinear-2.1/matlab/\n');
53 | end
54 | 
55 | function getlib(url)
56 |     system('mkdir tmp');
57 |     if 0~= system(['wget -O tmp/lib.zip ' url]) 
58 |         error('Could download code');
59 |     end
60 |     if 0~= system('cd lib && unzip ../tmp/lib.zip ')
61 |         error('Could not unzip');
62 |     end
63 |     if 0~= system('rm tmp/lib.zip')
64 |         error('Could not remove temporary file');
65 |     end
66 |     if 0~= system('rmdir tmp')
67 |         error('Could not remove directory tmp');
68 |     end
69 | end


--------------------------------------------------------------------------------
/cnn_finetuning/caffe_reference/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | input: "data"
  3 | input_dim: 11
  4 | input_dim: 3
  5 | input_dim: 227
  6 | input_dim: 227
  7 | layers {
  8 |   name: "conv1"
  9 |   type: CONVOLUTION
 10 |   bottom: "data"
 11 |   top: "conv1"
 12 |   convolution_param {
 13 |     num_output: 96
 14 |     kernel_size: 11
 15 |     stride: 4
 16 |   }
 17 | }
 18 | layers {
 19 |   name: "relu1"
 20 |   type: RELU
 21 |   bottom: "conv1"
 22 |   top: "conv1"
 23 | }
 24 | layers {
 25 |   name: "pool1"
 26 |   type: POOLING
 27 |   bottom: "conv1"
 28 |   top: "pool1"
 29 |   pooling_param {
 30 |     pool: MAX
 31 |     kernel_size: 3
 32 |     stride: 2
 33 |   }
 34 | }
 35 | layers {
 36 |   name: "norm1"
 37 |   type: LRN
 38 |   bottom: "pool1"
 39 |   top: "norm1"
 40 |   lrn_param {
 41 |     local_size: 5
 42 |     alpha: 0.0001
 43 |     beta: 0.75
 44 |   }
 45 | }
 46 | layers {
 47 |   name: "conv2"
 48 |   type: CONVOLUTION
 49 |   bottom: "norm1"
 50 |   top: "conv2"
 51 |   convolution_param {
 52 |     num_output: 256
 53 |     pad: 2
 54 |     kernel_size: 5
 55 |     group: 2
 56 |   }
 57 | }
 58 | layers {
 59 |   name: "relu2"
 60 |   type: RELU
 61 |   bottom: "conv2"
 62 |   top: "conv2"
 63 | }
 64 | layers {
 65 |   name: "pool2"
 66 |   type: POOLING
 67 |   bottom: "conv2"
 68 |   top: "pool2"
 69 |   pooling_param {
 70 |     pool: MAX
 71 |     kernel_size: 3
 72 |     stride: 2
 73 |   }
 74 | }
 75 | layers {
 76 |   name: "norm2"
 77 |   type: LRN
 78 |   bottom: "pool2"
 79 |   top: "norm2"
 80 |   lrn_param {
 81 |     local_size: 5
 82 |     alpha: 0.0001
 83 |     beta: 0.75
 84 |   }
 85 | }
 86 | layers {
 87 |   name: "conv3"
 88 |   type: CONVOLUTION
 89 |   bottom: "norm2"
 90 |   top: "conv3"
 91 |   convolution_param {
 92 |     num_output: 384
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layers {
 98 |   name: "relu3"
 99 |   type: RELU
100 |   bottom: "conv3"
101 |   top: "conv3"
102 | }
103 | layers {
104 |   name: "conv4"
105 |   type: CONVOLUTION
106 |   bottom: "conv3"
107 |   top: "conv4"
108 |   convolution_param {
109 |     num_output: 384
110 |     pad: 1
111 |     kernel_size: 3
112 |     group: 2
113 |   }
114 | }
115 | layers {
116 |   name: "relu4"
117 |   type: RELU
118 |   bottom: "conv4"
119 |   top: "conv4"
120 | }
121 | layers {
122 |   name: "conv5"
123 |   type: CONVOLUTION
124 |   bottom: "conv4"
125 |   top: "conv5"
126 |   convolution_param {
127 |     num_output: 256
128 |     pad: 1
129 |     kernel_size: 3
130 |     group: 2
131 |   }
132 | }
133 | layers {
134 |   name: "relu5"
135 |   type: RELU
136 |   bottom: "conv5"
137 |   top: "conv5"
138 | }
139 | layers {
140 |   name: "pool5"
141 |   type: POOLING
142 |   bottom: "conv5"
143 |   top: "pool5"
144 |   pooling_param {
145 |     pool: MAX
146 |     kernel_size: 3
147 |     stride: 2
148 |   }
149 | }
150 | layers {
151 |   name: "fc6"
152 |   type: INNER_PRODUCT
153 |   bottom: "pool5"
154 |   top: "fc6"
155 |   inner_product_param {
156 |     num_output: 4096
157 |   }
158 | }
159 | layers {
160 |   name: "relu6"
161 |   type: RELU
162 |   bottom: "fc6"
163 |   top: "fc6"
164 | }
165 | layers {
166 |   name: "drop6"
167 |   type: DROPOUT
168 |   bottom: "fc6"
169 |   top: "fc6"
170 |   dropout_param {
171 |     dropout_ratio: 0.5
172 |   }
173 | }
174 | layers {
175 |   name: "fc7"
176 |   type: INNER_PRODUCT
177 |   bottom: "fc6"
178 |   top: "fc7"
179 |   inner_product_param {
180 |     num_output: 4096
181 |   }
182 | }
183 | layers {
184 |   name: "relu7"
185 |   type: RELU
186 |   bottom: "fc7"
187 |   top: "fc7"
188 | }
189 | layers {
190 |   name: "drop7"
191 |   type: DROPOUT
192 |   bottom: "fc7"
193 |   top: "fc7"
194 |   dropout_param {
195 |     dropout_ratio: 0.5
196 |   }
197 | }
198 | layers {
199 |   name: "fc8"
200 |   type: INNER_PRODUCT
201 |   bottom: "fc7"
202 |   top: "fc8"
203 |   inner_product_param {
204 |     num_output: 1000
205 |   }
206 | }
207 | layers {
208 |   name: "prob"
209 |   type: SOFTMAX
210 |   bottom: "fc8"
211 |   top: "prob"
212 | }
213 | 


--------------------------------------------------------------------------------
/cnn_finetuning/caffe_reference/deploy_template.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | input: "data"
  3 | input_dim: 11
  4 | input_dim: 3
  5 | input_dim: 227
  6 | input_dim: 227
  7 | layers {
  8 |   name: "conv1"
  9 |   type: CONVOLUTION
 10 |   bottom: "data"
 11 |   top: "conv1"
 12 |   convolution_param {
 13 |     num_output: 96
 14 |     kernel_size: 11
 15 |     stride: 4
 16 |   }
 17 | }
 18 | layers {
 19 |   name: "relu1"
 20 |   type: RELU
 21 |   bottom: "conv1"
 22 |   top: "conv1"
 23 | }
 24 | layers {
 25 |   name: "pool1"
 26 |   type: POOLING
 27 |   bottom: "conv1"
 28 |   top: "pool1"
 29 |   pooling_param {
 30 |     pool: MAX
 31 |     kernel_size: 3
 32 |     stride: 2
 33 |   }
 34 | }
 35 | layers {
 36 |   name: "norm1"
 37 |   type: LRN
 38 |   bottom: "pool1"
 39 |   top: "norm1"
 40 |   lrn_param {
 41 |     local_size: 5
 42 |     alpha: 0.0001
 43 |     beta: 0.75
 44 |   }
 45 | }
 46 | layers {
 47 |   name: "conv2"
 48 |   type: CONVOLUTION
 49 |   bottom: "norm1"
 50 |   top: "conv2"
 51 |   convolution_param {
 52 |     num_output: 256
 53 |     pad: 2
 54 |     kernel_size: 5
 55 |     group: 2
 56 |   }
 57 | }
 58 | layers {
 59 |   name: "relu2"
 60 |   type: RELU
 61 |   bottom: "conv2"
 62 |   top: "conv2"
 63 | }
 64 | layers {
 65 |   name: "pool2"
 66 |   type: POOLING
 67 |   bottom: "conv2"
 68 |   top: "pool2"
 69 |   pooling_param {
 70 |     pool: MAX
 71 |     kernel_size: 3
 72 |     stride: 2
 73 |   }
 74 | }
 75 | layers {
 76 |   name: "norm2"
 77 |   type: LRN
 78 |   bottom: "pool2"
 79 |   top: "norm2"
 80 |   lrn_param {
 81 |     local_size: 5
 82 |     alpha: 0.0001
 83 |     beta: 0.75
 84 |   }
 85 | }
 86 | layers {
 87 |   name: "conv3"
 88 |   type: CONVOLUTION
 89 |   bottom: "norm2"
 90 |   top: "conv3"
 91 |   convolution_param {
 92 |     num_output: 384
 93 |     pad: 1
 94 |     kernel_size: 3
 95 |   }
 96 | }
 97 | layers {
 98 |   name: "relu3"
 99 |   type: RELU
100 |   bottom: "conv3"
101 |   top: "conv3"
102 | }
103 | layers {
104 |   name: "conv4"
105 |   type: CONVOLUTION
106 |   bottom: "conv3"
107 |   top: "conv4"
108 |   convolution_param {
109 |     num_output: 384
110 |     pad: 1
111 |     kernel_size: 3
112 |     group: 2
113 |   }
114 | }
115 | layers {
116 |   name: "relu4"
117 |   type: RELU
118 |   bottom: "conv4"
119 |   top: "conv4"
120 | }
121 | layers {
122 |   name: "conv5"
123 |   type: CONVOLUTION
124 |   bottom: "conv4"
125 |   top: "conv5"
126 |   convolution_param {
127 |     num_output: 256
128 |     pad: 1
129 |     kernel_size: 3
130 |     group: 2
131 |   }
132 | }
133 | layers {
134 |   name: "relu5"
135 |   type: RELU
136 |   bottom: "conv5"
137 |   top: "conv5"
138 | }
139 | layers {
140 |   name: "pool5"
141 |   type: POOLING
142 |   bottom: "conv5"
143 |   top: "pool5"
144 |   pooling_param {
145 |     pool: MAX
146 |     kernel_size: 3
147 |     stride: 2
148 |   }
149 | }
150 | layers {
151 |   name: "fc6"
152 |   type: INNER_PRODUCT
153 |   bottom: "pool5"
154 |   top: "fc6"
155 |   inner_product_param {
156 |     num_output: 4096
157 |   }
158 | }
159 | layers {
160 |   name: "relu6"
161 |   type: RELU
162 |   bottom: "fc6"
163 |   top: "fc6"
164 | }
165 | layers {
166 |   name: "drop6"
167 |   type: DROPOUT
168 |   bottom: "fc6"
169 |   top: "fc6"
170 |   dropout_param {
171 |     dropout_ratio: 0.5
172 |   }
173 | }
174 | layers {
175 |   name: "fc7"
176 |   type: INNER_PRODUCT
177 |   bottom: "fc6"
178 |   top: "fc7"
179 |   inner_product_param {
180 |     num_output: 4096
181 |   }
182 | }
183 | layers {
184 |   name: "relu7"
185 |   type: RELU
186 |   bottom: "fc7"
187 |   top: "fc7"
188 | }
189 | layers {
190 |   name: "drop7"
191 |   type: DROPOUT
192 |   bottom: "fc7"
193 |   top: "fc7"
194 |   dropout_param {
195 |     dropout_ratio: 0.5
196 |   }
197 | }
198 | layers {
199 |   name: "fc8_ft"
200 |   type: INNER_PRODUCT
201 |   bottom: "fc7"
202 |   top: "fc8_ft"
203 |   inner_product_param {
204 |     num_output: ##NUM_CLASSES##
205 |   }
206 | }
207 | layers {
208 |   name: "prob"
209 |   type: SOFTMAX
210 |   bottom: "fc8_ft"
211 |   top: "prob"
212 | }
213 | 


--------------------------------------------------------------------------------
/part_generation/fitGMMToGradient.m:
--------------------------------------------------------------------------------
  1 | function [ x,y ] = fitGMMToGradient(imagepath, gmap,bbox, num_clusters )
  2 | %fitGMMToGradient:
  3 | %   bbox = [col row width height]
  4 | 
  5 | 
  6 | %     d=load(gradient_path);
  7 | %     gmap=d.gradient_map;
  8 |     img=zeros(227,227,3);%imread(imagepath);
  9 | % %     set gradient outside of bounding box to 0
 10 | %     bbox_orig=bbox;
 11 | %     rect_size=min(size(img(:,:,1)));
 12 | %     ratio = max(227.0 / size(img,1), 227.0 / size(img,2));
 13 | %     bbox=int32(floor(bbox*ratio));
 14 | %     h_offset = ceil(size(img,1)*ratio - 227) / 2;
 15 | %     w_offset = ceil(size(img,2)*ratio - 227) / 2;
 16 | %     bbox(1)=max(w_offset+1,bbox(1));
 17 | %     bbox(2)=max(h_offset+1,bbox(2));
 18 | %     bbox(3)=bbox(3)-max(w_offset-bbox(1),0);
 19 | %     bbox(4)=bbox(4)-max(h_offset-bbox(2),0);
 20 | %     bbox(1)=bbox(1)-w_offset+1;
 21 | %     bbox(2)=bbox(2)-h_offset+1;
 22 | %     bbox(3)=min(227-bbox(1),bbox(3));
 23 | %     bbox(4)=min(227-bbox(2),bbox(4));
 24 | %     mask = ones(size(gmap));
 25 | %     mask(bbox(2):bbox(2)+bbox(4),bbox(1):bbox(1)+bbox(3))=0;
 26 | %     gmap(logical(mask))=0;
 27 | 
 28 |     if (false)
 29 |         % simplify calculation
 30 |         gmap(gmap<quantile(gmap(:),0.9))=0;
 31 |         [rows,cols,vals]=find(gmap);
 32 |         if (size(vals,1)<2)
 33 |             x=NaN;
 34 |             y=NaN;
 35 |             return;
 36 |         end
 37 |         [~,model,~]=weightedemgm([rows cols]',num_clusters,[vals],3,100);
 38 |         % Reorder accoring to weight
 39 |         [~,argidx]=sort(-model.weight);
 40 |         model.mu=model.mu(:,argidx);
 41 |         model.Sigma=model.Sigma(:,:,argidx);
 42 |         model.weight=model.weight(argidx);
 43 | 
 44 |         est_x=model.mu(2,1);
 45 |         est_y=model.mu(1,1);
 46 |     elseif (false)
 47 |         [est_y,est_x]=find(max(gmap(:))==gmap);
 48 |         est_y=est_y(1);
 49 |         est_x=est_x(1);
 50 |     elseif (true)
 51 |         if any(gmap(:)~=0)
 52 |             % Create the gaussian filter with hsize = [5 5] and sigma = 2
 53 |             G = fspecial('gaussian',[20 20],3);
 54 |             % Filter it
 55 |             gmap = imfilter(gmap,G,'same');
 56 |             [est_y,est_x]=find(max(gmap(:))==gmap,1,'last');
 57 |             est_y=est_y(1);
 58 |             est_x=est_x(1);
 59 |         else
 60 |             est_y=-1;
 61 |             est_x=-1;
 62 |         end
 63 |     end
 64 |     
 65 |     
 66 |     
 67 |     x=est_x;
 68 |     y=est_y;
 69 |     % calc ratio 
 70 |     ratio_x = 227.0 / size(img,2);
 71 |     ratio_y = 227.0 / size(img,1);
 72 |     % we add 0.5 to the converted result to avoid numerical problems.
 73 | %     h_offset = (size(img,1)*ratio - 227) / 2;
 74 | %     w_offset = (size(img,2)*ratio - 227) / 2;
 75 | %     x=est_x+w_offset;
 76 | %     y=est_y+h_offset;
 77 |     x=int32(x/ratio_x);
 78 |     y=int32(y/ratio_y);
 79 |     
 80 | %     % Display result
 81 | % %     show assignment to cluster
 82 | %     figure;
 83 | % %     map=double(full(sparse(rows,cols,labels',size(gmap,1),size(gmap,2))));
 84 | %     gmap=gmap/max(gmap(:));
 85 | %     imshow(gmap);
 86 | %     hold on;
 87 | %     plot(est_x,est_y,'x','MarkerSize',20,'LineWidth',3);
 88 | %     hold off;
 89 | %     ginput(1);
 90 | %     close all;
 91 | 
 92 | 
 93 | %     figure;
 94 | %     gmap=gmap/max(gmap(:));
 95 | %     imshow(gmap);%<quantile(gmap(:),0.9)
 96 | %     colors=['b','r','g'];
 97 | %     for i=1:size(model.mu,2)
 98 | %         h=plot_gaussian_ellipsoid(flipud(model.mu(:,i)),rot90(model.Sigma(:,:,i),2));
 99 | %         set(h,'color',colors(i));
100 | %     end
101 | %     ginput(1);
102 | %     close
103 |     
104 |     
105 | %     figure;
106 | %     img_cropped=imread(sprintf('/home/simon/tmp/cub200-maps/%s/inputimage.jpg',imagepath));
107 | %     imshow(img_cropped);
108 | %     hold all;
109 | %     plot(est_x,est_y,'x','MarkerSize',20,'LineWidth',3);
110 | %     figure;
111 | %     imshow(img);
112 | %     hold all;
113 | %     plot(x,y,'x','MarkerSize',20,'LineWidth',3);
114 | end
115 | 
116 | 
117 | function [bbox]=transformBbox(img,bbox)
118 |     % calc ratio 
119 |     ratio = max(227.0 / size(img,1), 227.0 / size(img,2));
120 |     % we add 0.5 to the converted result to avoid numerical problems.
121 |     h_offset = (size(img,1)*ratio - 227) / 2;
122 |     w_offset = (size(img,2)*ratio - 227) / 2;
123 |     bbox(1)=bbox(1)+w_offset;
124 |     bbox(2)=bbox(2)+h_offset;
125 |     bbox=int32(bbox/ratio);
126 | end
127 | 


--------------------------------------------------------------------------------
/cnn_finetuning/caffe_reference/train_val_template.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layers {
  3 |   name: "data"
  4 |   type: WINDOW_DATA
  5 |   top: "data"
  6 |   top: "label"
  7 |   window_data_param {
  8 |     source: "../../windows_train.txt"
  9 |     batch_size: 256
 10 |     fg_threshold: 0.5
 11 |     bg_threshold: 0.5
 12 |     fg_fraction: 1.00
 13 |     context_pad: 16
 14 |     crop_mode: "square"
 15 |     cache_images: true
 16 |   }
 17 |   transform_param {
 18 |     mirror: true
 19 |     crop_size: 227
 20 |     mean_file: "mean.binaryproto"
 21 |   }
 22 |   include: { phase: TRAIN }
 23 | }
 24 | layers {
 25 |   name: "data"
 26 |   type: WINDOW_DATA
 27 |   top: "data"
 28 |   top: "label"
 29 |   window_data_param {
 30 |     source: "../../windows_val.txt"
 31 |     batch_size: 50
 32 |     fg_threshold: 0.5
 33 |     bg_threshold: 0.5
 34 |     fg_fraction: 1.00
 35 |     context_pad: 16
 36 |     crop_mode: "square"
 37 |   }
 38 |   transform_param {
 39 |     mirror: true
 40 |     crop_size: 227
 41 |     mean_file: "mean.binaryproto"
 42 |   }
 43 |   include: { phase: TEST }
 44 | }
 45 | layers {
 46 |   name: "conv1"
 47 |   type: CONVOLUTION
 48 |   bottom: "data"
 49 |   top: "conv1"
 50 |   blobs_lr: 1
 51 |   blobs_lr: 2
 52 |   weight_decay: 1
 53 |   weight_decay: 0
 54 |   convolution_param {
 55 |     num_output: 96
 56 |     kernel_size: 11
 57 |     stride: 4
 58 |     weight_filler {
 59 |       type: "gaussian"
 60 |       std: 0.01
 61 |     }
 62 |     bias_filler {
 63 |       type: "constant"
 64 |       value: 0
 65 |     }
 66 |   }
 67 | }
 68 | layers {
 69 |   name: "relu1"
 70 |   type: RELU
 71 |   bottom: "conv1"
 72 |   top: "conv1"
 73 | }
 74 | layers {
 75 |   name: "pool1"
 76 |   type: POOLING
 77 |   bottom: "conv1"
 78 |   top: "pool1"
 79 |   pooling_param {
 80 |     pool: MAX
 81 |     kernel_size: 3
 82 |     stride: 2
 83 |   }
 84 | }
 85 | layers {
 86 |   name: "norm1"
 87 |   type: LRN
 88 |   bottom: "pool1"
 89 |   top: "norm1"
 90 |   lrn_param {
 91 |     local_size: 5
 92 |     alpha: 0.0001
 93 |     beta: 0.75
 94 |   }
 95 | }
 96 | layers {
 97 |   name: "conv2"
 98 |   type: CONVOLUTION
 99 |   bottom: "norm1"
100 |   top: "conv2"
101 |   blobs_lr: 1
102 |   blobs_lr: 2
103 |   weight_decay: 1
104 |   weight_decay: 0
105 |   convolution_param {
106 |     num_output: 256
107 |     pad: 2
108 |     kernel_size: 5
109 |     group: 2
110 |     weight_filler {
111 |       type: "gaussian"
112 |       std: 0.01
113 |     }
114 |     bias_filler {
115 |       type: "constant"
116 |       value: 1
117 |     }
118 |   }
119 | }
120 | layers {
121 |   name: "relu2"
122 |   type: RELU
123 |   bottom: "conv2"
124 |   top: "conv2"
125 | }
126 | layers {
127 |   name: "pool2"
128 |   type: POOLING
129 |   bottom: "conv2"
130 |   top: "pool2"
131 |   pooling_param {
132 |     pool: MAX
133 |     kernel_size: 3
134 |     stride: 2
135 |   }
136 | }
137 | layers {
138 |   name: "norm2"
139 |   type: LRN
140 |   bottom: "pool2"
141 |   top: "norm2"
142 |   lrn_param {
143 |     local_size: 5
144 |     alpha: 0.0001
145 |     beta: 0.75
146 |   }
147 | }
148 | layers {
149 |   name: "conv3"
150 |   type: CONVOLUTION
151 |   bottom: "norm2"
152 |   top: "conv3"
153 |   blobs_lr: 1
154 |   blobs_lr: 2
155 |   weight_decay: 1
156 |   weight_decay: 0
157 |   convolution_param {
158 |     num_output: 384
159 |     pad: 1
160 |     kernel_size: 3
161 |     weight_filler {
162 |       type: "gaussian"
163 |       std: 0.01
164 |     }
165 |     bias_filler {
166 |       type: "constant"
167 |       value: 0
168 |     }
169 |   }
170 | }
171 | layers {
172 |   name: "relu3"
173 |   type: RELU
174 |   bottom: "conv3"
175 |   top: "conv3"
176 | }
177 | layers {
178 |   name: "conv4"
179 |   type: CONVOLUTION
180 |   bottom: "conv3"
181 |   top: "conv4"
182 |   blobs_lr: 1
183 |   blobs_lr: 2
184 |   weight_decay: 1
185 |   weight_decay: 0
186 |   convolution_param {
187 |     num_output: 384
188 |     pad: 1
189 |     kernel_size: 3
190 |     group: 2
191 |     weight_filler {
192 |       type: "gaussian"
193 |       std: 0.01
194 |     }
195 |     bias_filler {
196 |       type: "constant"
197 |       value: 1
198 |     }
199 |   }
200 | }
201 | layers {
202 |   name: "relu4"
203 |   type: RELU
204 |   bottom: "conv4"
205 |   top: "conv4"
206 | }
207 | layers {
208 |   name: "conv5"
209 |   type: CONVOLUTION
210 |   bottom: "conv4"
211 |   top: "conv5"
212 |   blobs_lr: 1
213 |   blobs_lr: 2
214 |   weight_decay: 1
215 |   weight_decay: 0
216 |   convolution_param {
217 |     num_output: 256
218 |     pad: 1
219 |     kernel_size: 3
220 |     group: 2
221 |     weight_filler {
222 |       type: "gaussian"
223 |       std: 0.01
224 |     }
225 |     bias_filler {
226 |       type: "constant"
227 |       value: 1
228 |     }
229 |   }
230 | }
231 | layers {
232 |   name: "relu5"
233 |   type: RELU
234 |   bottom: "conv5"
235 |   top: "conv5"
236 | }
237 | layers {
238 |   name: "pool5"
239 |   type: POOLING
240 |   bottom: "conv5"
241 |   top: "pool5"
242 |   pooling_param {
243 |     pool: MAX
244 |     kernel_size: 3
245 |     stride: 2
246 |   }
247 | }
248 | layers {
249 |   name: "fc6"
250 |   type: INNER_PRODUCT
251 |   bottom: "pool5"
252 |   top: "fc6"
253 |   blobs_lr: 1
254 |   blobs_lr: 2
255 |   weight_decay: 1
256 |   weight_decay: 0
257 |   inner_product_param {
258 |     num_output: 4096
259 |     weight_filler {
260 |       type: "gaussian"
261 |       std: 0.005
262 |     }
263 |     bias_filler {
264 |       type: "constant"
265 |       value: 1
266 |     }
267 |   }
268 | }
269 | layers {
270 |   name: "relu6"
271 |   type: RELU
272 |   bottom: "fc6"
273 |   top: "fc6"
274 | }
275 | layers {
276 |   name: "drop6"
277 |   type: DROPOUT
278 |   bottom: "fc6"
279 |   top: "fc6"
280 |   dropout_param {
281 |     dropout_ratio: 0.5
282 |   }
283 | }
284 | layers {
285 |   name: "fc7"
286 |   type: INNER_PRODUCT
287 |   bottom: "fc6"
288 |   top: "fc7"
289 |   blobs_lr: 1
290 |   blobs_lr: 2
291 |   weight_decay: 1
292 |   weight_decay: 0
293 |   inner_product_param {
294 |     num_output: 4096
295 |     weight_filler {
296 |       type: "gaussian"
297 |       std: 0.005
298 |     }
299 |     bias_filler {
300 |       type: "constant"
301 |       value: 1
302 |     }
303 |   }
304 | }
305 | layers {
306 |   name: "relu7"
307 |   type: RELU
308 |   bottom: "fc7"
309 |   top: "fc7"
310 | }
311 | layers {
312 |   name: "drop7"
313 |   type: DROPOUT
314 |   bottom: "fc7"
315 |   top: "fc7"
316 |   dropout_param {
317 |     dropout_ratio: 0.5
318 |   }
319 | }
320 | layers {
321 |   name: "fc8_ft"
322 |   type: INNER_PRODUCT
323 |   bottom: "fc7"
324 |   top: "fc8_ft"
325 |   blobs_lr: 10
326 |   blobs_lr: 20
327 |   weight_decay: 1
328 |   weight_decay: 0
329 |   inner_product_param {
330 |     num_output: ##NUM_CLASSES##
331 |     weight_filler {
332 |       type: "gaussian"
333 |       std: 0.01
334 |     }
335 |     bias_filler {
336 |       type: "constant"
337 |       value: 0
338 |     }
339 |   }
340 | }
341 | layers {
342 |   name: "loss"
343 |   type: SOFTMAX_LOSS
344 |   bottom: "fc8_ft"
345 |   bottom: "label"
346 |   top: "loss"
347 | }
348 | layers {
349 |   name: "accuracy"
350 |   type: ACCURACY
351 |   bottom: "fc8_ft"
352 |   bottom: "label"
353 |   top: "accuracy"
354 |   include { phase: TEST }
355 | }
356 | 


--------------------------------------------------------------------------------
/cnn_finetuning/vgg19/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_19_layers"
  2 | input: "data"
  3 | input_dim: 11
  4 | input_dim: 3
  5 | input_dim: 224
  6 | input_dim: 224
  7 | layers {
  8 |   bottom: "data"
  9 |   top: "conv1_1"
 10 |   name: "conv1_1"
 11 |   type: CONVOLUTION
 12 |   convolution_param {
 13 |     num_output: 64
 14 |     pad: 1
 15 |     kernel_size: 3
 16 |   }
 17 | }
 18 | layers {
 19 |   bottom: "conv1_1"
 20 |   top: "conv1_1"
 21 |   name: "relu1_1"
 22 |   type: RELU
 23 | }
 24 | layers {
 25 |   bottom: "conv1_1"
 26 |   top: "conv1_2"
 27 |   name: "conv1_2"
 28 |   type: CONVOLUTION
 29 |   convolution_param {
 30 |     num_output: 64
 31 |     pad: 1
 32 |     kernel_size: 3
 33 |   }
 34 | }
 35 | layers {
 36 |   bottom: "conv1_2"
 37 |   top: "conv1_2"
 38 |   name: "relu1_2"
 39 |   type: RELU
 40 | }
 41 | layers {
 42 |   bottom: "conv1_2"
 43 |   top: "pool1"
 44 |   name: "pool1"
 45 |   type: POOLING
 46 |   pooling_param {
 47 |     pool: MAX
 48 |     kernel_size: 2
 49 |     stride: 2
 50 |   }
 51 | }
 52 | layers {
 53 |   bottom: "pool1"
 54 |   top: "conv2_1"
 55 |   name: "conv2_1"
 56 |   type: CONVOLUTION
 57 |   convolution_param {
 58 |     num_output: 128
 59 |     pad: 1
 60 |     kernel_size: 3
 61 |   }
 62 | }
 63 | layers {
 64 |   bottom: "conv2_1"
 65 |   top: "conv2_1"
 66 |   name: "relu2_1"
 67 |   type: RELU
 68 | }
 69 | layers {
 70 |   bottom: "conv2_1"
 71 |   top: "conv2_2"
 72 |   name: "conv2_2"
 73 |   type: CONVOLUTION
 74 |   convolution_param {
 75 |     num_output: 128
 76 |     pad: 1
 77 |     kernel_size: 3
 78 |   }
 79 | }
 80 | layers {
 81 |   bottom: "conv2_2"
 82 |   top: "conv2_2"
 83 |   name: "relu2_2"
 84 |   type: RELU
 85 | }
 86 | layers {
 87 |   bottom: "conv2_2"
 88 |   top: "pool2"
 89 |   name: "pool2"
 90 |   type: POOLING
 91 |   pooling_param {
 92 |     pool: MAX
 93 |     kernel_size: 2
 94 |     stride: 2
 95 |   }
 96 | }
 97 | layers {
 98 |   bottom: "pool2"
 99 |   top: "conv3_1"
100 |   name: "conv3_1"
101 |   type: CONVOLUTION
102 |   convolution_param {
103 |     num_output: 256
104 |     pad: 1
105 |     kernel_size: 3
106 |   }
107 | }
108 | layers {
109 |   bottom: "conv3_1"
110 |   top: "conv3_1"
111 |   name: "relu3_1"
112 |   type: RELU
113 | }
114 | layers {
115 |   bottom: "conv3_1"
116 |   top: "conv3_2"
117 |   name: "conv3_2"
118 |   type: CONVOLUTION
119 |   convolution_param {
120 |     num_output: 256
121 |     pad: 1
122 |     kernel_size: 3
123 |   }
124 | }
125 | layers {
126 |   bottom: "conv3_2"
127 |   top: "conv3_2"
128 |   name: "relu3_2"
129 |   type: RELU
130 | }
131 | layers {
132 |   bottom: "conv3_2"
133 |   top: "conv3_3"
134 |   name: "conv3_3"
135 |   type: CONVOLUTION
136 |   convolution_param {
137 |     num_output: 256
138 |     pad: 1
139 |     kernel_size: 3
140 |   }
141 | }
142 | layers {
143 |   bottom: "conv3_3"
144 |   top: "conv3_3"
145 |   name: "relu3_3"
146 |   type: RELU
147 | }
148 | layers {
149 |   bottom: "conv3_3"
150 |   top: "conv3_4"
151 |   name: "conv3_4"
152 |   type: CONVOLUTION
153 |   convolution_param {
154 |     num_output: 256
155 |     pad: 1
156 |     kernel_size: 3
157 |   }
158 | }
159 | layers {
160 |   bottom: "conv3_4"
161 |   top: "conv3_4"
162 |   name: "relu3_4"
163 |   type: RELU
164 | }
165 | layers {
166 |   bottom: "conv3_4"
167 |   top: "pool3"
168 |   name: "pool3"
169 |   type: POOLING
170 |   pooling_param {
171 |     pool: MAX
172 |     kernel_size: 2
173 |     stride: 2
174 |   }
175 | }
176 | layers {
177 |   bottom: "pool3"
178 |   top: "conv4_1"
179 |   name: "conv4_1"
180 |   type: CONVOLUTION
181 |   convolution_param {
182 |     num_output: 512
183 |     pad: 1
184 |     kernel_size: 3
185 |   }
186 | }
187 | layers {
188 |   bottom: "conv4_1"
189 |   top: "conv4_1"
190 |   name: "relu4_1"
191 |   type: RELU
192 | }
193 | layers {
194 |   bottom: "conv4_1"
195 |   top: "conv4_2"
196 |   name: "conv4_2"
197 |   type: CONVOLUTION
198 |   convolution_param {
199 |     num_output: 512
200 |     pad: 1
201 |     kernel_size: 3
202 |   }
203 | }
204 | layers {
205 |   bottom: "conv4_2"
206 |   top: "conv4_2"
207 |   name: "relu4_2"
208 |   type: RELU
209 | }
210 | layers {
211 |   bottom: "conv4_2"
212 |   top: "conv4_3"
213 |   name: "conv4_3"
214 |   type: CONVOLUTION
215 |   convolution_param {
216 |     num_output: 512
217 |     pad: 1
218 |     kernel_size: 3
219 |   }
220 | }
221 | layers {
222 |   bottom: "conv4_3"
223 |   top: "conv4_3"
224 |   name: "relu4_3"
225 |   type: RELU
226 | }
227 | layers {
228 |   bottom: "conv4_3"
229 |   top: "conv4_4"
230 |   name: "conv4_4"
231 |   type: CONVOLUTION
232 |   convolution_param {
233 |     num_output: 512
234 |     pad: 1
235 |     kernel_size: 3
236 |   }
237 | }
238 | layers {
239 |   bottom: "conv4_4"
240 |   top: "conv4_4"
241 |   name: "relu4_4"
242 |   type: RELU
243 | }
244 | layers {
245 |   bottom: "conv4_4"
246 |   top: "pool4"
247 |   name: "pool4"
248 |   type: POOLING
249 |   pooling_param {
250 |     pool: MAX
251 |     kernel_size: 2
252 |     stride: 2
253 |   }
254 | }
255 | layers {
256 |   bottom: "pool4"
257 |   top: "conv5_1"
258 |   name: "conv5_1"
259 |   type: CONVOLUTION
260 |   convolution_param {
261 |     num_output: 512
262 |     pad: 1
263 |     kernel_size: 3
264 |   }
265 | }
266 | layers {
267 |   bottom: "conv5_1"
268 |   top: "conv5_1"
269 |   name: "relu5_1"
270 |   type: RELU
271 | }
272 | layers {
273 |   bottom: "conv5_1"
274 |   top: "conv5_2"
275 |   name: "conv5_2"
276 |   type: CONVOLUTION
277 |   convolution_param {
278 |     num_output: 512
279 |     pad: 1
280 |     kernel_size: 3
281 |   }
282 | }
283 | layers {
284 |   bottom: "conv5_2"
285 |   top: "conv5_2"
286 |   name: "relu5_2"
287 |   type: RELU
288 | }
289 | layers {
290 |   bottom: "conv5_2"
291 |   top: "conv5_3"
292 |   name: "conv5_3"
293 |   type: CONVOLUTION
294 |   convolution_param {
295 |     num_output: 512
296 |     pad: 1
297 |     kernel_size: 3
298 |   }
299 | }
300 | layers {
301 |   bottom: "conv5_3"
302 |   top: "conv5_3"
303 |   name: "relu5_3"
304 |   type: RELU
305 | }
306 | layers {
307 |   bottom: "conv5_3"
308 |   top: "conv5_4"
309 |   name: "conv5_4"
310 |   type: CONVOLUTION
311 |   convolution_param {
312 |     num_output: 512
313 |     pad: 1
314 |     kernel_size: 3
315 |   }
316 | }
317 | layers {
318 |   bottom: "conv5_4"
319 |   top: "conv5_4"
320 |   name: "relu5_4"
321 |   type: RELU
322 | }
323 | layers {
324 |   bottom: "conv5_4"
325 |   top: "pool5"
326 |   name: "pool5"
327 |   type: POOLING
328 |   pooling_param {
329 |     pool: MAX
330 |     kernel_size: 2
331 |     stride: 2
332 |   }
333 | }
334 | layers {
335 |   bottom: "pool5"
336 |   top: "fc6"
337 |   name: "fc6"
338 |   type: INNER_PRODUCT
339 |   inner_product_param {
340 |     num_output: 4096
341 |   }
342 | }
343 | layers {
344 |   bottom: "fc6"
345 |   top: "fc6"
346 |   name: "relu6"
347 |   type: RELU
348 | }
349 | layers {
350 |   bottom: "fc6"
351 |   top: "fc6"
352 |   name: "drop6"
353 |   type: DROPOUT
354 |   dropout_param {
355 |     dropout_ratio: 0.5
356 |   }
357 | }
358 | layers {
359 |   bottom: "fc6"
360 |   top: "fc7"
361 |   name: "fc7"
362 |   type: INNER_PRODUCT
363 |   inner_product_param {
364 |     num_output: 4096
365 |   }
366 | }
367 | layers {
368 |   bottom: "fc7"
369 |   top: "fc7"
370 |   name: "relu7"
371 |   type: RELU
372 | }
373 | layers {
374 |   bottom: "fc7"
375 |   top: "fc7"
376 |   name: "drop7"
377 |   type: DROPOUT
378 |   dropout_param {
379 |     dropout_ratio: 0.5
380 |   }
381 | }
382 | layers {
383 |   bottom: "fc7"
384 |   top: "fc8"
385 |   name: "fc8"
386 |   type: INNER_PRODUCT
387 |   inner_product_param {
388 |     num_output: 1000
389 |   }
390 | }
391 | layers {
392 |   bottom: "fc8"
393 |   top: "prob"
394 |   name: "prob"
395 |   type: SOFTMAX
396 | }
397 | 


--------------------------------------------------------------------------------
/cnn_finetuning/vgg19/deploy_template.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_19_layers"
  2 | input: "data"
  3 | input_dim: 11
  4 | input_dim: 3
  5 | input_dim: 224
  6 | input_dim: 224
  7 | layers {
  8 |   bottom: "data"
  9 |   top: "conv1_1"
 10 |   name: "conv1_1"
 11 |   type: CONVOLUTION
 12 |   convolution_param {
 13 |     num_output: 64
 14 |     pad: 1
 15 |     kernel_size: 3
 16 |   }
 17 | }
 18 | layers {
 19 |   bottom: "conv1_1"
 20 |   top: "conv1_1"
 21 |   name: "relu1_1"
 22 |   type: RELU
 23 | }
 24 | layers {
 25 |   bottom: "conv1_1"
 26 |   top: "conv1_2"
 27 |   name: "conv1_2"
 28 |   type: CONVOLUTION
 29 |   convolution_param {
 30 |     num_output: 64
 31 |     pad: 1
 32 |     kernel_size: 3
 33 |   }
 34 | }
 35 | layers {
 36 |   bottom: "conv1_2"
 37 |   top: "conv1_2"
 38 |   name: "relu1_2"
 39 |   type: RELU
 40 | }
 41 | layers {
 42 |   bottom: "conv1_2"
 43 |   top: "pool1"
 44 |   name: "pool1"
 45 |   type: POOLING
 46 |   pooling_param {
 47 |     pool: MAX
 48 |     kernel_size: 2
 49 |     stride: 2
 50 |   }
 51 | }
 52 | layers {
 53 |   bottom: "pool1"
 54 |   top: "conv2_1"
 55 |   name: "conv2_1"
 56 |   type: CONVOLUTION
 57 |   convolution_param {
 58 |     num_output: 128
 59 |     pad: 1
 60 |     kernel_size: 3
 61 |   }
 62 | }
 63 | layers {
 64 |   bottom: "conv2_1"
 65 |   top: "conv2_1"
 66 |   name: "relu2_1"
 67 |   type: RELU
 68 | }
 69 | layers {
 70 |   bottom: "conv2_1"
 71 |   top: "conv2_2"
 72 |   name: "conv2_2"
 73 |   type: CONVOLUTION
 74 |   convolution_param {
 75 |     num_output: 128
 76 |     pad: 1
 77 |     kernel_size: 3
 78 |   }
 79 | }
 80 | layers {
 81 |   bottom: "conv2_2"
 82 |   top: "conv2_2"
 83 |   name: "relu2_2"
 84 |   type: RELU
 85 | }
 86 | layers {
 87 |   bottom: "conv2_2"
 88 |   top: "pool2"
 89 |   name: "pool2"
 90 |   type: POOLING
 91 |   pooling_param {
 92 |     pool: MAX
 93 |     kernel_size: 2
 94 |     stride: 2
 95 |   }
 96 | }
 97 | layers {
 98 |   bottom: "pool2"
 99 |   top: "conv3_1"
100 |   name: "conv3_1"
101 |   type: CONVOLUTION
102 |   convolution_param {
103 |     num_output: 256
104 |     pad: 1
105 |     kernel_size: 3
106 |   }
107 | }
108 | layers {
109 |   bottom: "conv3_1"
110 |   top: "conv3_1"
111 |   name: "relu3_1"
112 |   type: RELU
113 | }
114 | layers {
115 |   bottom: "conv3_1"
116 |   top: "conv3_2"
117 |   name: "conv3_2"
118 |   type: CONVOLUTION
119 |   convolution_param {
120 |     num_output: 256
121 |     pad: 1
122 |     kernel_size: 3
123 |   }
124 | }
125 | layers {
126 |   bottom: "conv3_2"
127 |   top: "conv3_2"
128 |   name: "relu3_2"
129 |   type: RELU
130 | }
131 | layers {
132 |   bottom: "conv3_2"
133 |   top: "conv3_3"
134 |   name: "conv3_3"
135 |   type: CONVOLUTION
136 |   convolution_param {
137 |     num_output: 256
138 |     pad: 1
139 |     kernel_size: 3
140 |   }
141 | }
142 | layers {
143 |   bottom: "conv3_3"
144 |   top: "conv3_3"
145 |   name: "relu3_3"
146 |   type: RELU
147 | }
148 | layers {
149 |   bottom: "conv3_3"
150 |   top: "conv3_4"
151 |   name: "conv3_4"
152 |   type: CONVOLUTION
153 |   convolution_param {
154 |     num_output: 256
155 |     pad: 1
156 |     kernel_size: 3
157 |   }
158 | }
159 | layers {
160 |   bottom: "conv3_4"
161 |   top: "conv3_4"
162 |   name: "relu3_4"
163 |   type: RELU
164 | }
165 | layers {
166 |   bottom: "conv3_4"
167 |   top: "pool3"
168 |   name: "pool3"
169 |   type: POOLING
170 |   pooling_param {
171 |     pool: MAX
172 |     kernel_size: 2
173 |     stride: 2
174 |   }
175 | }
176 | layers {
177 |   bottom: "pool3"
178 |   top: "conv4_1"
179 |   name: "conv4_1"
180 |   type: CONVOLUTION
181 |   convolution_param {
182 |     num_output: 512
183 |     pad: 1
184 |     kernel_size: 3
185 |   }
186 | }
187 | layers {
188 |   bottom: "conv4_1"
189 |   top: "conv4_1"
190 |   name: "relu4_1"
191 |   type: RELU
192 | }
193 | layers {
194 |   bottom: "conv4_1"
195 |   top: "conv4_2"
196 |   name: "conv4_2"
197 |   type: CONVOLUTION
198 |   convolution_param {
199 |     num_output: 512
200 |     pad: 1
201 |     kernel_size: 3
202 |   }
203 | }
204 | layers {
205 |   bottom: "conv4_2"
206 |   top: "conv4_2"
207 |   name: "relu4_2"
208 |   type: RELU
209 | }
210 | layers {
211 |   bottom: "conv4_2"
212 |   top: "conv4_3"
213 |   name: "conv4_3"
214 |   type: CONVOLUTION
215 |   convolution_param {
216 |     num_output: 512
217 |     pad: 1
218 |     kernel_size: 3
219 |   }
220 | }
221 | layers {
222 |   bottom: "conv4_3"
223 |   top: "conv4_3"
224 |   name: "relu4_3"
225 |   type: RELU
226 | }
227 | layers {
228 |   bottom: "conv4_3"
229 |   top: "conv4_4"
230 |   name: "conv4_4"
231 |   type: CONVOLUTION
232 |   convolution_param {
233 |     num_output: 512
234 |     pad: 1
235 |     kernel_size: 3
236 |   }
237 | }
238 | layers {
239 |   bottom: "conv4_4"
240 |   top: "conv4_4"
241 |   name: "relu4_4"
242 |   type: RELU
243 | }
244 | layers {
245 |   bottom: "conv4_4"
246 |   top: "pool4"
247 |   name: "pool4"
248 |   type: POOLING
249 |   pooling_param {
250 |     pool: MAX
251 |     kernel_size: 2
252 |     stride: 2
253 |   }
254 | }
255 | layers {
256 |   bottom: "pool4"
257 |   top: "conv5_1"
258 |   name: "conv5_1"
259 |   type: CONVOLUTION
260 |   convolution_param {
261 |     num_output: 512
262 |     pad: 1
263 |     kernel_size: 3
264 |   }
265 | }
266 | layers {
267 |   bottom: "conv5_1"
268 |   top: "conv5_1"
269 |   name: "relu5_1"
270 |   type: RELU
271 | }
272 | layers {
273 |   bottom: "conv5_1"
274 |   top: "conv5_2"
275 |   name: "conv5_2"
276 |   type: CONVOLUTION
277 |   convolution_param {
278 |     num_output: 512
279 |     pad: 1
280 |     kernel_size: 3
281 |   }
282 | }
283 | layers {
284 |   bottom: "conv5_2"
285 |   top: "conv5_2"
286 |   name: "relu5_2"
287 |   type: RELU
288 | }
289 | layers {
290 |   bottom: "conv5_2"
291 |   top: "conv5_3"
292 |   name: "conv5_3"
293 |   type: CONVOLUTION
294 |   convolution_param {
295 |     num_output: 512
296 |     pad: 1
297 |     kernel_size: 3
298 |   }
299 | }
300 | layers {
301 |   bottom: "conv5_3"
302 |   top: "conv5_3"
303 |   name: "relu5_3"
304 |   type: RELU
305 | }
306 | layers {
307 |   bottom: "conv5_3"
308 |   top: "conv5_4"
309 |   name: "conv5_4"
310 |   type: CONVOLUTION
311 |   convolution_param {
312 |     num_output: 512
313 |     pad: 1
314 |     kernel_size: 3
315 |   }
316 | }
317 | layers {
318 |   bottom: "conv5_4"
319 |   top: "conv5_4"
320 |   name: "relu5_4"
321 |   type: RELU
322 | }
323 | layers {
324 |   bottom: "conv5_4"
325 |   top: "pool5"
326 |   name: "pool5"
327 |   type: POOLING
328 |   pooling_param {
329 |     pool: MAX
330 |     kernel_size: 2
331 |     stride: 2
332 |   }
333 | }
334 | layers {
335 |   bottom: "pool5"
336 |   top: "fc6"
337 |   name: "fc6"
338 |   type: INNER_PRODUCT
339 |   inner_product_param {
340 |     num_output: 4096
341 |   }
342 | }
343 | layers {
344 |   bottom: "fc6"
345 |   top: "fc6"
346 |   name: "relu6"
347 |   type: RELU
348 | }
349 | layers {
350 |   bottom: "fc6"
351 |   top: "fc6"
352 |   name: "drop6"
353 |   type: DROPOUT
354 |   dropout_param {
355 |     dropout_ratio: 0.5
356 |   }
357 | }
358 | layers {
359 |   bottom: "fc6"
360 |   top: "fc7"
361 |   name: "fc7"
362 |   type: INNER_PRODUCT
363 |   inner_product_param {
364 |     num_output: 4096
365 |   }
366 | }
367 | layers {
368 |   bottom: "fc7"
369 |   top: "fc7"
370 |   name: "relu7"
371 |   type: RELU
372 | }
373 | layers {
374 |   bottom: "fc7"
375 |   top: "fc7"
376 |   name: "drop7"
377 |   type: DROPOUT
378 |   dropout_param {
379 |     dropout_ratio: 0.5
380 |   }
381 | }
382 | layers {
383 |   bottom: "fc7"
384 |   top: "fc8"
385 |   name: "fc8"
386 |   type: INNER_PRODUCT
387 |   inner_product_param {
388 |     num_output: ##NUM_CLASSES##
389 |   }
390 | }
391 | layers {
392 |   bottom: "fc8"
393 |   top: "prob"
394 |   name: "prob"
395 |   type: SOFTMAX
396 | }
397 | 


--------------------------------------------------------------------------------
/start.m:
--------------------------------------------------------------------------------
  1 | function start(varargin)
  2 | %%% Configuration
  3 | %% Dependencies
  4 | opts.caffe_path='./lib/caffe_pp';
  5 | opts.liblinear_path='./lib/liblinear-2.1/matlab/';
  6 | opts.selsearch_path='./lib/SelectiveSearchCodeIJCV/';
  7 | 
  8 | %% Datasets
  9 | % If the imagepaths are relative to some imagedir, specify it here, otherwise leave empty
 10 | opts.imagedir='/';
 11 | % Path to your dataset, where the imagelist, the labels and the train test split is
 12 | opts.basedir = '/path/to/your/dataset/';
 13 | 
 14 | % Cache files
 15 | opts.cache_dir = [pwd '/cache/'];
 16 | opts.store_features = false;
 17 | 
 18 | %% CNN model
 19 | % Here you can choose your CNN model, have a look in the folder cnn_finetuning for possible models
 20 | opts.cnn_dir = [fileparts(mfilename('fullpath')) '/cnn_finetuning/caffe_reference/'];
 21 | % The batch size when calculating features, should match the value in the deploy.prototxt file
 22 | opts.batch_size = 11;
 23 | % The net image input size as specified in the deploy.prototxt file
 24 | opts.crop_size = 227;
 25 | 
 26 | %% Fine-tuning of CNN
 27 | opts.finetuning = true;
 28 | opts.finetuning_iters = 10000;
 29 | opts.finetuning_gpu = 0;
 30 | % Proposal generation 
 31 | % We generate object proposals and filter those for fine-tuning
 32 | % You can specify, if the bounding box should be estimated as well
 33 | % This might take additional time when generating the proposals
 34 | opts.estimate_bbox = true;
 35 | % Calculate the image mean of all proposals
 36 | opts.calculate_mean = true;
 37 | 
 38 | %% Part model hyperparameters
 39 | opts.part_layer = 'pool5';
 40 | opts.part_layer_channel_count = 256;
 41 | % The number of parts per view
 42 | opts.no_selected_parts = 10;
 43 | % Number of visible parts
 44 | opts.no_visible_parts = 5;
 45 | % Number of views per class
 46 | opts.view_count = 10;
 47 | % We initialize the part model randomly and hence we repeat optimization several
 48 | % times and take the best optimum
 49 | opts.iterations = 5;
 50 | % Select whether to compute a part model for each class separately
 51 | opts.class_wise = true;
 52 | 
 53 | %% Classification parameters
 54 | % The CNN layer to take the features from
 55 | opts.feature_global = 'relu7';
 56 | % Use flipped images in training as well
 57 | opts.use_flipped = true;
 58 | % Add features from the estimated bounding box
 59 | opts.use_bounding_box = true;
 60 | % Calculate features on a spatial pyramid of the input image, define the depth here
 61 | opts.pyramid_levels = 0;
 62 | % If you want to use random splits into training and test, specify this here
 63 | % Absolute number of training images per class
 64 | opts.rand_tr_images = -1;
 65 | % Relative part of each class to be used for training
 66 | opts.rand_tr_part = 0;
 67 | % How many splits should be evaluated
 68 | opts.repetitions = 1;
 69 | 
 70 | % Set this if you want to use parts in training
 71 | opts.use_parts = true;
 72 | % At each estimated part location, we extract a patch of size sqrt(part_scale*width*height) to calculate featureres
 73 | % Multiple scales can be defined like [0.44 0.24]
 74 | opts.part_scales = [0.44 0.24];
 75 | % The CNN layer to take the features for the parts from
 76 | opts.feature_part = 'relu7';
 77 | % Parameters for SVM training, you should use estimate these parameters using cross-validation on training data
 78 | opts.svm_params='-q';% VGG19: -s 2 -c 0.0000432', GoogLeNet: -c 0.0009766  -s 2 -q'
 79 | 
 80 | %% Parallelism
 81 | % Number of parallel workers
 82 | opts.parfor_workers = 2;
 83 | % Specify if you want to use parallelism at all
 84 | opts.use_parfor = true;
 85 | 
 86 | % Set to false if you want to hide almost all unnecessary output
 87 | opts.verbose_output = true;
 88 | 
 89 | %% Depended paths
 90 | function opts = setDependentPaths(opts)
 91 |     % List of images 
 92 |     opts.imagelist_file = [opts.basedir '/imagelist_absolute.txt'];
 93 |     % The assignment to train and test, mandatory if finetuning is used! 
 94 |     % 1 - train, 0 - test
 95 |     opts.tr_ID_file = [opts.basedir '/tr_ID.txt'];
 96 |     % List of class labels starting from 1
 97 |     opts.labels_file = [opts.basedir '/labels.txt'];
 98 | 
 99 |     % CNN stuff
100 |     opts.deploy = [opts.cnn_dir '/deploy.prototxt'];
101 |     opts.model = [opts.cnn_dir '/model'];
102 |     opts.mean_mat_file = [opts.cnn_dir '/mean.mat'];
103 |     opts.mean_proto_file = [opts.cnn_dir '/mean.binaryproto'];
104 | 
105 |     opts.caffe_executable = [opts.caffe_path '/build/tools/caffe'];
106 | 
107 |     % Finetuning output dir: Derive from last two folders of opts.cnn_dir
108 |     dd = strsplit(opts.cnn_dir,'/');
109 |     dd = dd(~cellfun(@isempty,dd));
110 |     opts.finetuning_dir = [opts.cache_dir '/' strjoin(dd(end-1:end),'/')];
111 | end
112 | 
113 | %% Parse arguments
114 | addpath('lib');
115 | opts = setDependentPaths(opts);
116 | opts = vl_argparse(opts,varargin);
117 | % Again: Parse arguments to allow setting of the dependent paths
118 | opts = setDependentPaths(opts);
119 | opts = vl_argparse(opts,varargin);
120 | 
121 | 
122 | %% Make all paths for finetuning absolute
123 | opts.model = GetFullPath(opts.model);
124 | opts.caffe_executable = GetFullPath(opts.caffe_executable);
125 | opts.cnn_dir = GetFullPath(opts.cnn_dir);
126 | 
127 | %% Post processing of the params
128 | % Preparation
129 | addpath(genpath('./'));
130 | addpath([opts.caffe_path '/matlab/caffe']);
131 | addpath(opts.liblinear_path);
132 | addpath(opts.selsearch_path);
133 | % Cache files
134 | mkdir(opts.cache_dir);
135 | work_dir=pwd;cd(opts.cache_dir);opts.cache_dir=pwd;cd(work_dir);
136 | opts.part_loc_file = [opts.cache_dir '/part_locs.mat'];
137 | opts.est_bbox_file = [opts.cache_dir '/est_bbox.txt'];
138 | opts.caffe_window_file_train = [opts.cache_dir '/windows_train.txt'];
139 | opts.caffe_window_file_val = [opts.cache_dir '/windows_val.txt'];
140 | opts.caffe_part_model = [opts.cache_dir '/part_model.mat'];
141 | 
142 | opts
143 | 
144 | %% The algorithm
145 | if opts.use_parfor
146 |   opts.parfor_arg = opts.parfor_workers;
147 | else
148 |   opts.parfor_arg = 0;
149 | end
150 | opts.gpu_count = gpuDeviceCount;
151 | 
152 | % Extract parts
153 | % Result will be stored in cache file
154 | % IMPORTANT: part locs are always relative to the normalized image of width
155 | % and height 227
156 | if exist(opts.part_loc_file)
157 |     fprintf('Loading part locs from %s\n',opts.part_loc_file);
158 |     load(opts.part_loc_file,'part_locs');
159 | else
160 |     fprintf('Calculating part locations for all channels and all images...\n');
161 |     fprintf('IMPORTANT: This will take quite some time, especially with large nets like VGG19\n' );
162 |     part_locs = parts_locs_from_grads(opts);
163 | end
164 | 
165 | % Learn part model
166 | % This function outputs the whole part model, but we only use the first
167 | % output which are the ids of the opts.no_selected_parts most often used parts
168 | if exist(opts.caffe_part_model)
169 |     fprintf('Loading part model from cache...\n');
170 |     load(opts.caffe_part_model,'channel_ids','part_visibility');
171 | else
172 |     fprintf('Calculating part model...\n');
173 |     [ channel_ids, part_visibility] = evaluate_part_locs_anchor_multiview(part_locs, load(opts.tr_ID_file), ...
174 |             load(opts.labels_file), opts.no_selected_parts, opts.no_visible_parts, opts.view_count, opts.iterations);
175 |     save(opts.caffe_part_model,'channel_ids','part_visibility');
176 | end
177 |     
178 | % Part based localization to generate region proposals for fine-tuning as
179 | % well as bounding boxes for each image
180 | if (~opts.estimate_bbox || exist(opts.est_bbox_file)) && exist(opts.caffe_window_file_train) && ...
181 |         exist(opts.caffe_window_file_val) 
182 |     fprintf('Loading estimated bounding boxes and region proposals from disk...\n');
183 | elseif ~opts.use_bounding_box && ~opts.finetuning
184 |     fprintf('Region proposals for fine-tuning or estimated bboxes are not needed, skipping...\n');
185 | else
186 |     fprintf('Generating region proposals and estimated bounding boxes for CNN finetuning...\n');
187 |     selsearch_object_detector( channel_ids(1:opts.no_selected_parts), part_locs, part_visibility, opts );
188 | end
189 | 
190 | % Fine-tuning
191 | if opts.finetuning
192 |     opts.model = [opts.finetuning_dir '/model_ft_iter_' int2str(opts.finetuning_iters) '.caffemodel'];
193 |     opts.deploy = [opts.finetuning_dir '/deploy_ft.prototxt'];
194 |     if exist(opts.model) && exist(opts.deploy)
195 |         fprintf('Using pretrained model...\n');
196 |     else
197 |         fprintf('\nTime for fine-tuning of the CNN! This might take some time...\n');
198 | %         fprintf('\nHit enter to continue\n');
199 | %         pause
200 |         % Make sure labels start at 1 
201 |         labels=load(opts.labels_file);
202 |         labels=labels-min(labels(:))+1;
203 |         finetuning(max(labels(:)),opts);
204 |     end
205 | end
206 | 
207 | % Classification
208 | fprintf('Starting classification...\n');
209 | part_box_classification_multiscale( channel_ids(1:opts.no_selected_parts), part_locs, opts );
210 | end


--------------------------------------------------------------------------------
/part_based_classification/part_box_classification_multiscale.m:
--------------------------------------------------------------------------------
  1 | function [ output_args ] = part_box_classification_multiscale( channel_ids, part_locs, opts )
  2 |    %% Datasets
  3 |    imagedir=opts.imagedir;
  4 |    imagelist_file = opts.imagelist_file;
  5 |    tr_ID_file = opts.tr_ID_file;
  6 |    labels_file = opts.labels_file;
  7 |    bbox_file = opts.est_bbox_file;
  8 | 
  9 |     %% Params
 10 |     layer_parts = opts.feature_part;
 11 |     layer_image = opts.feature_global;
 12 |     use_flipped = opts.use_flipped;
 13 |     use_bounding_box = opts.use_bounding_box;
 14 |     pyramid_levels = opts.pyramid_levels;
 15 |     use_parts = opts.use_parts;
 16 |     rand_tr_images = opts.rand_tr_images;
 17 |     rand_tr_part = opts.rand_tr_part;
 18 |     part_scales = opts.part_scales;%0.31;%
 19 |     params=opts.svm_params;
 20 |     
 21 |     scale_relative_to_bbox = false;
 22 | 
 23 |     parfor_workers = opts.parfor_workers;
 24 |     use_parfor = opts.use_parfor;
 25 |     
 26 |     mean_file = opts.mean_mat_file;
 27 |     batch_size = opts.batch_size;
 28 |     crop_size = opts.crop_size;
 29 |     deploy = opts.deploy;
 30 |     model = opts.model;
 31 | 
 32 |     rng('shuffle');
 33 |     %read image list
 34 |     fid=fopen(imagelist_file,'r');
 35 |     imagelist=textscan(fid,'%s');
 36 |     imagelist=imagelist{1};
 37 |     fclose(fid);
 38 |     % Labels
 39 |     labels=load(labels_file);
 40 |     % load train test split
 41 |     if rand_tr_images>0
 42 |         [ tr_ID ] = createTrainTest( labels, rand_tr_images, opts.rand_tr_part );
 43 |     else
 44 |         train_test=logical(load(tr_ID_file));
 45 |         tr_ID = train_test;%(:,2);
 46 |     end
 47 |     % Bounding boxes 
 48 |     bboxes = [];
 49 |     if use_bounding_box
 50 |     %     bboxes = load([basedir '/bounding_boxes.txt'])+1;
 51 |         bboxes = load(bbox_file);
 52 |     end
 53 |     
 54 |     if use_parts
 55 |         parts = array2table(part_locs,'VariableNames',{'Var1','Var2','Var3','Var4','Var5'});
 56 |     %     parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/part_locs.txt','Delimiter',' ','ReadVariableNames',false);
 57 |     %     parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/est_part_locs.txt','Delimiter',' ','ReadVariableNames',false);
 58 |         parts.Properties.VariableNames{'Var1'} = 'image';
 59 |         parts.Properties.VariableNames{'Var2'} = 'part';
 60 |         parts.Properties.VariableNames{'Var3'} = 'x';
 61 |         parts.Properties.VariableNames{'Var4'} = 'y';
 62 |         parts.Properties.VariableNames{'Var5'} = 'visible';
 63 | 
 64 |         part_ids = unique(parts.part);
 65 |         image_ids = unique(parts.image);
 66 |         part_count = numel(part_ids);
 67 |         image_count = numel(image_ids);
 68 | 
 69 |         parts_x = reshape(parts.x,part_count,image_count);
 70 |         parts_y = reshape(parts.y,part_count,image_count);
 71 |     else
 72 |         image_count = size(imagelist,1);
 73 |         channel_ids = [];
 74 |         parts = [];
 75 |         part_count = [];
 76 |         parts_x = [];
 77 |         parts_y = [];
 78 |     end
 79 |     
 80 |     if use_flipped
 81 |         flipped_image_count = image_count + sum(tr_ID);
 82 |         labels = [labels;labels(tr_ID)];
 83 |         image_idx = [(1:image_count)';find(tr_ID)];
 84 |         tr_ID = [tr_ID;true(sum(tr_ID),1)];
 85 |     else
 86 |         flipped_image_count = image_count;
 87 |         image_idx = (1:image_count)';
 88 |     end
 89 |     
 90 |     matcaffe_init(1,deploy,model,1,0);
 91 |     if opts.use_parts
 92 |         f = caffe_features({[0]},layer_parts,mean_file,batch_size,crop_size);
 93 |     else
 94 |         f = [];
 95 |     end
 96 |     f2 = caffe_features({[0]},layer_image,mean_file,batch_size,crop_size);
 97 |     caffe('reset');
 98 |     
 99 |     if use_parfor
100 |         if ~isempty(gcp('nocreate'))%matlabpool('size')
101 |             pctRunOnAll caffe('reset')
102 |         else
103 |             caffe('reset');
104 |             parpool(parfor_workers);
105 |         end
106 |     end
107 |     parfor (i=1:parfor_workers, opts.parfor_arg)
108 |         matcaffe_init(1,deploy,model,1,mod(i,opts.gpu_count));
109 |     end
110 |     
111 |     num_patches_per_image = 0;
112 |     if use_parts
113 |         num_patches_per_image = num_patches_per_image + numel(part_scales)*numel(channel_ids);
114 |     end
115 |     if use_bounding_box
116 |         num_patches_per_image = num_patches_per_image+1;
117 |     end
118 |     if pyramid_levels>0
119 |         num_patches_per_image = num_patches_per_image+(1-4^(pyramid_levels+1))/-3 - 1;
120 |     end
121 |     
122 |     features = sparse(flipped_image_count, num_patches_per_image*size(f,2)+size(f2,2));
123 | %     visible = true(flipped_image_count, numel(channel_ids));
124 |     feature_count = size(features,2);
125 |     parfor (i=1:flipped_image_count, opts.parfor_arg) % randperm(image_count)%[1:10 11788+(1:10)]%
126 |         cur_image_idx = image_idx(i);
127 |         if opts.verbose_output
128 |             fprintf('Working on %i: %s\n',i,imagelist{cur_image_idx});
129 |         end
130 |         im = imread([imagedir '/' imagelist{cur_image_idx}]);
131 |         if i>image_count
132 |             im = flip(im,2);
133 |         end
134 |         batch_data = {};
135 |         missing_data = false(0,0);
136 |         
137 |         if use_bounding_box
138 |             cur_box = bboxes(cur_image_idx,:);
139 |             cur_box(4) = min(cur_box(4),size(im,2)-cur_box(2)+1);
140 |             cur_box(5) = min(cur_box(5),size(im,1)-cur_box(3)+1);
141 |             batch_data = [batch_data;im(cur_box(3):(cur_box(3)+cur_box(5)-1),cur_box(2):(cur_box(2)+cur_box(4)-1),:)];
142 |             missing_data = [missing_data;false];
143 | %              box_size = 0.5*sqrt(cur_box(4)*cur_box(5));
144 |         end 
145 |         
146 |         if use_parts
147 |             for part_scale = part_scales
148 |                 % Get all relevant and visible part positions
149 |                 selection = parts.visible((cur_image_idx-1)*part_count + channel_ids);
150 |                 visible_channels = channel_ids(logical(selection));
151 |                 cur_locs = [parts_x(channel_ids,cur_image_idx) parts_y(channel_ids,cur_image_idx)];
152 |                 if scale_relative_to_bbox
153 |                     box_size = 0.5*sqrt(cur_box(4)*cur_box(5));
154 |                 else
155 |                     box_size = part_scale*sqrt(size(im,1)*size(im,2));
156 |                 end
157 |                 for c=1:size(cur_locs,1)
158 |                     if parts.visible((cur_image_idx-1)*part_count + channel_ids(c))
159 |                         x=cur_locs(c,1);
160 |                         y=cur_locs(c,2);
161 |                         if i>image_count
162 |                             x=227-x;
163 |                         end
164 |                         ratio_x = 227.0 / size(im,2);
165 |                         ratio_y = 227.0 / size(im,1);
166 |                         x=int32(x/ratio_x);
167 |                         y=int32(y/ratio_y);
168 |                         x_min = max(x-box_size/2, 1);
169 |                         x_max = min(x+box_size/2, size(im,2));
170 |                         y_min = max(y-box_size/2, 1);
171 |                         y_max = min(y+box_size/2, size(im,1));
172 |                         batch_data = [batch_data; im(int32(y_min:y_max),int32(x_min:x_max),:)];
173 |                         missing_data = [missing_data;false];
174 |                     else
175 |                         batch_data = [batch_data; [125]];
176 |                         missing_data = [missing_data;true];
177 |                     end
178 |                 end
179 |             end
180 |         end
181 |         % Add spatial pyramid levels of image
182 |         for l=pyramid_levels:-1:1
183 |             x = fix(size(im,2)/(2^l));
184 |             y = fix(size(im,1)/(2^l));
185 |             if (x==0 || y==0)
186 |                 error('Image too small for spm');
187 |             end
188 |             xx=0;
189 |             yy=0;
190 |             while xx+x<=size(im,2)
191 |                 while yy +y <=size(im,1) 
192 |                     batch_data = [batch_data;im(yy+1:yy+y,xx+1:xx+x,:)];
193 |                     missing_data = [missing_data;false];
194 |                     yy = yy+y;
195 |                 end        
196 |                 yy = 0;
197 |                 xx = xx+x;
198 |             end
199 |         end
200 |         % Add the image
201 |         batch_data = [batch_data; im];
202 |         missing_data = [missing_data;false];
203 |         tmp = caffe_features(batch_data,layer_image,mean_file,batch_size,crop_size)'; 
204 |         features(i,:) = tmp(:);
205 |     end
206 |     
207 |     if opts.store_features
208 |         save([opts.cache_dir '/feats.mat'],'features','labels','tr_ID','-v7.3');    
209 |     end
210 |     
211 |     ORR_total = ones(opts.repetitions,1);
212 |     ARR_total = ones(opts.repetitions,1);
213 |     for i=1:opts.repetitions
214 |         if rand_tr_images>0
215 |             [ tr_ID ] = createTrainTest( labels, rand_tr_images, rand_tr_part );
216 |         end
217 |         % Train and test
218 |         model = train(labels(tr_ID,:),(features(tr_ID,:)),params);
219 |         [pred,acc_cur,~] = predict(labels(~tr_ID,:),(features(~tr_ID,:)),model);
220 | 
221 |         % evaluate
222 |         cm = confusionmat(labels(~tr_ID),pred);
223 |         acc=sum(diag(cm))/sum(cm(:))*100;
224 |         cm = cm./repmat(sum(cm,2),1,size(cm,2));
225 |         map=nanmean(diag(cm)./sum(cm,2))*100;    
226 |         ORR_total(i,1)=acc;
227 |         ARR_total(i,1)=map;
228 |         fprintf('Run %i ORR=%5.2f ARR=%5.2f\n',i, ORR_total(i,1),ARR_total(i,1));
229 |     end
230 |     fprintf('Mean over %i runs:\n',opts.repetitions);
231 |     fprintf('ORR=%f +- %f\n',nanmean(ORR_total), nanstd(ORR_total));
232 |     fprintf('ARR=%f +- %f\n',nanmean(ARR_total), nanstd(ARR_total));
233 | end
234 | 


--------------------------------------------------------------------------------
/patch_filtering/selsearch_object_detector.m:
--------------------------------------------------------------------------------
  1 | function [ output_args ] = selsearch_object_detector( channel_ids, part_locs, part_visibility, opts )
  2 |     output_file_train = opts.caffe_window_file_train;
  3 |     output_file_val = opts.caffe_window_file_val;
  4 |     output_file_bbox = opts.est_bbox_file;
  5 |       
  6 |     imagedir=opts.imagedir;
  7 |     imagelist_file = opts.imagelist_file;
  8 |     tr_ID_file = opts.tr_ID_file;
  9 |     labels_file = opts.labels_file;
 10 |     channels_for_boxes = channel_ids;
 11 |     
 12 |     add_est_bbox = opts.estimate_bbox;
 13 |     add_part_patches = true;
 14 |     add_proposals = true;
 15 |     write_bbox = opts.estimate_bbox;
 16 |     write_proposals = true;
 17 |     
 18 |     part_scales = opts.part_scales;
 19 |     scale_relative_to_bbox = false;    
 20 |     
 21 |     % Init caffe
 22 |     mean_file = opts.mean_mat_file;
 23 |     batch_size = opts.batch_size;
 24 |     crop_size = opts.crop_size;
 25 |     deploy = opts.deploy;
 26 |     model = opts.model;
 27 |     if write_bbox
 28 |     	matcaffe_init(1,deploy,model,1,0);
 29 |     end
 30 |     %read image list
 31 |     fid=fopen(imagelist_file,'r');
 32 |     imagelist=textscan(fid,'%s');
 33 |     imagelist=imagelist{1};
 34 |     fclose(fid);
 35 |     % load train test split
 36 |     tr_ID=logical(load(tr_ID_file));
 37 | %     tr_ID=true(size(imagelist,1),1);
 38 |     % Labels
 39 |     % Make sure labels start at 1
 40 |     labels=load(labels_file);
 41 |     labels=labels-min(labels(:))+1;
 42 |     
 43 |     
 44 |     %% Preparation starts
 45 |     parts = array2table(part_locs,'VariableNames',{'Var1','Var2','Var3','Var4','Var5'});
 46 | %     parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/part_locs.txt','Delimiter',' ','ReadVariableNames',false);
 47 | %     parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/est_part_locs.txt','Delimiter',' ','ReadVariableNames',false);
 48 |     parts.Properties.VariableNames{'Var1'} = 'image';
 49 |     parts.Properties.VariableNames{'Var2'} = 'part';
 50 |     parts.Properties.VariableNames{'Var3'} = 'x';
 51 |     parts.Properties.VariableNames{'Var4'} = 'y';
 52 |     parts.Properties.VariableNames{'Var5'} = 'visible';
 53 |     part_ids = unique(parts.part);
 54 |     image_ids = unique(parts.image);
 55 |     part_count = numel(part_ids);
 56 |     image_count = numel(image_ids);
 57 |     parts_x = reshape(parts.x,part_count,image_count);
 58 |     parts_y = reshape(parts.y,part_count,image_count);
 59 |     
 60 |     
 61 |     %% Calculation starts
 62 |     all_boxes = cell(image_count,1);
 63 |     all_images = cell(image_count,1);
 64 |     
 65 |     if write_proposals
 66 |         fid_train = fopen(output_file_train,'w');
 67 |         fid_val = fopen(output_file_val,'w');
 68 |         i_train = 0;
 69 |         i_test = 0;
 70 |         % Mean image
 71 |         mean_image = uint64(zeros(224,224,3));
 72 |         total_image_count = 0;
 73 |     end
 74 |     if write_bbox
 75 |         bbox_locs = nan(image_count,5);
 76 |     end
 77 |     
 78 |     fprintf('%s\n',datestr(now));
 79 |     for i=1:image_count%randperm(image_count)%
 80 |         if opts.verbose_output
 81 |             fprintf('Working on %i: %s\n',i,imagelist{i});
 82 |         end
 83 | %         if tr_ID(i)
 84 | %             continue
 85 | %         end
 86 | 
 87 |         im = imread([imagedir '/' imagelist{i}]);
 88 |         if size(im,3)==1
 89 |             im=repmat(im,1,1,3);
 90 |         end
 91 |         %% Get the propsals for the image 
 92 |         if add_proposals
 93 |     %         all_images{i} = im;
 94 |             colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'};
 95 |             colorType = colorTypes{1:5}; 
 96 |             % Here you specify which similarity functions to use in merging
 97 |             simFunctionHandles = {@SSSimColourTextureSizeFillOrig, @SSSimTextureSizeFill, @SSSimBoxFillOrig, @SSSimSize};
 98 |             simFunctionHandles = simFunctionHandles(1:4); % Two different merging strategies
 99 |             % Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm.
100 |             % Note that by default, we set minSize = k, and sigma = 0.8.
101 |             k = 200; % controls size of segments of initial segmentation. 
102 |             minSize = k;
103 |             sigma = 0.8;
104 |             % Selective search start
105 |             [all_boxes{i}] = Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles);
106 |             all_boxes{i} = BoxRemoveDuplicates(all_boxes{i});
107 |         else
108 |             all_boxes{i} = [];
109 |         end
110 |         boxes = all_boxes{i};
111 | 
112 |         %% Filter out boxes with zero size or which are too narrow
113 |         if size(boxes,1)<1
114 |             % Always keep the whole image (in case sel search fails)
115 |             boxes = [1 1 size(im,1) size(im,2)];
116 |         else
117 |             box_size_selection = (boxes(:,3)-boxes(:,1)).*(boxes(:,4)-boxes(:,2))>0 & ...
118 |                 (boxes(:,3)-boxes(:,1))>30 & (boxes(:,4)-boxes(:,2))>30;
119 |             boxes = boxes(box_size_selection,:);
120 |         end
121 |         
122 |         %% Transform part locs to actual part locations in the image
123 |         if add_proposals
124 |             % Get all relevant and visible part positions
125 |             channel_ids = find(part_visibility(i,:));
126 |             selection = parts.visible((i-1)*part_count + channel_ids);
127 |             cur_channels = channel_ids(logical(selection));
128 |             cur_locs = [parts_x(cur_channels,i) parts_y(cur_channels,i)];
129 | 
130 |             for k=1:size(cur_locs,1)
131 |                 x=cur_locs(k,1);
132 |                 y=cur_locs(k,2);
133 |                 % calc ratio 
134 |                 ratio_x = opts.crop_size / size(im,2);
135 |                 ratio_y = opts.crop_size / size(im,1);
136 |                 cur_locs(k,1)=int32(x/ratio_x);
137 |                 cur_locs(k,2)=int32(y/ratio_y);
138 |             end
139 |     %             hold off
140 |         else
141 |             cur_locs = [];
142 |         end
143 | 
144 |         %% Add part based boxes
145 |         if add_part_patches
146 |             box_part_selection = false(size(boxes,1),1);
147 |             for part_scale = part_scales
148 |                 % Get visibile parts for the custom channel selection
149 |                 part_based_locs = [parts_x(channels_for_boxes,i) parts_y(channels_for_boxes,i)];
150 |                 if scale_relative_to_bbox
151 |                     box_size = 0.5*sqrt(cur_box(4)*cur_box(5));
152 |                 else
153 |                     box_size = part_scale*sqrt(size(im,1)*size(im,2));
154 |                 end
155 |                 for c=1:size(part_based_locs,1)
156 |                     if parts.visible((i-1)*part_count + channels_for_boxes(c))
157 |                         x=part_based_locs(c,1);
158 |                         y=part_based_locs(c,2);
159 |                         if i>image_count
160 |                             x=opts.crop_size-x;
161 |                         end
162 |                         ratio_x = opts.crop_size / size(im,2);
163 |                         ratio_y = opts.crop_size / size(im,1);
164 |                         x=int32(x/ratio_x);
165 |                         y=int32(y/ratio_y);
166 |                         x_min = max(x-box_size/2, 1);
167 |                         x_max = min(x+box_size/2, size(im,2));
168 |                         y_min = max(y-box_size/2, 1);
169 |                         y_max = min(y+box_size/2, size(im,1));
170 |                         boxes = [boxes;y_min x_min y_max x_max];
171 |                         box_part_selection = [box_part_selection;true];
172 |                     end
173 |                 end
174 |             end
175 |         end
176 | 
177 |         %% Bounding box estimation
178 |         if write_bbox
179 |             % Classify all boxes        
180 |             batch_data = {};
181 |             for b=1:size(boxes,1)
182 |                 batch_data = [batch_data; im(boxes(b,1):boxes(b,3),boxes(b,2):boxes(b,4),:)];
183 |             end
184 |             probs = caffe_features(batch_data,'prob',mean_file,batch_size,crop_size);
185 |             if tr_ID(i)
186 |                 pred_class = labels(i,:);
187 |             else
188 |                 % Predict the class 
189 |                 % Use the most confident classification result as class
190 |                 % pred
191 |                 [val,pred_class]=max(max(probs(:,2:end),[],1),[],2);
192 |             end
193 |             % Take the bbox with the most sure classification
194 |             [~,idx ] = sort(-probs(:,pred_class+1));
195 |             bbox_locs(i,:) = [i, ...
196 |                 boxes(idx(1),2)                ,boxes(idx(1),1),...
197 |                 boxes(idx(1),4)-boxes(idx(1),2),boxes(idx(1),3)-boxes(idx(1),1)];
198 |         end
199 | 
200 |         %% Proposals
201 |         if write_proposals 
202 |             if add_proposals
203 |                 %% Decide foreground and background boxes according to part location
204 |                 % Count how many parts are inside the proposed box
205 |                 fg_bg_selection = zeros(size(boxes,1),1);
206 |                 cur_boxes = [];
207 |                 for loc = cur_locs'
208 |                     % loc has shape [x=col y=row]
209 |                     % Check which boxes contain this part and count 
210 |                     fg_bg_selection = fg_bg_selection + ...
211 |                         (boxes(:,1)<=loc(2) & boxes(:,3)>=loc(2)& ...
212 |                         boxes(:,2)<=loc(1) & boxes(:,4)>=loc(1));
213 |                 end
214 |                 % Take only boxes with three or more part detections
215 |                 fg_bg_selection=fg_bg_selection>numel(cur_channels)-3;
216 | 
217 |                 box_selection = fg_bg_selection | box_part_selection;
218 |                 % Always take the full image
219 |                 box_selection(1) = true;
220 |             elseif add_part_patches
221 |                 box_selection = box_part_selection;
222 |             else
223 |                 box_selection = [];
224 |             end
225 |             % Add the estimated bounding box
226 |             if add_est_bbox
227 |                 boxes = [boxes;[bbox_locs(i,3),bbox_locs(i,2),...
228 |                     bbox_locs(i,5)+bbox_locs(i,3),bbox_locs(i,4)+bbox_locs(i,2)]];
229 |                 box_selection = [box_selection;true];
230 |             end
231 |             
232 |             if opts.verbose_output
233 |                 fprintf('Found %i boxes\n',sum(box_selection));
234 |             end
235 | 
236 |             %% Now store these bboxes in text file
237 |             if tr_ID(i,:)
238 |                 fid = fid_train;
239 |                 fprintf(fid,'# %i\n',i_train); % Image id
240 |                 i_train = i_train + 1;
241 |             else
242 |                 fid = fid_val;
243 |                 fprintf(fid,'# %i\n',i_test); % Image id
244 |                 i_test = i_test + 1;
245 |             end
246 |             fprintf(fid,'%s\n',[imagedir imagelist{i}]); % absolute image path
247 |             fprintf(fid,'%i\n',3); % num channels
248 |             fprintf(fid,'%i\n',size(im,1)); % height
249 |             fprintf(fid,'%i\n',size(im,2)); % width 
250 |             fprintf(fid,'%i\n',size(boxes,1)); % num_windows
251 |             for b = 1:size(boxes,1)
252 |                 fprintf(fid,'%i %i %.0f %.0f %.0f %.0f\n',labels(i,:),box_selection(b,:),...
253 |                     boxes(b,2),boxes(b,1),boxes(b,4),boxes(b,3));
254 |                 if opts.calculate_mean
255 |                     mean_image = mean_image + uint64(imresize(im(boxes(b,1):boxes(b,3),boxes(b,2):boxes(b,4),:),[224 224]));
256 |                     total_image_count = total_image_count + 1;
257 |                 end
258 | %                 if box_selection(b,:)
259 | %                     imshow(im(boxes(b,1):boxes(b,3),boxes(b,2):boxes(b,4),:));
260 | %                     waitforbuttonpress; clf
261 | %                 end
262 |             end
263 |         end
264 |     end
265 |     if write_proposals
266 |         fclose(fid_train);
267 |         fclose(fid_val);
268 |         if opts.calculate_mean
269 |             mean_image = double(mean_image/total_image_count);
270 |             save('tmp_mean.mat','mean_image');
271 |         end
272 |     end
273 |     if write_bbox
274 |         dlmwrite(output_file_bbox,bbox_locs,'Delimiter',' ');
275 |     end
276 | end
277 | 
278 | 


--------------------------------------------------------------------------------
/cnn_finetuning/vgg19/train_val_template.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layers {
  3 |   name: "data"
  4 |   type: WINDOW_DATA
  5 |   top: "data"
  6 |   top: "label"
  7 |   window_data_param {
  8 |     source: "../../windows_train.txt"
  9 |     batch_size: 40
 10 |     fg_threshold: 0.5
 11 |     bg_threshold: 0.5
 12 |     fg_fraction: 1.00
 13 |     context_pad: 16
 14 |     crop_mode: "warp"
 15 |     cache_images: true
 16 |   }
 17 |   transform_param {
 18 |     mirror: true
 19 |     crop_size: 224
 20 |     mean_file: "mean.binaryproto"
 21 |   }
 22 |   include: { phase: TRAIN }
 23 | }
 24 | layers {
 25 |   name: "data"
 26 |   type: WINDOW_DATA
 27 |   top: "data"
 28 |   top: "label"
 29 |   window_data_param {
 30 |     source: "../../windows_val.txt"
 31 |     batch_size: 5
 32 |     fg_threshold: 0.5
 33 |     bg_threshold: 0.5
 34 |     fg_fraction: 1.00
 35 |     context_pad: 16
 36 |     crop_mode: "square"
 37 |     cache_images: true
 38 |   }
 39 |   transform_param {
 40 |     mirror: true
 41 |     crop_size: 224
 42 |     mean_file: "mean.binaryproto"
 43 |   }
 44 |   include: { phase: TEST }
 45 | }
 46 | 
 47 | layers {
 48 |   bottom: "data"
 49 |   top: "conv1_1"
 50 |   name: "conv1_1"
 51 |   type: CONVOLUTION
 52 |   blobs_lr: 1
 53 |   blobs_lr: 2
 54 |   weight_decay: 1
 55 |   weight_decay: 0
 56 |   convolution_param {
 57 |     num_output: 64
 58 |     pad: 1
 59 |     kernel_size: 3
 60 |     weight_filler {
 61 |       type: "gaussian"
 62 |       std: 0.01
 63 |     }
 64 |     bias_filler {
 65 |       type: "constant"
 66 |       value: 0
 67 |     }
 68 |   }
 69 | }
 70 | layers {
 71 |   bottom: "conv1_1"
 72 |   top: "conv1_1"
 73 |   name: "relu1_1"
 74 |   type: RELU
 75 | }
 76 | layers {
 77 |   bottom: "conv1_1"
 78 |   top: "conv1_2"
 79 |   name: "conv1_2"
 80 |   type: CONVOLUTION
 81 |   blobs_lr: 1
 82 |   blobs_lr: 2
 83 |   weight_decay: 1
 84 |   weight_decay: 0
 85 |   convolution_param {
 86 |     num_output: 64
 87 |     pad: 1
 88 |     kernel_size: 3
 89 |     weight_filler {
 90 |       type: "gaussian"
 91 |       std: 0.01
 92 |     }
 93 |     bias_filler {
 94 |       type: "constant"
 95 |       value: 0
 96 |     }
 97 |   }
 98 | }
 99 | layers {
100 |   bottom: "conv1_2"
101 |   top: "conv1_2"
102 |   name: "relu1_2"
103 |   type: RELU
104 | }
105 | layers {
106 |   bottom: "conv1_2"
107 |   top: "pool1"
108 |   name: "pool1"
109 |   type: POOLING
110 |   pooling_param {
111 |     pool: MAX
112 |     kernel_size: 2
113 |     stride: 2
114 |   }
115 | }
116 | layers {
117 |   bottom: "pool1"
118 |   top: "conv2_1"
119 |   name: "conv2_1"
120 |   type: CONVOLUTION
121 |   blobs_lr: 1
122 |   blobs_lr: 2
123 |   weight_decay: 1
124 |   weight_decay: 0
125 |   convolution_param {
126 |     num_output: 128
127 |     pad: 1
128 |     kernel_size: 3
129 |     weight_filler {
130 |       type: "gaussian"
131 |       std: 0.01
132 |     }
133 |     bias_filler {
134 |       type: "constant"
135 |       value: 0
136 |     }
137 |   }
138 | }
139 | layers {
140 |   bottom: "conv2_1"
141 |   top: "conv2_1"
142 |   name: "relu2_1"
143 |   type: RELU
144 | }
145 | layers {
146 |   bottom: "conv2_1"
147 |   top: "conv2_2"
148 |   name: "conv2_2"
149 |   type: CONVOLUTION
150 |   blobs_lr: 1
151 |   blobs_lr: 2
152 |   weight_decay: 1
153 |   weight_decay: 0
154 |   convolution_param {
155 |     num_output: 128
156 |     pad: 1
157 |     kernel_size: 3
158 |     weight_filler {
159 |       type: "gaussian"
160 |       std: 0.01
161 |     }
162 |     bias_filler {
163 |       type: "constant"
164 |       value: 0
165 |     }
166 |   }
167 | }
168 | layers {
169 |   bottom: "conv2_2"
170 |   top: "conv2_2"
171 |   name: "relu2_2"
172 |   type: RELU
173 | }
174 | layers {
175 |   bottom: "conv2_2"
176 |   top: "pool2"
177 |   name: "pool2"
178 |   type: POOLING
179 |   pooling_param {
180 |     pool: MAX
181 |     kernel_size: 2
182 |     stride: 2
183 |   }
184 | }
185 | layers {
186 |   bottom: "pool2"
187 |   top: "conv3_1"
188 |   name: "conv3_1"
189 |   type: CONVOLUTION
190 |   blobs_lr: 1
191 |   blobs_lr: 2
192 |   weight_decay: 1
193 |   weight_decay: 0
194 |   convolution_param {
195 |     num_output: 256
196 |     pad: 1
197 |     kernel_size: 3
198 |     weight_filler {
199 |       type: "gaussian"
200 |       std: 0.01
201 |     }
202 |     bias_filler {
203 |       type: "constant"
204 |       value: 0
205 |     }
206 |   }
207 | }
208 | layers {
209 |   bottom: "conv3_1"
210 |   top: "conv3_1"
211 |   name: "relu3_1"
212 |   type: RELU
213 | }
214 | layers {
215 |   bottom: "conv3_1"
216 |   top: "conv3_2"
217 |   name: "conv3_2"
218 |   type: CONVOLUTION
219 |   blobs_lr: 1
220 |   blobs_lr: 2
221 |   weight_decay: 1
222 |   weight_decay: 0
223 |   convolution_param {
224 |     num_output: 256
225 |     pad: 1
226 |     kernel_size: 3
227 |     weight_filler {
228 |       type: "gaussian"
229 |       std: 0.01
230 |     }
231 |     bias_filler {
232 |       type: "constant"
233 |       value: 0
234 |     }
235 |   }
236 | }
237 | layers {
238 |   bottom: "conv3_2"
239 |   top: "conv3_2"
240 |   name: "relu3_2"
241 |   type: RELU
242 | }
243 | layers {
244 |   bottom: "conv3_2"
245 |   top: "conv3_3"
246 |   name: "conv3_3"
247 |   type: CONVOLUTION
248 |   blobs_lr: 1
249 |   blobs_lr: 2
250 |   weight_decay: 1
251 |   weight_decay: 0
252 |   convolution_param {
253 |     num_output: 256
254 |     pad: 1
255 |     kernel_size: 3
256 |     weight_filler {
257 |       type: "gaussian"
258 |       std: 0.01
259 |     }
260 |     bias_filler {
261 |       type: "constant"
262 |       value: 0
263 |     }
264 |   }
265 | }
266 | layers {
267 |   bottom: "conv3_3"
268 |   top: "conv3_3"
269 |   name: "relu3_3"
270 |   type: RELU
271 | }
272 | layers {
273 |   bottom: "conv3_3"
274 |   top: "conv3_4"
275 |   name: "conv3_4"
276 |   type: CONVOLUTION
277 |   blobs_lr: 1
278 |   blobs_lr: 2
279 |   weight_decay: 1
280 |   weight_decay: 0
281 |   convolution_param {
282 |     num_output: 256
283 |     pad: 1
284 |     kernel_size: 3
285 |     weight_filler {
286 |       type: "gaussian"
287 |       std: 0.01
288 |     }
289 |     bias_filler {
290 |       type: "constant"
291 |       value: 0
292 |     }
293 |   }
294 | }
295 | layers {
296 |   bottom: "conv3_4"
297 |   top: "conv3_4"
298 |   name: "relu3_4"
299 |   type: RELU
300 | }
301 | layers {
302 |   bottom: "conv3_4"
303 |   top: "pool3"
304 |   name: "pool3"
305 |   type: POOLING
306 |   pooling_param {
307 |     pool: MAX
308 |     kernel_size: 2
309 |     stride: 2
310 |   }
311 | }
312 | layers {
313 |   bottom: "pool3"
314 |   top: "conv4_1"
315 |   name: "conv4_1"
316 |   type: CONVOLUTION
317 |   blobs_lr: 1
318 |   blobs_lr: 2
319 |   weight_decay: 1
320 |   weight_decay: 0
321 |   convolution_param {
322 |     num_output: 512
323 |     pad: 1
324 |     kernel_size: 3
325 |     weight_filler {
326 |       type: "gaussian"
327 |       std: 0.01
328 |     }
329 |     bias_filler {
330 |       type: "constant"
331 |       value: 0
332 |     }
333 |   }
334 | }
335 | layers {
336 |   bottom: "conv4_1"
337 |   top: "conv4_1"
338 |   name: "relu4_1"
339 |   type: RELU
340 | }
341 | layers {
342 |   bottom: "conv4_1"
343 |   top: "conv4_2"
344 |   name: "conv4_2"
345 |   type: CONVOLUTION
346 |   blobs_lr: 1
347 |   blobs_lr: 2
348 |   weight_decay: 1
349 |   weight_decay: 0
350 |   convolution_param {
351 |     num_output: 512
352 |     pad: 1
353 |     kernel_size: 3
354 |     weight_filler {
355 |       type: "gaussian"
356 |       std: 0.01
357 |     }
358 |     bias_filler {
359 |       type: "constant"
360 |       value: 0
361 |     }
362 |   }
363 | }
364 | layers {
365 |   bottom: "conv4_2"
366 |   top: "conv4_2"
367 |   name: "relu4_2"
368 |   type: RELU
369 | }
370 | layers {
371 |   bottom: "conv4_2"
372 |   top: "conv4_3"
373 |   name: "conv4_3"
374 |   type: CONVOLUTION
375 |   blobs_lr: 1
376 |   blobs_lr: 2
377 |   weight_decay: 1
378 |   weight_decay: 0
379 |   convolution_param {
380 |     num_output: 512
381 |     pad: 1
382 |     kernel_size: 3
383 |     weight_filler {
384 |       type: "gaussian"
385 |       std: 0.01
386 |     }
387 |     bias_filler {
388 |       type: "constant"
389 |       value: 0
390 |     }
391 |   }
392 | }
393 | layers {
394 |   bottom: "conv4_3"
395 |   top: "conv4_3"
396 |   name: "relu4_3"
397 |   type: RELU
398 | }
399 | layers {
400 |   bottom: "conv4_3"
401 |   top: "conv4_4"
402 |   name: "conv4_4"
403 |   type: CONVOLUTION
404 |   blobs_lr: 1
405 |   blobs_lr: 2
406 |   weight_decay: 1
407 |   weight_decay: 0
408 |   convolution_param {
409 |     num_output: 512
410 |     pad: 1
411 |     kernel_size: 3
412 |     weight_filler {
413 |       type: "gaussian"
414 |       std: 0.01
415 |     }
416 |     bias_filler {
417 |       type: "constant"
418 |       value: 0
419 |     }
420 |   }
421 | }
422 | layers {
423 |   bottom: "conv4_4"
424 |   top: "conv4_4"
425 |   name: "relu4_4"
426 |   type: RELU
427 | }
428 | layers {
429 |   bottom: "conv4_4"
430 |   top: "pool4"
431 |   name: "pool4"
432 |   type: POOLING
433 |   pooling_param {
434 |     pool: MAX
435 |     kernel_size: 2
436 |     stride: 2
437 |   }
438 | }
439 | layers {
440 |   bottom: "pool4"
441 |   top: "conv5_1"
442 |   name: "conv5_1"
443 |   type: CONVOLUTION
444 |   blobs_lr: 1
445 |   blobs_lr: 2
446 |   weight_decay: 1
447 |   weight_decay: 0
448 |   convolution_param {
449 |     num_output: 512
450 |     pad: 1
451 |     kernel_size: 3
452 |     weight_filler {
453 |       type: "gaussian"
454 |       std: 0.01
455 |     }
456 |     bias_filler {
457 |       type: "constant"
458 |       value: 0
459 |     }
460 |   }
461 | }
462 | layers {
463 |   bottom: "conv5_1"
464 |   top: "conv5_1"
465 |   name: "relu5_1"
466 |   type: RELU
467 | }
468 | layers {
469 |   bottom: "conv5_1"
470 |   top: "conv5_2"
471 |   name: "conv5_2"
472 |   type: CONVOLUTION
473 |   blobs_lr: 1
474 |   blobs_lr: 2
475 |   weight_decay: 1
476 |   weight_decay: 0
477 |   convolution_param {
478 |     num_output: 512
479 |     pad: 1
480 |     kernel_size: 3
481 |     weight_filler {
482 |       type: "gaussian"
483 |       std: 0.01
484 |     }
485 |     bias_filler {
486 |       type: "constant"
487 |       value: 0
488 |     }
489 |   }
490 | }
491 | layers {
492 |   bottom: "conv5_2"
493 |   top: "conv5_2"
494 |   name: "relu5_2"
495 |   type: RELU
496 | }
497 | layers {
498 |   bottom: "conv5_2"
499 |   top: "conv5_3"
500 |   name: "conv5_3"
501 |   type: CONVOLUTION
502 |   blobs_lr: 1
503 |   blobs_lr: 2
504 |   weight_decay: 1
505 |   weight_decay: 0
506 |   convolution_param {
507 |     num_output: 512
508 |     pad: 1
509 |     kernel_size: 3
510 |     weight_filler {
511 |       type: "gaussian"
512 |       std: 0.01
513 |     }
514 |     bias_filler {
515 |       type: "constant"
516 |       value: 0
517 |     }
518 |   }
519 | }
520 | layers {
521 |   bottom: "conv5_3"
522 |   top: "conv5_3"
523 |   name: "relu5_3"
524 |   type: RELU
525 | }
526 | layers {
527 |   bottom: "conv5_3"
528 |   top: "conv5_4"
529 |   name: "conv5_4"
530 |   type: CONVOLUTION
531 |   blobs_lr: 1
532 |   blobs_lr: 2
533 |   weight_decay: 1
534 |   weight_decay: 0
535 |   convolution_param {
536 |     num_output: 512
537 |     pad: 1
538 |     kernel_size: 3
539 |     weight_filler {
540 |       type: "gaussian"
541 |       std: 0.01
542 |     }
543 |     bias_filler {
544 |       type: "constant"
545 |       value: 0
546 |     }
547 |   }
548 | }
549 | layers {
550 |   bottom: "conv5_4"
551 |   top: "conv5_4"
552 |   name: "relu5_4"
553 |   type: RELU
554 | }
555 | layers {
556 |   bottom: "conv5_4"
557 |   top: "pool5"
558 |   name: "pool5"
559 |   type: POOLING
560 |   pooling_param {
561 |     pool: MAX
562 |     kernel_size: 2
563 |     stride: 2
564 |   }
565 | }
566 | layers {
567 |   bottom: "pool5"
568 |   top: "fc6"
569 |   name: "fc6"
570 |   type: INNER_PRODUCT
571 |   blobs_lr: 1
572 |   blobs_lr: 2
573 |   weight_decay: 1
574 |   weight_decay: 0
575 |   inner_product_param {
576 |     num_output: 4096
577 |     weight_filler {
578 |       type: "gaussian"
579 |       std: 0.005
580 |     }
581 |     bias_filler {
582 |       type: "constant"
583 |       value: 1
584 |     }
585 |   }
586 | }
587 | layers {
588 |   bottom: "fc6"
589 |   top: "fc6"
590 |   name: "relu6"
591 |   type: RELU
592 | }
593 | layers {
594 |   bottom: "fc6"
595 |   top: "fc6"
596 |   name: "drop6"
597 |   type: DROPOUT
598 |   dropout_param {
599 |     dropout_ratio: 0.5
600 |   }
601 | }
602 | layers {
603 |   bottom: "fc6"
604 |   top: "fc7"
605 |   name: "fc7"
606 |   type: INNER_PRODUCT
607 |   blobs_lr: 1
608 |   blobs_lr: 2
609 |   weight_decay: 1
610 |   weight_decay: 0
611 |   inner_product_param {
612 |     num_output: 4096
613 |     weight_filler {
614 |       type: "gaussian"
615 |       std: 0.005
616 |     }
617 |     bias_filler {
618 |       type: "constant"
619 |       value: 1
620 |     }
621 |   }
622 | }
623 | layers {
624 |   bottom: "fc7"
625 |   top: "fc7"
626 |   name: "relu7"
627 |   type: RELU
628 | }
629 | layers {
630 |   bottom: "fc7"
631 |   top: "fc7"
632 |   name: "drop7"
633 |   type: DROPOUT
634 |   dropout_param {
635 |     dropout_ratio: 0.5
636 |   }
637 | }
638 | layers {
639 |   bottom: "fc7"
640 |   top: "fc8_ft"
641 |   name: "fc8_ft"
642 |   type: INNER_PRODUCT
643 |   blobs_lr: 1
644 |   blobs_lr: 2
645 |   weight_decay: 1
646 |   weight_decay: 0
647 |   inner_product_param {
648 |     num_output: ##NUM_CLASSES##
649 |     weight_filler {
650 |       type: "gaussian"
651 |       std: 0.01
652 |     }
653 |     bias_filler {
654 |       type: "constant"
655 |       value: 0
656 |     }
657 |   }
658 | }
659 | layers {
660 |   name: "loss"
661 |   type: SOFTMAX_LOSS
662 |   bottom: "fc8_ft"
663 |   bottom: "label"
664 |   top: "loss"
665 | }
666 | layers {
667 |   name: "accuracy"
668 |   type: ACCURACY
669 |   bottom: "fc8_ft"
670 |   bottom: "label"
671 |   top: "accuracy"
672 |   include { phase: TEST }
673 | }
674 | 


--------------------------------------------------------------------------------
/part_selection/evaluate_part_locs_anchor_multiview.m:
--------------------------------------------------------------------------------
  1 | function [ channel_ids, part_visibility, anchor_points, shift_vectors, view_assignment, obj_value, err ] = ...
  2 |     evaluate_part_locs_anchor_multiview(part_locs, tr_ID, labels, no_selected_parts, no_visible_parts, view_count, iterations)    
  3 |     % Set no_visible_parts to NaN to avoid estimating visible parts
  4 | 
  5 |     %read part locations
  6 |     parts = array2table(part_locs,'VariableNames',{'Var1','Var2','Var3','Var4','Var5'});
  7 | %     parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/part_locs.txt','Delimiter',' ','ReadVariableNames',false);
  8 | %     parts = readtable('/home/simon/Datasets/CUB_200_2011/parts/est_part_locs.txt','Delimiter',' ','ReadVariableNames',false);
  9 |     parts.Properties.VariableNames{'Var1'} = 'image';
 10 |     parts.Properties.VariableNames{'Var2'} = 'part';
 11 |     parts.Properties.VariableNames{'Var3'} = 'x';
 12 |     parts.Properties.VariableNames{'Var4'} = 'y';
 13 |     parts.Properties.VariableNames{'Var5'} = 'visible';
 14 |     part_ids = unique(parts.part);
 15 |     part_count = numel(part_ids);
 16 |     % Some temp variables
 17 |     parts_x = reshape(parts.x,numel(unique(parts.part)),numel(unique(parts.image)));
 18 | %     parts_x = parts_x(:,tr_ID);
 19 |     parts_y = reshape(parts.y,numel(unique(parts.part)),numel(unique(parts.image)));
 20 | %     parts_y = parts_y(:,tr_ID);
 21 |     part_locs = cat(3,parts_x,parts_y);
 22 |     part_locs = part_locs(1:part_count,:,:);
 23 |     part_ids = unique(parts.part);
 24 |     image_ids = unique(parts.image);
 25 |     part_count = numel(part_ids);
 26 |     image_count = numel(image_ids);
 27 |     
 28 |     % Load train test, perform selection only on train
 29 |     if nargin<2
 30 |         tr_ID = logical(load('/home/simon/Datasets/CUB_200_2011/tr_ID.txt'));
 31 |     end
 32 |     
 33 |     
 34 |     %% Constraints
 35 |     % Number of parts to select
 36 |     if nargin<3
 37 |         no_selected_parts = 5;
 38 |     end
 39 |     % Number of visible parts per image
 40 |     if nargin<4
 41 |         no_visible_parts = NaN;%ceil(no_selected_parts/2);
 42 |     end
 43 |     % Number of views
 44 |     if nargin<5
 45 |         view_count = 3;
 46 |     end
 47 | 
 48 | 
 49 |     part_visibility = nan(image_count,part_count);
 50 |     anchor_points = nan(image_count,2);
 51 |     shift_vectors = nan(numel(unique(labels)),part_count,view_count,2);
 52 |     view_assignment = false(image_count,view_count);
 53 |     model_errors = nan(image_count,1);
 54 |     fprintf('Working on class ');
 55 |     for c=unique(labels)'
 56 |         fprintf('%i ',c);
 57 |         class_tr_ID = tr_ID & labels==c;
 58 |         if sum(class_tr_ID)<1
 59 |             continue
 60 |         end
 61 |         best_obj_value = -Inf;
 62 |         for k=1:iterations 
 63 |             [ ~, h, a, d, s, obj_value, err ] = ...
 64 |                 do_build_part_models(parts, part_locs, class_tr_ID,...
 65 |                 no_selected_parts, no_visible_parts, view_count );
 66 |             if obj_value>best_obj_value
 67 |                 best_obj_value = obj_value;
 68 |                 best_h=h;
 69 |                 best_a=a;
 70 |                 best_d=d;
 71 |                 best_s=s;
 72 |                 best_err=err;
 73 |             end
 74 |         end
 75 | %             fprintf('%f\n',best_obj_value);
 76 |         part_visibility(class_tr_ID,:)=best_h;
 77 |         anchor_points(class_tr_ID,:) = best_a;
 78 |         shift_vectors(c,:,:,:)=best_d;
 79 |         view_assignment(class_tr_ID,:)=best_s;
 80 |         model_errors(class_tr_ID,:)=best_err;
 81 |     end
 82 |     
 83 |     % Inference for test images
 84 |     [~,channel_ids] = sort(-nansum(part_visibility,1));
 85 | %     channel_ids = channel_ids(1:no_selected_parts);
 86 |     % TODO: Here should be a proper inference
 87 |     part_visibility(~tr_ID,:)=false;
 88 |     part_visibility(~tr_ID,channel_ids)=true;
 89 |     part_visibility = logical(part_visibility);
 90 | end
 91 | 
 92 | function [ idx, part_visibility, anchor_points, shift_vectors, view_assignment, obj_value, err ] = do_build_part_models(parts, part_locs, tr_ID, no_selected_parts, no_visible_parts, view_count)    
 93 | 
 94 |     part_ids = unique(parts.part);
 95 |     image_ids = unique(parts.image);
 96 |     part_count = numel(part_ids);
 97 |     image_count = sum(tr_ID);
 98 |     
 99 |     part_locs = part_locs(:,tr_ID,:);
100 | 
101 |     %% Variables to estimate
102 |     % View selection for each image
103 |     s = false(image_count,view_count);
104 |     % part selection b (indicator vector) for each view
105 |     b = false(view_count,part_count);
106 |     % Anchor points for each image
107 |     a = zeros(image_count,2);
108 |     % Shift vectors for each part in each view
109 |     d = zeros(part_count, view_count,2);
110 |     % Visibility of each part in each image
111 |     h = false(image_count, part_count);
112 |     
113 |     %% Initialization 
114 |     % Select a random view for each image
115 |     for i=1:image_count
116 |         s(i,randperm(view_count,1))=true;
117 |     end
118 |     % Select m random parts for each view
119 |     for v=1:view_count
120 |         b(v,randperm(part_count,no_selected_parts))=true;
121 |     end
122 |     % Set mean part position as default anchor point
123 |     a = repmat(mean([parts.x(logical(parts.visible)) ...
124 |         parts.y(logical(parts.visible))],1),size(a,1),1);
125 |     % Set 0 as default shift vector
126 |     d = zeros(size(d));
127 |     if ~isnan(no_visible_parts)
128 |         % Select no_visible_parts random parts for every view
129 |         for i=1:image_count 
130 |             % get the view for this image
131 |             available_parts = find(b(s(i,:),:));
132 |             h(i,available_parts(randperm(numel(available_parts),no_visible_parts)))=true;
133 |         end
134 |     end
135 |     
136 |     h = logical(reshape(parts.visible,numel(unique(parts.part)),numel(unique(parts.image))))';
137 |     h = h(tr_ID,1:part_count);
138 |     i = 0;
139 |     
140 |     done = false;
141 |     best_obj_value = Inf;
142 |     while ~done && ceil(i/2)<15
143 |         i = i+1;
144 |         old_b = b;
145 |         
146 | %         if mod(i,2)==1
147 | %             fprintf('Running round %i \n',ceil(i/2));
148 | %         end
149 |         
150 |         % General preparations
151 |         % First, build a image_count x view_count x part_count x coordinates
152 |         % Create singleton dimensions to fit target matrix shape
153 |         % part_locs had shape part_count x image_count x coordinates
154 |         mu_tmp = permute(part_locs,[2 4 1 3]);
155 |         % a had shape image_count x coordinates
156 |         a_tmp = permute(a,[1 3 4 2]);
157 |         % d had shape part_count x view_count x coordinates
158 |         d_tmp = permute(d,[4 2 1 3]);
159 |         
160 |         if mod(i,2)==1
161 |             %% Estimate d
162 |             % d has shape part_count x view_count x coordinates
163 |             % Calculate d first, as we cannot do much wrong here (in contrast
164 |             % to the part selection), and is required for the following steps
165 |             % in order to produce any meaningful results
166 |             % d is calculated by mean(mu-a along image_index)
167 |             mu_a = bsxfun(@minus,mu_tmp,a_tmp);
168 |             mu_a = repmat(mu_a,1,view_count,1,1);
169 |             % Mask out data that is not visible
170 |             mask = true(image_count,view_count,part_count);
171 |             mask = bsxfun(@and, mask, permute(h,[1 3 2]));
172 |             mask = repmat(mask,1,1,1,2);
173 |             mu_a(~mask) = NaN;
174 |             d = nanmean(mu_a,1);
175 |             d = permute(d,[3 2 4 1]);
176 | 
177 |             %% Estimate a
178 |             % a has shape image_count x coordinates
179 |             % Calculate d first, as we cannot do much wrong here (in contrast
180 |             % to the part selection), and is required for the following steps
181 |             % in order to produce any meaningful results
182 |             % d is calculated by mean(mu-a along image_index)
183 |             mu_d = bsxfun(@minus,mu_tmp,d_tmp);
184 |             mu_d(~mask) = NaN;
185 |             a = nanmean(nanmean(mu_d,3),2);
186 |             a = permute(a, [1 4 2 3]);
187 |         else
188 |             %% Preparations for b, h and s
189 |             % calculate mu - (a + d) using bsxfun to automatically duplicate axis
190 |             mu_a_d = bsxfun(@minus,mu_tmp,bsxfun(@plus,a_tmp,d_tmp));
191 |             % Calculate the quadratic norm^2 along coordinate-axis
192 |             mu_a_d = sum(mu_a_d.^2,4);
193 | 
194 |             %% Estimate h
195 |             if ~isnan(no_visible_parts)
196 |                 % h has shape image_count x part_count
197 |                 est_h = false(size(h));
198 |                 % Shape of mu_a_d is (image_count x view_count x part_count)
199 |                 mask = true(image_count,view_count,part_count);
200 |                 mask = bsxfun(@and, mask, permute(s,[1 2 3]));
201 |                 mask = bsxfun(@and, mask, permute(b,[3 1 2]));
202 |                 mu_a_d_tmp = mu_a_d;
203 |                 mu_a_d_tmp(~mask) = NaN;
204 |                 mu_a_d_tmp = nansum(mu_a_d_tmp,2);
205 |                 % Only select parts to hide from chosen parts
206 |                 mu_a_d_tmp(mu_a_d_tmp == 0) = Inf;
207 |                 [~,idx] = sort(mu_a_d_tmp, 3);
208 |                 idx = permute(idx,[1 3 2]);
209 |                 idx = idx(:,1:no_visible_parts);
210 |                 idx2 = repmat((1:size(idx,1))',1,size(idx,2));
211 |                 est_h(sub2ind(size(h), idx2(:), idx(:))) = true;
212 |             end
213 | 
214 |             %% Estimate s
215 |             % s has shape image_count x view_count
216 |             s = false(size(s));
217 |             % Shape of mu_a_d is (image_count x view_count x part_count)
218 |             mask = true(image_count,view_count,part_count);
219 |             mask = bsxfun(@and, mask, permute(h,[1 3 2]));
220 |             mask = bsxfun(@and, mask, permute(b,[3 1 2]));
221 |             mu_a_d_tmp = mu_a_d;
222 |             mu_a_d_tmp(~mask) = NaN;
223 |             mu_a_d_tmp = nansum(mu_a_d_tmp,3);
224 |             [~,idx] = sort(mu_a_d_tmp, 2);
225 |             idx = idx(:,1);
226 |             idx2 = repmat((1:size(idx,1))',1,size(idx,2));
227 |             s(sub2ind(size(h), idx2(:), idx(:))) = true;
228 | 
229 |             %% Estimate b 
230 |             % b has shape view_count x part_count
231 |             b = false(size(b));
232 |             % Shape of mu_a_d is (image_count x view_count x part_count)
233 |             mask = true(image_count,view_count,part_count);
234 |             mask = bsxfun(@and, mask, permute(s,[1 2 3]));
235 | %             mask = bsxfun(@and, mask, permute(h,[1 3 2]));
236 |             mu_a_d_tmp = mu_a_d;
237 |             mu_a_d_tmp(~mask) = NaN;
238 |             mu_a_d_tmp = nansum(mu_a_d_tmp,1);
239 |             [~,idx] = sort(mu_a_d_tmp, 3);
240 |             idx = permute(idx,[2 3 1]);
241 |             idx = idx(:,1:no_selected_parts);
242 |             idx2 = repmat((1:size(idx,1))',1,size(idx,2));
243 |             b(sub2ind(size(b), idx2(:), idx(:))) = true;
244 | %             % If you want to include distance between parts as selection
245 | %             % criteria:
246 | %             v = sum(pdist2(squeeze(d),squeeze(d)));
247 | %             v = v/max(v(:))*10;
248 | %             v = exp(v);
249 | %             v = v/sum(v);
250 | %             [~,idx] = sort(mu_a_d_tmp.*permute(v,[1 3 2]), 3);
251 |             
252 |             %% Remember old b to check for convergence
253 |             if old_b == b
254 |                 done = true;
255 |             else
256 |                 old_b = b;
257 |             end
258 |         end
259 |         
260 |         
261 | %         % Calculate objective value            
262 | %         mu_tmp = permute(part_locs,[2 4 1 3]);
263 | %         % a had shape image_count x coordinates
264 | %         a_tmp = permute(a,[1 3 4 2]);
265 | %         % d had shape part_count x view_count x coordinates
266 | %         d_tmp = permute(d,[4 2 1 3]);
267 | %         % calculate mu - (a + d) using bsxfun to automatically duplicate axis
268 | %         mu_a_d = bsxfun(@minus,mu_tmp,bsxfun(@plus,a_tmp,d_tmp));
269 | %         % Calculate the quadratic norm^2 along coordinate-axis
270 | %         mu_a_d = sum(mu_a_d.^2,4);
271 | %         mask = true(image_count,view_count,part_count);
272 | %         mask = bsxfun(@and, mask, permute(s,[1 2 3]));
273 | %         mask = bsxfun(@and, mask, permute(h,[1 3 2]));
274 | %         mask = bsxfun(@and, mask, permute(b,[3 1 2]));
275 | %         mu_a_d_tmp = mu_a_d;
276 | %         mu_a_d_tmp(~mask) = NaN;
277 | %         new_obj_value = -nansum(mu_a_d_tmp(:));
278 | % %         if true %new_obj_value < best_obj_value
279 | % %             part_visibility = h;
280 | % %             anchor_points = a;
281 | % %             shift_vectors = d;
282 | % %             best_obj_value = new_obj_value;
283 | % %         end
284 | %         fprintf('Objective value %10.0f\n', new_obj_value);
285 |     end
286 | %     channel_ids = idx(1:no_selected_parts);
287 | %     save('part_selection_anchor_vgg19.mat','channel_ids');
288 | 
289 | 
290 | 
291 | 
292 |     %% Get the error of each training image
293 |     mu_tmp = permute(part_locs,[2 4 1 3]);
294 |     % a had shape image_count x coordinates
295 |     a_tmp = permute(a,[1 3 4 2]);
296 |     % d had shape part_count x view_count x coordinates
297 |     d_tmp = permute(d,[4 2 1 3]);
298 |     % calculate mu - (a + d) using bsxfun to automatically duplicate axis
299 |     mu_a_d = bsxfun(@minus,mu_tmp,bsxfun(@plus,a_tmp,d_tmp));
300 |     % Calculate the quadratic norm^2 along coordinate-axis
301 |     mu_a_d = sum(mu_a_d.^2,4);
302 |     mask = true(image_count,view_count,part_count);
303 |     mask = bsxfun(@and, mask, permute(s,[1 2 3]));
304 |     mask = bsxfun(@and, mask, permute(h,[1 3 2]));
305 |     mask = bsxfun(@and, mask, permute(b,[3 1 2]));
306 |     mu_a_d(~mask) = NaN;
307 |     err = nansum(nansum(mu_a_d,2),3);
308 |     obj_value = -nansum(err);
309 | 
310 |     %% Return values 
311 |     part_visibility = est_h;
312 |     anchor_points = a;
313 |     shift_vectors = d;
314 |     view_assignment = s;
315 | end


--------------------------------------------------------------------------------
/cnn_finetuning/googlenet/deploy.prototxt:
--------------------------------------------------------------------------------
   1 | name: "GoogleNet"
   2 | input: "data"
   3 | input_dim: 11
   4 | input_dim: 3
   5 | input_dim: 224
   6 | input_dim: 224
   7 | layer {
   8 |   name: "conv1/7x7_s2"
   9 |   type: "Convolution"
  10 |   bottom: "data"
  11 |   top: "conv1/7x7_s2"
  12 |   param {
  13 |     lr_mult: 1
  14 |     decay_mult: 1
  15 |   }
  16 |   param {
  17 |     lr_mult: 2
  18 |     decay_mult: 0
  19 |   }
  20 |   convolution_param {
  21 |     num_output: 64
  22 |     pad: 3
  23 |     kernel_size: 7
  24 |     stride: 2
  25 |     weight_filler {
  26 |       type: "xavier"
  27 |       std: 0.1
  28 |     }
  29 |     bias_filler {
  30 |       type: "constant"
  31 |       value: 0.2
  32 |     }
  33 |   }
  34 | }
  35 | layer {
  36 |   name: "conv1/relu_7x7"
  37 |   type: "ReLU"
  38 |   bottom: "conv1/7x7_s2"
  39 |   top: "conv1/7x7_s2"
  40 | }
  41 | layer {
  42 |   name: "pool1/3x3_s2"
  43 |   type: "Pooling"
  44 |   bottom: "conv1/7x7_s2"
  45 |   top: "pool1/3x3_s2"
  46 |   pooling_param {
  47 |     pool: MAX
  48 |     kernel_size: 3
  49 |     stride: 2
  50 |   }
  51 | }
  52 | layer {
  53 |   name: "pool1/norm1"
  54 |   type: "LRN"
  55 |   bottom: "pool1/3x3_s2"
  56 |   top: "pool1/norm1"
  57 |   lrn_param {
  58 |     local_size: 5
  59 |     alpha: 0.0001
  60 |     beta: 0.75
  61 |   }
  62 | }
  63 | layer {
  64 |   name: "conv2/3x3_reduce"
  65 |   type: "Convolution"
  66 |   bottom: "pool1/norm1"
  67 |   top: "conv2/3x3_reduce"
  68 |   param {
  69 |     lr_mult: 1
  70 |     decay_mult: 1
  71 |   }
  72 |   param {
  73 |     lr_mult: 2
  74 |     decay_mult: 0
  75 |   }
  76 |   convolution_param {
  77 |     num_output: 64
  78 |     kernel_size: 1
  79 |     weight_filler {
  80 |       type: "xavier"
  81 |       std: 0.1
  82 |     }
  83 |     bias_filler {
  84 |       type: "constant"
  85 |       value: 0.2
  86 |     }
  87 |   }
  88 | }
  89 | layer {
  90 |   name: "conv2/relu_3x3_reduce"
  91 |   type: "ReLU"
  92 |   bottom: "conv2/3x3_reduce"
  93 |   top: "conv2/3x3_reduce"
  94 | }
  95 | layer {
  96 |   name: "conv2/3x3"
  97 |   type: "Convolution"
  98 |   bottom: "conv2/3x3_reduce"
  99 |   top: "conv2/3x3"
 100 |   param {
 101 |     lr_mult: 1
 102 |     decay_mult: 1
 103 |   }
 104 |   param {
 105 |     lr_mult: 2
 106 |     decay_mult: 0
 107 |   }
 108 |   convolution_param {
 109 |     num_output: 192
 110 |     pad: 1
 111 |     kernel_size: 3
 112 |     weight_filler {
 113 |       type: "xavier"
 114 |       std: 0.03
 115 |     }
 116 |     bias_filler {
 117 |       type: "constant"
 118 |       value: 0.2
 119 |     }
 120 |   }
 121 | }
 122 | layer {
 123 |   name: "conv2/relu_3x3"
 124 |   type: "ReLU"
 125 |   bottom: "conv2/3x3"
 126 |   top: "conv2/3x3"
 127 | }
 128 | layer {
 129 |   name: "conv2/norm2"
 130 |   type: "LRN"
 131 |   bottom: "conv2/3x3"
 132 |   top: "conv2/norm2"
 133 |   lrn_param {
 134 |     local_size: 5
 135 |     alpha: 0.0001
 136 |     beta: 0.75
 137 |   }
 138 | }
 139 | layer {
 140 |   name: "pool2/3x3_s2"
 141 |   type: "Pooling"
 142 |   bottom: "conv2/norm2"
 143 |   top: "pool2/3x3_s2"
 144 |   pooling_param {
 145 |     pool: MAX
 146 |     kernel_size: 3
 147 |     stride: 2
 148 |   }
 149 | }
 150 | layer {
 151 |   name: "inception_3a/1x1"
 152 |   type: "Convolution"
 153 |   bottom: "pool2/3x3_s2"
 154 |   top: "inception_3a/1x1"
 155 |   param {
 156 |     lr_mult: 1
 157 |     decay_mult: 1
 158 |   }
 159 |   param {
 160 |     lr_mult: 2
 161 |     decay_mult: 0
 162 |   }
 163 |   convolution_param {
 164 |     num_output: 64
 165 |     kernel_size: 1
 166 |     weight_filler {
 167 |       type: "xavier"
 168 |       std: 0.03
 169 |     }
 170 |     bias_filler {
 171 |       type: "constant"
 172 |       value: 0.2
 173 |     }
 174 |   }
 175 | }
 176 | layer {
 177 |   name: "inception_3a/relu_1x1"
 178 |   type: "ReLU"
 179 |   bottom: "inception_3a/1x1"
 180 |   top: "inception_3a/1x1"
 181 | }
 182 | layer {
 183 |   name: "inception_3a/3x3_reduce"
 184 |   type: "Convolution"
 185 |   bottom: "pool2/3x3_s2"
 186 |   top: "inception_3a/3x3_reduce"
 187 |   param {
 188 |     lr_mult: 1
 189 |     decay_mult: 1
 190 |   }
 191 |   param {
 192 |     lr_mult: 2
 193 |     decay_mult: 0
 194 |   }
 195 |   convolution_param {
 196 |     num_output: 96
 197 |     kernel_size: 1
 198 |     weight_filler {
 199 |       type: "xavier"
 200 |       std: 0.09
 201 |     }
 202 |     bias_filler {
 203 |       type: "constant"
 204 |       value: 0.2
 205 |     }
 206 |   }
 207 | }
 208 | layer {
 209 |   name: "inception_3a/relu_3x3_reduce"
 210 |   type: "ReLU"
 211 |   bottom: "inception_3a/3x3_reduce"
 212 |   top: "inception_3a/3x3_reduce"
 213 | }
 214 | layer {
 215 |   name: "inception_3a/3x3"
 216 |   type: "Convolution"
 217 |   bottom: "inception_3a/3x3_reduce"
 218 |   top: "inception_3a/3x3"
 219 |   param {
 220 |     lr_mult: 1
 221 |     decay_mult: 1
 222 |   }
 223 |   param {
 224 |     lr_mult: 2
 225 |     decay_mult: 0
 226 |   }
 227 |   convolution_param {
 228 |     num_output: 128
 229 |     pad: 1
 230 |     kernel_size: 3
 231 |     weight_filler {
 232 |       type: "xavier"
 233 |       std: 0.03
 234 |     }
 235 |     bias_filler {
 236 |       type: "constant"
 237 |       value: 0.2
 238 |     }
 239 |   }
 240 | }
 241 | layer {
 242 |   name: "inception_3a/relu_3x3"
 243 |   type: "ReLU"
 244 |   bottom: "inception_3a/3x3"
 245 |   top: "inception_3a/3x3"
 246 | }
 247 | layer {
 248 |   name: "inception_3a/5x5_reduce"
 249 |   type: "Convolution"
 250 |   bottom: "pool2/3x3_s2"
 251 |   top: "inception_3a/5x5_reduce"
 252 |   param {
 253 |     lr_mult: 1
 254 |     decay_mult: 1
 255 |   }
 256 |   param {
 257 |     lr_mult: 2
 258 |     decay_mult: 0
 259 |   }
 260 |   convolution_param {
 261 |     num_output: 16
 262 |     kernel_size: 1
 263 |     weight_filler {
 264 |       type: "xavier"
 265 |       std: 0.2
 266 |     }
 267 |     bias_filler {
 268 |       type: "constant"
 269 |       value: 0.2
 270 |     }
 271 |   }
 272 | }
 273 | layer {
 274 |   name: "inception_3a/relu_5x5_reduce"
 275 |   type: "ReLU"
 276 |   bottom: "inception_3a/5x5_reduce"
 277 |   top: "inception_3a/5x5_reduce"
 278 | }
 279 | layer {
 280 |   name: "inception_3a/5x5"
 281 |   type: "Convolution"
 282 |   bottom: "inception_3a/5x5_reduce"
 283 |   top: "inception_3a/5x5"
 284 |   param {
 285 |     lr_mult: 1
 286 |     decay_mult: 1
 287 |   }
 288 |   param {
 289 |     lr_mult: 2
 290 |     decay_mult: 0
 291 |   }
 292 |   convolution_param {
 293 |     num_output: 32
 294 |     pad: 2
 295 |     kernel_size: 5
 296 |     weight_filler {
 297 |       type: "xavier"
 298 |       std: 0.03
 299 |     }
 300 |     bias_filler {
 301 |       type: "constant"
 302 |       value: 0.2
 303 |     }
 304 |   }
 305 | }
 306 | layer {
 307 |   name: "inception_3a/relu_5x5"
 308 |   type: "ReLU"
 309 |   bottom: "inception_3a/5x5"
 310 |   top: "inception_3a/5x5"
 311 | }
 312 | layer {
 313 |   name: "inception_3a/pool"
 314 |   type: "Pooling"
 315 |   bottom: "pool2/3x3_s2"
 316 |   top: "inception_3a/pool"
 317 |   pooling_param {
 318 |     pool: MAX
 319 |     kernel_size: 3
 320 |     stride: 1
 321 |     pad: 1
 322 |   }
 323 | }
 324 | layer {
 325 |   name: "inception_3a/pool_proj"
 326 |   type: "Convolution"
 327 |   bottom: "inception_3a/pool"
 328 |   top: "inception_3a/pool_proj"
 329 |   param {
 330 |     lr_mult: 1
 331 |     decay_mult: 1
 332 |   }
 333 |   param {
 334 |     lr_mult: 2
 335 |     decay_mult: 0
 336 |   }
 337 |   convolution_param {
 338 |     num_output: 32
 339 |     kernel_size: 1
 340 |     weight_filler {
 341 |       type: "xavier"
 342 |       std: 0.1
 343 |     }
 344 |     bias_filler {
 345 |       type: "constant"
 346 |       value: 0.2
 347 |     }
 348 |   }
 349 | }
 350 | layer {
 351 |   name: "inception_3a/relu_pool_proj"
 352 |   type: "ReLU"
 353 |   bottom: "inception_3a/pool_proj"
 354 |   top: "inception_3a/pool_proj"
 355 | }
 356 | layer {
 357 |   name: "inception_3a/output"
 358 |   type: "Concat"
 359 |   bottom: "inception_3a/1x1"
 360 |   bottom: "inception_3a/3x3"
 361 |   bottom: "inception_3a/5x5"
 362 |   bottom: "inception_3a/pool_proj"
 363 |   top: "inception_3a/output"
 364 | }
 365 | layer {
 366 |   name: "inception_3b/1x1"
 367 |   type: "Convolution"
 368 |   bottom: "inception_3a/output"
 369 |   top: "inception_3b/1x1"
 370 |   param {
 371 |     lr_mult: 1
 372 |     decay_mult: 1
 373 |   }
 374 |   param {
 375 |     lr_mult: 2
 376 |     decay_mult: 0
 377 |   }
 378 |   convolution_param {
 379 |     num_output: 128
 380 |     kernel_size: 1
 381 |     weight_filler {
 382 |       type: "xavier"
 383 |       std: 0.03
 384 |     }
 385 |     bias_filler {
 386 |       type: "constant"
 387 |       value: 0.2
 388 |     }
 389 |   }
 390 | }
 391 | layer {
 392 |   name: "inception_3b/relu_1x1"
 393 |   type: "ReLU"
 394 |   bottom: "inception_3b/1x1"
 395 |   top: "inception_3b/1x1"
 396 | }
 397 | layer {
 398 |   name: "inception_3b/3x3_reduce"
 399 |   type: "Convolution"
 400 |   bottom: "inception_3a/output"
 401 |   top: "inception_3b/3x3_reduce"
 402 |   param {
 403 |     lr_mult: 1
 404 |     decay_mult: 1
 405 |   }
 406 |   param {
 407 |     lr_mult: 2
 408 |     decay_mult: 0
 409 |   }
 410 |   convolution_param {
 411 |     num_output: 128
 412 |     kernel_size: 1
 413 |     weight_filler {
 414 |       type: "xavier"
 415 |       std: 0.09
 416 |     }
 417 |     bias_filler {
 418 |       type: "constant"
 419 |       value: 0.2
 420 |     }
 421 |   }
 422 | }
 423 | layer {
 424 |   name: "inception_3b/relu_3x3_reduce"
 425 |   type: "ReLU"
 426 |   bottom: "inception_3b/3x3_reduce"
 427 |   top: "inception_3b/3x3_reduce"
 428 | }
 429 | layer {
 430 |   name: "inception_3b/3x3"
 431 |   type: "Convolution"
 432 |   bottom: "inception_3b/3x3_reduce"
 433 |   top: "inception_3b/3x3"
 434 |   param {
 435 |     lr_mult: 1
 436 |     decay_mult: 1
 437 |   }
 438 |   param {
 439 |     lr_mult: 2
 440 |     decay_mult: 0
 441 |   }
 442 |   convolution_param {
 443 |     num_output: 192
 444 |     pad: 1
 445 |     kernel_size: 3
 446 |     weight_filler {
 447 |       type: "xavier"
 448 |       std: 0.03
 449 |     }
 450 |     bias_filler {
 451 |       type: "constant"
 452 |       value: 0.2
 453 |     }
 454 |   }
 455 | }
 456 | layer {
 457 |   name: "inception_3b/relu_3x3"
 458 |   type: "ReLU"
 459 |   bottom: "inception_3b/3x3"
 460 |   top: "inception_3b/3x3"
 461 | }
 462 | layer {
 463 |   name: "inception_3b/5x5_reduce"
 464 |   type: "Convolution"
 465 |   bottom: "inception_3a/output"
 466 |   top: "inception_3b/5x5_reduce"
 467 |   param {
 468 |     lr_mult: 1
 469 |     decay_mult: 1
 470 |   }
 471 |   param {
 472 |     lr_mult: 2
 473 |     decay_mult: 0
 474 |   }
 475 |   convolution_param {
 476 |     num_output: 32
 477 |     kernel_size: 1
 478 |     weight_filler {
 479 |       type: "xavier"
 480 |       std: 0.2
 481 |     }
 482 |     bias_filler {
 483 |       type: "constant"
 484 |       value: 0.2
 485 |     }
 486 |   }
 487 | }
 488 | layer {
 489 |   name: "inception_3b/relu_5x5_reduce"
 490 |   type: "ReLU"
 491 |   bottom: "inception_3b/5x5_reduce"
 492 |   top: "inception_3b/5x5_reduce"
 493 | }
 494 | layer {
 495 |   name: "inception_3b/5x5"
 496 |   type: "Convolution"
 497 |   bottom: "inception_3b/5x5_reduce"
 498 |   top: "inception_3b/5x5"
 499 |   param {
 500 |     lr_mult: 1
 501 |     decay_mult: 1
 502 |   }
 503 |   param {
 504 |     lr_mult: 2
 505 |     decay_mult: 0
 506 |   }
 507 |   convolution_param {
 508 |     num_output: 96
 509 |     pad: 2
 510 |     kernel_size: 5
 511 |     weight_filler {
 512 |       type: "xavier"
 513 |       std: 0.03
 514 |     }
 515 |     bias_filler {
 516 |       type: "constant"
 517 |       value: 0.2
 518 |     }
 519 |   }
 520 | }
 521 | layer {
 522 |   name: "inception_3b/relu_5x5"
 523 |   type: "ReLU"
 524 |   bottom: "inception_3b/5x5"
 525 |   top: "inception_3b/5x5"
 526 | }
 527 | layer {
 528 |   name: "inception_3b/pool"
 529 |   type: "Pooling"
 530 |   bottom: "inception_3a/output"
 531 |   top: "inception_3b/pool"
 532 |   pooling_param {
 533 |     pool: MAX
 534 |     kernel_size: 3
 535 |     stride: 1
 536 |     pad: 1
 537 |   }
 538 | }
 539 | layer {
 540 |   name: "inception_3b/pool_proj"
 541 |   type: "Convolution"
 542 |   bottom: "inception_3b/pool"
 543 |   top: "inception_3b/pool_proj"
 544 |   param {
 545 |     lr_mult: 1
 546 |     decay_mult: 1
 547 |   }
 548 |   param {
 549 |     lr_mult: 2
 550 |     decay_mult: 0
 551 |   }
 552 |   convolution_param {
 553 |     num_output: 64
 554 |     kernel_size: 1
 555 |     weight_filler {
 556 |       type: "xavier"
 557 |       std: 0.1
 558 |     }
 559 |     bias_filler {
 560 |       type: "constant"
 561 |       value: 0.2
 562 |     }
 563 |   }
 564 | }
 565 | layer {
 566 |   name: "inception_3b/relu_pool_proj"
 567 |   type: "ReLU"
 568 |   bottom: "inception_3b/pool_proj"
 569 |   top: "inception_3b/pool_proj"
 570 | }
 571 | layer {
 572 |   name: "inception_3b/output"
 573 |   type: "Concat"
 574 |   bottom: "inception_3b/1x1"
 575 |   bottom: "inception_3b/3x3"
 576 |   bottom: "inception_3b/5x5"
 577 |   bottom: "inception_3b/pool_proj"
 578 |   top: "inception_3b/output"
 579 | }
 580 | layer {
 581 |   name: "pool3/3x3_s2"
 582 |   type: "Pooling"
 583 |   bottom: "inception_3b/output"
 584 |   top: "pool3/3x3_s2"
 585 |   pooling_param {
 586 |     pool: MAX
 587 |     kernel_size: 3
 588 |     stride: 2
 589 |   }
 590 | }
 591 | layer {
 592 |   name: "inception_4a/1x1"
 593 |   type: "Convolution"
 594 |   bottom: "pool3/3x3_s2"
 595 |   top: "inception_4a/1x1"
 596 |   param {
 597 |     lr_mult: 1
 598 |     decay_mult: 1
 599 |   }
 600 |   param {
 601 |     lr_mult: 2
 602 |     decay_mult: 0
 603 |   }
 604 |   convolution_param {
 605 |     num_output: 192
 606 |     kernel_size: 1
 607 |     weight_filler {
 608 |       type: "xavier"
 609 |       std: 0.03
 610 |     }
 611 |     bias_filler {
 612 |       type: "constant"
 613 |       value: 0.2
 614 |     }
 615 |   }
 616 | }
 617 | layer {
 618 |   name: "inception_4a/relu_1x1"
 619 |   type: "ReLU"
 620 |   bottom: "inception_4a/1x1"
 621 |   top: "inception_4a/1x1"
 622 | }
 623 | layer {
 624 |   name: "inception_4a/3x3_reduce"
 625 |   type: "Convolution"
 626 |   bottom: "pool3/3x3_s2"
 627 |   top: "inception_4a/3x3_reduce"
 628 |   param {
 629 |     lr_mult: 1
 630 |     decay_mult: 1
 631 |   }
 632 |   param {
 633 |     lr_mult: 2
 634 |     decay_mult: 0
 635 |   }
 636 |   convolution_param {
 637 |     num_output: 96
 638 |     kernel_size: 1
 639 |     weight_filler {
 640 |       type: "xavier"
 641 |       std: 0.09
 642 |     }
 643 |     bias_filler {
 644 |       type: "constant"
 645 |       value: 0.2
 646 |     }
 647 |   }
 648 | }
 649 | layer {
 650 |   name: "inception_4a/relu_3x3_reduce"
 651 |   type: "ReLU"
 652 |   bottom: "inception_4a/3x3_reduce"
 653 |   top: "inception_4a/3x3_reduce"
 654 | }
 655 | layer {
 656 |   name: "inception_4a/3x3"
 657 |   type: "Convolution"
 658 |   bottom: "inception_4a/3x3_reduce"
 659 |   top: "inception_4a/3x3"
 660 |   param {
 661 |     lr_mult: 1
 662 |     decay_mult: 1
 663 |   }
 664 |   param {
 665 |     lr_mult: 2
 666 |     decay_mult: 0
 667 |   }
 668 |   convolution_param {
 669 |     num_output: 208
 670 |     pad: 1
 671 |     kernel_size: 3
 672 |     weight_filler {
 673 |       type: "xavier"
 674 |       std: 0.03
 675 |     }
 676 |     bias_filler {
 677 |       type: "constant"
 678 |       value: 0.2
 679 |     }
 680 |   }
 681 | }
 682 | layer {
 683 |   name: "inception_4a/relu_3x3"
 684 |   type: "ReLU"
 685 |   bottom: "inception_4a/3x3"
 686 |   top: "inception_4a/3x3"
 687 | }
 688 | layer {
 689 |   name: "inception_4a/5x5_reduce"
 690 |   type: "Convolution"
 691 |   bottom: "pool3/3x3_s2"
 692 |   top: "inception_4a/5x5_reduce"
 693 |   param {
 694 |     lr_mult: 1
 695 |     decay_mult: 1
 696 |   }
 697 |   param {
 698 |     lr_mult: 2
 699 |     decay_mult: 0
 700 |   }
 701 |   convolution_param {
 702 |     num_output: 16
 703 |     kernel_size: 1
 704 |     weight_filler {
 705 |       type: "xavier"
 706 |       std: 0.2
 707 |     }
 708 |     bias_filler {
 709 |       type: "constant"
 710 |       value: 0.2
 711 |     }
 712 |   }
 713 | }
 714 | layer {
 715 |   name: "inception_4a/relu_5x5_reduce"
 716 |   type: "ReLU"
 717 |   bottom: "inception_4a/5x5_reduce"
 718 |   top: "inception_4a/5x5_reduce"
 719 | }
 720 | layer {
 721 |   name: "inception_4a/5x5"
 722 |   type: "Convolution"
 723 |   bottom: "inception_4a/5x5_reduce"
 724 |   top: "inception_4a/5x5"
 725 |   param {
 726 |     lr_mult: 1
 727 |     decay_mult: 1
 728 |   }
 729 |   param {
 730 |     lr_mult: 2
 731 |     decay_mult: 0
 732 |   }
 733 |   convolution_param {
 734 |     num_output: 48
 735 |     pad: 2
 736 |     kernel_size: 5
 737 |     weight_filler {
 738 |       type: "xavier"
 739 |       std: 0.03
 740 |     }
 741 |     bias_filler {
 742 |       type: "constant"
 743 |       value: 0.2
 744 |     }
 745 |   }
 746 | }
 747 | layer {
 748 |   name: "inception_4a/relu_5x5"
 749 |   type: "ReLU"
 750 |   bottom: "inception_4a/5x5"
 751 |   top: "inception_4a/5x5"
 752 | }
 753 | layer {
 754 |   name: "inception_4a/pool"
 755 |   type: "Pooling"
 756 |   bottom: "pool3/3x3_s2"
 757 |   top: "inception_4a/pool"
 758 |   pooling_param {
 759 |     pool: MAX
 760 |     kernel_size: 3
 761 |     stride: 1
 762 |     pad: 1
 763 |   }
 764 | }
 765 | layer {
 766 |   name: "inception_4a/pool_proj"
 767 |   type: "Convolution"
 768 |   bottom: "inception_4a/pool"
 769 |   top: "inception_4a/pool_proj"
 770 |   param {
 771 |     lr_mult: 1
 772 |     decay_mult: 1
 773 |   }
 774 |   param {
 775 |     lr_mult: 2
 776 |     decay_mult: 0
 777 |   }
 778 |   convolution_param {
 779 |     num_output: 64
 780 |     kernel_size: 1
 781 |     weight_filler {
 782 |       type: "xavier"
 783 |       std: 0.1
 784 |     }
 785 |     bias_filler {
 786 |       type: "constant"
 787 |       value: 0.2
 788 |     }
 789 |   }
 790 | }
 791 | layer {
 792 |   name: "inception_4a/relu_pool_proj"
 793 |   type: "ReLU"
 794 |   bottom: "inception_4a/pool_proj"
 795 |   top: "inception_4a/pool_proj"
 796 | }
 797 | layer {
 798 |   name: "inception_4a/output"
 799 |   type: "Concat"
 800 |   bottom: "inception_4a/1x1"
 801 |   bottom: "inception_4a/3x3"
 802 |   bottom: "inception_4a/5x5"
 803 |   bottom: "inception_4a/pool_proj"
 804 |   top: "inception_4a/output"
 805 | }
 806 | layer {
 807 |   name: "inception_4b/1x1"
 808 |   type: "Convolution"
 809 |   bottom: "inception_4a/output"
 810 |   top: "inception_4b/1x1"
 811 |   param {
 812 |     lr_mult: 1
 813 |     decay_mult: 1
 814 |   }
 815 |   param {
 816 |     lr_mult: 2
 817 |     decay_mult: 0
 818 |   }
 819 |   convolution_param {
 820 |     num_output: 160
 821 |     kernel_size: 1
 822 |     weight_filler {
 823 |       type: "xavier"
 824 |       std: 0.03
 825 |     }
 826 |     bias_filler {
 827 |       type: "constant"
 828 |       value: 0.2
 829 |     }
 830 |   }
 831 | }
 832 | layer {
 833 |   name: "inception_4b/relu_1x1"
 834 |   type: "ReLU"
 835 |   bottom: "inception_4b/1x1"
 836 |   top: "inception_4b/1x1"
 837 | }
 838 | layer {
 839 |   name: "inception_4b/3x3_reduce"
 840 |   type: "Convolution"
 841 |   bottom: "inception_4a/output"
 842 |   top: "inception_4b/3x3_reduce"
 843 |   param {
 844 |     lr_mult: 1
 845 |     decay_mult: 1
 846 |   }
 847 |   param {
 848 |     lr_mult: 2
 849 |     decay_mult: 0
 850 |   }
 851 |   convolution_param {
 852 |     num_output: 112
 853 |     kernel_size: 1
 854 |     weight_filler {
 855 |       type: "xavier"
 856 |       std: 0.09
 857 |     }
 858 |     bias_filler {
 859 |       type: "constant"
 860 |       value: 0.2
 861 |     }
 862 |   }
 863 | }
 864 | layer {
 865 |   name: "inception_4b/relu_3x3_reduce"
 866 |   type: "ReLU"
 867 |   bottom: "inception_4b/3x3_reduce"
 868 |   top: "inception_4b/3x3_reduce"
 869 | }
 870 | layer {
 871 |   name: "inception_4b/3x3"
 872 |   type: "Convolution"
 873 |   bottom: "inception_4b/3x3_reduce"
 874 |   top: "inception_4b/3x3"
 875 |   param {
 876 |     lr_mult: 1
 877 |     decay_mult: 1
 878 |   }
 879 |   param {
 880 |     lr_mult: 2
 881 |     decay_mult: 0
 882 |   }
 883 |   convolution_param {
 884 |     num_output: 224
 885 |     pad: 1
 886 |     kernel_size: 3
 887 |     weight_filler {
 888 |       type: "xavier"
 889 |       std: 0.03
 890 |     }
 891 |     bias_filler {
 892 |       type: "constant"
 893 |       value: 0.2
 894 |     }
 895 |   }
 896 | }
 897 | layer {
 898 |   name: "inception_4b/relu_3x3"
 899 |   type: "ReLU"
 900 |   bottom: "inception_4b/3x3"
 901 |   top: "inception_4b/3x3"
 902 | }
 903 | layer {
 904 |   name: "inception_4b/5x5_reduce"
 905 |   type: "Convolution"
 906 |   bottom: "inception_4a/output"
 907 |   top: "inception_4b/5x5_reduce"
 908 |   param {
 909 |     lr_mult: 1
 910 |     decay_mult: 1
 911 |   }
 912 |   param {
 913 |     lr_mult: 2
 914 |     decay_mult: 0
 915 |   }
 916 |   convolution_param {
 917 |     num_output: 24
 918 |     kernel_size: 1
 919 |     weight_filler {
 920 |       type: "xavier"
 921 |       std: 0.2
 922 |     }
 923 |     bias_filler {
 924 |       type: "constant"
 925 |       value: 0.2
 926 |     }
 927 |   }
 928 | }
 929 | layer {
 930 |   name: "inception_4b/relu_5x5_reduce"
 931 |   type: "ReLU"
 932 |   bottom: "inception_4b/5x5_reduce"
 933 |   top: "inception_4b/5x5_reduce"
 934 | }
 935 | layer {
 936 |   name: "inception_4b/5x5"
 937 |   type: "Convolution"
 938 |   bottom: "inception_4b/5x5_reduce"
 939 |   top: "inception_4b/5x5"
 940 |   param {
 941 |     lr_mult: 1
 942 |     decay_mult: 1
 943 |   }
 944 |   param {
 945 |     lr_mult: 2
 946 |     decay_mult: 0
 947 |   }
 948 |   convolution_param {
 949 |     num_output: 64
 950 |     pad: 2
 951 |     kernel_size: 5
 952 |     weight_filler {
 953 |       type: "xavier"
 954 |       std: 0.03
 955 |     }
 956 |     bias_filler {
 957 |       type: "constant"
 958 |       value: 0.2
 959 |     }
 960 |   }
 961 | }
 962 | layer {
 963 |   name: "inception_4b/relu_5x5"
 964 |   type: "ReLU"
 965 |   bottom: "inception_4b/5x5"
 966 |   top: "inception_4b/5x5"
 967 | }
 968 | layer {
 969 |   name: "inception_4b/pool"
 970 |   type: "Pooling"
 971 |   bottom: "inception_4a/output"
 972 |   top: "inception_4b/pool"
 973 |   pooling_param {
 974 |     pool: MAX
 975 |     kernel_size: 3
 976 |     stride: 1
 977 |     pad: 1
 978 |   }
 979 | }
 980 | layer {
 981 |   name: "inception_4b/pool_proj"
 982 |   type: "Convolution"
 983 |   bottom: "inception_4b/pool"
 984 |   top: "inception_4b/pool_proj"
 985 |   param {
 986 |     lr_mult: 1
 987 |     decay_mult: 1
 988 |   }
 989 |   param {
 990 |     lr_mult: 2
 991 |     decay_mult: 0
 992 |   }
 993 |   convolution_param {
 994 |     num_output: 64
 995 |     kernel_size: 1
 996 |     weight_filler {
 997 |       type: "xavier"
 998 |       std: 0.1
 999 |     }
1000 |     bias_filler {
1001 |       type: "constant"
1002 |       value: 0.2
1003 |     }
1004 |   }
1005 | }
1006 | layer {
1007 |   name: "inception_4b/relu_pool_proj"
1008 |   type: "ReLU"
1009 |   bottom: "inception_4b/pool_proj"
1010 |   top: "inception_4b/pool_proj"
1011 | }
1012 | layer {
1013 |   name: "inception_4b/output"
1014 |   type: "Concat"
1015 |   bottom: "inception_4b/1x1"
1016 |   bottom: "inception_4b/3x3"
1017 |   bottom: "inception_4b/5x5"
1018 |   bottom: "inception_4b/pool_proj"
1019 |   top: "inception_4b/output"
1020 | }
1021 | layer {
1022 |   name: "inception_4c/1x1"
1023 |   type: "Convolution"
1024 |   bottom: "inception_4b/output"
1025 |   top: "inception_4c/1x1"
1026 |   param {
1027 |     lr_mult: 1
1028 |     decay_mult: 1
1029 |   }
1030 |   param {
1031 |     lr_mult: 2
1032 |     decay_mult: 0
1033 |   }
1034 |   convolution_param {
1035 |     num_output: 128
1036 |     kernel_size: 1
1037 |     weight_filler {
1038 |       type: "xavier"
1039 |       std: 0.03
1040 |     }
1041 |     bias_filler {
1042 |       type: "constant"
1043 |       value: 0.2
1044 |     }
1045 |   }
1046 | }
1047 | layer {
1048 |   name: "inception_4c/relu_1x1"
1049 |   type: "ReLU"
1050 |   bottom: "inception_4c/1x1"
1051 |   top: "inception_4c/1x1"
1052 | }
1053 | layer {
1054 |   name: "inception_4c/3x3_reduce"
1055 |   type: "Convolution"
1056 |   bottom: "inception_4b/output"
1057 |   top: "inception_4c/3x3_reduce"
1058 |   param {
1059 |     lr_mult: 1
1060 |     decay_mult: 1
1061 |   }
1062 |   param {
1063 |     lr_mult: 2
1064 |     decay_mult: 0
1065 |   }
1066 |   convolution_param {
1067 |     num_output: 128
1068 |     kernel_size: 1
1069 |     weight_filler {
1070 |       type: "xavier"
1071 |       std: 0.09
1072 |     }
1073 |     bias_filler {
1074 |       type: "constant"
1075 |       value: 0.2
1076 |     }
1077 |   }
1078 | }
1079 | layer {
1080 |   name: "inception_4c/relu_3x3_reduce"
1081 |   type: "ReLU"
1082 |   bottom: "inception_4c/3x3_reduce"
1083 |   top: "inception_4c/3x3_reduce"
1084 | }
1085 | layer {
1086 |   name: "inception_4c/3x3"
1087 |   type: "Convolution"
1088 |   bottom: "inception_4c/3x3_reduce"
1089 |   top: "inception_4c/3x3"
1090 |   param {
1091 |     lr_mult: 1
1092 |     decay_mult: 1
1093 |   }
1094 |   param {
1095 |     lr_mult: 2
1096 |     decay_mult: 0
1097 |   }
1098 |   convolution_param {
1099 |     num_output: 256
1100 |     pad: 1
1101 |     kernel_size: 3
1102 |     weight_filler {
1103 |       type: "xavier"
1104 |       std: 0.03
1105 |     }
1106 |     bias_filler {
1107 |       type: "constant"
1108 |       value: 0.2
1109 |     }
1110 |   }
1111 | }
1112 | layer {
1113 |   name: "inception_4c/relu_3x3"
1114 |   type: "ReLU"
1115 |   bottom: "inception_4c/3x3"
1116 |   top: "inception_4c/3x3"
1117 | }
1118 | layer {
1119 |   name: "inception_4c/5x5_reduce"
1120 |   type: "Convolution"
1121 |   bottom: "inception_4b/output"
1122 |   top: "inception_4c/5x5_reduce"
1123 |   param {
1124 |     lr_mult: 1
1125 |     decay_mult: 1
1126 |   }
1127 |   param {
1128 |     lr_mult: 2
1129 |     decay_mult: 0
1130 |   }
1131 |   convolution_param {
1132 |     num_output: 24
1133 |     kernel_size: 1
1134 |     weight_filler {
1135 |       type: "xavier"
1136 |       std: 0.2
1137 |     }
1138 |     bias_filler {
1139 |       type: "constant"
1140 |       value: 0.2
1141 |     }
1142 |   }
1143 | }
1144 | layer {
1145 |   name: "inception_4c/relu_5x5_reduce"
1146 |   type: "ReLU"
1147 |   bottom: "inception_4c/5x5_reduce"
1148 |   top: "inception_4c/5x5_reduce"
1149 | }
1150 | layer {
1151 |   name: "inception_4c/5x5"
1152 |   type: "Convolution"
1153 |   bottom: "inception_4c/5x5_reduce"
1154 |   top: "inception_4c/5x5"
1155 |   param {
1156 |     lr_mult: 1
1157 |     decay_mult: 1
1158 |   }
1159 |   param {
1160 |     lr_mult: 2
1161 |     decay_mult: 0
1162 |   }
1163 |   convolution_param {
1164 |     num_output: 64
1165 |     pad: 2
1166 |     kernel_size: 5
1167 |     weight_filler {
1168 |       type: "xavier"
1169 |       std: 0.03
1170 |     }
1171 |     bias_filler {
1172 |       type: "constant"
1173 |       value: 0.2
1174 |     }
1175 |   }
1176 | }
1177 | layer {
1178 |   name: "inception_4c/relu_5x5"
1179 |   type: "ReLU"
1180 |   bottom: "inception_4c/5x5"
1181 |   top: "inception_4c/5x5"
1182 | }
1183 | layer {
1184 |   name: "inception_4c/pool"
1185 |   type: "Pooling"
1186 |   bottom: "inception_4b/output"
1187 |   top: "inception_4c/pool"
1188 |   pooling_param {
1189 |     pool: MAX
1190 |     kernel_size: 3
1191 |     stride: 1
1192 |     pad: 1
1193 |   }
1194 | }
1195 | layer {
1196 |   name: "inception_4c/pool_proj"
1197 |   type: "Convolution"
1198 |   bottom: "inception_4c/pool"
1199 |   top: "inception_4c/pool_proj"
1200 |   param {
1201 |     lr_mult: 1
1202 |     decay_mult: 1
1203 |   }
1204 |   param {
1205 |     lr_mult: 2
1206 |     decay_mult: 0
1207 |   }
1208 |   convolution_param {
1209 |     num_output: 64
1210 |     kernel_size: 1
1211 |     weight_filler {
1212 |       type: "xavier"
1213 |       std: 0.1
1214 |     }
1215 |     bias_filler {
1216 |       type: "constant"
1217 |       value: 0.2
1218 |     }
1219 |   }
1220 | }
1221 | layer {
1222 |   name: "inception_4c/relu_pool_proj"
1223 |   type: "ReLU"
1224 |   bottom: "inception_4c/pool_proj"
1225 |   top: "inception_4c/pool_proj"
1226 | }
1227 | layer {
1228 |   name: "inception_4c/output"
1229 |   type: "Concat"
1230 |   bottom: "inception_4c/1x1"
1231 |   bottom: "inception_4c/3x3"
1232 |   bottom: "inception_4c/5x5"
1233 |   bottom: "inception_4c/pool_proj"
1234 |   top: "inception_4c/output"
1235 | }
1236 | layer {
1237 |   name: "inception_4d/1x1"
1238 |   type: "Convolution"
1239 |   bottom: "inception_4c/output"
1240 |   top: "inception_4d/1x1"
1241 |   param {
1242 |     lr_mult: 1
1243 |     decay_mult: 1
1244 |   }
1245 |   param {
1246 |     lr_mult: 2
1247 |     decay_mult: 0
1248 |   }
1249 |   convolution_param {
1250 |     num_output: 112
1251 |     kernel_size: 1
1252 |     weight_filler {
1253 |       type: "xavier"
1254 |       std: 0.03
1255 |     }
1256 |     bias_filler {
1257 |       type: "constant"
1258 |       value: 0.2
1259 |     }
1260 |   }
1261 | }
1262 | layer {
1263 |   name: "inception_4d/relu_1x1"
1264 |   type: "ReLU"
1265 |   bottom: "inception_4d/1x1"
1266 |   top: "inception_4d/1x1"
1267 | }
1268 | layer {
1269 |   name: "inception_4d/3x3_reduce"
1270 |   type: "Convolution"
1271 |   bottom: "inception_4c/output"
1272 |   top: "inception_4d/3x3_reduce"
1273 |   param {
1274 |     lr_mult: 1
1275 |     decay_mult: 1
1276 |   }
1277 |   param {
1278 |     lr_mult: 2
1279 |     decay_mult: 0
1280 |   }
1281 |   convolution_param {
1282 |     num_output: 144
1283 |     kernel_size: 1
1284 |     weight_filler {
1285 |       type: "xavier"
1286 |       std: 0.09
1287 |     }
1288 |     bias_filler {
1289 |       type: "constant"
1290 |       value: 0.2
1291 |     }
1292 |   }
1293 | }
1294 | layer {
1295 |   name: "inception_4d/relu_3x3_reduce"
1296 |   type: "ReLU"
1297 |   bottom: "inception_4d/3x3_reduce"
1298 |   top: "inception_4d/3x3_reduce"
1299 | }
1300 | layer {
1301 |   name: "inception_4d/3x3"
1302 |   type: "Convolution"
1303 |   bottom: "inception_4d/3x3_reduce"
1304 |   top: "inception_4d/3x3"
1305 |   param {
1306 |     lr_mult: 1
1307 |     decay_mult: 1
1308 |   }
1309 |   param {
1310 |     lr_mult: 2
1311 |     decay_mult: 0
1312 |   }
1313 |   convolution_param {
1314 |     num_output: 288
1315 |     pad: 1
1316 |     kernel_size: 3
1317 |     weight_filler {
1318 |       type: "xavier"
1319 |       std: 0.03
1320 |     }
1321 |     bias_filler {
1322 |       type: "constant"
1323 |       value: 0.2
1324 |     }
1325 |   }
1326 | }
1327 | layer {
1328 |   name: "inception_4d/relu_3x3"
1329 |   type: "ReLU"
1330 |   bottom: "inception_4d/3x3"
1331 |   top: "inception_4d/3x3"
1332 | }
1333 | layer {
1334 |   name: "inception_4d/5x5_reduce"
1335 |   type: "Convolution"
1336 |   bottom: "inception_4c/output"
1337 |   top: "inception_4d/5x5_reduce"
1338 |   param {
1339 |     lr_mult: 1
1340 |     decay_mult: 1
1341 |   }
1342 |   param {
1343 |     lr_mult: 2
1344 |     decay_mult: 0
1345 |   }
1346 |   convolution_param {
1347 |     num_output: 32
1348 |     kernel_size: 1
1349 |     weight_filler {
1350 |       type: "xavier"
1351 |       std: 0.2
1352 |     }
1353 |     bias_filler {
1354 |       type: "constant"
1355 |       value: 0.2
1356 |     }
1357 |   }
1358 | }
1359 | layer {
1360 |   name: "inception_4d/relu_5x5_reduce"
1361 |   type: "ReLU"
1362 |   bottom: "inception_4d/5x5_reduce"
1363 |   top: "inception_4d/5x5_reduce"
1364 | }
1365 | layer {
1366 |   name: "inception_4d/5x5"
1367 |   type: "Convolution"
1368 |   bottom: "inception_4d/5x5_reduce"
1369 |   top: "inception_4d/5x5"
1370 |   param {
1371 |     lr_mult: 1
1372 |     decay_mult: 1
1373 |   }
1374 |   param {
1375 |     lr_mult: 2
1376 |     decay_mult: 0
1377 |   }
1378 |   convolution_param {
1379 |     num_output: 64
1380 |     pad: 2
1381 |     kernel_size: 5
1382 |     weight_filler {
1383 |       type: "xavier"
1384 |       std: 0.03
1385 |     }
1386 |     bias_filler {
1387 |       type: "constant"
1388 |       value: 0.2
1389 |     }
1390 |   }
1391 | }
1392 | layer {
1393 |   name: "inception_4d/relu_5x5"
1394 |   type: "ReLU"
1395 |   bottom: "inception_4d/5x5"
1396 |   top: "inception_4d/5x5"
1397 | }
1398 | layer {
1399 |   name: "inception_4d/pool"
1400 |   type: "Pooling"
1401 |   bottom: "inception_4c/output"
1402 |   top: "inception_4d/pool"
1403 |   pooling_param {
1404 |     pool: MAX
1405 |     kernel_size: 3
1406 |     stride: 1
1407 |     pad: 1
1408 |   }
1409 | }
1410 | layer {
1411 |   name: "inception_4d/pool_proj"
1412 |   type: "Convolution"
1413 |   bottom: "inception_4d/pool"
1414 |   top: "inception_4d/pool_proj"
1415 |   param {
1416 |     lr_mult: 1
1417 |     decay_mult: 1
1418 |   }
1419 |   param {
1420 |     lr_mult: 2
1421 |     decay_mult: 0
1422 |   }
1423 |   convolution_param {
1424 |     num_output: 64
1425 |     kernel_size: 1
1426 |     weight_filler {
1427 |       type: "xavier"
1428 |       std: 0.1
1429 |     }
1430 |     bias_filler {
1431 |       type: "constant"
1432 |       value: 0.2
1433 |     }
1434 |   }
1435 | }
1436 | layer {
1437 |   name: "inception_4d/relu_pool_proj"
1438 |   type: "ReLU"
1439 |   bottom: "inception_4d/pool_proj"
1440 |   top: "inception_4d/pool_proj"
1441 | }
1442 | layer {
1443 |   name: "inception_4d/output"
1444 |   type: "Concat"
1445 |   bottom: "inception_4d/1x1"
1446 |   bottom: "inception_4d/3x3"
1447 |   bottom: "inception_4d/5x5"
1448 |   bottom: "inception_4d/pool_proj"
1449 |   top: "inception_4d/output"
1450 | }
1451 | layer {
1452 |   name: "inception_4e/1x1"
1453 |   type: "Convolution"
1454 |   bottom: "inception_4d/output"
1455 |   top: "inception_4e/1x1"
1456 |   param {
1457 |     lr_mult: 1
1458 |     decay_mult: 1
1459 |   }
1460 |   param {
1461 |     lr_mult: 2
1462 |     decay_mult: 0
1463 |   }
1464 |   convolution_param {
1465 |     num_output: 256
1466 |     kernel_size: 1
1467 |     weight_filler {
1468 |       type: "xavier"
1469 |       std: 0.03
1470 |     }
1471 |     bias_filler {
1472 |       type: "constant"
1473 |       value: 0.2
1474 |     }
1475 |   }
1476 | }
1477 | layer {
1478 |   name: "inception_4e/relu_1x1"
1479 |   type: "ReLU"
1480 |   bottom: "inception_4e/1x1"
1481 |   top: "inception_4e/1x1"
1482 | }
1483 | layer {
1484 |   name: "inception_4e/3x3_reduce"
1485 |   type: "Convolution"
1486 |   bottom: "inception_4d/output"
1487 |   top: "inception_4e/3x3_reduce"
1488 |   param {
1489 |     lr_mult: 1
1490 |     decay_mult: 1
1491 |   }
1492 |   param {
1493 |     lr_mult: 2
1494 |     decay_mult: 0
1495 |   }
1496 |   convolution_param {
1497 |     num_output: 160
1498 |     kernel_size: 1
1499 |     weight_filler {
1500 |       type: "xavier"
1501 |       std: 0.09
1502 |     }
1503 |     bias_filler {
1504 |       type: "constant"
1505 |       value: 0.2
1506 |     }
1507 |   }
1508 | }
1509 | layer {
1510 |   name: "inception_4e/relu_3x3_reduce"
1511 |   type: "ReLU"
1512 |   bottom: "inception_4e/3x3_reduce"
1513 |   top: "inception_4e/3x3_reduce"
1514 | }
1515 | layer {
1516 |   name: "inception_4e/3x3"
1517 |   type: "Convolution"
1518 |   bottom: "inception_4e/3x3_reduce"
1519 |   top: "inception_4e/3x3"
1520 |   param {
1521 |     lr_mult: 1
1522 |     decay_mult: 1
1523 |   }
1524 |   param {
1525 |     lr_mult: 2
1526 |     decay_mult: 0
1527 |   }
1528 |   convolution_param {
1529 |     num_output: 320
1530 |     pad: 1
1531 |     kernel_size: 3
1532 |     weight_filler {
1533 |       type: "xavier"
1534 |       std: 0.03
1535 |     }
1536 |     bias_filler {
1537 |       type: "constant"
1538 |       value: 0.2
1539 |     }
1540 |   }
1541 | }
1542 | layer {
1543 |   name: "inception_4e/relu_3x3"
1544 |   type: "ReLU"
1545 |   bottom: "inception_4e/3x3"
1546 |   top: "inception_4e/3x3"
1547 | }
1548 | layer {
1549 |   name: "inception_4e/5x5_reduce"
1550 |   type: "Convolution"
1551 |   bottom: "inception_4d/output"
1552 |   top: "inception_4e/5x5_reduce"
1553 |   param {
1554 |     lr_mult: 1
1555 |     decay_mult: 1
1556 |   }
1557 |   param {
1558 |     lr_mult: 2
1559 |     decay_mult: 0
1560 |   }
1561 |   convolution_param {
1562 |     num_output: 32
1563 |     kernel_size: 1
1564 |     weight_filler {
1565 |       type: "xavier"
1566 |       std: 0.2
1567 |     }
1568 |     bias_filler {
1569 |       type: "constant"
1570 |       value: 0.2
1571 |     }
1572 |   }
1573 | }
1574 | layer {
1575 |   name: "inception_4e/relu_5x5_reduce"
1576 |   type: "ReLU"
1577 |   bottom: "inception_4e/5x5_reduce"
1578 |   top: "inception_4e/5x5_reduce"
1579 | }
1580 | layer {
1581 |   name: "inception_4e/5x5"
1582 |   type: "Convolution"
1583 |   bottom: "inception_4e/5x5_reduce"
1584 |   top: "inception_4e/5x5"
1585 |   param {
1586 |     lr_mult: 1
1587 |     decay_mult: 1
1588 |   }
1589 |   param {
1590 |     lr_mult: 2
1591 |     decay_mult: 0
1592 |   }
1593 |   convolution_param {
1594 |     num_output: 128
1595 |     pad: 2
1596 |     kernel_size: 5
1597 |     weight_filler {
1598 |       type: "xavier"
1599 |       std: 0.03
1600 |     }
1601 |     bias_filler {
1602 |       type: "constant"
1603 |       value: 0.2
1604 |     }
1605 |   }
1606 | }
1607 | layer {
1608 |   name: "inception_4e/relu_5x5"
1609 |   type: "ReLU"
1610 |   bottom: "inception_4e/5x5"
1611 |   top: "inception_4e/5x5"
1612 | }
1613 | layer {
1614 |   name: "inception_4e/pool"
1615 |   type: "Pooling"
1616 |   bottom: "inception_4d/output"
1617 |   top: "inception_4e/pool"
1618 |   pooling_param {
1619 |     pool: MAX
1620 |     kernel_size: 3
1621 |     stride: 1
1622 |     pad: 1
1623 |   }
1624 | }
1625 | layer {
1626 |   name: "inception_4e/pool_proj"
1627 |   type: "Convolution"
1628 |   bottom: "inception_4e/pool"
1629 |   top: "inception_4e/pool_proj"
1630 |   param {
1631 |     lr_mult: 1
1632 |     decay_mult: 1
1633 |   }
1634 |   param {
1635 |     lr_mult: 2
1636 |     decay_mult: 0
1637 |   }
1638 |   convolution_param {
1639 |     num_output: 128
1640 |     kernel_size: 1
1641 |     weight_filler {
1642 |       type: "xavier"
1643 |       std: 0.1
1644 |     }
1645 |     bias_filler {
1646 |       type: "constant"
1647 |       value: 0.2
1648 |     }
1649 |   }
1650 | }
1651 | layer {
1652 |   name: "inception_4e/relu_pool_proj"
1653 |   type: "ReLU"
1654 |   bottom: "inception_4e/pool_proj"
1655 |   top: "inception_4e/pool_proj"
1656 | }
1657 | layer {
1658 |   name: "inception_4e/output"
1659 |   type: "Concat"
1660 |   bottom: "inception_4e/1x1"
1661 |   bottom: "inception_4e/3x3"
1662 |   bottom: "inception_4e/5x5"
1663 |   bottom: "inception_4e/pool_proj"
1664 |   top: "inception_4e/output"
1665 | }
1666 | layer {
1667 |   name: "pool4/3x3_s2"
1668 |   type: "Pooling"
1669 |   bottom: "inception_4e/output"
1670 |   top: "pool4/3x3_s2"
1671 |   pooling_param {
1672 |     pool: MAX
1673 |     kernel_size: 3
1674 |     stride: 2
1675 |   }
1676 | }
1677 | layer {
1678 |   name: "inception_5a/1x1"
1679 |   type: "Convolution"
1680 |   bottom: "pool4/3x3_s2"
1681 |   top: "inception_5a/1x1"
1682 |   param {
1683 |     lr_mult: 1
1684 |     decay_mult: 1
1685 |   }
1686 |   param {
1687 |     lr_mult: 2
1688 |     decay_mult: 0
1689 |   }
1690 |   convolution_param {
1691 |     num_output: 256
1692 |     kernel_size: 1
1693 |     weight_filler {
1694 |       type: "xavier"
1695 |       std: 0.03
1696 |     }
1697 |     bias_filler {
1698 |       type: "constant"
1699 |       value: 0.2
1700 |     }
1701 |   }
1702 | }
1703 | layer {
1704 |   name: "inception_5a/relu_1x1"
1705 |   type: "ReLU"
1706 |   bottom: "inception_5a/1x1"
1707 |   top: "inception_5a/1x1"
1708 | }
1709 | layer {
1710 |   name: "inception_5a/3x3_reduce"
1711 |   type: "Convolution"
1712 |   bottom: "pool4/3x3_s2"
1713 |   top: "inception_5a/3x3_reduce"
1714 |   param {
1715 |     lr_mult: 1
1716 |     decay_mult: 1
1717 |   }
1718 |   param {
1719 |     lr_mult: 2
1720 |     decay_mult: 0
1721 |   }
1722 |   convolution_param {
1723 |     num_output: 160
1724 |     kernel_size: 1
1725 |     weight_filler {
1726 |       type: "xavier"
1727 |       std: 0.09
1728 |     }
1729 |     bias_filler {
1730 |       type: "constant"
1731 |       value: 0.2
1732 |     }
1733 |   }
1734 | }
1735 | layer {
1736 |   name: "inception_5a/relu_3x3_reduce"
1737 |   type: "ReLU"
1738 |   bottom: "inception_5a/3x3_reduce"
1739 |   top: "inception_5a/3x3_reduce"
1740 | }
1741 | layer {
1742 |   name: "inception_5a/3x3"
1743 |   type: "Convolution"
1744 |   bottom: "inception_5a/3x3_reduce"
1745 |   top: "inception_5a/3x3"
1746 |   param {
1747 |     lr_mult: 1
1748 |     decay_mult: 1
1749 |   }
1750 |   param {
1751 |     lr_mult: 2
1752 |     decay_mult: 0
1753 |   }
1754 |   convolution_param {
1755 |     num_output: 320
1756 |     pad: 1
1757 |     kernel_size: 3
1758 |     weight_filler {
1759 |       type: "xavier"
1760 |       std: 0.03
1761 |     }
1762 |     bias_filler {
1763 |       type: "constant"
1764 |       value: 0.2
1765 |     }
1766 |   }
1767 | }
1768 | layer {
1769 |   name: "inception_5a/relu_3x3"
1770 |   type: "ReLU"
1771 |   bottom: "inception_5a/3x3"
1772 |   top: "inception_5a/3x3"
1773 | }
1774 | layer {
1775 |   name: "inception_5a/5x5_reduce"
1776 |   type: "Convolution"
1777 |   bottom: "pool4/3x3_s2"
1778 |   top: "inception_5a/5x5_reduce"
1779 |   param {
1780 |     lr_mult: 1
1781 |     decay_mult: 1
1782 |   }
1783 |   param {
1784 |     lr_mult: 2
1785 |     decay_mult: 0
1786 |   }
1787 |   convolution_param {
1788 |     num_output: 32
1789 |     kernel_size: 1
1790 |     weight_filler {
1791 |       type: "xavier"
1792 |       std: 0.2
1793 |     }
1794 |     bias_filler {
1795 |       type: "constant"
1796 |       value: 0.2
1797 |     }
1798 |   }
1799 | }
1800 | layer {
1801 |   name: "inception_5a/relu_5x5_reduce"
1802 |   type: "ReLU"
1803 |   bottom: "inception_5a/5x5_reduce"
1804 |   top: "inception_5a/5x5_reduce"
1805 | }
1806 | layer {
1807 |   name: "inception_5a/5x5"
1808 |   type: "Convolution"
1809 |   bottom: "inception_5a/5x5_reduce"
1810 |   top: "inception_5a/5x5"
1811 |   param {
1812 |     lr_mult: 1
1813 |     decay_mult: 1
1814 |   }
1815 |   param {
1816 |     lr_mult: 2
1817 |     decay_mult: 0
1818 |   }
1819 |   convolution_param {
1820 |     num_output: 128
1821 |     pad: 2
1822 |     kernel_size: 5
1823 |     weight_filler {
1824 |       type: "xavier"
1825 |       std: 0.03
1826 |     }
1827 |     bias_filler {
1828 |       type: "constant"
1829 |       value: 0.2
1830 |     }
1831 |   }
1832 | }
1833 | layer {
1834 |   name: "inception_5a/relu_5x5"
1835 |   type: "ReLU"
1836 |   bottom: "inception_5a/5x5"
1837 |   top: "inception_5a/5x5"
1838 | }
1839 | layer {
1840 |   name: "inception_5a/pool"
1841 |   type: "Pooling"
1842 |   bottom: "pool4/3x3_s2"
1843 |   top: "inception_5a/pool"
1844 |   pooling_param {
1845 |     pool: MAX
1846 |     kernel_size: 3
1847 |     stride: 1
1848 |     pad: 1
1849 |   }
1850 | }
1851 | layer {
1852 |   name: "inception_5a/pool_proj"
1853 |   type: "Convolution"
1854 |   bottom: "inception_5a/pool"
1855 |   top: "inception_5a/pool_proj"
1856 |   param {
1857 |     lr_mult: 1
1858 |     decay_mult: 1
1859 |   }
1860 |   param {
1861 |     lr_mult: 2
1862 |     decay_mult: 0
1863 |   }
1864 |   convolution_param {
1865 |     num_output: 128
1866 |     kernel_size: 1
1867 |     weight_filler {
1868 |       type: "xavier"
1869 |       std: 0.1
1870 |     }
1871 |     bias_filler {
1872 |       type: "constant"
1873 |       value: 0.2
1874 |     }
1875 |   }
1876 | }
1877 | layer {
1878 |   name: "inception_5a/relu_pool_proj"
1879 |   type: "ReLU"
1880 |   bottom: "inception_5a/pool_proj"
1881 |   top: "inception_5a/pool_proj"
1882 | }
1883 | layer {
1884 |   name: "inception_5a/output"
1885 |   type: "Concat"
1886 |   bottom: "inception_5a/1x1"
1887 |   bottom: "inception_5a/3x3"
1888 |   bottom: "inception_5a/5x5"
1889 |   bottom: "inception_5a/pool_proj"
1890 |   top: "inception_5a/output"
1891 | }
1892 | layer {
1893 |   name: "inception_5b/1x1"
1894 |   type: "Convolution"
1895 |   bottom: "inception_5a/output"
1896 |   top: "inception_5b/1x1"
1897 |   param {
1898 |     lr_mult: 1
1899 |     decay_mult: 1
1900 |   }
1901 |   param {
1902 |     lr_mult: 2
1903 |     decay_mult: 0
1904 |   }
1905 |   convolution_param {
1906 |     num_output: 384
1907 |     kernel_size: 1
1908 |     weight_filler {
1909 |       type: "xavier"
1910 |       std: 0.03
1911 |     }
1912 |     bias_filler {
1913 |       type: "constant"
1914 |       value: 0.2
1915 |     }
1916 |   }
1917 | }
1918 | layer {
1919 |   name: "inception_5b/relu_1x1"
1920 |   type: "ReLU"
1921 |   bottom: "inception_5b/1x1"
1922 |   top: "inception_5b/1x1"
1923 | }
1924 | layer {
1925 |   name: "inception_5b/3x3_reduce"
1926 |   type: "Convolution"
1927 |   bottom: "inception_5a/output"
1928 |   top: "inception_5b/3x3_reduce"
1929 |   param {
1930 |     lr_mult: 1
1931 |     decay_mult: 1
1932 |   }
1933 |   param {
1934 |     lr_mult: 2
1935 |     decay_mult: 0
1936 |   }
1937 |   convolution_param {
1938 |     num_output: 192
1939 |     kernel_size: 1
1940 |     weight_filler {
1941 |       type: "xavier"
1942 |       std: 0.09
1943 |     }
1944 |     bias_filler {
1945 |       type: "constant"
1946 |       value: 0.2
1947 |     }
1948 |   }
1949 | }
1950 | layer {
1951 |   name: "inception_5b/relu_3x3_reduce"
1952 |   type: "ReLU"
1953 |   bottom: "inception_5b/3x3_reduce"
1954 |   top: "inception_5b/3x3_reduce"
1955 | }
1956 | layer {
1957 |   name: "inception_5b/3x3"
1958 |   type: "Convolution"
1959 |   bottom: "inception_5b/3x3_reduce"
1960 |   top: "inception_5b/3x3"
1961 |   param {
1962 |     lr_mult: 1
1963 |     decay_mult: 1
1964 |   }
1965 |   param {
1966 |     lr_mult: 2
1967 |     decay_mult: 0
1968 |   }
1969 |   convolution_param {
1970 |     num_output: 384
1971 |     pad: 1
1972 |     kernel_size: 3
1973 |     weight_filler {
1974 |       type: "xavier"
1975 |       std: 0.03
1976 |     }
1977 |     bias_filler {
1978 |       type: "constant"
1979 |       value: 0.2
1980 |     }
1981 |   }
1982 | }
1983 | layer {
1984 |   name: "inception_5b/relu_3x3"
1985 |   type: "ReLU"
1986 |   bottom: "inception_5b/3x3"
1987 |   top: "inception_5b/3x3"
1988 | }
1989 | layer {
1990 |   name: "inception_5b/5x5_reduce"
1991 |   type: "Convolution"
1992 |   bottom: "inception_5a/output"
1993 |   top: "inception_5b/5x5_reduce"
1994 |   param {
1995 |     lr_mult: 1
1996 |     decay_mult: 1
1997 |   }
1998 |   param {
1999 |     lr_mult: 2
2000 |     decay_mult: 0
2001 |   }
2002 |   convolution_param {
2003 |     num_output: 48
2004 |     kernel_size: 1
2005 |     weight_filler {
2006 |       type: "xavier"
2007 |       std: 0.2
2008 |     }
2009 |     bias_filler {
2010 |       type: "constant"
2011 |       value: 0.2
2012 |     }
2013 |   }
2014 | }
2015 | layer {
2016 |   name: "inception_5b/relu_5x5_reduce"
2017 |   type: "ReLU"
2018 |   bottom: "inception_5b/5x5_reduce"
2019 |   top: "inception_5b/5x5_reduce"
2020 | }
2021 | layer {
2022 |   name: "inception_5b/5x5"
2023 |   type: "Convolution"
2024 |   bottom: "inception_5b/5x5_reduce"
2025 |   top: "inception_5b/5x5"
2026 |   param {
2027 |     lr_mult: 1
2028 |     decay_mult: 1
2029 |   }
2030 |   param {
2031 |     lr_mult: 2
2032 |     decay_mult: 0
2033 |   }
2034 |   convolution_param {
2035 |     num_output: 128
2036 |     pad: 2
2037 |     kernel_size: 5
2038 |     weight_filler {
2039 |       type: "xavier"
2040 |       std: 0.03
2041 |     }
2042 |     bias_filler {
2043 |       type: "constant"
2044 |       value: 0.2
2045 |     }
2046 |   }
2047 | }
2048 | layer {
2049 |   name: "inception_5b/relu_5x5"
2050 |   type: "ReLU"
2051 |   bottom: "inception_5b/5x5"
2052 |   top: "inception_5b/5x5"
2053 | }
2054 | layer {
2055 |   name: "inception_5b/pool"
2056 |   type: "Pooling"
2057 |   bottom: "inception_5a/output"
2058 |   top: "inception_5b/pool"
2059 |   pooling_param {
2060 |     pool: MAX
2061 |     kernel_size: 3
2062 |     stride: 1
2063 |     pad: 1
2064 |   }
2065 | }
2066 | layer {
2067 |   name: "inception_5b/pool_proj"
2068 |   type: "Convolution"
2069 |   bottom: "inception_5b/pool"
2070 |   top: "inception_5b/pool_proj"
2071 |   param {
2072 |     lr_mult: 1
2073 |     decay_mult: 1
2074 |   }
2075 |   param {
2076 |     lr_mult: 2
2077 |     decay_mult: 0
2078 |   }
2079 |   convolution_param {
2080 |     num_output: 128
2081 |     kernel_size: 1
2082 |     weight_filler {
2083 |       type: "xavier"
2084 |       std: 0.1
2085 |     }
2086 |     bias_filler {
2087 |       type: "constant"
2088 |       value: 0.2
2089 |     }
2090 |   }
2091 | }
2092 | layer {
2093 |   name: "inception_5b/relu_pool_proj"
2094 |   type: "ReLU"
2095 |   bottom: "inception_5b/pool_proj"
2096 |   top: "inception_5b/pool_proj"
2097 | }
2098 | layer {
2099 |   name: "inception_5b/output"
2100 |   type: "Concat"
2101 |   bottom: "inception_5b/1x1"
2102 |   bottom: "inception_5b/3x3"
2103 |   bottom: "inception_5b/5x5"
2104 |   bottom: "inception_5b/pool_proj"
2105 |   top: "inception_5b/output"
2106 | }
2107 | layer {
2108 |   name: "pool5/7x7_s1"
2109 |   type: "Pooling"
2110 |   bottom: "inception_5b/output"
2111 |   top: "pool5/7x7_s1"
2112 |   pooling_param {
2113 |     pool: AVE
2114 |     kernel_size: 7
2115 |     stride: 1
2116 |   }
2117 | }
2118 | layer {
2119 |   name: "pool5/drop_7x7_s1"
2120 |   type: "Dropout"
2121 |   bottom: "pool5/7x7_s1"
2122 |   top: "pool5/7x7_s1"
2123 |   dropout_param {
2124 |     dropout_ratio: 0.4
2125 |   }
2126 | }
2127 | layer {
2128 |   name: "loss3/classifier"
2129 |   type: "InnerProduct"
2130 |   bottom: "pool5/7x7_s1"
2131 |   top: "loss3/classifier"
2132 |   param {
2133 |     lr_mult: 1
2134 |     decay_mult: 1
2135 |   }
2136 |   param {
2137 |     lr_mult: 2
2138 |     decay_mult: 0
2139 |   }
2140 |   inner_product_param {
2141 |     num_output: 1000
2142 |     weight_filler {
2143 |       type: "xavier"
2144 |     }
2145 |     bias_filler {
2146 |       type: "constant"
2147 |       value: 0
2148 |     }
2149 |   }
2150 | }
2151 | layer {
2152 |   name: "prob"
2153 |   type: "Softmax"
2154 |   bottom: "loss3/classifier"
2155 |   top: "prob"
2156 | }
2157 | 


--------------------------------------------------------------------------------
/cnn_finetuning/googlenet/deploy_template.prototxt:
--------------------------------------------------------------------------------
   1 | name: "GoogleNet"
   2 | input: "data"
   3 | input_dim: 11
   4 | input_dim: 3
   5 | input_dim: 224
   6 | input_dim: 224
   7 | layer {
   8 |   name: "conv1/7x7_s2"
   9 |   type: "Convolution"
  10 |   bottom: "data"
  11 |   top: "conv1/7x7_s2"
  12 |   param {
  13 |     lr_mult: 1
  14 |     decay_mult: 1
  15 |   }
  16 |   param {
  17 |     lr_mult: 2
  18 |     decay_mult: 0
  19 |   }
  20 |   convolution_param {
  21 |     num_output: 64
  22 |     pad: 3
  23 |     kernel_size: 7
  24 |     stride: 2
  25 |     weight_filler {
  26 |       type: "xavier"
  27 |       std: 0.1
  28 |     }
  29 |     bias_filler {
  30 |       type: "constant"
  31 |       value: 0.2
  32 |     }
  33 |   }
  34 | }
  35 | layer {
  36 |   name: "conv1/relu_7x7"
  37 |   type: "ReLU"
  38 |   bottom: "conv1/7x7_s2"
  39 |   top: "conv1/7x7_s2"
  40 | }
  41 | layer {
  42 |   name: "pool1/3x3_s2"
  43 |   type: "Pooling"
  44 |   bottom: "conv1/7x7_s2"
  45 |   top: "pool1/3x3_s2"
  46 |   pooling_param {
  47 |     pool: MAX
  48 |     kernel_size: 3
  49 |     stride: 2
  50 |   }
  51 | }
  52 | layer {
  53 |   name: "pool1/norm1"
  54 |   type: "LRN"
  55 |   bottom: "pool1/3x3_s2"
  56 |   top: "pool1/norm1"
  57 |   lrn_param {
  58 |     local_size: 5
  59 |     alpha: 0.0001
  60 |     beta: 0.75
  61 |   }
  62 | }
  63 | layer {
  64 |   name: "conv2/3x3_reduce"
  65 |   type: "Convolution"
  66 |   bottom: "pool1/norm1"
  67 |   top: "conv2/3x3_reduce"
  68 |   param {
  69 |     lr_mult: 1
  70 |     decay_mult: 1
  71 |   }
  72 |   param {
  73 |     lr_mult: 2
  74 |     decay_mult: 0
  75 |   }
  76 |   convolution_param {
  77 |     num_output: 64
  78 |     kernel_size: 1
  79 |     weight_filler {
  80 |       type: "xavier"
  81 |       std: 0.1
  82 |     }
  83 |     bias_filler {
  84 |       type: "constant"
  85 |       value: 0.2
  86 |     }
  87 |   }
  88 | }
  89 | layer {
  90 |   name: "conv2/relu_3x3_reduce"
  91 |   type: "ReLU"
  92 |   bottom: "conv2/3x3_reduce"
  93 |   top: "conv2/3x3_reduce"
  94 | }
  95 | layer {
  96 |   name: "conv2/3x3"
  97 |   type: "Convolution"
  98 |   bottom: "conv2/3x3_reduce"
  99 |   top: "conv2/3x3"
 100 |   param {
 101 |     lr_mult: 1
 102 |     decay_mult: 1
 103 |   }
 104 |   param {
 105 |     lr_mult: 2
 106 |     decay_mult: 0
 107 |   }
 108 |   convolution_param {
 109 |     num_output: 192
 110 |     pad: 1
 111 |     kernel_size: 3
 112 |     weight_filler {
 113 |       type: "xavier"
 114 |       std: 0.03
 115 |     }
 116 |     bias_filler {
 117 |       type: "constant"
 118 |       value: 0.2
 119 |     }
 120 |   }
 121 | }
 122 | layer {
 123 |   name: "conv2/relu_3x3"
 124 |   type: "ReLU"
 125 |   bottom: "conv2/3x3"
 126 |   top: "conv2/3x3"
 127 | }
 128 | layer {
 129 |   name: "conv2/norm2"
 130 |   type: "LRN"
 131 |   bottom: "conv2/3x3"
 132 |   top: "conv2/norm2"
 133 |   lrn_param {
 134 |     local_size: 5
 135 |     alpha: 0.0001
 136 |     beta: 0.75
 137 |   }
 138 | }
 139 | layer {
 140 |   name: "pool2/3x3_s2"
 141 |   type: "Pooling"
 142 |   bottom: "conv2/norm2"
 143 |   top: "pool2/3x3_s2"
 144 |   pooling_param {
 145 |     pool: MAX
 146 |     kernel_size: 3
 147 |     stride: 2
 148 |   }
 149 | }
 150 | layer {
 151 |   name: "inception_3a/1x1"
 152 |   type: "Convolution"
 153 |   bottom: "pool2/3x3_s2"
 154 |   top: "inception_3a/1x1"
 155 |   param {
 156 |     lr_mult: 1
 157 |     decay_mult: 1
 158 |   }
 159 |   param {
 160 |     lr_mult: 2
 161 |     decay_mult: 0
 162 |   }
 163 |   convolution_param {
 164 |     num_output: 64
 165 |     kernel_size: 1
 166 |     weight_filler {
 167 |       type: "xavier"
 168 |       std: 0.03
 169 |     }
 170 |     bias_filler {
 171 |       type: "constant"
 172 |       value: 0.2
 173 |     }
 174 |   }
 175 | }
 176 | layer {
 177 |   name: "inception_3a/relu_1x1"
 178 |   type: "ReLU"
 179 |   bottom: "inception_3a/1x1"
 180 |   top: "inception_3a/1x1"
 181 | }
 182 | layer {
 183 |   name: "inception_3a/3x3_reduce"
 184 |   type: "Convolution"
 185 |   bottom: "pool2/3x3_s2"
 186 |   top: "inception_3a/3x3_reduce"
 187 |   param {
 188 |     lr_mult: 1
 189 |     decay_mult: 1
 190 |   }
 191 |   param {
 192 |     lr_mult: 2
 193 |     decay_mult: 0
 194 |   }
 195 |   convolution_param {
 196 |     num_output: 96
 197 |     kernel_size: 1
 198 |     weight_filler {
 199 |       type: "xavier"
 200 |       std: 0.09
 201 |     }
 202 |     bias_filler {
 203 |       type: "constant"
 204 |       value: 0.2
 205 |     }
 206 |   }
 207 | }
 208 | layer {
 209 |   name: "inception_3a/relu_3x3_reduce"
 210 |   type: "ReLU"
 211 |   bottom: "inception_3a/3x3_reduce"
 212 |   top: "inception_3a/3x3_reduce"
 213 | }
 214 | layer {
 215 |   name: "inception_3a/3x3"
 216 |   type: "Convolution"
 217 |   bottom: "inception_3a/3x3_reduce"
 218 |   top: "inception_3a/3x3"
 219 |   param {
 220 |     lr_mult: 1
 221 |     decay_mult: 1
 222 |   }
 223 |   param {
 224 |     lr_mult: 2
 225 |     decay_mult: 0
 226 |   }
 227 |   convolution_param {
 228 |     num_output: 128
 229 |     pad: 1
 230 |     kernel_size: 3
 231 |     weight_filler {
 232 |       type: "xavier"
 233 |       std: 0.03
 234 |     }
 235 |     bias_filler {
 236 |       type: "constant"
 237 |       value: 0.2
 238 |     }
 239 |   }
 240 | }
 241 | layer {
 242 |   name: "inception_3a/relu_3x3"
 243 |   type: "ReLU"
 244 |   bottom: "inception_3a/3x3"
 245 |   top: "inception_3a/3x3"
 246 | }
 247 | layer {
 248 |   name: "inception_3a/5x5_reduce"
 249 |   type: "Convolution"
 250 |   bottom: "pool2/3x3_s2"
 251 |   top: "inception_3a/5x5_reduce"
 252 |   param {
 253 |     lr_mult: 1
 254 |     decay_mult: 1
 255 |   }
 256 |   param {
 257 |     lr_mult: 2
 258 |     decay_mult: 0
 259 |   }
 260 |   convolution_param {
 261 |     num_output: 16
 262 |     kernel_size: 1
 263 |     weight_filler {
 264 |       type: "xavier"
 265 |       std: 0.2
 266 |     }
 267 |     bias_filler {
 268 |       type: "constant"
 269 |       value: 0.2
 270 |     }
 271 |   }
 272 | }
 273 | layer {
 274 |   name: "inception_3a/relu_5x5_reduce"
 275 |   type: "ReLU"
 276 |   bottom: "inception_3a/5x5_reduce"
 277 |   top: "inception_3a/5x5_reduce"
 278 | }
 279 | layer {
 280 |   name: "inception_3a/5x5"
 281 |   type: "Convolution"
 282 |   bottom: "inception_3a/5x5_reduce"
 283 |   top: "inception_3a/5x5"
 284 |   param {
 285 |     lr_mult: 1
 286 |     decay_mult: 1
 287 |   }
 288 |   param {
 289 |     lr_mult: 2
 290 |     decay_mult: 0
 291 |   }
 292 |   convolution_param {
 293 |     num_output: 32
 294 |     pad: 2
 295 |     kernel_size: 5
 296 |     weight_filler {
 297 |       type: "xavier"
 298 |       std: 0.03
 299 |     }
 300 |     bias_filler {
 301 |       type: "constant"
 302 |       value: 0.2
 303 |     }
 304 |   }
 305 | }
 306 | layer {
 307 |   name: "inception_3a/relu_5x5"
 308 |   type: "ReLU"
 309 |   bottom: "inception_3a/5x5"
 310 |   top: "inception_3a/5x5"
 311 | }
 312 | layer {
 313 |   name: "inception_3a/pool"
 314 |   type: "Pooling"
 315 |   bottom: "pool2/3x3_s2"
 316 |   top: "inception_3a/pool"
 317 |   pooling_param {
 318 |     pool: MAX
 319 |     kernel_size: 3
 320 |     stride: 1
 321 |     pad: 1
 322 |   }
 323 | }
 324 | layer {
 325 |   name: "inception_3a/pool_proj"
 326 |   type: "Convolution"
 327 |   bottom: "inception_3a/pool"
 328 |   top: "inception_3a/pool_proj"
 329 |   param {
 330 |     lr_mult: 1
 331 |     decay_mult: 1
 332 |   }
 333 |   param {
 334 |     lr_mult: 2
 335 |     decay_mult: 0
 336 |   }
 337 |   convolution_param {
 338 |     num_output: 32
 339 |     kernel_size: 1
 340 |     weight_filler {
 341 |       type: "xavier"
 342 |       std: 0.1
 343 |     }
 344 |     bias_filler {
 345 |       type: "constant"
 346 |       value: 0.2
 347 |     }
 348 |   }
 349 | }
 350 | layer {
 351 |   name: "inception_3a/relu_pool_proj"
 352 |   type: "ReLU"
 353 |   bottom: "inception_3a/pool_proj"
 354 |   top: "inception_3a/pool_proj"
 355 | }
 356 | layer {
 357 |   name: "inception_3a/output"
 358 |   type: "Concat"
 359 |   bottom: "inception_3a/1x1"
 360 |   bottom: "inception_3a/3x3"
 361 |   bottom: "inception_3a/5x5"
 362 |   bottom: "inception_3a/pool_proj"
 363 |   top: "inception_3a/output"
 364 | }
 365 | layer {
 366 |   name: "inception_3b/1x1"
 367 |   type: "Convolution"
 368 |   bottom: "inception_3a/output"
 369 |   top: "inception_3b/1x1"
 370 |   param {
 371 |     lr_mult: 1
 372 |     decay_mult: 1
 373 |   }
 374 |   param {
 375 |     lr_mult: 2
 376 |     decay_mult: 0
 377 |   }
 378 |   convolution_param {
 379 |     num_output: 128
 380 |     kernel_size: 1
 381 |     weight_filler {
 382 |       type: "xavier"
 383 |       std: 0.03
 384 |     }
 385 |     bias_filler {
 386 |       type: "constant"
 387 |       value: 0.2
 388 |     }
 389 |   }
 390 | }
 391 | layer {
 392 |   name: "inception_3b/relu_1x1"
 393 |   type: "ReLU"
 394 |   bottom: "inception_3b/1x1"
 395 |   top: "inception_3b/1x1"
 396 | }
 397 | layer {
 398 |   name: "inception_3b/3x3_reduce"
 399 |   type: "Convolution"
 400 |   bottom: "inception_3a/output"
 401 |   top: "inception_3b/3x3_reduce"
 402 |   param {
 403 |     lr_mult: 1
 404 |     decay_mult: 1
 405 |   }
 406 |   param {
 407 |     lr_mult: 2
 408 |     decay_mult: 0
 409 |   }
 410 |   convolution_param {
 411 |     num_output: 128
 412 |     kernel_size: 1
 413 |     weight_filler {
 414 |       type: "xavier"
 415 |       std: 0.09
 416 |     }
 417 |     bias_filler {
 418 |       type: "constant"
 419 |       value: 0.2
 420 |     }
 421 |   }
 422 | }
 423 | layer {
 424 |   name: "inception_3b/relu_3x3_reduce"
 425 |   type: "ReLU"
 426 |   bottom: "inception_3b/3x3_reduce"
 427 |   top: "inception_3b/3x3_reduce"
 428 | }
 429 | layer {
 430 |   name: "inception_3b/3x3"
 431 |   type: "Convolution"
 432 |   bottom: "inception_3b/3x3_reduce"
 433 |   top: "inception_3b/3x3"
 434 |   param {
 435 |     lr_mult: 1
 436 |     decay_mult: 1
 437 |   }
 438 |   param {
 439 |     lr_mult: 2
 440 |     decay_mult: 0
 441 |   }
 442 |   convolution_param {
 443 |     num_output: 192
 444 |     pad: 1
 445 |     kernel_size: 3
 446 |     weight_filler {
 447 |       type: "xavier"
 448 |       std: 0.03
 449 |     }
 450 |     bias_filler {
 451 |       type: "constant"
 452 |       value: 0.2
 453 |     }
 454 |   }
 455 | }
 456 | layer {
 457 |   name: "inception_3b/relu_3x3"
 458 |   type: "ReLU"
 459 |   bottom: "inception_3b/3x3"
 460 |   top: "inception_3b/3x3"
 461 | }
 462 | layer {
 463 |   name: "inception_3b/5x5_reduce"
 464 |   type: "Convolution"
 465 |   bottom: "inception_3a/output"
 466 |   top: "inception_3b/5x5_reduce"
 467 |   param {
 468 |     lr_mult: 1
 469 |     decay_mult: 1
 470 |   }
 471 |   param {
 472 |     lr_mult: 2
 473 |     decay_mult: 0
 474 |   }
 475 |   convolution_param {
 476 |     num_output: 32
 477 |     kernel_size: 1
 478 |     weight_filler {
 479 |       type: "xavier"
 480 |       std: 0.2
 481 |     }
 482 |     bias_filler {
 483 |       type: "constant"
 484 |       value: 0.2
 485 |     }
 486 |   }
 487 | }
 488 | layer {
 489 |   name: "inception_3b/relu_5x5_reduce"
 490 |   type: "ReLU"
 491 |   bottom: "inception_3b/5x5_reduce"
 492 |   top: "inception_3b/5x5_reduce"
 493 | }
 494 | layer {
 495 |   name: "inception_3b/5x5"
 496 |   type: "Convolution"
 497 |   bottom: "inception_3b/5x5_reduce"
 498 |   top: "inception_3b/5x5"
 499 |   param {
 500 |     lr_mult: 1
 501 |     decay_mult: 1
 502 |   }
 503 |   param {
 504 |     lr_mult: 2
 505 |     decay_mult: 0
 506 |   }
 507 |   convolution_param {
 508 |     num_output: 96
 509 |     pad: 2
 510 |     kernel_size: 5
 511 |     weight_filler {
 512 |       type: "xavier"
 513 |       std: 0.03
 514 |     }
 515 |     bias_filler {
 516 |       type: "constant"
 517 |       value: 0.2
 518 |     }
 519 |   }
 520 | }
 521 | layer {
 522 |   name: "inception_3b/relu_5x5"
 523 |   type: "ReLU"
 524 |   bottom: "inception_3b/5x5"
 525 |   top: "inception_3b/5x5"
 526 | }
 527 | layer {
 528 |   name: "inception_3b/pool"
 529 |   type: "Pooling"
 530 |   bottom: "inception_3a/output"
 531 |   top: "inception_3b/pool"
 532 |   pooling_param {
 533 |     pool: MAX
 534 |     kernel_size: 3
 535 |     stride: 1
 536 |     pad: 1
 537 |   }
 538 | }
 539 | layer {
 540 |   name: "inception_3b/pool_proj"
 541 |   type: "Convolution"
 542 |   bottom: "inception_3b/pool"
 543 |   top: "inception_3b/pool_proj"
 544 |   param {
 545 |     lr_mult: 1
 546 |     decay_mult: 1
 547 |   }
 548 |   param {
 549 |     lr_mult: 2
 550 |     decay_mult: 0
 551 |   }
 552 |   convolution_param {
 553 |     num_output: 64
 554 |     kernel_size: 1
 555 |     weight_filler {
 556 |       type: "xavier"
 557 |       std: 0.1
 558 |     }
 559 |     bias_filler {
 560 |       type: "constant"
 561 |       value: 0.2
 562 |     }
 563 |   }
 564 | }
 565 | layer {
 566 |   name: "inception_3b/relu_pool_proj"
 567 |   type: "ReLU"
 568 |   bottom: "inception_3b/pool_proj"
 569 |   top: "inception_3b/pool_proj"
 570 | }
 571 | layer {
 572 |   name: "inception_3b/output"
 573 |   type: "Concat"
 574 |   bottom: "inception_3b/1x1"
 575 |   bottom: "inception_3b/3x3"
 576 |   bottom: "inception_3b/5x5"
 577 |   bottom: "inception_3b/pool_proj"
 578 |   top: "inception_3b/output"
 579 | }
 580 | layer {
 581 |   name: "pool3/3x3_s2"
 582 |   type: "Pooling"
 583 |   bottom: "inception_3b/output"
 584 |   top: "pool3/3x3_s2"
 585 |   pooling_param {
 586 |     pool: MAX
 587 |     kernel_size: 3
 588 |     stride: 2
 589 |   }
 590 | }
 591 | layer {
 592 |   name: "inception_4a/1x1"
 593 |   type: "Convolution"
 594 |   bottom: "pool3/3x3_s2"
 595 |   top: "inception_4a/1x1"
 596 |   param {
 597 |     lr_mult: 1
 598 |     decay_mult: 1
 599 |   }
 600 |   param {
 601 |     lr_mult: 2
 602 |     decay_mult: 0
 603 |   }
 604 |   convolution_param {
 605 |     num_output: 192
 606 |     kernel_size: 1
 607 |     weight_filler {
 608 |       type: "xavier"
 609 |       std: 0.03
 610 |     }
 611 |     bias_filler {
 612 |       type: "constant"
 613 |       value: 0.2
 614 |     }
 615 |   }
 616 | }
 617 | layer {
 618 |   name: "inception_4a/relu_1x1"
 619 |   type: "ReLU"
 620 |   bottom: "inception_4a/1x1"
 621 |   top: "inception_4a/1x1"
 622 | }
 623 | layer {
 624 |   name: "inception_4a/3x3_reduce"
 625 |   type: "Convolution"
 626 |   bottom: "pool3/3x3_s2"
 627 |   top: "inception_4a/3x3_reduce"
 628 |   param {
 629 |     lr_mult: 1
 630 |     decay_mult: 1
 631 |   }
 632 |   param {
 633 |     lr_mult: 2
 634 |     decay_mult: 0
 635 |   }
 636 |   convolution_param {
 637 |     num_output: 96
 638 |     kernel_size: 1
 639 |     weight_filler {
 640 |       type: "xavier"
 641 |       std: 0.09
 642 |     }
 643 |     bias_filler {
 644 |       type: "constant"
 645 |       value: 0.2
 646 |     }
 647 |   }
 648 | }
 649 | layer {
 650 |   name: "inception_4a/relu_3x3_reduce"
 651 |   type: "ReLU"
 652 |   bottom: "inception_4a/3x3_reduce"
 653 |   top: "inception_4a/3x3_reduce"
 654 | }
 655 | layer {
 656 |   name: "inception_4a/3x3"
 657 |   type: "Convolution"
 658 |   bottom: "inception_4a/3x3_reduce"
 659 |   top: "inception_4a/3x3"
 660 |   param {
 661 |     lr_mult: 1
 662 |     decay_mult: 1
 663 |   }
 664 |   param {
 665 |     lr_mult: 2
 666 |     decay_mult: 0
 667 |   }
 668 |   convolution_param {
 669 |     num_output: 208
 670 |     pad: 1
 671 |     kernel_size: 3
 672 |     weight_filler {
 673 |       type: "xavier"
 674 |       std: 0.03
 675 |     }
 676 |     bias_filler {
 677 |       type: "constant"
 678 |       value: 0.2
 679 |     }
 680 |   }
 681 | }
 682 | layer {
 683 |   name: "inception_4a/relu_3x3"
 684 |   type: "ReLU"
 685 |   bottom: "inception_4a/3x3"
 686 |   top: "inception_4a/3x3"
 687 | }
 688 | layer {
 689 |   name: "inception_4a/5x5_reduce"
 690 |   type: "Convolution"
 691 |   bottom: "pool3/3x3_s2"
 692 |   top: "inception_4a/5x5_reduce"
 693 |   param {
 694 |     lr_mult: 1
 695 |     decay_mult: 1
 696 |   }
 697 |   param {
 698 |     lr_mult: 2
 699 |     decay_mult: 0
 700 |   }
 701 |   convolution_param {
 702 |     num_output: 16
 703 |     kernel_size: 1
 704 |     weight_filler {
 705 |       type: "xavier"
 706 |       std: 0.2
 707 |     }
 708 |     bias_filler {
 709 |       type: "constant"
 710 |       value: 0.2
 711 |     }
 712 |   }
 713 | }
 714 | layer {
 715 |   name: "inception_4a/relu_5x5_reduce"
 716 |   type: "ReLU"
 717 |   bottom: "inception_4a/5x5_reduce"
 718 |   top: "inception_4a/5x5_reduce"
 719 | }
 720 | layer {
 721 |   name: "inception_4a/5x5"
 722 |   type: "Convolution"
 723 |   bottom: "inception_4a/5x5_reduce"
 724 |   top: "inception_4a/5x5"
 725 |   param {
 726 |     lr_mult: 1
 727 |     decay_mult: 1
 728 |   }
 729 |   param {
 730 |     lr_mult: 2
 731 |     decay_mult: 0
 732 |   }
 733 |   convolution_param {
 734 |     num_output: 48
 735 |     pad: 2
 736 |     kernel_size: 5
 737 |     weight_filler {
 738 |       type: "xavier"
 739 |       std: 0.03
 740 |     }
 741 |     bias_filler {
 742 |       type: "constant"
 743 |       value: 0.2
 744 |     }
 745 |   }
 746 | }
 747 | layer {
 748 |   name: "inception_4a/relu_5x5"
 749 |   type: "ReLU"
 750 |   bottom: "inception_4a/5x5"
 751 |   top: "inception_4a/5x5"
 752 | }
 753 | layer {
 754 |   name: "inception_4a/pool"
 755 |   type: "Pooling"
 756 |   bottom: "pool3/3x3_s2"
 757 |   top: "inception_4a/pool"
 758 |   pooling_param {
 759 |     pool: MAX
 760 |     kernel_size: 3
 761 |     stride: 1
 762 |     pad: 1
 763 |   }
 764 | }
 765 | layer {
 766 |   name: "inception_4a/pool_proj"
 767 |   type: "Convolution"
 768 |   bottom: "inception_4a/pool"
 769 |   top: "inception_4a/pool_proj"
 770 |   param {
 771 |     lr_mult: 1
 772 |     decay_mult: 1
 773 |   }
 774 |   param {
 775 |     lr_mult: 2
 776 |     decay_mult: 0
 777 |   }
 778 |   convolution_param {
 779 |     num_output: 64
 780 |     kernel_size: 1
 781 |     weight_filler {
 782 |       type: "xavier"
 783 |       std: 0.1
 784 |     }
 785 |     bias_filler {
 786 |       type: "constant"
 787 |       value: 0.2
 788 |     }
 789 |   }
 790 | }
 791 | layer {
 792 |   name: "inception_4a/relu_pool_proj"
 793 |   type: "ReLU"
 794 |   bottom: "inception_4a/pool_proj"
 795 |   top: "inception_4a/pool_proj"
 796 | }
 797 | layer {
 798 |   name: "inception_4a/output"
 799 |   type: "Concat"
 800 |   bottom: "inception_4a/1x1"
 801 |   bottom: "inception_4a/3x3"
 802 |   bottom: "inception_4a/5x5"
 803 |   bottom: "inception_4a/pool_proj"
 804 |   top: "inception_4a/output"
 805 | }
 806 | layer {
 807 |   name: "inception_4b/1x1"
 808 |   type: "Convolution"
 809 |   bottom: "inception_4a/output"
 810 |   top: "inception_4b/1x1"
 811 |   param {
 812 |     lr_mult: 1
 813 |     decay_mult: 1
 814 |   }
 815 |   param {
 816 |     lr_mult: 2
 817 |     decay_mult: 0
 818 |   }
 819 |   convolution_param {
 820 |     num_output: 160
 821 |     kernel_size: 1
 822 |     weight_filler {
 823 |       type: "xavier"
 824 |       std: 0.03
 825 |     }
 826 |     bias_filler {
 827 |       type: "constant"
 828 |       value: 0.2
 829 |     }
 830 |   }
 831 | }
 832 | layer {
 833 |   name: "inception_4b/relu_1x1"
 834 |   type: "ReLU"
 835 |   bottom: "inception_4b/1x1"
 836 |   top: "inception_4b/1x1"
 837 | }
 838 | layer {
 839 |   name: "inception_4b/3x3_reduce"
 840 |   type: "Convolution"
 841 |   bottom: "inception_4a/output"
 842 |   top: "inception_4b/3x3_reduce"
 843 |   param {
 844 |     lr_mult: 1
 845 |     decay_mult: 1
 846 |   }
 847 |   param {
 848 |     lr_mult: 2
 849 |     decay_mult: 0
 850 |   }
 851 |   convolution_param {
 852 |     num_output: 112
 853 |     kernel_size: 1
 854 |     weight_filler {
 855 |       type: "xavier"
 856 |       std: 0.09
 857 |     }
 858 |     bias_filler {
 859 |       type: "constant"
 860 |       value: 0.2
 861 |     }
 862 |   }
 863 | }
 864 | layer {
 865 |   name: "inception_4b/relu_3x3_reduce"
 866 |   type: "ReLU"
 867 |   bottom: "inception_4b/3x3_reduce"
 868 |   top: "inception_4b/3x3_reduce"
 869 | }
 870 | layer {
 871 |   name: "inception_4b/3x3"
 872 |   type: "Convolution"
 873 |   bottom: "inception_4b/3x3_reduce"
 874 |   top: "inception_4b/3x3"
 875 |   param {
 876 |     lr_mult: 1
 877 |     decay_mult: 1
 878 |   }
 879 |   param {
 880 |     lr_mult: 2
 881 |     decay_mult: 0
 882 |   }
 883 |   convolution_param {
 884 |     num_output: 224
 885 |     pad: 1
 886 |     kernel_size: 3
 887 |     weight_filler {
 888 |       type: "xavier"
 889 |       std: 0.03
 890 |     }
 891 |     bias_filler {
 892 |       type: "constant"
 893 |       value: 0.2
 894 |     }
 895 |   }
 896 | }
 897 | layer {
 898 |   name: "inception_4b/relu_3x3"
 899 |   type: "ReLU"
 900 |   bottom: "inception_4b/3x3"
 901 |   top: "inception_4b/3x3"
 902 | }
 903 | layer {
 904 |   name: "inception_4b/5x5_reduce"
 905 |   type: "Convolution"
 906 |   bottom: "inception_4a/output"
 907 |   top: "inception_4b/5x5_reduce"
 908 |   param {
 909 |     lr_mult: 1
 910 |     decay_mult: 1
 911 |   }
 912 |   param {
 913 |     lr_mult: 2
 914 |     decay_mult: 0
 915 |   }
 916 |   convolution_param {
 917 |     num_output: 24
 918 |     kernel_size: 1
 919 |     weight_filler {
 920 |       type: "xavier"
 921 |       std: 0.2
 922 |     }
 923 |     bias_filler {
 924 |       type: "constant"
 925 |       value: 0.2
 926 |     }
 927 |   }
 928 | }
 929 | layer {
 930 |   name: "inception_4b/relu_5x5_reduce"
 931 |   type: "ReLU"
 932 |   bottom: "inception_4b/5x5_reduce"
 933 |   top: "inception_4b/5x5_reduce"
 934 | }
 935 | layer {
 936 |   name: "inception_4b/5x5"
 937 |   type: "Convolution"
 938 |   bottom: "inception_4b/5x5_reduce"
 939 |   top: "inception_4b/5x5"
 940 |   param {
 941 |     lr_mult: 1
 942 |     decay_mult: 1
 943 |   }
 944 |   param {
 945 |     lr_mult: 2
 946 |     decay_mult: 0
 947 |   }
 948 |   convolution_param {
 949 |     num_output: 64
 950 |     pad: 2
 951 |     kernel_size: 5
 952 |     weight_filler {
 953 |       type: "xavier"
 954 |       std: 0.03
 955 |     }
 956 |     bias_filler {
 957 |       type: "constant"
 958 |       value: 0.2
 959 |     }
 960 |   }
 961 | }
 962 | layer {
 963 |   name: "inception_4b/relu_5x5"
 964 |   type: "ReLU"
 965 |   bottom: "inception_4b/5x5"
 966 |   top: "inception_4b/5x5"
 967 | }
 968 | layer {
 969 |   name: "inception_4b/pool"
 970 |   type: "Pooling"
 971 |   bottom: "inception_4a/output"
 972 |   top: "inception_4b/pool"
 973 |   pooling_param {
 974 |     pool: MAX
 975 |     kernel_size: 3
 976 |     stride: 1
 977 |     pad: 1
 978 |   }
 979 | }
 980 | layer {
 981 |   name: "inception_4b/pool_proj"
 982 |   type: "Convolution"
 983 |   bottom: "inception_4b/pool"
 984 |   top: "inception_4b/pool_proj"
 985 |   param {
 986 |     lr_mult: 1
 987 |     decay_mult: 1
 988 |   }
 989 |   param {
 990 |     lr_mult: 2
 991 |     decay_mult: 0
 992 |   }
 993 |   convolution_param {
 994 |     num_output: 64
 995 |     kernel_size: 1
 996 |     weight_filler {
 997 |       type: "xavier"
 998 |       std: 0.1
 999 |     }
1000 |     bias_filler {
1001 |       type: "constant"
1002 |       value: 0.2
1003 |     }
1004 |   }
1005 | }
1006 | layer {
1007 |   name: "inception_4b/relu_pool_proj"
1008 |   type: "ReLU"
1009 |   bottom: "inception_4b/pool_proj"
1010 |   top: "inception_4b/pool_proj"
1011 | }
1012 | layer {
1013 |   name: "inception_4b/output"
1014 |   type: "Concat"
1015 |   bottom: "inception_4b/1x1"
1016 |   bottom: "inception_4b/3x3"
1017 |   bottom: "inception_4b/5x5"
1018 |   bottom: "inception_4b/pool_proj"
1019 |   top: "inception_4b/output"
1020 | }
1021 | layer {
1022 |   name: "inception_4c/1x1"
1023 |   type: "Convolution"
1024 |   bottom: "inception_4b/output"
1025 |   top: "inception_4c/1x1"
1026 |   param {
1027 |     lr_mult: 1
1028 |     decay_mult: 1
1029 |   }
1030 |   param {
1031 |     lr_mult: 2
1032 |     decay_mult: 0
1033 |   }
1034 |   convolution_param {
1035 |     num_output: 128
1036 |     kernel_size: 1
1037 |     weight_filler {
1038 |       type: "xavier"
1039 |       std: 0.03
1040 |     }
1041 |     bias_filler {
1042 |       type: "constant"
1043 |       value: 0.2
1044 |     }
1045 |   }
1046 | }
1047 | layer {
1048 |   name: "inception_4c/relu_1x1"
1049 |   type: "ReLU"
1050 |   bottom: "inception_4c/1x1"
1051 |   top: "inception_4c/1x1"
1052 | }
1053 | layer {
1054 |   name: "inception_4c/3x3_reduce"
1055 |   type: "Convolution"
1056 |   bottom: "inception_4b/output"
1057 |   top: "inception_4c/3x3_reduce"
1058 |   param {
1059 |     lr_mult: 1
1060 |     decay_mult: 1
1061 |   }
1062 |   param {
1063 |     lr_mult: 2
1064 |     decay_mult: 0
1065 |   }
1066 |   convolution_param {
1067 |     num_output: 128
1068 |     kernel_size: 1
1069 |     weight_filler {
1070 |       type: "xavier"
1071 |       std: 0.09
1072 |     }
1073 |     bias_filler {
1074 |       type: "constant"
1075 |       value: 0.2
1076 |     }
1077 |   }
1078 | }
1079 | layer {
1080 |   name: "inception_4c/relu_3x3_reduce"
1081 |   type: "ReLU"
1082 |   bottom: "inception_4c/3x3_reduce"
1083 |   top: "inception_4c/3x3_reduce"
1084 | }
1085 | layer {
1086 |   name: "inception_4c/3x3"
1087 |   type: "Convolution"
1088 |   bottom: "inception_4c/3x3_reduce"
1089 |   top: "inception_4c/3x3"
1090 |   param {
1091 |     lr_mult: 1
1092 |     decay_mult: 1
1093 |   }
1094 |   param {
1095 |     lr_mult: 2
1096 |     decay_mult: 0
1097 |   }
1098 |   convolution_param {
1099 |     num_output: 256
1100 |     pad: 1
1101 |     kernel_size: 3
1102 |     weight_filler {
1103 |       type: "xavier"
1104 |       std: 0.03
1105 |     }
1106 |     bias_filler {
1107 |       type: "constant"
1108 |       value: 0.2
1109 |     }
1110 |   }
1111 | }
1112 | layer {
1113 |   name: "inception_4c/relu_3x3"
1114 |   type: "ReLU"
1115 |   bottom: "inception_4c/3x3"
1116 |   top: "inception_4c/3x3"
1117 | }
1118 | layer {
1119 |   name: "inception_4c/5x5_reduce"
1120 |   type: "Convolution"
1121 |   bottom: "inception_4b/output"
1122 |   top: "inception_4c/5x5_reduce"
1123 |   param {
1124 |     lr_mult: 1
1125 |     decay_mult: 1
1126 |   }
1127 |   param {
1128 |     lr_mult: 2
1129 |     decay_mult: 0
1130 |   }
1131 |   convolution_param {
1132 |     num_output: 24
1133 |     kernel_size: 1
1134 |     weight_filler {
1135 |       type: "xavier"
1136 |       std: 0.2
1137 |     }
1138 |     bias_filler {
1139 |       type: "constant"
1140 |       value: 0.2
1141 |     }
1142 |   }
1143 | }
1144 | layer {
1145 |   name: "inception_4c/relu_5x5_reduce"
1146 |   type: "ReLU"
1147 |   bottom: "inception_4c/5x5_reduce"
1148 |   top: "inception_4c/5x5_reduce"
1149 | }
1150 | layer {
1151 |   name: "inception_4c/5x5"
1152 |   type: "Convolution"
1153 |   bottom: "inception_4c/5x5_reduce"
1154 |   top: "inception_4c/5x5"
1155 |   param {
1156 |     lr_mult: 1
1157 |     decay_mult: 1
1158 |   }
1159 |   param {
1160 |     lr_mult: 2
1161 |     decay_mult: 0
1162 |   }
1163 |   convolution_param {
1164 |     num_output: 64
1165 |     pad: 2
1166 |     kernel_size: 5
1167 |     weight_filler {
1168 |       type: "xavier"
1169 |       std: 0.03
1170 |     }
1171 |     bias_filler {
1172 |       type: "constant"
1173 |       value: 0.2
1174 |     }
1175 |   }
1176 | }
1177 | layer {
1178 |   name: "inception_4c/relu_5x5"
1179 |   type: "ReLU"
1180 |   bottom: "inception_4c/5x5"
1181 |   top: "inception_4c/5x5"
1182 | }
1183 | layer {
1184 |   name: "inception_4c/pool"
1185 |   type: "Pooling"
1186 |   bottom: "inception_4b/output"
1187 |   top: "inception_4c/pool"
1188 |   pooling_param {
1189 |     pool: MAX
1190 |     kernel_size: 3
1191 |     stride: 1
1192 |     pad: 1
1193 |   }
1194 | }
1195 | layer {
1196 |   name: "inception_4c/pool_proj"
1197 |   type: "Convolution"
1198 |   bottom: "inception_4c/pool"
1199 |   top: "inception_4c/pool_proj"
1200 |   param {
1201 |     lr_mult: 1
1202 |     decay_mult: 1
1203 |   }
1204 |   param {
1205 |     lr_mult: 2
1206 |     decay_mult: 0
1207 |   }
1208 |   convolution_param {
1209 |     num_output: 64
1210 |     kernel_size: 1
1211 |     weight_filler {
1212 |       type: "xavier"
1213 |       std: 0.1
1214 |     }
1215 |     bias_filler {
1216 |       type: "constant"
1217 |       value: 0.2
1218 |     }
1219 |   }
1220 | }
1221 | layer {
1222 |   name: "inception_4c/relu_pool_proj"
1223 |   type: "ReLU"
1224 |   bottom: "inception_4c/pool_proj"
1225 |   top: "inception_4c/pool_proj"
1226 | }
1227 | layer {
1228 |   name: "inception_4c/output"
1229 |   type: "Concat"
1230 |   bottom: "inception_4c/1x1"
1231 |   bottom: "inception_4c/3x3"
1232 |   bottom: "inception_4c/5x5"
1233 |   bottom: "inception_4c/pool_proj"
1234 |   top: "inception_4c/output"
1235 | }
1236 | layer {
1237 |   name: "inception_4d/1x1"
1238 |   type: "Convolution"
1239 |   bottom: "inception_4c/output"
1240 |   top: "inception_4d/1x1"
1241 |   param {
1242 |     lr_mult: 1
1243 |     decay_mult: 1
1244 |   }
1245 |   param {
1246 |     lr_mult: 2
1247 |     decay_mult: 0
1248 |   }
1249 |   convolution_param {
1250 |     num_output: 112
1251 |     kernel_size: 1
1252 |     weight_filler {
1253 |       type: "xavier"
1254 |       std: 0.03
1255 |     }
1256 |     bias_filler {
1257 |       type: "constant"
1258 |       value: 0.2
1259 |     }
1260 |   }
1261 | }
1262 | layer {
1263 |   name: "inception_4d/relu_1x1"
1264 |   type: "ReLU"
1265 |   bottom: "inception_4d/1x1"
1266 |   top: "inception_4d/1x1"
1267 | }
1268 | layer {
1269 |   name: "inception_4d/3x3_reduce"
1270 |   type: "Convolution"
1271 |   bottom: "inception_4c/output"
1272 |   top: "inception_4d/3x3_reduce"
1273 |   param {
1274 |     lr_mult: 1
1275 |     decay_mult: 1
1276 |   }
1277 |   param {
1278 |     lr_mult: 2
1279 |     decay_mult: 0
1280 |   }
1281 |   convolution_param {
1282 |     num_output: 144
1283 |     kernel_size: 1
1284 |     weight_filler {
1285 |       type: "xavier"
1286 |       std: 0.09
1287 |     }
1288 |     bias_filler {
1289 |       type: "constant"
1290 |       value: 0.2
1291 |     }
1292 |   }
1293 | }
1294 | layer {
1295 |   name: "inception_4d/relu_3x3_reduce"
1296 |   type: "ReLU"
1297 |   bottom: "inception_4d/3x3_reduce"
1298 |   top: "inception_4d/3x3_reduce"
1299 | }
1300 | layer {
1301 |   name: "inception_4d/3x3"
1302 |   type: "Convolution"
1303 |   bottom: "inception_4d/3x3_reduce"
1304 |   top: "inception_4d/3x3"
1305 |   param {
1306 |     lr_mult: 1
1307 |     decay_mult: 1
1308 |   }
1309 |   param {
1310 |     lr_mult: 2
1311 |     decay_mult: 0
1312 |   }
1313 |   convolution_param {
1314 |     num_output: 288
1315 |     pad: 1
1316 |     kernel_size: 3
1317 |     weight_filler {
1318 |       type: "xavier"
1319 |       std: 0.03
1320 |     }
1321 |     bias_filler {
1322 |       type: "constant"
1323 |       value: 0.2
1324 |     }
1325 |   }
1326 | }
1327 | layer {
1328 |   name: "inception_4d/relu_3x3"
1329 |   type: "ReLU"
1330 |   bottom: "inception_4d/3x3"
1331 |   top: "inception_4d/3x3"
1332 | }
1333 | layer {
1334 |   name: "inception_4d/5x5_reduce"
1335 |   type: "Convolution"
1336 |   bottom: "inception_4c/output"
1337 |   top: "inception_4d/5x5_reduce"
1338 |   param {
1339 |     lr_mult: 1
1340 |     decay_mult: 1
1341 |   }
1342 |   param {
1343 |     lr_mult: 2
1344 |     decay_mult: 0
1345 |   }
1346 |   convolution_param {
1347 |     num_output: 32
1348 |     kernel_size: 1
1349 |     weight_filler {
1350 |       type: "xavier"
1351 |       std: 0.2
1352 |     }
1353 |     bias_filler {
1354 |       type: "constant"
1355 |       value: 0.2
1356 |     }
1357 |   }
1358 | }
1359 | layer {
1360 |   name: "inception_4d/relu_5x5_reduce"
1361 |   type: "ReLU"
1362 |   bottom: "inception_4d/5x5_reduce"
1363 |   top: "inception_4d/5x5_reduce"
1364 | }
1365 | layer {
1366 |   name: "inception_4d/5x5"
1367 |   type: "Convolution"
1368 |   bottom: "inception_4d/5x5_reduce"
1369 |   top: "inception_4d/5x5"
1370 |   param {
1371 |     lr_mult: 1
1372 |     decay_mult: 1
1373 |   }
1374 |   param {
1375 |     lr_mult: 2
1376 |     decay_mult: 0
1377 |   }
1378 |   convolution_param {
1379 |     num_output: 64
1380 |     pad: 2
1381 |     kernel_size: 5
1382 |     weight_filler {
1383 |       type: "xavier"
1384 |       std: 0.03
1385 |     }
1386 |     bias_filler {
1387 |       type: "constant"
1388 |       value: 0.2
1389 |     }
1390 |   }
1391 | }
1392 | layer {
1393 |   name: "inception_4d/relu_5x5"
1394 |   type: "ReLU"
1395 |   bottom: "inception_4d/5x5"
1396 |   top: "inception_4d/5x5"
1397 | }
1398 | layer {
1399 |   name: "inception_4d/pool"
1400 |   type: "Pooling"
1401 |   bottom: "inception_4c/output"
1402 |   top: "inception_4d/pool"
1403 |   pooling_param {
1404 |     pool: MAX
1405 |     kernel_size: 3
1406 |     stride: 1
1407 |     pad: 1
1408 |   }
1409 | }
1410 | layer {
1411 |   name: "inception_4d/pool_proj"
1412 |   type: "Convolution"
1413 |   bottom: "inception_4d/pool"
1414 |   top: "inception_4d/pool_proj"
1415 |   param {
1416 |     lr_mult: 1
1417 |     decay_mult: 1
1418 |   }
1419 |   param {
1420 |     lr_mult: 2
1421 |     decay_mult: 0
1422 |   }
1423 |   convolution_param {
1424 |     num_output: 64
1425 |     kernel_size: 1
1426 |     weight_filler {
1427 |       type: "xavier"
1428 |       std: 0.1
1429 |     }
1430 |     bias_filler {
1431 |       type: "constant"
1432 |       value: 0.2
1433 |     }
1434 |   }
1435 | }
1436 | layer {
1437 |   name: "inception_4d/relu_pool_proj"
1438 |   type: "ReLU"
1439 |   bottom: "inception_4d/pool_proj"
1440 |   top: "inception_4d/pool_proj"
1441 | }
1442 | layer {
1443 |   name: "inception_4d/output"
1444 |   type: "Concat"
1445 |   bottom: "inception_4d/1x1"
1446 |   bottom: "inception_4d/3x3"
1447 |   bottom: "inception_4d/5x5"
1448 |   bottom: "inception_4d/pool_proj"
1449 |   top: "inception_4d/output"
1450 | }
1451 | layer {
1452 |   name: "inception_4e/1x1"
1453 |   type: "Convolution"
1454 |   bottom: "inception_4d/output"
1455 |   top: "inception_4e/1x1"
1456 |   param {
1457 |     lr_mult: 1
1458 |     decay_mult: 1
1459 |   }
1460 |   param {
1461 |     lr_mult: 2
1462 |     decay_mult: 0
1463 |   }
1464 |   convolution_param {
1465 |     num_output: 256
1466 |     kernel_size: 1
1467 |     weight_filler {
1468 |       type: "xavier"
1469 |       std: 0.03
1470 |     }
1471 |     bias_filler {
1472 |       type: "constant"
1473 |       value: 0.2
1474 |     }
1475 |   }
1476 | }
1477 | layer {
1478 |   name: "inception_4e/relu_1x1"
1479 |   type: "ReLU"
1480 |   bottom: "inception_4e/1x1"
1481 |   top: "inception_4e/1x1"
1482 | }
1483 | layer {
1484 |   name: "inception_4e/3x3_reduce"
1485 |   type: "Convolution"
1486 |   bottom: "inception_4d/output"
1487 |   top: "inception_4e/3x3_reduce"
1488 |   param {
1489 |     lr_mult: 1
1490 |     decay_mult: 1
1491 |   }
1492 |   param {
1493 |     lr_mult: 2
1494 |     decay_mult: 0
1495 |   }
1496 |   convolution_param {
1497 |     num_output: 160
1498 |     kernel_size: 1
1499 |     weight_filler {
1500 |       type: "xavier"
1501 |       std: 0.09
1502 |     }
1503 |     bias_filler {
1504 |       type: "constant"
1505 |       value: 0.2
1506 |     }
1507 |   }
1508 | }
1509 | layer {
1510 |   name: "inception_4e/relu_3x3_reduce"
1511 |   type: "ReLU"
1512 |   bottom: "inception_4e/3x3_reduce"
1513 |   top: "inception_4e/3x3_reduce"
1514 | }
1515 | layer {
1516 |   name: "inception_4e/3x3"
1517 |   type: "Convolution"
1518 |   bottom: "inception_4e/3x3_reduce"
1519 |   top: "inception_4e/3x3"
1520 |   param {
1521 |     lr_mult: 1
1522 |     decay_mult: 1
1523 |   }
1524 |   param {
1525 |     lr_mult: 2
1526 |     decay_mult: 0
1527 |   }
1528 |   convolution_param {
1529 |     num_output: 320
1530 |     pad: 1
1531 |     kernel_size: 3
1532 |     weight_filler {
1533 |       type: "xavier"
1534 |       std: 0.03
1535 |     }
1536 |     bias_filler {
1537 |       type: "constant"
1538 |       value: 0.2
1539 |     }
1540 |   }
1541 | }
1542 | layer {
1543 |   name: "inception_4e/relu_3x3"
1544 |   type: "ReLU"
1545 |   bottom: "inception_4e/3x3"
1546 |   top: "inception_4e/3x3"
1547 | }
1548 | layer {
1549 |   name: "inception_4e/5x5_reduce"
1550 |   type: "Convolution"
1551 |   bottom: "inception_4d/output"
1552 |   top: "inception_4e/5x5_reduce"
1553 |   param {
1554 |     lr_mult: 1
1555 |     decay_mult: 1
1556 |   }
1557 |   param {
1558 |     lr_mult: 2
1559 |     decay_mult: 0
1560 |   }
1561 |   convolution_param {
1562 |     num_output: 32
1563 |     kernel_size: 1
1564 |     weight_filler {
1565 |       type: "xavier"
1566 |       std: 0.2
1567 |     }
1568 |     bias_filler {
1569 |       type: "constant"
1570 |       value: 0.2
1571 |     }
1572 |   }
1573 | }
1574 | layer {
1575 |   name: "inception_4e/relu_5x5_reduce"
1576 |   type: "ReLU"
1577 |   bottom: "inception_4e/5x5_reduce"
1578 |   top: "inception_4e/5x5_reduce"
1579 | }
1580 | layer {
1581 |   name: "inception_4e/5x5"
1582 |   type: "Convolution"
1583 |   bottom: "inception_4e/5x5_reduce"
1584 |   top: "inception_4e/5x5"
1585 |   param {
1586 |     lr_mult: 1
1587 |     decay_mult: 1
1588 |   }
1589 |   param {
1590 |     lr_mult: 2
1591 |     decay_mult: 0
1592 |   }
1593 |   convolution_param {
1594 |     num_output: 128
1595 |     pad: 2
1596 |     kernel_size: 5
1597 |     weight_filler {
1598 |       type: "xavier"
1599 |       std: 0.03
1600 |     }
1601 |     bias_filler {
1602 |       type: "constant"
1603 |       value: 0.2
1604 |     }
1605 |   }
1606 | }
1607 | layer {
1608 |   name: "inception_4e/relu_5x5"
1609 |   type: "ReLU"
1610 |   bottom: "inception_4e/5x5"
1611 |   top: "inception_4e/5x5"
1612 | }
1613 | layer {
1614 |   name: "inception_4e/pool"
1615 |   type: "Pooling"
1616 |   bottom: "inception_4d/output"
1617 |   top: "inception_4e/pool"
1618 |   pooling_param {
1619 |     pool: MAX
1620 |     kernel_size: 3
1621 |     stride: 1
1622 |     pad: 1
1623 |   }
1624 | }
1625 | layer {
1626 |   name: "inception_4e/pool_proj"
1627 |   type: "Convolution"
1628 |   bottom: "inception_4e/pool"
1629 |   top: "inception_4e/pool_proj"
1630 |   param {
1631 |     lr_mult: 1
1632 |     decay_mult: 1
1633 |   }
1634 |   param {
1635 |     lr_mult: 2
1636 |     decay_mult: 0
1637 |   }
1638 |   convolution_param {
1639 |     num_output: 128
1640 |     kernel_size: 1
1641 |     weight_filler {
1642 |       type: "xavier"
1643 |       std: 0.1
1644 |     }
1645 |     bias_filler {
1646 |       type: "constant"
1647 |       value: 0.2
1648 |     }
1649 |   }
1650 | }
1651 | layer {
1652 |   name: "inception_4e/relu_pool_proj"
1653 |   type: "ReLU"
1654 |   bottom: "inception_4e/pool_proj"
1655 |   top: "inception_4e/pool_proj"
1656 | }
1657 | layer {
1658 |   name: "inception_4e/output"
1659 |   type: "Concat"
1660 |   bottom: "inception_4e/1x1"
1661 |   bottom: "inception_4e/3x3"
1662 |   bottom: "inception_4e/5x5"
1663 |   bottom: "inception_4e/pool_proj"
1664 |   top: "inception_4e/output"
1665 | }
1666 | layer {
1667 |   name: "pool4/3x3_s2"
1668 |   type: "Pooling"
1669 |   bottom: "inception_4e/output"
1670 |   top: "pool4/3x3_s2"
1671 |   pooling_param {
1672 |     pool: MAX
1673 |     kernel_size: 3
1674 |     stride: 2
1675 |   }
1676 | }
1677 | layer {
1678 |   name: "inception_5a/1x1"
1679 |   type: "Convolution"
1680 |   bottom: "pool4/3x3_s2"
1681 |   top: "inception_5a/1x1"
1682 |   param {
1683 |     lr_mult: 1
1684 |     decay_mult: 1
1685 |   }
1686 |   param {
1687 |     lr_mult: 2
1688 |     decay_mult: 0
1689 |   }
1690 |   convolution_param {
1691 |     num_output: 256
1692 |     kernel_size: 1
1693 |     weight_filler {
1694 |       type: "xavier"
1695 |       std: 0.03
1696 |     }
1697 |     bias_filler {
1698 |       type: "constant"
1699 |       value: 0.2
1700 |     }
1701 |   }
1702 | }
1703 | layer {
1704 |   name: "inception_5a/relu_1x1"
1705 |   type: "ReLU"
1706 |   bottom: "inception_5a/1x1"
1707 |   top: "inception_5a/1x1"
1708 | }
1709 | layer {
1710 |   name: "inception_5a/3x3_reduce"
1711 |   type: "Convolution"
1712 |   bottom: "pool4/3x3_s2"
1713 |   top: "inception_5a/3x3_reduce"
1714 |   param {
1715 |     lr_mult: 1
1716 |     decay_mult: 1
1717 |   }
1718 |   param {
1719 |     lr_mult: 2
1720 |     decay_mult: 0
1721 |   }
1722 |   convolution_param {
1723 |     num_output: 160
1724 |     kernel_size: 1
1725 |     weight_filler {
1726 |       type: "xavier"
1727 |       std: 0.09
1728 |     }
1729 |     bias_filler {
1730 |       type: "constant"
1731 |       value: 0.2
1732 |     }
1733 |   }
1734 | }
1735 | layer {
1736 |   name: "inception_5a/relu_3x3_reduce"
1737 |   type: "ReLU"
1738 |   bottom: "inception_5a/3x3_reduce"
1739 |   top: "inception_5a/3x3_reduce"
1740 | }
1741 | layer {
1742 |   name: "inception_5a/3x3"
1743 |   type: "Convolution"
1744 |   bottom: "inception_5a/3x3_reduce"
1745 |   top: "inception_5a/3x3"
1746 |   param {
1747 |     lr_mult: 1
1748 |     decay_mult: 1
1749 |   }
1750 |   param {
1751 |     lr_mult: 2
1752 |     decay_mult: 0
1753 |   }
1754 |   convolution_param {
1755 |     num_output: 320
1756 |     pad: 1
1757 |     kernel_size: 3
1758 |     weight_filler {
1759 |       type: "xavier"
1760 |       std: 0.03
1761 |     }
1762 |     bias_filler {
1763 |       type: "constant"
1764 |       value: 0.2
1765 |     }
1766 |   }
1767 | }
1768 | layer {
1769 |   name: "inception_5a/relu_3x3"
1770 |   type: "ReLU"
1771 |   bottom: "inception_5a/3x3"
1772 |   top: "inception_5a/3x3"
1773 | }
1774 | layer {
1775 |   name: "inception_5a/5x5_reduce"
1776 |   type: "Convolution"
1777 |   bottom: "pool4/3x3_s2"
1778 |   top: "inception_5a/5x5_reduce"
1779 |   param {
1780 |     lr_mult: 1
1781 |     decay_mult: 1
1782 |   }
1783 |   param {
1784 |     lr_mult: 2
1785 |     decay_mult: 0
1786 |   }
1787 |   convolution_param {
1788 |     num_output: 32
1789 |     kernel_size: 1
1790 |     weight_filler {
1791 |       type: "xavier"
1792 |       std: 0.2
1793 |     }
1794 |     bias_filler {
1795 |       type: "constant"
1796 |       value: 0.2
1797 |     }
1798 |   }
1799 | }
1800 | layer {
1801 |   name: "inception_5a/relu_5x5_reduce"
1802 |   type: "ReLU"
1803 |   bottom: "inception_5a/5x5_reduce"
1804 |   top: "inception_5a/5x5_reduce"
1805 | }
1806 | layer {
1807 |   name: "inception_5a/5x5"
1808 |   type: "Convolution"
1809 |   bottom: "inception_5a/5x5_reduce"
1810 |   top: "inception_5a/5x5"
1811 |   param {
1812 |     lr_mult: 1
1813 |     decay_mult: 1
1814 |   }
1815 |   param {
1816 |     lr_mult: 2
1817 |     decay_mult: 0
1818 |   }
1819 |   convolution_param {
1820 |     num_output: 128
1821 |     pad: 2
1822 |     kernel_size: 5
1823 |     weight_filler {
1824 |       type: "xavier"
1825 |       std: 0.03
1826 |     }
1827 |     bias_filler {
1828 |       type: "constant"
1829 |       value: 0.2
1830 |     }
1831 |   }
1832 | }
1833 | layer {
1834 |   name: "inception_5a/relu_5x5"
1835 |   type: "ReLU"
1836 |   bottom: "inception_5a/5x5"
1837 |   top: "inception_5a/5x5"
1838 | }
1839 | layer {
1840 |   name: "inception_5a/pool"
1841 |   type: "Pooling"
1842 |   bottom: "pool4/3x3_s2"
1843 |   top: "inception_5a/pool"
1844 |   pooling_param {
1845 |     pool: MAX
1846 |     kernel_size: 3
1847 |     stride: 1
1848 |     pad: 1
1849 |   }
1850 | }
1851 | layer {
1852 |   name: "inception_5a/pool_proj"
1853 |   type: "Convolution"
1854 |   bottom: "inception_5a/pool"
1855 |   top: "inception_5a/pool_proj"
1856 |   param {
1857 |     lr_mult: 1
1858 |     decay_mult: 1
1859 |   }
1860 |   param {
1861 |     lr_mult: 2
1862 |     decay_mult: 0
1863 |   }
1864 |   convolution_param {
1865 |     num_output: 128
1866 |     kernel_size: 1
1867 |     weight_filler {
1868 |       type: "xavier"
1869 |       std: 0.1
1870 |     }
1871 |     bias_filler {
1872 |       type: "constant"
1873 |       value: 0.2
1874 |     }
1875 |   }
1876 | }
1877 | layer {
1878 |   name: "inception_5a/relu_pool_proj"
1879 |   type: "ReLU"
1880 |   bottom: "inception_5a/pool_proj"
1881 |   top: "inception_5a/pool_proj"
1882 | }
1883 | layer {
1884 |   name: "inception_5a/output"
1885 |   type: "Concat"
1886 |   bottom: "inception_5a/1x1"
1887 |   bottom: "inception_5a/3x3"
1888 |   bottom: "inception_5a/5x5"
1889 |   bottom: "inception_5a/pool_proj"
1890 |   top: "inception_5a/output"
1891 | }
1892 | layer {
1893 |   name: "inception_5b/1x1"
1894 |   type: "Convolution"
1895 |   bottom: "inception_5a/output"
1896 |   top: "inception_5b/1x1"
1897 |   param {
1898 |     lr_mult: 1
1899 |     decay_mult: 1
1900 |   }
1901 |   param {
1902 |     lr_mult: 2
1903 |     decay_mult: 0
1904 |   }
1905 |   convolution_param {
1906 |     num_output: 384
1907 |     kernel_size: 1
1908 |     weight_filler {
1909 |       type: "xavier"
1910 |       std: 0.03
1911 |     }
1912 |     bias_filler {
1913 |       type: "constant"
1914 |       value: 0.2
1915 |     }
1916 |   }
1917 | }
1918 | layer {
1919 |   name: "inception_5b/relu_1x1"
1920 |   type: "ReLU"
1921 |   bottom: "inception_5b/1x1"
1922 |   top: "inception_5b/1x1"
1923 | }
1924 | layer {
1925 |   name: "inception_5b/3x3_reduce"
1926 |   type: "Convolution"
1927 |   bottom: "inception_5a/output"
1928 |   top: "inception_5b/3x3_reduce"
1929 |   param {
1930 |     lr_mult: 1
1931 |     decay_mult: 1
1932 |   }
1933 |   param {
1934 |     lr_mult: 2
1935 |     decay_mult: 0
1936 |   }
1937 |   convolution_param {
1938 |     num_output: 192
1939 |     kernel_size: 1
1940 |     weight_filler {
1941 |       type: "xavier"
1942 |       std: 0.09
1943 |     }
1944 |     bias_filler {
1945 |       type: "constant"
1946 |       value: 0.2
1947 |     }
1948 |   }
1949 | }
1950 | layer {
1951 |   name: "inception_5b/relu_3x3_reduce"
1952 |   type: "ReLU"
1953 |   bottom: "inception_5b/3x3_reduce"
1954 |   top: "inception_5b/3x3_reduce"
1955 | }
1956 | layer {
1957 |   name: "inception_5b/3x3"
1958 |   type: "Convolution"
1959 |   bottom: "inception_5b/3x3_reduce"
1960 |   top: "inception_5b/3x3"
1961 |   param {
1962 |     lr_mult: 1
1963 |     decay_mult: 1
1964 |   }
1965 |   param {
1966 |     lr_mult: 2
1967 |     decay_mult: 0
1968 |   }
1969 |   convolution_param {
1970 |     num_output: 384
1971 |     pad: 1
1972 |     kernel_size: 3
1973 |     weight_filler {
1974 |       type: "xavier"
1975 |       std: 0.03
1976 |     }
1977 |     bias_filler {
1978 |       type: "constant"
1979 |       value: 0.2
1980 |     }
1981 |   }
1982 | }
1983 | layer {
1984 |   name: "inception_5b/relu_3x3"
1985 |   type: "ReLU"
1986 |   bottom: "inception_5b/3x3"
1987 |   top: "inception_5b/3x3"
1988 | }
1989 | layer {
1990 |   name: "inception_5b/5x5_reduce"
1991 |   type: "Convolution"
1992 |   bottom: "inception_5a/output"
1993 |   top: "inception_5b/5x5_reduce"
1994 |   param {
1995 |     lr_mult: 1
1996 |     decay_mult: 1
1997 |   }
1998 |   param {
1999 |     lr_mult: 2
2000 |     decay_mult: 0
2001 |   }
2002 |   convolution_param {
2003 |     num_output: 48
2004 |     kernel_size: 1
2005 |     weight_filler {
2006 |       type: "xavier"
2007 |       std: 0.2
2008 |     }
2009 |     bias_filler {
2010 |       type: "constant"
2011 |       value: 0.2
2012 |     }
2013 |   }
2014 | }
2015 | layer {
2016 |   name: "inception_5b/relu_5x5_reduce"
2017 |   type: "ReLU"
2018 |   bottom: "inception_5b/5x5_reduce"
2019 |   top: "inception_5b/5x5_reduce"
2020 | }
2021 | layer {
2022 |   name: "inception_5b/5x5"
2023 |   type: "Convolution"
2024 |   bottom: "inception_5b/5x5_reduce"
2025 |   top: "inception_5b/5x5"
2026 |   param {
2027 |     lr_mult: 1
2028 |     decay_mult: 1
2029 |   }
2030 |   param {
2031 |     lr_mult: 2
2032 |     decay_mult: 0
2033 |   }
2034 |   convolution_param {
2035 |     num_output: 128
2036 |     pad: 2
2037 |     kernel_size: 5
2038 |     weight_filler {
2039 |       type: "xavier"
2040 |       std: 0.03
2041 |     }
2042 |     bias_filler {
2043 |       type: "constant"
2044 |       value: 0.2
2045 |     }
2046 |   }
2047 | }
2048 | layer {
2049 |   name: "inception_5b/relu_5x5"
2050 |   type: "ReLU"
2051 |   bottom: "inception_5b/5x5"
2052 |   top: "inception_5b/5x5"
2053 | }
2054 | layer {
2055 |   name: "inception_5b/pool"
2056 |   type: "Pooling"
2057 |   bottom: "inception_5a/output"
2058 |   top: "inception_5b/pool"
2059 |   pooling_param {
2060 |     pool: MAX
2061 |     kernel_size: 3
2062 |     stride: 1
2063 |     pad: 1
2064 |   }
2065 | }
2066 | layer {
2067 |   name: "inception_5b/pool_proj"
2068 |   type: "Convolution"
2069 |   bottom: "inception_5b/pool"
2070 |   top: "inception_5b/pool_proj"
2071 |   param {
2072 |     lr_mult: 1
2073 |     decay_mult: 1
2074 |   }
2075 |   param {
2076 |     lr_mult: 2
2077 |     decay_mult: 0
2078 |   }
2079 |   convolution_param {
2080 |     num_output: 128
2081 |     kernel_size: 1
2082 |     weight_filler {
2083 |       type: "xavier"
2084 |       std: 0.1
2085 |     }
2086 |     bias_filler {
2087 |       type: "constant"
2088 |       value: 0.2
2089 |     }
2090 |   }
2091 | }
2092 | layer {
2093 |   name: "inception_5b/relu_pool_proj"
2094 |   type: "ReLU"
2095 |   bottom: "inception_5b/pool_proj"
2096 |   top: "inception_5b/pool_proj"
2097 | }
2098 | layer {
2099 |   name: "inception_5b/output"
2100 |   type: "Concat"
2101 |   bottom: "inception_5b/1x1"
2102 |   bottom: "inception_5b/3x3"
2103 |   bottom: "inception_5b/5x5"
2104 |   bottom: "inception_5b/pool_proj"
2105 |   top: "inception_5b/output"
2106 | }
2107 | layer {
2108 |   name: "pool5/7x7_s1"
2109 |   type: "Pooling"
2110 |   bottom: "inception_5b/output"
2111 |   top: "pool5/7x7_s1"
2112 |   pooling_param {
2113 |     pool: AVE
2114 |     kernel_size: 7
2115 |     stride: 1
2116 |   }
2117 | }
2118 | layer {
2119 |   name: "pool5/drop_7x7_s1"
2120 |   type: "Dropout"
2121 |   bottom: "pool5/7x7_s1"
2122 |   top: "pool5/7x7_s1"
2123 |   dropout_param {
2124 |     dropout_ratio: 0.4
2125 |   }
2126 | }
2127 | layer {
2128 |   name: "loss3/classifier"
2129 |   type: "InnerProduct"
2130 |   bottom: "pool5/7x7_s1"
2131 |   top: "loss3/classifier"
2132 |   param {
2133 |     lr_mult: 1
2134 |     decay_mult: 1
2135 |   }
2136 |   param {
2137 |     lr_mult: 2
2138 |     decay_mult: 0
2139 |   }
2140 |   inner_product_param {
2141 |     num_output: ##NUM_CLASSES##
2142 |     weight_filler {
2143 |       type: "xavier"
2144 |     }
2145 |     bias_filler {
2146 |       type: "constant"
2147 |       value: 0
2148 |     }
2149 |   }
2150 | }
2151 | layer {
2152 |   name: "prob"
2153 |   type: "Softmax"
2154 |   bottom: "loss3/classifier"
2155 |   top: "prob"
2156 | }
2157 | 


--------------------------------------------------------------------------------