├── .gitignore
├── ILSVRC_evaluate_bbox.m
├── ILSVRC_generate_heatmap.m
├── README.md
├── bboxgenerator
    ├── Makefile
    ├── cut
    ├── dt.c
    ├── dt.h
    ├── dt_box
    ├── dt_box.cpp
    ├── gc.cpp
    ├── heatmap_6.jpg
    ├── heatmap_6.txt
    └── sample_6.jpg
├── categories1000.mat
├── data_img1.mat
├── data_img2.mat
├── data_net.mat
├── demo.m
├── generate_bbox.m
├── ilsvrc_2012_mean.mat
├── img1.jpg
├── img2.jpg
├── map2jpg.m
├── mergeTenCrop.m
├── models
    ├── categoriesImageNet.mat
    ├── categories_places205.mat
    ├── deploy_alexnetplusCAM_imagenet.prototxt
    ├── deploy_alexnetplusCAM_places205.prototxt
    ├── deploy_googlenetCAM.prototxt
    ├── deploy_googlenetCAM_places205.prototxt
    ├── deploy_vgg16CAM.prototxt
    └── download.sh
├── prepare_image.m
├── py_demo.py
├── py_generate_bbox.py
├── py_map2jpg.py
├── py_returnCAMmap.py
└── returnCAMmap.m


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | models/*.caffemodel
3 | 


--------------------------------------------------------------------------------
/ILSVRC_evaluate_bbox.m:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | datasetName = 'ILSVRCvalSet';
  4 | load('imagenet_toolkit/ILSVRC2014_devkit/evaluation/cache_groundtruth.mat');
  5 | load('imagenet_toolkit/ILSVRC2014_devkit/data/meta_clsloc.mat');
  6 | datasetPath = 'dataset/ILSVRC2012';
  7 | load([datasetPath '/imageListVal.mat']);
  8 | load('sizeImg_ILSVRC2014.mat');
  9 | 
 10 | % datasetName = 'ILSVRCtestSet';
 11 | % datasetPath = '/data/vision/torralba/deeplearning/imagenet_toolkit';
 12 | % load([datasetPath '/imageListTest.mat']);
 13 | 
 14 | 
 15 | nImgs = size(imageList,1);
 16 | 
 17 | ground_truth_file='imagenet_toolkit/ILSVRC2014_devkit/data/ILSVRC2014_clsloc_validation_ground_truth.txt';
 18 | gt_labels = dlmread(ground_truth_file);
 19 | 
 20 | categories_gt = [];
 21 | categoryIDMap = containers.Map();
 22 | for i=1:numel(synsets)
 23 |     categories_gt{synsets(i).ILSVRC2014_ID,1} = synsets(i).words;
 24 |     categories_gt{synsets(i).ILSVRC2014_ID,2} = synsets(i).WNID;
 25 |     categoryIDMap(synsets(i).WNID) = i;
 26 | end
 27 | 
 28 | 
 29 | 
 30 | %% network to evaluate
 31 | % backpropa-heatmap
 32 | %netName = 'caffeNet_imagenet';
 33 | %netName = 'googlenetBVLC_imagenet';
 34 | %netName = 'VGG16_imagenet';
 35 | 
 36 | % CAM-based network
 37 | %netName = 'NIN';
 38 | %netName = 'CAM_imagenetCNNaveSumDeep';
 39 | %netName = 'CAM_googlenetBVLC_imagenet';% the direct output
 40 | netName = 'CAM_googlenetBVLCshrink_imagenet';
 41 | %netName = 'CAM_googlenetBVLCshrink_imagenet_maxpool';
 42 | %netName = 'CAM_VGG16_imagenet';
 43 | %netName = 'CAM_alexnet';
 44 | 
 45 | load('categoriesImageNet.mat');
 46 | 
 47 | visualizationPointer = 0;
 48 | 
 49 | topCategoryNum = 5;
 50 | predictionResult_bbox1 = zeros(nImgs, topCategoryNum*5);
 51 | predictionResult_bbox2 = zeros(nImgs, topCategoryNum*5);
 52 | predictionResult_bboxCombine = zeros(nImgs, topCategoryNum*5);
 53 | 
 54 | if matlabpool('size')==0
 55 |     try
 56 |         matlabpool
 57 |     catch e
 58 |     end
 59 | end
 60 | 
 61 | heatMapFolder = ['heatMap-' datasetName '-' netName];
 62 | bbox_threshold = [20, 100, 110];
 63 | curParaThreshold = [num2str(bbox_threshold(1)) ' ' num2str(bbox_threshold(2)) ' ' num2str(bbox_threshold(3))];
 64 | parfor i=1:size(imageList,1)
 65 |     curImgIDX = i;
 66 | 
 67 |     height_original = sizeFull_imageList(curImgIDX,1);%tmp.Height;
 68 |     weight_original = sizeFull_imageList(curImgIDX,2);%tmp.Width;
 69 |     
 70 |     [a b c] = fileparts(imageList{curImgIDX,1});
 71 |     curPath_fullSizeImg = ['/data/vision/torralba/deeplearning/imagenet_toolkit/ILSVRC2012_img_val/' b c];
 72 |     curMatFile = [heatMapFolder '/' b '.mat'];
 73 |     [heatMapSet, value_category, IDX_category] = loadHeatMap( curMatFile);
 74 |     
 75 |     curResult_bbox1 = [];
 76 |     curResult_bbox2 = [];
 77 |     curResult_bboxCombine = [];
 78 |     for j=1:5
 79 |         curHeatMapFile = [heatMapFolder '/top' num2str(j) '/' b '.jpg'];
 80 | 
 81 |         curBBoxFile = [heatMapFolder '/top' num2str(j) '/' b '_default.txt'];
 82 |         %curBBoxFileGraphcut = [heatMapFolder '/top' num2str(j) '/' b '_graphcut.txt'];
 83 |         curCategory = categories{IDX_category(j),1};
 84 |         %imwrite(curHeatMap, ['result_bbox/heatmap_tmp' b randString '.jpg']);
 85 |         if ~exist(curBBoxFile)
 86 |             %system(['/data/vision/torralba/deeplearning/package/bbox_hui/final ' curHeatMapFile ' ' curBBoxFile]);
 87 |             
 88 |             system(['/data/vision/torralba/deeplearning/package/bbox_hui_new/./dt_box ' curHeatMapFile ' ' curParaThreshold ' ' curBBoxFile]);
 89 |         end
 90 |         curPredictCategory = categories{IDX_category(j),1};
 91 |         curPredictCategoryID = categories{IDX_category(j),1}(1:9);
 92 |         curPredictCategoryGTID = categoryIDMap(curPredictCategoryID);
 93 |         
 94 |         
 95 |         boxData = dlmread(curBBoxFile);
 96 |         boxData_formulate = [boxData(1:4:end)' boxData(2:4:end)' boxData(1:4:end)'+boxData(3:4:end)' boxData(2:4:end)'+boxData(4:4:end)'];
 97 |         boxData_formulate = [min(boxData_formulate(:,1),boxData_formulate(:,3)),min(boxData_formulate(:,2),boxData_formulate(:,4)),max(boxData_formulate(:,1),boxData_formulate(:,3)),max(boxData_formulate(:,2),boxData_formulate(:,4))];
 98 |            
 99 | %         try
100 | %             boxDataGraphcut = dlmread(curBBoxFileGraphcut);
101 | %             boxData_formulateGraphcut = [boxDataGraphcut(1:4:end)' boxDataGraphcut(2:4:end)' boxDataGraphcut(1:4:end)'+boxDataGraphcut(3:4:end)' boxDataGraphcut(2:4:end)'+boxDataGraphcut(4:4:end)'];
102 | %         catch exception
103 | %             boxDataGraphcut = dlmread(curBBoxFile);
104 | %             boxData_formulateGraphcut = [boxDataGraphcut(1:4:end)' boxDataGraphcut(2:4:end)' boxDataGraphcut(1:4:end)'+boxDataGraphcut(3:4:end)' boxDataGraphcut(2:4:end)'+boxDataGraphcut(4:4:end)'];
105 | %             boxData_formulateGraphcut = boxData_formulateGraphcut(1,:);
106 | %         end
107 | 
108 |         bbox = boxData_formulate(1,:); 
109 |         curPredictTuple = [curPredictCategoryGTID bbox(1) bbox(2) bbox(3) bbox(4)];
110 |         curResult_bbox1 = [curResult_bbox1 curPredictTuple];
111 |         curResult_bboxCombine = [curResult_bboxCombine curPredictTuple];
112 |         
113 |         bbox = boxData_formulate(2,:); 
114 |         %bbox = boxData_formulateGraphcut(1,:);
115 |         curPredictTuple = [curPredictCategoryGTID bbox(1) bbox(2) bbox(3) bbox(4)];
116 |         curResult_bbox2 = [curResult_bbox2 curPredictTuple];      
117 |         
118 |         curResult_bboxCombine = [curResult_bboxCombine curPredictTuple];
119 |         if visualizationPointer == 1
120 |               
121 |             curHeatMap = imread(curHeatMapFile);
122 |             curHeatMap = imresize(curHeatMap,[height_original weight_original]);
123 |         
124 |             subplot(1,2,1),hold off, imshow(curPath_fullSizeImg);
125 |             hold on
126 |             curBox = boxData_formulate(1,:);
127 |             rectangle('Position',[curBox(1) curBox(2) curBox(3)-curBox(1) curBox(4)-curBox(2)],'EdgeColor',[1 0 0]);
128 |             subplot(1,2,2),imagesc(curHeatMap);
129 |             title(curCategory);
130 |             waitforbuttonpress
131 |         end
132 |     end
133 |     
134 |     predictionResult_bbox1(i, :) = curResult_bbox1;
135 |     predictionResult_bbox2(i, :) = curResult_bbox2;
136 |     predictionResult_bboxCombine(i,:) = curResult_bboxCombine(1:topCategoryNum*5);
137 |     disp([netName ' processing ' b])
138 | end
139 | 
140 | 
141 | addpath('imagenet_toolkit/ILSVRC2014_devkit/evaluation');
142 | disp([netName '--------bbox1' ]);
143 | [cls_error, clsloc_error] = simpleEvaluation(predictionResult_bbox1);
144 | disp([(1:5)',clsloc_error,cls_error]);
145 | 
146 | disp([netName '--------bbox2' ]);
147 | [cls_error, clsloc_error] = simpleEvaluation(predictionResult_bbox2);
148 | disp([(1:5)',clsloc_error,cls_error]);
149 | 
150 | disp([netName '--------bboxCombine' ]);
151 | [cls_error, clsloc_error] = simpleEvaluation(predictionResult_bboxCombine);
152 | disp([(1:5)',clsloc_error,cls_error]);
153 | 


--------------------------------------------------------------------------------
/ILSVRC_generate_heatmap.m:
--------------------------------------------------------------------------------
  1 | % raw script used to generate heatmaps for ILSVRC localization experiment
  2 | % please load the necessary packages like matcaffe and ILSVRC toolbox correctly, some functions in matcaffe might be already deprecated.
  3 | % you could take it as an example to see how to reproduce the ILSVRC localization experiment.
  4 | %
  5 | % Bolei Zhou. 
  6 | 
  7 | addpath('caffeCPU2/matlab/caffe');
  8 | 
  9 | modelSetFolder = 'CAMnet';
 10 | 
 11 | %% CAMnet 
 12 | 
 13 | 
 14 | % netName = 'CAM_googlenetBVLC_imagenet';
 15 | % model_file = [modelSetFolder '/googlenet_imagenet/bvlc_googlenet.caffemodel'];
 16 | % model_def_file = [modelSetFolder '/googlenet_imagenet/deploy.protxt'];
 17 | 
 18 | % netName = 'CAM_alexnet';
 19 | % model_file = [modelSetFolder '/alexnet/CAMmodels/caffeNetCAM_imagenet_train_iter_100000.caffemodel'];
 20 | % model_def_file = [modelSetFolder '/alexnet/deploy_caffeNetCAM.prototxt'];
 21 | 
 22 | netName = 'CAM_googlenetBVLCshrink_imagenet';
 23 | model_file = [modelSetFolder '/googlenet_imagenet/CAMmodels/imagenet_googleletCAM_train_iter_80000.caffemodel'];
 24 | model_def_file = [modelSetFolder '/googlenet_imagenet/deploy_googlenetCAM.prototxt'];
 25 | 
 26 | 
 27 | % netName = 'CAM_VGG16_imagenet';
 28 | % model_file = [modelSetFolder '/VGGnet/models/vgg16CAM_train_iter_50000.caffemodel'];
 29 | % model_def_file = [modelSetFolder '/VGGnet/deploy_vgg16CAM.prototxt'];
 30 | 
 31 | 
 32 | %% loading the network
 33 | caffe('init', model_def_file, model_file,'test');
 34 | caffe('set_mode_gpu');
 35 | caffe('set_device',0);
 36 | 
 37 | %% testing to predict some image
 38 | 
 39 | weights = caffe('get_weights');
 40 | weights_LR = squeeze(weights(end,1).weights{1,1});
 41 | bias_LR = weights(end,1).weights{2,1};
 42 | layernames = caffe('get_names');
 43 | response = caffe('get_all_layers');
 44 | netInfo = cell(size(layernames,1),3);
 45 | for i=1:size(layernames,1)
 46 |     netInfo{i,1} = layernames{i};
 47 |     netInfo{i,2} = i;
 48 |     netInfo{i,3} = size(response{i,1});
 49 | end
 50 | 
 51 | load('categoriesImageNet.mat');
 52 | d = load('/data/vision/torralba/small-projects/bolei_deep/caffe/ilsvrc_2012_mean.mat');
 53 | IMAGE_MEAN = d.image_mean;
 54 | IMAGE_DIM = 256;
 55 | CROPPED_DIM = netInfo{1,3}(1); 
 56 | 
 57 | weightInfo = cell(size(weights,1),1);
 58 | for i=1:size(weights,1)
 59 |     weightInfo{i,1} = weights(i,1).layer_names;
 60 |     weightInfo{i,2} = weights(i,1).weights{1,1};
 61 |     weightInfo{i,3} = size(weights(i,1).weights{1,1});
 62 | end
 63 | 
 64 | %% testing to predict some image
 65 | 
 66 | datasetName = 'ILSVRCvalSet';
 67 | datasetPath = '/data/vision/torralba/gigaSUN/deeplearning/dataset/ILSVRC2012';
 68 | load([datasetPath '/imageListVal.mat']);
 69 | load('sizeImg_ILSVRC2014.mat');
 70 | % datasetName = 'ILSVRCtestSet';
 71 | % datasetPath = '/data/vision/torralba/deeplearning/imagenet_toolkit';
 72 | % load([datasetPath '/imageListTest.mat']);
 73 | 
 74 | 
 75 | 
 76 | saveFolder = ['heatMap-' datasetName '-' netName];
 77 | if ~exist(saveFolder)
 78 |     mkdir(saveFolder);
 79 | end
 80 | for i=1:5
 81 |     if ~exist([saveFolder '/top' num2str(i)])
 82 |         mkdir([saveFolder '/top' num2str(i)]);
 83 |     end
 84 | end
 85 | 
 86 | for i = 1:size(imageList,1)
 87 |     curImgIDX = i;
 88 |     [a b c] = fileparts(imageList{curImgIDX,1});    
 89 |     saveMatFile = [saveFolder '/' b '.mat'];
 90 |     if ~exist(saveMatFile)
 91 |         height_original = sizeFull_imageList(curImgIDX,1);%tmp.Height;
 92 |         weight_original = sizeFull_imageList(curImgIDX,2);%tmp.Width;
 93 | 
 94 | 
 95 |         curImg = imread(imageList{curImgIDX,1});
 96 | 
 97 |         if size(curImg,3)==1
 98 |             curImg = repmat(curImg,[1 1 3]);
 99 |         end
100 | 
101 | 
102 |         scores = caffe('forward', {prepare_img(curImg, IMAGE_MEAN, CROPPED_DIM)});
103 |         response = caffe('get_all_layers');
104 |         scoresMean = mean(squeeze(scores{1}),2);
105 |         [value_category, IDX_category] = sort(scoresMean,'descend');
106 | 
107 | 
108 |         featureObjectSwitchSpatial = squeeze(response{end-3,1});
109 |         [curColumnMap] = returnColumnMap(featureObjectSwitchSpatial, weights_LR(:,IDX_category(1:5)));
110 | 
111 | 
112 | 
113 |         for j=1:5
114 |             curFeatureMap = squeeze(curColumnMap(:,:,j,:));
115 |             curFeatureMap_crop = imresize(curFeatureMap,[netInfo{1,3}(1) netInfo{1,3}(2)]);
116 |             gradients = zeros([netInfo{1,3}(1) netInfo{1,3}(2) 3 10]);
117 |             gradients(:,:,1,:) = curFeatureMap_crop;
118 |             gradients(:,:,2,:) = curFeatureMap_crop;
119 |             gradients(:,:,3,:) = curFeatureMap_crop;
120 | 
121 |             [alignImgMean alignImgSet] = crop2img(gradients);
122 |             alignImgMean = single(alignImgMean);
123 |             alignImgMean = imresize(alignImgMean, [height_original weight_original]);
124 |             alignImgMean = alignImgMean./max(alignImgMean(:));
125 | 
126 | 
127 |             imwrite(alignImgMean, [saveFolder '/top' num2str(j) '/' b '.jpg']);
128 | 
129 |         end
130 |         value_category = single(value_category);
131 |         IDX_category = single(IDX_category);
132 |         save(saveMatFile,'value_category','IDX_category');
133 |         disp([netName ' processing ' b]);
134 |     end
135 | end
136 | 
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Class Activation Mapping for Python
 2 | I have written the files demo.m and generate_bbox.m in Python in order to be able to use the script without Matlab. In order to run it in Python one just need to run
 3 | ```
 4 | python py_demo.py
 5 | ```
 6 | and
 7 | ```
 8 | python py_generate_bbox.py
 9 | ```
10 | # Sample code for the Class Activation Mapping
11 | We propose a simple technique to expose the implicit attention of Convolutional Neural Networks on the image. It highlights the most informative image regions relevant to the predicted class. You could get attention-based model instantly by tweaking your own CNN a little bit more. The paper is published at [CVPR'16](http://arxiv.org/pdf/1512.04150.pdf).
12 | 
13 | The framework of the Class Activation Mapping is as below:
14 | ![Framework](http://cnnlocalization.csail.mit.edu/framework.jpg)
15 | 
16 | Some predicted class activation maps are:
17 | ![Results](http://cnnlocalization.csail.mit.edu/example.jpg)
18 | 
19 | ### Pre-trained models:
20 | * GoogLeNet-CAM model on ImageNet: ```models/deploy_googlenetCAM.prototxt``` weights:[http://cnnlocalization.csail.mit.edu/demoCAM/models/imagenet_googleletCAM_train_iter_120000.caffemodel]
21 | * VGG16-CAM model on ImageNet: ```models/deploy_vgg16CAM.prototxt``` weights:[http://cnnlocalization.csail.mit.edu/demoCAM/models/vgg16CAM_train_iter_90000.caffemodel]
22 | * GoogLeNet-CAM model on Places205: ```models/deploy_googlenetCAM_places205.prototxt``` weights:[http://cnnlocalization.csail.mit.edu/demoCAM/models/places_googleletCAM_train_iter_120000.caffemodel]
23 | * AlexNet+-CAM on ImageNet:```models/deploy_alexnetplusCAM_imagenet.prototxt``` weights:[http://cnnlocalization.csail.mit.edu/demoCAM/models/alexnetplusCAM_imagenet.caffemodel]
24 | * AlexNet+-CAM on Places205 (used in the [online demo](http://places.csail.mit.edu/demo.html)):```models/deploy_alexnetplusCAM_places205.prototxt``` weights:[http://cnnlocalization.csail.mit.edu/demoCAM/models/alexnetplusCAM_places205.caffemodel]
25 | 
26 | ### Usage Instructions:
27 | * Install [caffe](https://github.com/BVLC/caffe), compile the matcaffe (matlab wrapper for caffe), and make sure you could run the prediction example code classification.m.
28 | * Clone the code from Github:
29 | ```
30 | git clone https://github.com/metalbubble/CAM.git
31 | cd CAM
32 | ```
33 | * Download the pretrained network
34 | ```
35 | sh models/download.sh
36 | ```
37 | * Run the demo code to generate the heatmap: in matlab terminal, 
38 | ```
39 | demo
40 | ```
41 | * Run the demo code to generate bounding boxes from the heatmap: in matlab terminal,
42 | ```
43 | generate_bbox
44 | ```
45 | 
46 | The demo video of what the CNN is looking is [here](https://www.youtube.com/watch?v=fZvOy0VXWAI). The reimplementation in tensorflow is [here](https://github.com/jazzsaxmafia/Weakly_detector).
47 | 
48 | ### Reference:
49 | ```
50 | @inproceedings{zhou2016cvpr,
51 |     author    = {Zhou, Bolei and Khosla, Aditya and Lapedriza, Agata and Oliva, Aude and Torralba, Antonio},
52 |     title     = {Learning Deep Features for Discriminative Localization},
53 |     booktitle = {Computer Vision and Pattern Recognition},
54 |     year      = {2016}
55 | }
56 | ```
57 | ### License:
58 | The pre-trained models and the CAM technique are released for unrestricted use.
59 | 
60 | Contact [Bolei Zhou](http://people.csail.mit.edu/bzhou/) if you have questions.
61 |     
62 | 


--------------------------------------------------------------------------------
/bboxgenerator/Makefile:
--------------------------------------------------------------------------------
 1 | all: cut dt_box
 2 | 
 3 | 
 4 | cut:
 5 | 	g++ -g -O3 gc.cpp -o cut `pkg-config --libs opencv` -lm
 6 | 
 7 | dt_box:
 8 | 	g++ -g -O3 dt.c dt_box.cpp -o dt_box `pkg-config --libs opencv` -lm
 9 | 
10 | .PHONY: clean
11 | clean:
12 | 	rm cut dt_box
13 | 


--------------------------------------------------------------------------------
/bboxgenerator/cut:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/bboxgenerator/cut


--------------------------------------------------------------------------------
/bboxgenerator/dt.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Distance transform for binary image or gray-scale image.
  3 |  * @param 
  4 |  * @return 
  5 |  */
  6 | 
  7 | #include <stdlib.h>
  8 | #include <math.h>
  9 | 
 10 | #ifdef __cplusplus
 11 | extern "C" {
 12 | #endif
 13 |         
 14 | #define INF 1E20
 15 | 
 16 | #define SQUARE(q) ((q)*(q))
 17 | #define MAX(a,b) ((a) > (b) ? (a) : (b))
 18 | #define ROUND(t) ((int)((t) + 0.5))
 19 | #define BOUND_8U(t) ((t) < 0 ? 0 : (t) > 255 ? 255 : (t))
 20 | 
 21 | static void dt_row(const double *f, int n, double *d, double *z, int *v) {
 22 |         int k, q;
 23 | 
 24 |         v[0] = 0;
 25 |         z[0] = -INF; 
 26 |         z[1] = +INF;
 27 |         
 28 |         k = 0;
 29 |         for (q = 1; q < n; ++q) {
 30 |                 double s = ((f[q]+SQUARE(q))-(f[v[k]]+SQUARE(v[k])))/(double)(2*q-2*v[k]);
 31 |                 while (s <= z[k]) {
 32 |                         k--;
 33 |                         s = ((f[q]+SQUARE(q))-(f[v[k]]+SQUARE(v[k])))/(double)(2*q-2*v[k]);
 34 |                 }
 35 |                 k++;
 36 |                 v[k] = q;
 37 |                 z[k] = s;
 38 |                 z[k+1] = +INF;
 39 |         }
 40 |         
 41 |         k = 0;
 42 |         for (q = 0; q < n; ++q) {
 43 |                 while (z[k+1] < q) 
 44 |                         k++;
 45 |                 d[q] = SQUARE(q-v[k]) + f[v[k]];
 46 |         }
 47 | }
 48 | 
 49 | void 
 50 | dt(double *m, int rows, int cols) {
 51 |         const int n = MAX(rows, cols);
 52 |         double *f = (double *)malloc(sizeof(f[0]) * n);
 53 |         double *d = (double *)malloc(sizeof(d[0]) * n);
 54 |         double *z = (double *)malloc(sizeof(z[0]) * (n+ 1));
 55 |         int *v = (int *)malloc(sizeof(v[0]) * n);
 56 |         int x, y;
 57 |         
 58 |         for (x = 0; x < cols; ++x) {
 59 |                 for (y = 0; y < rows; ++y) {
 60 |                         f[y] = m[y*cols + x];
 61 |                 }
 62 |                 dt_row(f, rows, d, z, v);
 63 |                 for (y = 0; y < rows; ++y) {
 64 |                         m[y*cols + x] = d[y];
 65 |                 }
 66 |         }
 67 |         
 68 |         for (y = 0; y < rows; ++y) {
 69 |                 for (x = 0; x < cols; ++x) {
 70 |                         f[x] = m[y*cols + x];
 71 |                 }
 72 |                 dt_row(f, cols, d, z, v);
 73 |                 for (x = 0; x < cols; ++x) {
 74 |                         m[y*cols + x] = d[x];
 75 |                 }
 76 |         }
 77 | 
 78 |         free(f);
 79 |         free(d);
 80 |         free(z);
 81 |         free(v);
 82 | }
 83 | 
 84 | static void 
 85 | min_max(const double *m, int sz, double *min, double *max) {
 86 |         double mi = m[0], ma = m[0];
 87 |         int i = 1;
 88 |         for (; i < sz; ++i) {
 89 |                 if (m[i] > ma) {
 90 |                         ma = m[i];
 91 |                 }
 92 |                 else if (m[i] < mi) {
 93 |                         mi = m[i];
 94 |                 }
 95 |         }
 96 |         *min = mi;
 97 |         *max = ma;
 98 | }
 99 | 
100 | static void 
101 | double_to_image(const double *m, int rows, int cols, unsigned char *data, int step) {
102 |         int i, j;
103 |         double mi, ma, scale;
104 |         min_max(m, rows * cols, &mi, &ma);
105 | 
106 |         if (mi == ma) {
107 |                 return ;
108 |         }
109 |         
110 |         scale = 255.0 / (ma - mi);
111 | 
112 |         for (i = 0; i < rows; ++i) {
113 |                 for (j = 0; j < cols; ++j) {
114 |                         const double s = m[i*cols + j] * scale;
115 |                         const int t = ROUND(s);
116 |                         data[i*step + j] = BOUND_8U(t);
117 |                 }
118 |         }
119 | }
120 | 
121 | static void 
122 | sqrt_m(double *m, int sz) {
123 |         int i = 0;
124 |         for (; i < sz; ++i) {
125 |                 m[i] = sqrt(m[i]);
126 |         }
127 | }
128 | 
129 | void 
130 | dt_gray(unsigned char *gray, int rows, int cols, int step) {
131 |         double *m = (double *)malloc(sizeof(m[0]) * rows * cols);
132 |         int i, j;
133 |         const double vstep = 100.0; /* big enough to transform the distance... */
134 |         for (i = 0; i < rows; ++i) {
135 |                 for (j = 0; j < cols; ++j) {
136 |                         m[i*cols + j] = vstep * (double)gray[i*step + j];
137 |                 }
138 |         }
139 | 
140 |         dt(m, rows, cols);
141 |         sqrt_m(m, rows * cols);
142 |         double_to_image(m, rows, cols, gray, step);
143 |         free(m);
144 | }
145 | 
146 | void 
147 | dt_binary(unsigned char *bimg, int rows, int cols, int step) {
148 |         double *m = (double *)malloc(sizeof(m[0]) * rows * cols);
149 |         int i, j;
150 |         for (i = 0; i < rows; ++i) {
151 |                 for (j = 0; j < cols; ++j) {
152 |                         m[i*cols + j] = bimg[i*step + j] > 0 ? +INF : 0.0;
153 |                 }
154 |         }
155 |         
156 |         dt(m, rows, cols);
157 |         sqrt_m(m, rows*cols);
158 |         double_to_image(m, rows, cols, bimg, step);
159 |         free(m);
160 | }
161 | 
162 | 
163 | #ifdef __cplusplus
164 | }
165 | #endif
166 | 


--------------------------------------------------------------------------------
/bboxgenerator/dt.h:
--------------------------------------------------------------------------------
 1 | #ifndef DT_H
 2 | #define DT_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void dt(double *m, int rows, int cols);
 9 | void dt_binary(unsigned char *bimg, int rows, int cols, int step);
10 | void dt_gray(unsigned char *gray, int rows, int cols, int step);
11 |         
12 | #ifdef __cplusplus
13 | }
14 | #endif
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/bboxgenerator/dt_box:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/bboxgenerator/dt_box


--------------------------------------------------------------------------------
/bboxgenerator/dt_box.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | ----------------------------------------
  3 | Given an heatmap, given out the bbox.
  4 | 
  5 | 0. Get the DT-ed images
  6 | 1. detect all contour in the bboxes
  7 | 2. merge based on some rules.
  8 | 3. output the bbox
  9 | ----------------------------------------
 10 | */
 11 | #include <stdio.h>
 12 | #include "dt.h"
 13 | #include <vector>
 14 | #include <algorithm>
 15 | #include <opencv2/opencv.hpp>
 16 | #include <assert.h>
 17 | 
 18 | using namespace cv;
 19 | using std::vector;
 20 | 
 21 | #define SCALE_NUM 3
 22 | 
 23 | struct Data
 24 | {
 25 |         Data() : size(SCALE_NUM)
 26 |         {
 27 |                 for (int i = 0; i < SCALE_NUM; ++i)
 28 |                 {
 29 |                         images[i] = NULL;
 30 |                 }
 31 |         }
 32 | 
 33 |         ~Data()
 34 |         {
 35 |                 for (int i = 0; i < SCALE_NUM; ++i)
 36 |                 {
 37 |                         if (images[i])
 38 |                         {
 39 |                                 cvReleaseImage(&(images[i]));
 40 |                                 images[i] = NULL;
 41 |                         }
 42 |                 }
 43 |         }
 44 |         int size;
 45 |         IplImage *images[SCALE_NUM];
 46 | };
 47 | 
 48 | 
 49 | static int g_Ths[SCALE_NUM] = {30, 90, 150};
 50 |                 
 51 | static Data *
 52 | fromDT(const IplImage *gray)
 53 | {
 54 |         Data *data = new Data;
 55 |         for (int i = 0; i < data->size; ++i)
 56 |         {
 57 |                 data->images[i] = cvCreateImage(cvGetSize(gray), 8, 1);
 58 |                 cvThreshold(gray, data->images[i], g_Ths[i], 255, CV_THRESH_BINARY);
 59 |                 dt_binary((unsigned char*)data->images[i]->imageData, data->images[i]->height, data->images[i]->width, data->images[i]->widthStep);
 60 |                 cvThreshold(data->images[i], data->images[i], 10, 255, CV_THRESH_BINARY);
 61 |         }
 62 |         return data;
 63 | }
 64 | 
 65 | 
 66 | static int
 67 | LIMIT(int v, int L, int R)
 68 | {
 69 |         return v < L ? L : (v > R ? R : v);
 70 | }
 71 | 
 72 | static vector<CvRect>
 73 | getBBox(struct Data *data)
 74 | {
 75 |         vector<CvRect> bboxes;
 76 |         const int W = data->images[0]->width;
 77 |         const int H = data->images[0]->height;
 78 |         
 79 |         for (int i = 0; i < data->size; ++i)
 80 |         {
 81 |                 cv::Mat a = cv::cvarrToMat(data->images[i]);
 82 |                 vector< vector<cv::Point> >contours;
 83 |                 vector<cv::Vec4i> hie;
 84 |                 cv::findContours(a, contours, hie, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE);
 85 |                 for (int j = 0; j < contours.size(); ++j)
 86 |                 {
 87 |                         cv::Rect bb = cv::boundingRect( contours[j] );
 88 |                         CvRect cr;
 89 |                         cr.x = LIMIT(bb.x, 0, W-5);
 90 |                         cr.y = LIMIT(bb.y, 0, H-5);
 91 |                         cr.width = LIMIT(bb.width, 0, W - bb.x-5);
 92 |                         cr.height = LIMIT(bb.height, 0, H-bb.y-5);
 93 |                         //printf("%d, %d, %d, %d\n", W, H, cr.width, cr.height);
 94 |                         bboxes.push_back(cr);
 95 |                 }
 96 |         }
 97 |         return bboxes;
 98 | }
 99 | 
100 | 
101 | /*
102 | ----------------------------------------
103 | x_overlap = Math.max(0, Math.min(x12,x22) - Math.max(x11,x21));
104 | y_overlap = Math.max(0, Math.min(y12,y22) - Math.max(y11,y21));
105 | overlapArea = x_overlap * y_overlap;
106 | ----------------------------------------
107 | */
108 | static bool
109 | big_overlap(const CvRect &a, const CvRect &b)
110 | {
111 |         int t = (double)std::max(a.width * a.height, b.width * b.height) * 0.5;
112 |         int x11, y11, x12, y12, x21, y21, x22, y22;
113 |         x11 = a.x;
114 |         y11 = a.y;
115 |         x12 = a.x + a.width;
116 |         y12 = a.y + a.height;
117 |         x21 = b.x;
118 |         y21 = b.y;
119 |         x22 = b.x + b.width;
120 |         y22 = b.y + b.height;
121 |         int x_overlap = std::max(0, std::min(x12,x22) - std::max(x11,x21));
122 |         int y_overlap = std::max(0, std::min(y12,y22) - std::max(y11,y21));
123 |         int overlapArea = x_overlap * y_overlap;
124 |         return overlapArea > t;
125 | }
126 | 
127 | /*
128 | ----------------------------------------
129 | 1. Overlap > max(area(A), area(B)) * 0.5
130 | 
131 | 0. rank BB
132 | 1. from big to small:
133 |      
134 | ----------------------------------------
135 | */
136 | static void
137 | mergeBBox(vector<CvRect> &bboxes)
138 | {
139 |         for (int i = 0; i < bboxes.size(); ++i)
140 |         {
141 |                 for (int j = i + 1; j < bboxes.size(); ++j)
142 |                 {
143 |                         if (big_overlap(bboxes[i], bboxes[j]))
144 |                         {
145 |                                 // remove small one
146 |                                 bboxes.erase(bboxes.begin() + j);
147 |                         }
148 |                 }
149 |         }
150 |         return ;
151 | }
152 | 
153 | static bool
154 | my_cmp(const CvRect& a, const CvRect& b)
155 | {
156 |     return a.width * a.height > b.width * b.height;
157 | }
158 | 
159 | 
160 | static void
161 | rankBBox(vector<CvRect> &bboxes)
162 | {
163 |         std::sort(bboxes.begin(), bboxes.end(), my_cmp);
164 | }
165 | 
166 | 
167 | static void
168 | draw(const vector<CvRect> &rects, const char *iname)
169 | {
170 |         IplImage *img = cvLoadImage(iname, 1);
171 |         const CvScalar color = cvScalar(0,0,255,0);
172 |         
173 |         for (int i = 0; i < rects.size(); ++i)
174 |         {
175 |                 CvRect r = rects[i];
176 |                 cvRectangle(img, cvPoint(r.x, r.y), cvPoint(r.x + r.width, r.y + r.height), color, 3, 8, 0);
177 |         }
178 |         cvNamedWindow("draw", 1);
179 |         cvShowImage("draw", img);
180 |         cvWaitKey(0);
181 |         cvReleaseImage(&img);
182 | }
183 | 
184 | 
185 | static void
186 | output(const vector<CvRect> &rects, const char *filen)
187 | {
188 |         FILE *fp = fopen(filen, "w");
189 |         assert(fp != NULL);
190 |         for (int i = 0; i < rects.size(); ++i)
191 |         {
192 |                 fprintf(fp, "%d %d %d %d ", rects[i].x, rects[i].y, rects[i].width, rects[i].height);
193 |         }
194 |         fclose(fp);
195 |         return ;
196 | }
197 | 
198 | static void
199 | output(const vector<CvRect> &rects)
200 | {
201 |         for (int i = 0; i < rects.size(); ++i)
202 |         {
203 |                 printf("%d %d %d %d ", rects[i].x, rects[i].y, rects[i].width, rects[i].height);
204 |         }
205 |         printf("\n");
206 | }
207 | 
208 | int 
209 | main(int argc, char *argv[])
210 | {
211 |         if (argc != 5 && argc != 6)
212 |         {
213 |                 puts(">>>./program image.jpg th0 th1 th2\nor");
214 |                 puts(">>>./program image.jpg th0 th1 th2 output.txt");
215 |                 return -1;
216 |         }
217 | 
218 |         IplImage *gray = cvLoadImage(argv[1], 0);
219 |         if (!gray)
220 |         {
221 |                 puts("Can not open image, dude!\n");
222 |         }
223 | 
224 |         // set the thresholds
225 |         {
226 |                 int t0, t1, t2;
227 |                 t0 = atoi(argv[2]);
228 |                 t1 = atoi(argv[3]);
229 |                 t2 = atoi(argv[4]);
230 |                 if (0 < t0 && t0 < t1 && t1 < t2 && t2 < 255)
231 |                 {
232 |                         g_Ths[0] = t0;
233 |                         g_Ths[1] = t1;
234 |                         g_Ths[2] = t2;
235 |                 }
236 |         }
237 |         
238 |         
239 |         Data *data = fromDT(gray);
240 |         vector<CvRect> rects = getBBox(data);
241 |         rankBBox(rects);
242 |         mergeBBox(rects);
243 | 
244 |         
245 |         //if (argc == 4)
246 |         //        draw(rects, argv[3]);
247 | 
248 |         
249 |         
250 |         if (argc == 6)
251 |                 output(rects, argv[5]);
252 |         else
253 |                 output(rects);
254 |         
255 |         delete data;
256 |         cvReleaseImage(&gray);        
257 |         return 0;
258 | }
259 | 


--------------------------------------------------------------------------------
/bboxgenerator/gc.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | ----------------------------------------
  3 | Using Heat map as the foreground input of
  4 | the grabcut.
  5 | 
  6 | Update:
  7 | 0. output the biggest bounding box
  8 | 1. expose the two thresholding value to the command line.
  9 | ----------------------------------------
 10 | */
 11 | #include <stdio.h>
 12 | #include <opencv2/opencv.hpp>
 13 | #include <vector>
 14 | 
 15 | using namespace cv;
 16 | using std::vector;
 17 | 
 18 | static int g_th0 = 10;
 19 | static int g_th1 = 40;
 20 | 
 21 | /*
 22 | ----------------------------------------
 23 | Using the simplest thresholding to get the foreground.
 24 | ----------------------------------------
 25 | */
 26 | static Mat
 27 | foreground(const Mat &heatmap)
 28 | {
 29 |         Mat bm;
 30 |         Mat re = heatmap.clone();
 31 |         re.setTo(GC_BGD);
 32 |         
 33 |         threshold(heatmap, bm, g_th0, 255, THRESH_BINARY); 
 34 |         re.setTo(GC_PR_BGD, bm);
 35 |         threshold(heatmap, bm, g_th1, 255, THRESH_BINARY);
 36 |         re.setTo(GC_PR_FGD, bm);
 37 |         
 38 |         return re;
 39 | }
 40 | 
 41 | 
 42 | static Mat
 43 | cut(const Mat &src, const Mat &heatmap)
 44 | {
 45 |         Mat mask = foreground(heatmap);
 46 |         Mat bgModel,fgModel; 
 47 |         grabCut(src, mask, Rect(), bgModel,fgModel, 1, cv::GC_INIT_WITH_MASK);
 48 |         Mat1b mask_fgpf = ( mask == cv::GC_FGD) | (mask == cv::GC_PR_FGD);
 49 |         Mat3b tmp = Mat3b::zeros(src.rows, src.cols);
 50 |         src.copyTo(tmp, mask_fgpf);
 51 |         return tmp;
 52 | }
 53 | 
 54 | /*
 55 | ----------------------------------------
 56 | The same with cut_mask, but save the segmented image
 57 | ----------------------------------------
 58 | */
 59 | static Mat
 60 | cut_mask_save(const Mat &src, const Mat &heatmap, const char *dstname)
 61 | {
 62 |         Mat mask = foreground(heatmap);
 63 |         Mat bgModel,fgModel; 
 64 |         grabCut(src, mask, Rect(), bgModel,fgModel, 1, cv::GC_INIT_WITH_MASK);
 65 |         Mat mask_fgpf = (mask == cv::GC_FGD) | (mask == cv::GC_PR_FGD);
 66 |         Mat tmp = Mat3b::zeros(src.rows, src.cols);
 67 |         src.copyTo(tmp, mask_fgpf);
 68 |         imwrite(dstname, tmp);
 69 |         return mask_fgpf;
 70 | }
 71 | 
 72 | 
 73 | /*
 74 | ----------------------------------------
 75 | cut, return the mask.
 76 | ----------------------------------------
 77 | */
 78 | static Mat
 79 | cut_mask(const Mat &src, const Mat &heatmap)
 80 | {
 81 |         Mat mask = foreground(heatmap);
 82 |         Mat bgModel,fgModel; 
 83 |         grabCut(src, mask, Rect(), bgModel,fgModel, 1, cv::GC_INIT_WITH_MASK);
 84 |         return ( mask == cv::GC_FGD) | (mask == cv::GC_PR_FGD);
 85 | }
 86 | 
 87 | 
 88 | static bool
 89 | rect_cmp(const Rect& a, const Rect& b)
 90 | {
 91 |         return a.area()> b.area();
 92 | }
 93 | 
 94 | 
 95 | static vector<Rect>
 96 | bbox(Mat &mask)
 97 | {
 98 |         vector<Rect> rs;
 99 |         vector< vector<Point> >contours;
100 |         vector<Vec4i> hie;
101 |         findContours(mask, contours, hie, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE);
102 |         for (int j = 0; j < contours.size(); ++j)
103 |         {
104 |                 const Rect bb = boundingRect( contours[j] );
105 |                 if (bb.area() > 10)
106 |                 {
107 |                         rs.push_back(bb);
108 |                 }
109 |                 
110 |         }
111 | 
112 |         if (rs.size() == 0)
113 |         {
114 |                 rs.push_back(Rect(0,0,mask.cols, mask.rows));
115 |                 return rs;
116 |         }
117 | 
118 |         sort(rs.begin(), rs.end(), rect_cmp);
119 |         return rs;
120 | }
121 | 
122 | 
123 | static void
124 | output(const vector<Rect> &rs, const char *filen)
125 | {
126 |         FILE *fp = fopen(filen, "w");
127 |         assert(fp != NULL);
128 |         for (int i = 0; i < rs.size(); ++i)
129 |         {
130 |                 fprintf(fp, "%d %d %d %d ", rs[i].x, rs[i].y, rs[i].width, rs[i].height);
131 |         }
132 | 
133 |         fclose(fp);
134 |         return ;
135 | }
136 | 
137 | int 
138 | main(int argc, char *argv[])
139 | {
140 |         if (argc != 4 && argc != 6 && argc != 7)
141 |         {
142 |                 puts(">>./cut sample.jpg heat.jpg output.txt\nor");
143 |                 puts(">>./cut sample.jpg heat.jpg output.txt th1[=10] th2[=40]\nor");
144 |                 puts(">>./cut sample.jpg heat.jpg output.txt th1[=10] th2[=40] save_image_name.jpg");
145 |                 return 0;
146 |         }
147 | 
148 |         if (argc == 6)
149 |         {
150 |                 int t0 = atoi(argv[4]);
151 |                 int t1 = atoi(argv[5]);
152 |                 if (0 <= t0 && t0 < t1 && t1 <= 255)
153 |                 {
154 |                         g_th0 = t0;
155 |                         g_th1 = t1;
156 |                 }
157 |         }
158 |         
159 |         Mat src = imread(argv[1], 1);
160 |         Mat heat = imread(argv[2], 0);
161 |         Mat m;
162 | 
163 |         if (argc == 7)
164 |                 m = cut_mask_save(src, heat, argv[6]);
165 |         else
166 |                 m = cut_mask(src, heat);
167 |         
168 |         vector<Rect> bbs = bbox(m);
169 |         output(bbs, argv[3]);
170 |         //rectangle(src, box, Scalar(0,0,255));
171 |         //imwrite(argv[3], src);
172 |         //imshow("result", src);
173 |         //waitKey(0);
174 |         return 0;
175 | }
176 | 


--------------------------------------------------------------------------------
/bboxgenerator/heatmap_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/bboxgenerator/heatmap_6.jpg


--------------------------------------------------------------------------------
/bboxgenerator/heatmap_6.txt:
--------------------------------------------------------------------------------
1 | 41 145 362 202 102 188 242 126 


--------------------------------------------------------------------------------
/bboxgenerator/sample_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/bboxgenerator/sample_6.jpg


--------------------------------------------------------------------------------
/categories1000.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/categories1000.mat


--------------------------------------------------------------------------------
/data_img1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/data_img1.mat


--------------------------------------------------------------------------------
/data_img2.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/data_img2.mat


--------------------------------------------------------------------------------
/data_net.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/data_net.mat


--------------------------------------------------------------------------------
/demo.m:
--------------------------------------------------------------------------------
 1 | % Sample code to generate class activation map from 10 crops of activations
 2 | % Bolei Zhou, March 15, 2016
 3 | % for the online prediction, make sure you have complied matcaffe
 4 | 
 5 | clear
 6 | addpath('/xxx/yyy/caffe/matlab');
 7 | 
 8 | imgID = 2; % 1 or 2
 9 | img = imread(['img' num2str(imgID) '.jpg']);
10 | img = imresize(img, [256 256]);
11 | online = 0; % whether extract features online or load pre-extracted features
12 | 
13 | load('categories1000.mat');
14 | if online == 1
15 |     % load the CAM model and extract features
16 | 
17 |     net_weights = ['models/imagenet_googleletCAM_train_iter_120000.caffemodel'];
18 |     net_model = ['models/deploy_googlenetCAM.prototxt'];
19 |     net = caffe.Net(net_model, net_weights, 'test');    
20 |     
21 |     weights_LR = net.params('CAM_fc',1).get_data();% get the softmax layer of the network
22 |    
23 |     scores = net.forward({prepare_image(img)});% extract conv features online
24 |     activation_lastconv = net.blobs('CAM_conv').get_data();
25 | 	scores = scores{1};
26 | else
27 |     % use the extracted features and softmax parameters cached before hand
28 |     load('data_net.mat'); % it contains the softmax weights and the category names of the network
29 |     load(['data_img' num2str(imgID) '.mat']); %it contains the pre-extracted conv features
30 | end
31 | 
32 | 
33 | 
34 | 
35 | %% Class Activation Mapping
36 | 
37 | topNum = 5; % generate heatmap for top X prediction results
38 | scoresMean = mean(scores,2);
39 | [value_category, IDX_category] = sort(scoresMean,'descend');
40 | [curCAMmapAll] = returnCAMmap(activation_lastconv, weights_LR(:,IDX_category(1:topNum)));
41 | 
42 | curResult = im2double(img);
43 | curPrediction = '';
44 | 
45 | for j=1:topNum
46 |     curCAMmap_crops = squeeze(curCAMmapAll(:,:,j,:));
47 |     curCAMmapLarge_crops = imresize(curCAMmap_crops,[256 256]);
48 |     curCAMLarge = mergeTenCrop(curCAMmapLarge_crops);
49 |     curHeatMap = imresize(im2double(curCAMLarge),[256 256]);
50 |     curHeatMap = im2double(curHeatMap);
51 | 
52 |     curHeatMap = map2jpg(curHeatMap,[], 'jet');
53 |     curHeatMap = im2double(img)*0.2+curHeatMap*0.7;
54 |     curResult = [curResult ones(size(curHeatMap,1),8,3) curHeatMap];
55 |     curPrediction = [curPrediction ' --top'  num2str(j) ':' categories{IDX_category(j)}];
56 |     
57 | end
58 | figure,imshow(curResult);title(curPrediction)
59 | 
60 | if online==1
61 |     caffe.reset_all();
62 | end
63 | 
64 | 


--------------------------------------------------------------------------------
/generate_bbox.m:
--------------------------------------------------------------------------------
 1 | %% Here is the code to generate the bounding box from the heatmap
 2 | %
 3 | % to reproduce the ILSVRC localization result, you need to first generate
 4 | % the heatmap for each testing image by merging the heatmap from the
 5 | % 10-crops (it is exactly what the demo code is doing), then resize the merged heatmap back to the original size of
 6 | % that image. Then use this bbox generator to generate the bbox from the resized heatmap.
 7 | %
 8 | % The source code of the bbox generator is also released. Probably you need
 9 | % to install the correct version of OpenCV to compile it.
10 | %
11 | % Special thanks to Hui Li for helping on this code.
12 | %
13 | % Bolei Zhou, April 19, 2016
14 | 
15 | bbox_threshold = [20, 100, 110]; % parameters for the bbox generator
16 | curParaThreshold = [num2str(bbox_threshold(1)) ' ' num2str(bbox_threshold(2)) ' ' num2str(bbox_threshold(3))];
17 | curHeatMapFile = 'bboxgenerator/heatmap_6.jpg';
18 | curImgFile = 'bboxgenerator/sample_6.jpg';
19 | curBBoxFile = 'bboxgenerator/heatmap_6.txt';
20 | system(['bboxgenerator/./dt_box ' curHeatMapFile ' ' curParaThreshold ' ' curBBoxFile]);
21 | 
22 | boxData = dlmread(curBBoxFile);
23 | boxData_formulate = [boxData(1:4:end)' boxData(2:4:end)' boxData(1:4:end)'+boxData(3:4:end)' boxData(2:4:end)'+boxData(4:4:end)'];
24 | boxData_formulate = [min(boxData_formulate(:,1),boxData_formulate(:,3)),min(boxData_formulate(:,2),boxData_formulate(:,4)),max(boxData_formulate(:,1),boxData_formulate(:,3)),max(boxData_formulate(:,2),boxData_formulate(:,4))];
25 | 
26 | curHeatMap = imread(curHeatMapFile);
27 | %curHeatMap = imresize(curHeatMap,[height_original weight_original]);
28 | 
29 | subplot(1,2,1),hold off, imshow(curImgFile);
30 | hold on
31 | for i=1:size(boxData_formulate,1)
32 |     curBox = boxData_formulate(i,:);
33 |     rectangle('Position',[curBox(1) curBox(2) curBox(3)-curBox(1) curBox(4)-curBox(2)],'EdgeColor',[1 0 0]);
34 | end
35 | subplot(1,2,2),imagesc(curHeatMap);


--------------------------------------------------------------------------------
/ilsvrc_2012_mean.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/ilsvrc_2012_mean.mat


--------------------------------------------------------------------------------
/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/img1.jpg


--------------------------------------------------------------------------------
/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/img2.jpg


--------------------------------------------------------------------------------
/map2jpg.m:
--------------------------------------------------------------------------------
 1 | function [img] = map2jpg(imgmap, range, colorMap)
 2 | imgmap = double(imgmap);
 3 | if(~exist('range', 'var') || isempty(range)), range = [min(imgmap(:)) max(imgmap(:))]; end
 4 | 
 5 | heatmap_gray = mat2gray(imgmap, range);
 6 | heatmap_x = gray2ind(heatmap_gray, 256);
 7 | heatmap_x(isnan(imgmap)) = 0;
 8 | 
 9 | if(~exist('colorMap', 'var'))
10 |     img = ind2rgb(heatmap_x, jet(256));
11 | else
12 |     img = ind2rgb(heatmap_x, eval([colorMap '(256)']));
13 | end
14 | 
15 | 


--------------------------------------------------------------------------------
/mergeTenCrop.m:
--------------------------------------------------------------------------------
 1 | function alignImgMean = mergeTenCrop( CAMmap_crops)
 2 | % align the ten crops of CAMmaps back to one image (take a look at caffe
 3 | % matlab wrapper about how ten crops are generated)
 4 | cropImgSet = zeros([256 256 3 10]);
 5 | cropImgSet(:,:,1,:) = CAMmap_crops;
 6 | cropImgSet(:,:,2,:) = CAMmap_crops;
 7 | cropImgSet(:,:,3,:) = CAMmap_crops;
 8 | 
 9 |     
10 | squareSize = 256;
11 | cropSize = size(cropImgSet,1);
12 | indices = [0 squareSize-cropSize] + 1;
13 | 
14 | alignImgSet = zeros(256,256,size(cropImgSet,3),'single');
15 | 
16 | 
17 | curr = 1;
18 | for i = indices
19 |   for j = indices
20 | 
21 |     curCrop1 = permute(cropImgSet(:,:,:,curr),[2 1 3 4]);
22 |     curCrop2 = permute(cropImgSet(end:-1:1,:,:,curr+5),[2 1 3 4]);
23 | 
24 | 
25 |     alignImgSet(i:i+cropSize-1, j:j+cropSize-1,:,curr) = curCrop1;
26 |     alignImgSet(i:i+cropSize-1, j:j+cropSize-1,:, curr+5) = curCrop2;
27 |     
28 |     curr = curr + 1;
29 | 
30 |   end
31 | end
32 | center = floor(indices(2) / 2)+1;
33 | curCrop1 = permute(cropImgSet(:,:,:,5),[2 1 3 4]);
34 | curCrop2 = permute(cropImgSet(end:-1:1,:,:,10),[2 1 3 4]);
35 | alignImgSet(center:center+cropSize-1, center:center+cropSize-1,:,5) = curCrop1;
36 | alignImgSet(center:center+cropSize-1, center:center+cropSize-1,:, 10) = curCrop2;
37 | alignImgMean = squeeze(sum(sum(abs(alignImgSet),3),4));
38 | 
39 | end
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/models/categoriesImageNet.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/models/categoriesImageNet.mat


--------------------------------------------------------------------------------
/models/categories_places205.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcucurull/CAM-Python/f1f83e8433c0c34d532b4878adc9d7b69948c2d7/models/categories_places205.mat


--------------------------------------------------------------------------------
/models/deploy_alexnetplusCAM_imagenet.prototxt:
--------------------------------------------------------------------------------
  1 | name: "imagenetCNN_alexnetdeep"
  2 | input: "data"
  3 | input_dim: 10
  4 | input_dim: 3
  5 | input_dim: 227
  6 | input_dim: 227
  7 | layers {
  8 |   name: "conv1"
  9 |   type: CONVOLUTION
 10 |   bottom: "data"
 11 |   top: "conv1"
 12 |   blobs_lr: 1
 13 |   blobs_lr: 2
 14 |   weight_decay: 1
 15 |   weight_decay: 0
 16 |   convolution_param {
 17 |     num_output: 96
 18 |     kernel_size: 11
 19 |     stride: 4
 20 |     weight_filler {
 21 |       type: "gaussian"
 22 |       std: 0.01
 23 |     }
 24 |     bias_filler {
 25 |       type: "constant"
 26 |       value: 0
 27 |     }
 28 |   }
 29 | }
 30 | layers {
 31 |   name: "relu1"
 32 |   type: RELU
 33 |   bottom: "conv1"
 34 |   top: "conv1"
 35 | }
 36 | layers {
 37 |   name: "pool1"
 38 |   type: POOLING
 39 |   bottom: "conv1"
 40 |   top: "pool1"
 41 |   pooling_param {
 42 |     pool: MAX
 43 |     kernel_size: 3
 44 |     stride: 2
 45 |   }
 46 | }
 47 | layers {
 48 |   name: "norm1"
 49 |   type: LRN
 50 |   bottom: "pool1"
 51 |   top: "norm1"
 52 |   lrn_param {
 53 |     local_size: 5
 54 |     alpha: 0.0001
 55 |     beta: 0.75
 56 |   }
 57 | }
 58 | layers {
 59 |   name: "conv2"
 60 |   type: CONVOLUTION
 61 |   bottom: "norm1"
 62 |   top: "conv2"
 63 |   blobs_lr: 1
 64 |   blobs_lr: 2
 65 |   weight_decay: 1
 66 |   weight_decay: 0
 67 |   convolution_param {
 68 |     num_output: 256
 69 |     pad: 2
 70 |     kernel_size: 5
 71 |     group: 2
 72 |     weight_filler {
 73 |       type: "gaussian"
 74 |       std: 0.01
 75 |     }
 76 |     bias_filler {
 77 |       type: "constant"
 78 |       value: 1
 79 |     }
 80 |   }
 81 | }
 82 | layers {
 83 |   name: "relu2"
 84 |   type: RELU
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layers {
 89 |   name: "pool2"
 90 |   type: POOLING
 91 |   bottom: "conv2"
 92 |   top: "pool2"
 93 |   pooling_param {
 94 |     pool: MAX
 95 |     kernel_size: 3
 96 |     stride: 2
 97 |   }
 98 | }
 99 | layers {
100 |   name: "norm2"
101 |   type: LRN
102 |   bottom: "pool2"
103 |   top: "norm2"
104 |   lrn_param {
105 |     local_size: 5
106 |     alpha: 0.0001
107 |     beta: 0.75
108 |   }
109 | }
110 | layers {
111 |   name: "conv3"
112 |   type: CONVOLUTION
113 |   bottom: "norm2"
114 |   top: "conv3"
115 |   blobs_lr: 1
116 |   blobs_lr: 2
117 |   weight_decay: 1
118 |   weight_decay: 0
119 |   convolution_param {
120 |     num_output: 384
121 |     pad: 1
122 |     kernel_size: 3
123 |     weight_filler {
124 |       type: "gaussian"
125 |       std: 0.01
126 |     }
127 |     bias_filler {
128 |       type: "constant"
129 |       value: 0
130 |     }
131 |   }
132 | }
133 | layers {
134 |   name: "relu3"
135 |   type: RELU
136 |   bottom: "conv3"
137 |   top: "conv3"
138 | }
139 | layers {
140 |   name: "conv4"
141 |   type: CONVOLUTION
142 |   bottom: "conv3"
143 |   top: "conv4"
144 |   blobs_lr: 1
145 |   blobs_lr: 2
146 |   weight_decay: 1
147 |   weight_decay: 0
148 |   convolution_param {
149 |     num_output: 384
150 |     pad: 1
151 |     kernel_size: 3
152 |     group: 2
153 |     weight_filler {
154 |       type: "gaussian"
155 |       std: 0.01
156 |     }
157 |     bias_filler {
158 |       type: "constant"
159 |       value: 1
160 |     }
161 |   }
162 | }
163 | layers {
164 |   name: "relu4"
165 |   type: RELU
166 |   bottom: "conv4"
167 |   top: "conv4"
168 | }
169 | layers {
170 |   name: "conv5"
171 |   type: CONVOLUTION
172 |   bottom: "conv4"
173 |   top: "conv5"
174 |   blobs_lr: 1
175 |   blobs_lr: 2
176 |   weight_decay: 1
177 |   weight_decay: 0
178 |   convolution_param {
179 |     num_output: 384
180 |     pad: 1
181 |     kernel_size: 3
182 |     group: 2
183 |     weight_filler {
184 |       type: "gaussian"
185 |       std: 0.01
186 |     }
187 |     bias_filler {
188 |       type: "constant"
189 |       value: 1
190 |     }
191 |   }
192 | }
193 | layers {
194 |   name: "relu5"
195 |   type: RELU
196 |   bottom: "conv5"
197 |   top: "conv5"
198 | }
199 | layers {
200 |   name: "pool5"
201 |   type: POOLING
202 |   bottom: "conv5"
203 |   top: "pool5"
204 |   pooling_param {
205 |     pool: MAX
206 |     kernel_size: 3
207 |     stride: 1
208 |   }
209 | }
210 | layers {
211 |   name: "conv6"
212 |   type: CONVOLUTION
213 |   bottom: "pool5"
214 |   top: "conv6"
215 |   blobs_lr: 1
216 |   blobs_lr: 2
217 |   weight_decay: 1
218 |   weight_decay: 0
219 |   convolution_param {
220 |     num_output: 512
221 |     pad: 1
222 |     kernel_size: 3
223 |     group: 2
224 |     weight_filler {
225 |       type: "gaussian"
226 |       std: 0.01
227 |     }
228 |     bias_filler {
229 |       type: "constant"
230 |       value: 1
231 |     }
232 |   }
233 | }
234 | layers {
235 |   name: "relu6"
236 |   type: RELU
237 |   bottom: "conv6"
238 |   top: "conv6"
239 | }
240 | layers {
241 |   name: "conv7"
242 |   type: CONVOLUTION
243 |   bottom: "conv6"
244 |   top: "conv7"
245 |   blobs_lr: 1
246 |   blobs_lr: 2
247 |   weight_decay: 1
248 |   weight_decay: 0
249 |   convolution_param {
250 |     num_output: 512
251 |     pad: 1
252 |     kernel_size: 3
253 |     group: 2
254 |     weight_filler {
255 |       type: "gaussian"
256 |       std: 0.01
257 |     }
258 |     bias_filler {
259 |       type: "constant"
260 |       value: 1
261 |     }
262 |   }
263 | }
264 | layers {
265 |   name: "relu7"
266 |   type: RELU
267 |   bottom: "conv7"
268 |   top: "conv7"
269 | }
270 | layers {
271 |   name: "pool8_global"
272 |   type: POOLING
273 |   bottom: "conv7"
274 |   top: "pool8_global"
275 |   pooling_param {
276 |     pool: AVE
277 |     kernel_size: 11
278 |     stride: 11
279 |   }
280 | }
281 | layers {
282 |   name: "drop8"
283 |   type: DROPOUT
284 |   bottom: "pool8_global"
285 |   top: "pool8_global"
286 |   dropout_param {
287 |     dropout_ratio: 0.5
288 |   }
289 | }
290 | layers {
291 |   name: "fc9"
292 |   type: INNER_PRODUCT
293 |   bottom: "pool8_global"
294 |   top: "fc9"
295 |   blobs_lr: 1
296 |   blobs_lr: 2
297 |   weight_decay: 1
298 |   weight_decay: 0
299 |   inner_product_param {
300 |     num_output: 1000
301 |     weight_filler {
302 |       type: "gaussian"
303 |       std: 0.01
304 |     }
305 |     bias_filler {
306 |       type: "constant"
307 |       value: 0
308 |     }
309 |   }
310 | }
311 | layers {
312 |   bottom: "fc9"
313 |   top: "prob"
314 |   name: "prob"
315 |   type: SOFTMAX
316 | }
317 | 
318 | 


--------------------------------------------------------------------------------
/models/deploy_alexnetplusCAM_places205.prototxt:
--------------------------------------------------------------------------------
  1 | name: "placesCNNobjectdiscoveryAverageSumDeepNoDropout"
  2 | input: "data"
  3 | input_dim: 10
  4 | input_dim: 3
  5 | input_dim: 227
  6 | input_dim: 227
  7 | layers {
  8 |   name: "conv1"
  9 |   type: CONVOLUTION
 10 |   bottom: "data"
 11 |   top: "conv1"
 12 |   blobs_lr: 1
 13 |   blobs_lr: 2
 14 |   weight_decay: 1
 15 |   weight_decay: 0
 16 |   convolution_param {
 17 |     num_output: 96
 18 |     kernel_size: 11
 19 |     stride: 4
 20 |     weight_filler {
 21 |       type: "gaussian"
 22 |       std: 0.01
 23 |     }
 24 |     bias_filler {
 25 |       type: "constant"
 26 |       value: 0
 27 |     }
 28 |   }
 29 | }
 30 | layers {
 31 |   name: "relu1"
 32 |   type: RELU
 33 |   bottom: "conv1"
 34 |   top: "conv1"
 35 | }
 36 | layers {
 37 |   name: "pool1"
 38 |   type: POOLING
 39 |   bottom: "conv1"
 40 |   top: "pool1"
 41 |   pooling_param {
 42 |     pool: MAX
 43 |     kernel_size: 3
 44 |     stride: 2
 45 |   }
 46 | }
 47 | layers {
 48 |   name: "norm1"
 49 |   type: LRN
 50 |   bottom: "pool1"
 51 |   top: "norm1"
 52 |   lrn_param {
 53 |     local_size: 5
 54 |     alpha: 0.0001
 55 |     beta: 0.75
 56 |   }
 57 | }
 58 | layers {
 59 |   name: "conv2"
 60 |   type: CONVOLUTION
 61 |   bottom: "norm1"
 62 |   top: "conv2"
 63 |   blobs_lr: 1
 64 |   blobs_lr: 2
 65 |   weight_decay: 1
 66 |   weight_decay: 0
 67 |   convolution_param {
 68 |     num_output: 256
 69 |     pad: 2
 70 |     kernel_size: 5
 71 |     group: 2
 72 |     weight_filler {
 73 |       type: "gaussian"
 74 |       std: 0.01
 75 |     }
 76 |     bias_filler {
 77 |       type: "constant"
 78 |       value: 1
 79 |     }
 80 |   }
 81 | }
 82 | layers {
 83 |   name: "relu2"
 84 |   type: RELU
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layers {
 89 |   name: "pool2"
 90 |   type: POOLING
 91 |   bottom: "conv2"
 92 |   top: "pool2"
 93 |   pooling_param {
 94 |     pool: MAX
 95 |     kernel_size: 3
 96 |     stride: 2
 97 |   }
 98 | }
 99 | layers {
100 |   name: "norm2"
101 |   type: LRN
102 |   bottom: "pool2"
103 |   top: "norm2"
104 |   lrn_param {
105 |     local_size: 5
106 |     alpha: 0.0001
107 |     beta: 0.75
108 |   }
109 | }
110 | layers {
111 |   name: "conv3"
112 |   type: CONVOLUTION
113 |   bottom: "norm2"
114 |   top: "conv3"
115 |   blobs_lr: 1
116 |   blobs_lr: 2
117 |   weight_decay: 1
118 |   weight_decay: 0
119 |   convolution_param {
120 |     num_output: 384
121 |     pad: 1
122 |     kernel_size: 3
123 |     weight_filler {
124 |       type: "gaussian"
125 |       std: 0.01
126 |     }
127 |     bias_filler {
128 |       type: "constant"
129 |       value: 0
130 |     }
131 |   }
132 | }
133 | layers {
134 |   name: "relu3"
135 |   type: RELU
136 |   bottom: "conv3"
137 |   top: "conv3"
138 | }
139 | layers {
140 |   name: "conv4"
141 |   type: CONVOLUTION
142 |   bottom: "conv3"
143 |   top: "conv4"
144 |   blobs_lr: 1
145 |   blobs_lr: 2
146 |   weight_decay: 1
147 |   weight_decay: 0
148 |   convolution_param {
149 |     num_output: 384
150 |     pad: 1
151 |     kernel_size: 3
152 |     group: 2
153 |     weight_filler {
154 |       type: "gaussian"
155 |       std: 0.01
156 |     }
157 |     bias_filler {
158 |       type: "constant"
159 |       value: 1
160 |     }
161 |   }
162 | }
163 | layers {
164 |   name: "relu4"
165 |   type: RELU
166 |   bottom: "conv4"
167 |   top: "conv4"
168 | }
169 | layers {
170 |   name: "conv5"
171 |   type: CONVOLUTION
172 |   bottom: "conv4"
173 |   top: "conv5"
174 |   blobs_lr: 1
175 |   blobs_lr: 2
176 |   weight_decay: 1
177 |   weight_decay: 0
178 |   convolution_param {
179 |     num_output: 384
180 |     pad: 1
181 |     kernel_size: 3
182 |     group: 2
183 |     weight_filler {
184 |       type: "gaussian"
185 |       std: 0.01
186 |     }
187 |     bias_filler {
188 |       type: "constant"
189 |       value: 1
190 |     }
191 |   }
192 | }
193 | layers {
194 |   name: "relu5"
195 |   type: RELU
196 |   bottom: "conv5"
197 |   top: "conv5"
198 | }
199 | layers {
200 |   name: "pool5"
201 |   type: POOLING
202 |   bottom: "conv5"
203 |   top: "pool5"
204 |   pooling_param {
205 |     pool: MAX
206 |     kernel_size: 3
207 |     stride: 1
208 |   }
209 | }
210 | layers {
211 |   name: "conv6"
212 |   type: CONVOLUTION
213 |   bottom: "pool5"
214 |   top: "conv6"
215 |   blobs_lr: 1
216 |   blobs_lr: 2
217 |   weight_decay: 1
218 |   weight_decay: 0
219 |   convolution_param {
220 |     num_output: 512
221 |     pad: 1
222 |     kernel_size: 3
223 |     group: 2
224 |     weight_filler {
225 |       type: "gaussian"
226 |       std: 0.01
227 |     }
228 |     bias_filler {
229 |       type: "constant"
230 |       value: 1
231 |     }
232 |   }
233 | }
234 | layers {
235 |   name: "relu6"
236 |   type: RELU
237 |   bottom: "conv6"
238 |   top: "conv6"
239 | }
240 | layers {
241 |   name: "conv7"
242 |   type: CONVOLUTION
243 |   bottom: "conv6"
244 |   top: "conv7"
245 |   blobs_lr: 1
246 |   blobs_lr: 2
247 |   weight_decay: 1
248 |   weight_decay: 0
249 |   convolution_param {
250 |     num_output: 512
251 |     pad: 1
252 |     kernel_size: 3
253 |     group: 2
254 |     weight_filler {
255 |       type: "gaussian"
256 |       std: 0.01
257 |     }
258 |     bias_filler {
259 |       type: "constant"
260 |       value: 1
261 |     }
262 |   }
263 | }
264 | layers {
265 |   name: "relu7"
266 |   type: RELU
267 |   bottom: "conv7"
268 |   top: "conv7"
269 | }
270 | layers {
271 |   name: "pool8_global"
272 |   type: POOLING
273 |   bottom: "conv7"
274 |   top: "pool8_global"
275 |   pooling_param {
276 |     pool: AVE
277 |     kernel_size: 11
278 |     stride: 11
279 |   }
280 | }
281 | layers {
282 |   name: "fc9"
283 |   type: INNER_PRODUCT
284 |   bottom: "pool8_global"
285 |   top: "fc9"
286 |   blobs_lr: 1
287 |   blobs_lr: 2
288 |   weight_decay: 1
289 |   weight_decay: 0
290 |   inner_product_param {
291 |     num_output: 205
292 |     weight_filler {
293 |       type: "gaussian"
294 |       std: 0.01
295 |     }
296 |     bias_filler {
297 |       type: "constant"
298 |       value: 0
299 |     }
300 |   }
301 | }
302 | layers {
303 |   name: "prob"
304 |   type: SOFTMAX
305 |   bottom: "fc9"
306 |   top: "prob"
307 | }
308 | 
309 | 


--------------------------------------------------------------------------------
/models/deploy_googlenetCAM.prototxt:
--------------------------------------------------------------------------------
   1 | name: "GoogleNet"
   2 | input: "data"
   3 | input_dim: 10
   4 | input_dim: 3
   5 | input_dim: 224
   6 | input_dim: 224
   7 | force_backward: true
   8 | layer {
   9 |   name: "conv1/7x7_s2"
  10 |   type: "Convolution"
  11 |   bottom: "data"
  12 |   top: "conv1/7x7_s2"
  13 |   param {
  14 |     lr_mult: 1
  15 |     decay_mult: 1
  16 |   }
  17 |   param {
  18 |     lr_mult: 2
  19 |     decay_mult: 0
  20 |   }
  21 |   convolution_param {
  22 |     num_output: 64
  23 |     pad: 3
  24 |     kernel_size: 7
  25 |     stride: 2
  26 |     weight_filler {
  27 |       type: "xavier"
  28 |       std: 0.1
  29 |     }
  30 |     bias_filler {
  31 |       type: "constant"
  32 |       value: 0.2
  33 |     }
  34 |   }
  35 | }
  36 | layer {
  37 |   name: "conv1/relu_7x7"
  38 |   type: "ReLU"
  39 |   bottom: "conv1/7x7_s2"
  40 |   top: "conv1/7x7_s2"
  41 | }
  42 | layer {
  43 |   name: "pool1/3x3_s2"
  44 |   type: "Pooling"
  45 |   bottom: "conv1/7x7_s2"
  46 |   top: "pool1/3x3_s2"
  47 |   pooling_param {
  48 |     pool: MAX
  49 |     kernel_size: 3
  50 |     stride: 2
  51 |   }
  52 | }
  53 | layer {
  54 |   name: "pool1/norm1"
  55 |   type: "LRN"
  56 |   bottom: "pool1/3x3_s2"
  57 |   top: "pool1/norm1"
  58 |   lrn_param {
  59 |     local_size: 5
  60 |     alpha: 0.0001
  61 |     beta: 0.75
  62 |   }
  63 | }
  64 | layer {
  65 |   name: "conv2/3x3_reduce"
  66 |   type: "Convolution"
  67 |   bottom: "pool1/norm1"
  68 |   top: "conv2/3x3_reduce"
  69 |   param {
  70 |     lr_mult: 1
  71 |     decay_mult: 1
  72 |   }
  73 |   param {
  74 |     lr_mult: 2
  75 |     decay_mult: 0
  76 |   }
  77 |   convolution_param {
  78 |     num_output: 64
  79 |     kernel_size: 1
  80 |     weight_filler {
  81 |       type: "xavier"
  82 |       std: 0.1
  83 |     }
  84 |     bias_filler {
  85 |       type: "constant"
  86 |       value: 0.2
  87 |     }
  88 |   }
  89 | }
  90 | layer {
  91 |   name: "conv2/relu_3x3_reduce"
  92 |   type: "ReLU"
  93 |   bottom: "conv2/3x3_reduce"
  94 |   top: "conv2/3x3_reduce"
  95 | }
  96 | layer {
  97 |   name: "conv2/3x3"
  98 |   type: "Convolution"
  99 |   bottom: "conv2/3x3_reduce"
 100 |   top: "conv2/3x3"
 101 |   param {
 102 |     lr_mult: 1
 103 |     decay_mult: 1
 104 |   }
 105 |   param {
 106 |     lr_mult: 2
 107 |     decay_mult: 0
 108 |   }
 109 |   convolution_param {
 110 |     num_output: 192
 111 |     pad: 1
 112 |     kernel_size: 3
 113 |     weight_filler {
 114 |       type: "xavier"
 115 |       std: 0.03
 116 |     }
 117 |     bias_filler {
 118 |       type: "constant"
 119 |       value: 0.2
 120 |     }
 121 |   }
 122 | }
 123 | layer {
 124 |   name: "conv2/relu_3x3"
 125 |   type: "ReLU"
 126 |   bottom: "conv2/3x3"
 127 |   top: "conv2/3x3"
 128 | }
 129 | layer {
 130 |   name: "conv2/norm2"
 131 |   type: "LRN"
 132 |   bottom: "conv2/3x3"
 133 |   top: "conv2/norm2"
 134 |   lrn_param {
 135 |     local_size: 5
 136 |     alpha: 0.0001
 137 |     beta: 0.75
 138 |   }
 139 | }
 140 | layer {
 141 |   name: "pool2/3x3_s2"
 142 |   type: "Pooling"
 143 |   bottom: "conv2/norm2"
 144 |   top: "pool2/3x3_s2"
 145 |   pooling_param {
 146 |     pool: MAX
 147 |     kernel_size: 3
 148 |     stride: 2
 149 |   }
 150 | }
 151 | layer {
 152 |   name: "inception_3a/1x1"
 153 |   type: "Convolution"
 154 |   bottom: "pool2/3x3_s2"
 155 |   top: "inception_3a/1x1"
 156 |   param {
 157 |     lr_mult: 1
 158 |     decay_mult: 1
 159 |   }
 160 |   param {
 161 |     lr_mult: 2
 162 |     decay_mult: 0
 163 |   }
 164 |   convolution_param {
 165 |     num_output: 64
 166 |     kernel_size: 1
 167 |     weight_filler {
 168 |       type: "xavier"
 169 |       std: 0.03
 170 |     }
 171 |     bias_filler {
 172 |       type: "constant"
 173 |       value: 0.2
 174 |     }
 175 |   }
 176 | }
 177 | layer {
 178 |   name: "inception_3a/relu_1x1"
 179 |   type: "ReLU"
 180 |   bottom: "inception_3a/1x1"
 181 |   top: "inception_3a/1x1"
 182 | }
 183 | layer {
 184 |   name: "inception_3a/3x3_reduce"
 185 |   type: "Convolution"
 186 |   bottom: "pool2/3x3_s2"
 187 |   top: "inception_3a/3x3_reduce"
 188 |   param {
 189 |     lr_mult: 1
 190 |     decay_mult: 1
 191 |   }
 192 |   param {
 193 |     lr_mult: 2
 194 |     decay_mult: 0
 195 |   }
 196 |   convolution_param {
 197 |     num_output: 96
 198 |     kernel_size: 1
 199 |     weight_filler {
 200 |       type: "xavier"
 201 |       std: 0.09
 202 |     }
 203 |     bias_filler {
 204 |       type: "constant"
 205 |       value: 0.2
 206 |     }
 207 |   }
 208 | }
 209 | layer {
 210 |   name: "inception_3a/relu_3x3_reduce"
 211 |   type: "ReLU"
 212 |   bottom: "inception_3a/3x3_reduce"
 213 |   top: "inception_3a/3x3_reduce"
 214 | }
 215 | layer {
 216 |   name: "inception_3a/3x3"
 217 |   type: "Convolution"
 218 |   bottom: "inception_3a/3x3_reduce"
 219 |   top: "inception_3a/3x3"
 220 |   param {
 221 |     lr_mult: 1
 222 |     decay_mult: 1
 223 |   }
 224 |   param {
 225 |     lr_mult: 2
 226 |     decay_mult: 0
 227 |   }
 228 |   convolution_param {
 229 |     num_output: 128
 230 |     pad: 1
 231 |     kernel_size: 3
 232 |     weight_filler {
 233 |       type: "xavier"
 234 |       std: 0.03
 235 |     }
 236 |     bias_filler {
 237 |       type: "constant"
 238 |       value: 0.2
 239 |     }
 240 |   }
 241 | }
 242 | layer {
 243 |   name: "inception_3a/relu_3x3"
 244 |   type: "ReLU"
 245 |   bottom: "inception_3a/3x3"
 246 |   top: "inception_3a/3x3"
 247 | }
 248 | layer {
 249 |   name: "inception_3a/5x5_reduce"
 250 |   type: "Convolution"
 251 |   bottom: "pool2/3x3_s2"
 252 |   top: "inception_3a/5x5_reduce"
 253 |   param {
 254 |     lr_mult: 1
 255 |     decay_mult: 1
 256 |   }
 257 |   param {
 258 |     lr_mult: 2
 259 |     decay_mult: 0
 260 |   }
 261 |   convolution_param {
 262 |     num_output: 16
 263 |     kernel_size: 1
 264 |     weight_filler {
 265 |       type: "xavier"
 266 |       std: 0.2
 267 |     }
 268 |     bias_filler {
 269 |       type: "constant"
 270 |       value: 0.2
 271 |     }
 272 |   }
 273 | }
 274 | layer {
 275 |   name: "inception_3a/relu_5x5_reduce"
 276 |   type: "ReLU"
 277 |   bottom: "inception_3a/5x5_reduce"
 278 |   top: "inception_3a/5x5_reduce"
 279 | }
 280 | layer {
 281 |   name: "inception_3a/5x5"
 282 |   type: "Convolution"
 283 |   bottom: "inception_3a/5x5_reduce"
 284 |   top: "inception_3a/5x5"
 285 |   param {
 286 |     lr_mult: 1
 287 |     decay_mult: 1
 288 |   }
 289 |   param {
 290 |     lr_mult: 2
 291 |     decay_mult: 0
 292 |   }
 293 |   convolution_param {
 294 |     num_output: 32
 295 |     pad: 2
 296 |     kernel_size: 5
 297 |     weight_filler {
 298 |       type: "xavier"
 299 |       std: 0.03
 300 |     }
 301 |     bias_filler {
 302 |       type: "constant"
 303 |       value: 0.2
 304 |     }
 305 |   }
 306 | }
 307 | layer {
 308 |   name: "inception_3a/relu_5x5"
 309 |   type: "ReLU"
 310 |   bottom: "inception_3a/5x5"
 311 |   top: "inception_3a/5x5"
 312 | }
 313 | layer {
 314 |   name: "inception_3a/pool"
 315 |   type: "Pooling"
 316 |   bottom: "pool2/3x3_s2"
 317 |   top: "inception_3a/pool"
 318 |   pooling_param {
 319 |     pool: MAX
 320 |     kernel_size: 3
 321 |     stride: 1
 322 |     pad: 1
 323 |   }
 324 | }
 325 | layer {
 326 |   name: "inception_3a/pool_proj"
 327 |   type: "Convolution"
 328 |   bottom: "inception_3a/pool"
 329 |   top: "inception_3a/pool_proj"
 330 |   param {
 331 |     lr_mult: 1
 332 |     decay_mult: 1
 333 |   }
 334 |   param {
 335 |     lr_mult: 2
 336 |     decay_mult: 0
 337 |   }
 338 |   convolution_param {
 339 |     num_output: 32
 340 |     kernel_size: 1
 341 |     weight_filler {
 342 |       type: "xavier"
 343 |       std: 0.1
 344 |     }
 345 |     bias_filler {
 346 |       type: "constant"
 347 |       value: 0.2
 348 |     }
 349 |   }
 350 | }
 351 | layer {
 352 |   name: "inception_3a/relu_pool_proj"
 353 |   type: "ReLU"
 354 |   bottom: "inception_3a/pool_proj"
 355 |   top: "inception_3a/pool_proj"
 356 | }
 357 | layer {
 358 |   name: "inception_3a/output"
 359 |   type: "Concat"
 360 |   bottom: "inception_3a/1x1"
 361 |   bottom: "inception_3a/3x3"
 362 |   bottom: "inception_3a/5x5"
 363 |   bottom: "inception_3a/pool_proj"
 364 |   top: "inception_3a/output"
 365 | }
 366 | layer {
 367 |   name: "inception_3b/1x1"
 368 |   type: "Convolution"
 369 |   bottom: "inception_3a/output"
 370 |   top: "inception_3b/1x1"
 371 |   param {
 372 |     lr_mult: 1
 373 |     decay_mult: 1
 374 |   }
 375 |   param {
 376 |     lr_mult: 2
 377 |     decay_mult: 0
 378 |   }
 379 |   convolution_param {
 380 |     num_output: 128
 381 |     kernel_size: 1
 382 |     weight_filler {
 383 |       type: "xavier"
 384 |       std: 0.03
 385 |     }
 386 |     bias_filler {
 387 |       type: "constant"
 388 |       value: 0.2
 389 |     }
 390 |   }
 391 | }
 392 | layer {
 393 |   name: "inception_3b/relu_1x1"
 394 |   type: "ReLU"
 395 |   bottom: "inception_3b/1x1"
 396 |   top: "inception_3b/1x1"
 397 | }
 398 | layer {
 399 |   name: "inception_3b/3x3_reduce"
 400 |   type: "Convolution"
 401 |   bottom: "inception_3a/output"
 402 |   top: "inception_3b/3x3_reduce"
 403 |   param {
 404 |     lr_mult: 1
 405 |     decay_mult: 1
 406 |   }
 407 |   param {
 408 |     lr_mult: 2
 409 |     decay_mult: 0
 410 |   }
 411 |   convolution_param {
 412 |     num_output: 128
 413 |     kernel_size: 1
 414 |     weight_filler {
 415 |       type: "xavier"
 416 |       std: 0.09
 417 |     }
 418 |     bias_filler {
 419 |       type: "constant"
 420 |       value: 0.2
 421 |     }
 422 |   }
 423 | }
 424 | layer {
 425 |   name: "inception_3b/relu_3x3_reduce"
 426 |   type: "ReLU"
 427 |   bottom: "inception_3b/3x3_reduce"
 428 |   top: "inception_3b/3x3_reduce"
 429 | }
 430 | layer {
 431 |   name: "inception_3b/3x3"
 432 |   type: "Convolution"
 433 |   bottom: "inception_3b/3x3_reduce"
 434 |   top: "inception_3b/3x3"
 435 |   param {
 436 |     lr_mult: 1
 437 |     decay_mult: 1
 438 |   }
 439 |   param {
 440 |     lr_mult: 2
 441 |     decay_mult: 0
 442 |   }
 443 |   convolution_param {
 444 |     num_output: 192
 445 |     pad: 1
 446 |     kernel_size: 3
 447 |     weight_filler {
 448 |       type: "xavier"
 449 |       std: 0.03
 450 |     }
 451 |     bias_filler {
 452 |       type: "constant"
 453 |       value: 0.2
 454 |     }
 455 |   }
 456 | }
 457 | layer {
 458 |   name: "inception_3b/relu_3x3"
 459 |   type: "ReLU"
 460 |   bottom: "inception_3b/3x3"
 461 |   top: "inception_3b/3x3"
 462 | }
 463 | layer {
 464 |   name: "inception_3b/5x5_reduce"
 465 |   type: "Convolution"
 466 |   bottom: "inception_3a/output"
 467 |   top: "inception_3b/5x5_reduce"
 468 |   param {
 469 |     lr_mult: 1
 470 |     decay_mult: 1
 471 |   }
 472 |   param {
 473 |     lr_mult: 2
 474 |     decay_mult: 0
 475 |   }
 476 |   convolution_param {
 477 |     num_output: 32
 478 |     kernel_size: 1
 479 |     weight_filler {
 480 |       type: "xavier"
 481 |       std: 0.2
 482 |     }
 483 |     bias_filler {
 484 |       type: "constant"
 485 |       value: 0.2
 486 |     }
 487 |   }
 488 | }
 489 | layer {
 490 |   name: "inception_3b/relu_5x5_reduce"
 491 |   type: "ReLU"
 492 |   bottom: "inception_3b/5x5_reduce"
 493 |   top: "inception_3b/5x5_reduce"
 494 | }
 495 | layer {
 496 |   name: "inception_3b/5x5"
 497 |   type: "Convolution"
 498 |   bottom: "inception_3b/5x5_reduce"
 499 |   top: "inception_3b/5x5"
 500 |   param {
 501 |     lr_mult: 1
 502 |     decay_mult: 1
 503 |   }
 504 |   param {
 505 |     lr_mult: 2
 506 |     decay_mult: 0
 507 |   }
 508 |   convolution_param {
 509 |     num_output: 96
 510 |     pad: 2
 511 |     kernel_size: 5
 512 |     weight_filler {
 513 |       type: "xavier"
 514 |       std: 0.03
 515 |     }
 516 |     bias_filler {
 517 |       type: "constant"
 518 |       value: 0.2
 519 |     }
 520 |   }
 521 | }
 522 | layer {
 523 |   name: "inception_3b/relu_5x5"
 524 |   type: "ReLU"
 525 |   bottom: "inception_3b/5x5"
 526 |   top: "inception_3b/5x5"
 527 | }
 528 | layer {
 529 |   name: "inception_3b/pool"
 530 |   type: "Pooling"
 531 |   bottom: "inception_3a/output"
 532 |   top: "inception_3b/pool"
 533 |   pooling_param {
 534 |     pool: MAX
 535 |     kernel_size: 3
 536 |     stride: 1
 537 |     pad: 1
 538 |   }
 539 | }
 540 | layer {
 541 |   name: "inception_3b/pool_proj"
 542 |   type: "Convolution"
 543 |   bottom: "inception_3b/pool"
 544 |   top: "inception_3b/pool_proj"
 545 |   param {
 546 |     lr_mult: 1
 547 |     decay_mult: 1
 548 |   }
 549 |   param {
 550 |     lr_mult: 2
 551 |     decay_mult: 0
 552 |   }
 553 |   convolution_param {
 554 |     num_output: 64
 555 |     kernel_size: 1
 556 |     weight_filler {
 557 |       type: "xavier"
 558 |       std: 0.1
 559 |     }
 560 |     bias_filler {
 561 |       type: "constant"
 562 |       value: 0.2
 563 |     }
 564 |   }
 565 | }
 566 | layer {
 567 |   name: "inception_3b/relu_pool_proj"
 568 |   type: "ReLU"
 569 |   bottom: "inception_3b/pool_proj"
 570 |   top: "inception_3b/pool_proj"
 571 | }
 572 | layer {
 573 |   name: "inception_3b/output"
 574 |   type: "Concat"
 575 |   bottom: "inception_3b/1x1"
 576 |   bottom: "inception_3b/3x3"
 577 |   bottom: "inception_3b/5x5"
 578 |   bottom: "inception_3b/pool_proj"
 579 |   top: "inception_3b/output"
 580 | }
 581 | layer {
 582 |   name: "pool3/3x3_s2"
 583 |   type: "Pooling"
 584 |   bottom: "inception_3b/output"
 585 |   top: "pool3/3x3_s2"
 586 |   pooling_param {
 587 |     pool: MAX
 588 |     kernel_size: 3
 589 |     stride: 2
 590 |   }
 591 | }
 592 | layer {
 593 |   name: "inception_4a/1x1"
 594 |   type: "Convolution"
 595 |   bottom: "pool3/3x3_s2"
 596 |   top: "inception_4a/1x1"
 597 |   param {
 598 |     lr_mult: 1
 599 |     decay_mult: 1
 600 |   }
 601 |   param {
 602 |     lr_mult: 2
 603 |     decay_mult: 0
 604 |   }
 605 |   convolution_param {
 606 |     num_output: 192
 607 |     kernel_size: 1
 608 |     weight_filler {
 609 |       type: "xavier"
 610 |       std: 0.03
 611 |     }
 612 |     bias_filler {
 613 |       type: "constant"
 614 |       value: 0.2
 615 |     }
 616 |   }
 617 | }
 618 | layer {
 619 |   name: "inception_4a/relu_1x1"
 620 |   type: "ReLU"
 621 |   bottom: "inception_4a/1x1"
 622 |   top: "inception_4a/1x1"
 623 | }
 624 | layer {
 625 |   name: "inception_4a/3x3_reduce"
 626 |   type: "Convolution"
 627 |   bottom: "pool3/3x3_s2"
 628 |   top: "inception_4a/3x3_reduce"
 629 |   param {
 630 |     lr_mult: 1
 631 |     decay_mult: 1
 632 |   }
 633 |   param {
 634 |     lr_mult: 2
 635 |     decay_mult: 0
 636 |   }
 637 |   convolution_param {
 638 |     num_output: 96
 639 |     kernel_size: 1
 640 |     weight_filler {
 641 |       type: "xavier"
 642 |       std: 0.09
 643 |     }
 644 |     bias_filler {
 645 |       type: "constant"
 646 |       value: 0.2
 647 |     }
 648 |   }
 649 | }
 650 | layer {
 651 |   name: "inception_4a/relu_3x3_reduce"
 652 |   type: "ReLU"
 653 |   bottom: "inception_4a/3x3_reduce"
 654 |   top: "inception_4a/3x3_reduce"
 655 | }
 656 | layer {
 657 |   name: "inception_4a/3x3"
 658 |   type: "Convolution"
 659 |   bottom: "inception_4a/3x3_reduce"
 660 |   top: "inception_4a/3x3"
 661 |   param {
 662 |     lr_mult: 1
 663 |     decay_mult: 1
 664 |   }
 665 |   param {
 666 |     lr_mult: 2
 667 |     decay_mult: 0
 668 |   }
 669 |   convolution_param {
 670 |     num_output: 208
 671 |     pad: 1
 672 |     kernel_size: 3
 673 |     weight_filler {
 674 |       type: "xavier"
 675 |       std: 0.03
 676 |     }
 677 |     bias_filler {
 678 |       type: "constant"
 679 |       value: 0.2
 680 |     }
 681 |   }
 682 | }
 683 | layer {
 684 |   name: "inception_4a/relu_3x3"
 685 |   type: "ReLU"
 686 |   bottom: "inception_4a/3x3"
 687 |   top: "inception_4a/3x3"
 688 | }
 689 | layer {
 690 |   name: "inception_4a/5x5_reduce"
 691 |   type: "Convolution"
 692 |   bottom: "pool3/3x3_s2"
 693 |   top: "inception_4a/5x5_reduce"
 694 |   param {
 695 |     lr_mult: 1
 696 |     decay_mult: 1
 697 |   }
 698 |   param {
 699 |     lr_mult: 2
 700 |     decay_mult: 0
 701 |   }
 702 |   convolution_param {
 703 |     num_output: 16
 704 |     kernel_size: 1
 705 |     weight_filler {
 706 |       type: "xavier"
 707 |       std: 0.2
 708 |     }
 709 |     bias_filler {
 710 |       type: "constant"
 711 |       value: 0.2
 712 |     }
 713 |   }
 714 | }
 715 | layer {
 716 |   name: "inception_4a/relu_5x5_reduce"
 717 |   type: "ReLU"
 718 |   bottom: "inception_4a/5x5_reduce"
 719 |   top: "inception_4a/5x5_reduce"
 720 | }
 721 | layer {
 722 |   name: "inception_4a/5x5"
 723 |   type: "Convolution"
 724 |   bottom: "inception_4a/5x5_reduce"
 725 |   top: "inception_4a/5x5"
 726 |   param {
 727 |     lr_mult: 1
 728 |     decay_mult: 1
 729 |   }
 730 |   param {
 731 |     lr_mult: 2
 732 |     decay_mult: 0
 733 |   }
 734 |   convolution_param {
 735 |     num_output: 48
 736 |     pad: 2
 737 |     kernel_size: 5
 738 |     weight_filler {
 739 |       type: "xavier"
 740 |       std: 0.03
 741 |     }
 742 |     bias_filler {
 743 |       type: "constant"
 744 |       value: 0.2
 745 |     }
 746 |   }
 747 | }
 748 | layer {
 749 |   name: "inception_4a/relu_5x5"
 750 |   type: "ReLU"
 751 |   bottom: "inception_4a/5x5"
 752 |   top: "inception_4a/5x5"
 753 | }
 754 | layer {
 755 |   name: "inception_4a/pool"
 756 |   type: "Pooling"
 757 |   bottom: "pool3/3x3_s2"
 758 |   top: "inception_4a/pool"
 759 |   pooling_param {
 760 |     pool: MAX
 761 |     kernel_size: 3
 762 |     stride: 1
 763 |     pad: 1
 764 |   }
 765 | }
 766 | layer {
 767 |   name: "inception_4a/pool_proj"
 768 |   type: "Convolution"
 769 |   bottom: "inception_4a/pool"
 770 |   top: "inception_4a/pool_proj"
 771 |   param {
 772 |     lr_mult: 1
 773 |     decay_mult: 1
 774 |   }
 775 |   param {
 776 |     lr_mult: 2
 777 |     decay_mult: 0
 778 |   }
 779 |   convolution_param {
 780 |     num_output: 64
 781 |     kernel_size: 1
 782 |     weight_filler {
 783 |       type: "xavier"
 784 |       std: 0.1
 785 |     }
 786 |     bias_filler {
 787 |       type: "constant"
 788 |       value: 0.2
 789 |     }
 790 |   }
 791 | }
 792 | layer {
 793 |   name: "inception_4a/relu_pool_proj"
 794 |   type: "ReLU"
 795 |   bottom: "inception_4a/pool_proj"
 796 |   top: "inception_4a/pool_proj"
 797 | }
 798 | layer {
 799 |   name: "inception_4a/output"
 800 |   type: "Concat"
 801 |   bottom: "inception_4a/1x1"
 802 |   bottom: "inception_4a/3x3"
 803 |   bottom: "inception_4a/5x5"
 804 |   bottom: "inception_4a/pool_proj"
 805 |   top: "inception_4a/output"
 806 | }
 807 | layer {
 808 |   name: "inception_4b/1x1"
 809 |   type: "Convolution"
 810 |   bottom: "inception_4a/output"
 811 |   top: "inception_4b/1x1"
 812 |   param {
 813 |     lr_mult: 1
 814 |     decay_mult: 1
 815 |   }
 816 |   param {
 817 |     lr_mult: 2
 818 |     decay_mult: 0
 819 |   }
 820 |   convolution_param {
 821 |     num_output: 160
 822 |     kernel_size: 1
 823 |     weight_filler {
 824 |       type: "xavier"
 825 |       std: 0.03
 826 |     }
 827 |     bias_filler {
 828 |       type: "constant"
 829 |       value: 0.2
 830 |     }
 831 |   }
 832 | }
 833 | layer {
 834 |   name: "inception_4b/relu_1x1"
 835 |   type: "ReLU"
 836 |   bottom: "inception_4b/1x1"
 837 |   top: "inception_4b/1x1"
 838 | }
 839 | layer {
 840 |   name: "inception_4b/3x3_reduce"
 841 |   type: "Convolution"
 842 |   bottom: "inception_4a/output"
 843 |   top: "inception_4b/3x3_reduce"
 844 |   param {
 845 |     lr_mult: 1
 846 |     decay_mult: 1
 847 |   }
 848 |   param {
 849 |     lr_mult: 2
 850 |     decay_mult: 0
 851 |   }
 852 |   convolution_param {
 853 |     num_output: 112
 854 |     kernel_size: 1
 855 |     weight_filler {
 856 |       type: "xavier"
 857 |       std: 0.09
 858 |     }
 859 |     bias_filler {
 860 |       type: "constant"
 861 |       value: 0.2
 862 |     }
 863 |   }
 864 | }
 865 | layer {
 866 |   name: "inception_4b/relu_3x3_reduce"
 867 |   type: "ReLU"
 868 |   bottom: "inception_4b/3x3_reduce"
 869 |   top: "inception_4b/3x3_reduce"
 870 | }
 871 | layer {
 872 |   name: "inception_4b/3x3"
 873 |   type: "Convolution"
 874 |   bottom: "inception_4b/3x3_reduce"
 875 |   top: "inception_4b/3x3"
 876 |   param {
 877 |     lr_mult: 1
 878 |     decay_mult: 1
 879 |   }
 880 |   param {
 881 |     lr_mult: 2
 882 |     decay_mult: 0
 883 |   }
 884 |   convolution_param {
 885 |     num_output: 224
 886 |     pad: 1
 887 |     kernel_size: 3
 888 |     weight_filler {
 889 |       type: "xavier"
 890 |       std: 0.03
 891 |     }
 892 |     bias_filler {
 893 |       type: "constant"
 894 |       value: 0.2
 895 |     }
 896 |   }
 897 | }
 898 | layer {
 899 |   name: "inception_4b/relu_3x3"
 900 |   type: "ReLU"
 901 |   bottom: "inception_4b/3x3"
 902 |   top: "inception_4b/3x3"
 903 | }
 904 | layer {
 905 |   name: "inception_4b/5x5_reduce"
 906 |   type: "Convolution"
 907 |   bottom: "inception_4a/output"
 908 |   top: "inception_4b/5x5_reduce"
 909 |   param {
 910 |     lr_mult: 1
 911 |     decay_mult: 1
 912 |   }
 913 |   param {
 914 |     lr_mult: 2
 915 |     decay_mult: 0
 916 |   }
 917 |   convolution_param {
 918 |     num_output: 24
 919 |     kernel_size: 1
 920 |     weight_filler {
 921 |       type: "xavier"
 922 |       std: 0.2
 923 |     }
 924 |     bias_filler {
 925 |       type: "constant"
 926 |       value: 0.2
 927 |     }
 928 |   }
 929 | }
 930 | layer {
 931 |   name: "inception_4b/relu_5x5_reduce"
 932 |   type: "ReLU"
 933 |   bottom: "inception_4b/5x5_reduce"
 934 |   top: "inception_4b/5x5_reduce"
 935 | }
 936 | layer {
 937 |   name: "inception_4b/5x5"
 938 |   type: "Convolution"
 939 |   bottom: "inception_4b/5x5_reduce"
 940 |   top: "inception_4b/5x5"
 941 |   param {
 942 |     lr_mult: 1
 943 |     decay_mult: 1
 944 |   }
 945 |   param {
 946 |     lr_mult: 2
 947 |     decay_mult: 0
 948 |   }
 949 |   convolution_param {
 950 |     num_output: 64
 951 |     pad: 2
 952 |     kernel_size: 5
 953 |     weight_filler {
 954 |       type: "xavier"
 955 |       std: 0.03
 956 |     }
 957 |     bias_filler {
 958 |       type: "constant"
 959 |       value: 0.2
 960 |     }
 961 |   }
 962 | }
 963 | layer {
 964 |   name: "inception_4b/relu_5x5"
 965 |   type: "ReLU"
 966 |   bottom: "inception_4b/5x5"
 967 |   top: "inception_4b/5x5"
 968 | }
 969 | layer {
 970 |   name: "inception_4b/pool"
 971 |   type: "Pooling"
 972 |   bottom: "inception_4a/output"
 973 |   top: "inception_4b/pool"
 974 |   pooling_param {
 975 |     pool: MAX
 976 |     kernel_size: 3
 977 |     stride: 1
 978 |     pad: 1
 979 |   }
 980 | }
 981 | layer {
 982 |   name: "inception_4b/pool_proj"
 983 |   type: "Convolution"
 984 |   bottom: "inception_4b/pool"
 985 |   top: "inception_4b/pool_proj"
 986 |   param {
 987 |     lr_mult: 1
 988 |     decay_mult: 1
 989 |   }
 990 |   param {
 991 |     lr_mult: 2
 992 |     decay_mult: 0
 993 |   }
 994 |   convolution_param {
 995 |     num_output: 64
 996 |     kernel_size: 1
 997 |     weight_filler {
 998 |       type: "xavier"
 999 |       std: 0.1
1000 |     }
1001 |     bias_filler {
1002 |       type: "constant"
1003 |       value: 0.2
1004 |     }
1005 |   }
1006 | }
1007 | layer {
1008 |   name: "inception_4b/relu_pool_proj"
1009 |   type: "ReLU"
1010 |   bottom: "inception_4b/pool_proj"
1011 |   top: "inception_4b/pool_proj"
1012 | }
1013 | layer {
1014 |   name: "inception_4b/output"
1015 |   type: "Concat"
1016 |   bottom: "inception_4b/1x1"
1017 |   bottom: "inception_4b/3x3"
1018 |   bottom: "inception_4b/5x5"
1019 |   bottom: "inception_4b/pool_proj"
1020 |   top: "inception_4b/output"
1021 | }
1022 | layer {
1023 |   name: "inception_4c/1x1"
1024 |   type: "Convolution"
1025 |   bottom: "inception_4b/output"
1026 |   top: "inception_4c/1x1"
1027 |   param {
1028 |     lr_mult: 1
1029 |     decay_mult: 1
1030 |   }
1031 |   param {
1032 |     lr_mult: 2
1033 |     decay_mult: 0
1034 |   }
1035 |   convolution_param {
1036 |     num_output: 128
1037 |     kernel_size: 1
1038 |     weight_filler {
1039 |       type: "xavier"
1040 |       std: 0.03
1041 |     }
1042 |     bias_filler {
1043 |       type: "constant"
1044 |       value: 0.2
1045 |     }
1046 |   }
1047 | }
1048 | layer {
1049 |   name: "inception_4c/relu_1x1"
1050 |   type: "ReLU"
1051 |   bottom: "inception_4c/1x1"
1052 |   top: "inception_4c/1x1"
1053 | }
1054 | layer {
1055 |   name: "inception_4c/3x3_reduce"
1056 |   type: "Convolution"
1057 |   bottom: "inception_4b/output"
1058 |   top: "inception_4c/3x3_reduce"
1059 |   param {
1060 |     lr_mult: 1
1061 |     decay_mult: 1
1062 |   }
1063 |   param {
1064 |     lr_mult: 2
1065 |     decay_mult: 0
1066 |   }
1067 |   convolution_param {
1068 |     num_output: 128
1069 |     kernel_size: 1
1070 |     weight_filler {
1071 |       type: "xavier"
1072 |       std: 0.09
1073 |     }
1074 |     bias_filler {
1075 |       type: "constant"
1076 |       value: 0.2
1077 |     }
1078 |   }
1079 | }
1080 | layer {
1081 |   name: "inception_4c/relu_3x3_reduce"
1082 |   type: "ReLU"
1083 |   bottom: "inception_4c/3x3_reduce"
1084 |   top: "inception_4c/3x3_reduce"
1085 | }
1086 | layer {
1087 |   name: "inception_4c/3x3"
1088 |   type: "Convolution"
1089 |   bottom: "inception_4c/3x3_reduce"
1090 |   top: "inception_4c/3x3"
1091 |   param {
1092 |     lr_mult: 1
1093 |     decay_mult: 1
1094 |   }
1095 |   param {
1096 |     lr_mult: 2
1097 |     decay_mult: 0
1098 |   }
1099 |   convolution_param {
1100 |     num_output: 256
1101 |     pad: 1
1102 |     kernel_size: 3
1103 |     weight_filler {
1104 |       type: "xavier"
1105 |       std: 0.03
1106 |     }
1107 |     bias_filler {
1108 |       type: "constant"
1109 |       value: 0.2
1110 |     }
1111 |   }
1112 | }
1113 | layer {
1114 |   name: "inception_4c/relu_3x3"
1115 |   type: "ReLU"
1116 |   bottom: "inception_4c/3x3"
1117 |   top: "inception_4c/3x3"
1118 | }
1119 | layer {
1120 |   name: "inception_4c/5x5_reduce"
1121 |   type: "Convolution"
1122 |   bottom: "inception_4b/output"
1123 |   top: "inception_4c/5x5_reduce"
1124 |   param {
1125 |     lr_mult: 1
1126 |     decay_mult: 1
1127 |   }
1128 |   param {
1129 |     lr_mult: 2
1130 |     decay_mult: 0
1131 |   }
1132 |   convolution_param {
1133 |     num_output: 24
1134 |     kernel_size: 1
1135 |     weight_filler {
1136 |       type: "xavier"
1137 |       std: 0.2
1138 |     }
1139 |     bias_filler {
1140 |       type: "constant"
1141 |       value: 0.2
1142 |     }
1143 |   }
1144 | }
1145 | layer {
1146 |   name: "inception_4c/relu_5x5_reduce"
1147 |   type: "ReLU"
1148 |   bottom: "inception_4c/5x5_reduce"
1149 |   top: "inception_4c/5x5_reduce"
1150 | }
1151 | layer {
1152 |   name: "inception_4c/5x5"
1153 |   type: "Convolution"
1154 |   bottom: "inception_4c/5x5_reduce"
1155 |   top: "inception_4c/5x5"
1156 |   param {
1157 |     lr_mult: 1
1158 |     decay_mult: 1
1159 |   }
1160 |   param {
1161 |     lr_mult: 2
1162 |     decay_mult: 0
1163 |   }
1164 |   convolution_param {
1165 |     num_output: 64
1166 |     pad: 2
1167 |     kernel_size: 5
1168 |     weight_filler {
1169 |       type: "xavier"
1170 |       std: 0.03
1171 |     }
1172 |     bias_filler {
1173 |       type: "constant"
1174 |       value: 0.2
1175 |     }
1176 |   }
1177 | }
1178 | layer {
1179 |   name: "inception_4c/relu_5x5"
1180 |   type: "ReLU"
1181 |   bottom: "inception_4c/5x5"
1182 |   top: "inception_4c/5x5"
1183 | }
1184 | layer {
1185 |   name: "inception_4c/pool"
1186 |   type: "Pooling"
1187 |   bottom: "inception_4b/output"
1188 |   top: "inception_4c/pool"
1189 |   pooling_param {
1190 |     pool: MAX
1191 |     kernel_size: 3
1192 |     stride: 1
1193 |     pad: 1
1194 |   }
1195 | }
1196 | layer {
1197 |   name: "inception_4c/pool_proj"
1198 |   type: "Convolution"
1199 |   bottom: "inception_4c/pool"
1200 |   top: "inception_4c/pool_proj"
1201 |   param {
1202 |     lr_mult: 1
1203 |     decay_mult: 1
1204 |   }
1205 |   param {
1206 |     lr_mult: 2
1207 |     decay_mult: 0
1208 |   }
1209 |   convolution_param {
1210 |     num_output: 64
1211 |     kernel_size: 1
1212 |     weight_filler {
1213 |       type: "xavier"
1214 |       std: 0.1
1215 |     }
1216 |     bias_filler {
1217 |       type: "constant"
1218 |       value: 0.2
1219 |     }
1220 |   }
1221 | }
1222 | layer {
1223 |   name: "inception_4c/relu_pool_proj"
1224 |   type: "ReLU"
1225 |   bottom: "inception_4c/pool_proj"
1226 |   top: "inception_4c/pool_proj"
1227 | }
1228 | layer {
1229 |   name: "inception_4c/output"
1230 |   type: "Concat"
1231 |   bottom: "inception_4c/1x1"
1232 |   bottom: "inception_4c/3x3"
1233 |   bottom: "inception_4c/5x5"
1234 |   bottom: "inception_4c/pool_proj"
1235 |   top: "inception_4c/output"
1236 | }
1237 | layer {
1238 |   name: "inception_4d/1x1"
1239 |   type: "Convolution"
1240 |   bottom: "inception_4c/output"
1241 |   top: "inception_4d/1x1"
1242 |   param {
1243 |     lr_mult: 1
1244 |     decay_mult: 1
1245 |   }
1246 |   param {
1247 |     lr_mult: 2
1248 |     decay_mult: 0
1249 |   }
1250 |   convolution_param {
1251 |     num_output: 112
1252 |     kernel_size: 1
1253 |     weight_filler {
1254 |       type: "xavier"
1255 |       std: 0.03
1256 |     }
1257 |     bias_filler {
1258 |       type: "constant"
1259 |       value: 0.2
1260 |     }
1261 |   }
1262 | }
1263 | layer {
1264 |   name: "inception_4d/relu_1x1"
1265 |   type: "ReLU"
1266 |   bottom: "inception_4d/1x1"
1267 |   top: "inception_4d/1x1"
1268 | }
1269 | layer {
1270 |   name: "inception_4d/3x3_reduce"
1271 |   type: "Convolution"
1272 |   bottom: "inception_4c/output"
1273 |   top: "inception_4d/3x3_reduce"
1274 |   param {
1275 |     lr_mult: 1
1276 |     decay_mult: 1
1277 |   }
1278 |   param {
1279 |     lr_mult: 2
1280 |     decay_mult: 0
1281 |   }
1282 |   convolution_param {
1283 |     num_output: 144
1284 |     kernel_size: 1
1285 |     weight_filler {
1286 |       type: "xavier"
1287 |       std: 0.09
1288 |     }
1289 |     bias_filler {
1290 |       type: "constant"
1291 |       value: 0.2
1292 |     }
1293 |   }
1294 | }
1295 | layer {
1296 |   name: "inception_4d/relu_3x3_reduce"
1297 |   type: "ReLU"
1298 |   bottom: "inception_4d/3x3_reduce"
1299 |   top: "inception_4d/3x3_reduce"
1300 | }
1301 | layer {
1302 |   name: "inception_4d/3x3"
1303 |   type: "Convolution"
1304 |   bottom: "inception_4d/3x3_reduce"
1305 |   top: "inception_4d/3x3"
1306 |   param {
1307 |     lr_mult: 1
1308 |     decay_mult: 1
1309 |   }
1310 |   param {
1311 |     lr_mult: 2
1312 |     decay_mult: 0
1313 |   }
1314 |   convolution_param {
1315 |     num_output: 288
1316 |     pad: 1
1317 |     kernel_size: 3
1318 |     weight_filler {
1319 |       type: "xavier"
1320 |       std: 0.03
1321 |     }
1322 |     bias_filler {
1323 |       type: "constant"
1324 |       value: 0.2
1325 |     }
1326 |   }
1327 | }
1328 | layer {
1329 |   name: "inception_4d/relu_3x3"
1330 |   type: "ReLU"
1331 |   bottom: "inception_4d/3x3"
1332 |   top: "inception_4d/3x3"
1333 | }
1334 | layer {
1335 |   name: "inception_4d/5x5_reduce"
1336 |   type: "Convolution"
1337 |   bottom: "inception_4c/output"
1338 |   top: "inception_4d/5x5_reduce"
1339 |   param {
1340 |     lr_mult: 1
1341 |     decay_mult: 1
1342 |   }
1343 |   param {
1344 |     lr_mult: 2
1345 |     decay_mult: 0
1346 |   }
1347 |   convolution_param {
1348 |     num_output: 32
1349 |     kernel_size: 1
1350 |     weight_filler {
1351 |       type: "xavier"
1352 |       std: 0.2
1353 |     }
1354 |     bias_filler {
1355 |       type: "constant"
1356 |       value: 0.2
1357 |     }
1358 |   }
1359 | }
1360 | layer {
1361 |   name: "inception_4d/relu_5x5_reduce"
1362 |   type: "ReLU"
1363 |   bottom: "inception_4d/5x5_reduce"
1364 |   top: "inception_4d/5x5_reduce"
1365 | }
1366 | layer {
1367 |   name: "inception_4d/5x5"
1368 |   type: "Convolution"
1369 |   bottom: "inception_4d/5x5_reduce"
1370 |   top: "inception_4d/5x5"
1371 |   param {
1372 |     lr_mult: 1
1373 |     decay_mult: 1
1374 |   }
1375 |   param {
1376 |     lr_mult: 2
1377 |     decay_mult: 0
1378 |   }
1379 |   convolution_param {
1380 |     num_output: 64
1381 |     pad: 2
1382 |     kernel_size: 5
1383 |     weight_filler {
1384 |       type: "xavier"
1385 |       std: 0.03
1386 |     }
1387 |     bias_filler {
1388 |       type: "constant"
1389 |       value: 0.2
1390 |     }
1391 |   }
1392 | }
1393 | layer {
1394 |   name: "inception_4d/relu_5x5"
1395 |   type: "ReLU"
1396 |   bottom: "inception_4d/5x5"
1397 |   top: "inception_4d/5x5"
1398 | }
1399 | layer {
1400 |   name: "inception_4d/pool"
1401 |   type: "Pooling"
1402 |   bottom: "inception_4c/output"
1403 |   top: "inception_4d/pool"
1404 |   pooling_param {
1405 |     pool: MAX
1406 |     kernel_size: 3
1407 |     stride: 1
1408 |     pad: 1
1409 |   }
1410 | }
1411 | layer {
1412 |   name: "inception_4d/pool_proj"
1413 |   type: "Convolution"
1414 |   bottom: "inception_4d/pool"
1415 |   top: "inception_4d/pool_proj"
1416 |   param {
1417 |     lr_mult: 1
1418 |     decay_mult: 1
1419 |   }
1420 |   param {
1421 |     lr_mult: 2
1422 |     decay_mult: 0
1423 |   }
1424 |   convolution_param {
1425 |     num_output: 64
1426 |     kernel_size: 1
1427 |     weight_filler {
1428 |       type: "xavier"
1429 |       std: 0.1
1430 |     }
1431 |     bias_filler {
1432 |       type: "constant"
1433 |       value: 0.2
1434 |     }
1435 |   }
1436 | }
1437 | layer {
1438 |   name: "inception_4d/relu_pool_proj"
1439 |   type: "ReLU"
1440 |   bottom: "inception_4d/pool_proj"
1441 |   top: "inception_4d/pool_proj"
1442 | }
1443 | layer {
1444 |   name: "inception_4d/output"
1445 |   type: "Concat"
1446 |   bottom: "inception_4d/1x1"
1447 |   bottom: "inception_4d/3x3"
1448 |   bottom: "inception_4d/5x5"
1449 |   bottom: "inception_4d/pool_proj"
1450 |   top: "inception_4d/output"
1451 | }
1452 | layer {
1453 |   name: "inception_4e/1x1"
1454 |   type: "Convolution"
1455 |   bottom: "inception_4d/output"
1456 |   top: "inception_4e/1x1"
1457 |   param {
1458 |     lr_mult: 1
1459 |     decay_mult: 1
1460 |   }
1461 |   param {
1462 |     lr_mult: 2
1463 |     decay_mult: 0
1464 |   }
1465 |   convolution_param {
1466 |     num_output: 256
1467 |     kernel_size: 1
1468 |     weight_filler {
1469 |       type: "xavier"
1470 |       std: 0.03
1471 |     }
1472 |     bias_filler {
1473 |       type: "constant"
1474 |       value: 0.2
1475 |     }
1476 |   }
1477 | }
1478 | layer {
1479 |   name: "inception_4e/relu_1x1"
1480 |   type: "ReLU"
1481 |   bottom: "inception_4e/1x1"
1482 |   top: "inception_4e/1x1"
1483 | }
1484 | layer {
1485 |   name: "inception_4e/3x3_reduce"
1486 |   type: "Convolution"
1487 |   bottom: "inception_4d/output"
1488 |   top: "inception_4e/3x3_reduce"
1489 |   param {
1490 |     lr_mult: 1
1491 |     decay_mult: 1
1492 |   }
1493 |   param {
1494 |     lr_mult: 2
1495 |     decay_mult: 0
1496 |   }
1497 |   convolution_param {
1498 |     num_output: 160
1499 |     kernel_size: 1
1500 |     weight_filler {
1501 |       type: "xavier"
1502 |       std: 0.09
1503 |     }
1504 |     bias_filler {
1505 |       type: "constant"
1506 |       value: 0.2
1507 |     }
1508 |   }
1509 | }
1510 | layer {
1511 |   name: "inception_4e/relu_3x3_reduce"
1512 |   type: "ReLU"
1513 |   bottom: "inception_4e/3x3_reduce"
1514 |   top: "inception_4e/3x3_reduce"
1515 | }
1516 | layer {
1517 |   name: "inception_4e/3x3"
1518 |   type: "Convolution"
1519 |   bottom: "inception_4e/3x3_reduce"
1520 |   top: "inception_4e/3x3"
1521 |   param {
1522 |     lr_mult: 1
1523 |     decay_mult: 1
1524 |   }
1525 |   param {
1526 |     lr_mult: 2
1527 |     decay_mult: 0
1528 |   }
1529 |   convolution_param {
1530 |     num_output: 320
1531 |     pad: 1
1532 |     kernel_size: 3
1533 |     weight_filler {
1534 |       type: "xavier"
1535 |       std: 0.03
1536 |     }
1537 |     bias_filler {
1538 |       type: "constant"
1539 |       value: 0.2
1540 |     }
1541 |   }
1542 | }
1543 | layer {
1544 |   name: "inception_4e/relu_3x3"
1545 |   type: "ReLU"
1546 |   bottom: "inception_4e/3x3"
1547 |   top: "inception_4e/3x3"
1548 | }
1549 | layer {
1550 |   name: "inception_4e/5x5_reduce"
1551 |   type: "Convolution"
1552 |   bottom: "inception_4d/output"
1553 |   top: "inception_4e/5x5_reduce"
1554 |   param {
1555 |     lr_mult: 1
1556 |     decay_mult: 1
1557 |   }
1558 |   param {
1559 |     lr_mult: 2
1560 |     decay_mult: 0
1561 |   }
1562 |   convolution_param {
1563 |     num_output: 32
1564 |     kernel_size: 1
1565 |     weight_filler {
1566 |       type: "xavier"
1567 |       std: 0.2
1568 |     }
1569 |     bias_filler {
1570 |       type: "constant"
1571 |       value: 0.2
1572 |     }
1573 |   }
1574 | }
1575 | layer {
1576 |   name: "inception_4e/relu_5x5_reduce"
1577 |   type: "ReLU"
1578 |   bottom: "inception_4e/5x5_reduce"
1579 |   top: "inception_4e/5x5_reduce"
1580 | }
1581 | layer {
1582 |   name: "inception_4e/5x5"
1583 |   type: "Convolution"
1584 |   bottom: "inception_4e/5x5_reduce"
1585 |   top: "inception_4e/5x5"
1586 |   param {
1587 |     lr_mult: 1
1588 |     decay_mult: 1
1589 |   }
1590 |   param {
1591 |     lr_mult: 2
1592 |     decay_mult: 0
1593 |   }
1594 |   convolution_param {
1595 |     num_output: 128
1596 |     pad: 2
1597 |     kernel_size: 5
1598 |     weight_filler {
1599 |       type: "xavier"
1600 |       std: 0.03
1601 |     }
1602 |     bias_filler {
1603 |       type: "constant"
1604 |       value: 0.2
1605 |     }
1606 |   }
1607 | }
1608 | layer {
1609 |   name: "inception_4e/relu_5x5"
1610 |   type: "ReLU"
1611 |   bottom: "inception_4e/5x5"
1612 |   top: "inception_4e/5x5"
1613 | }
1614 | layer {
1615 |   name: "inception_4e/pool"
1616 |   type: "Pooling"
1617 |   bottom: "inception_4d/output"
1618 |   top: "inception_4e/pool"
1619 |   pooling_param {
1620 |     pool: MAX
1621 |     kernel_size: 3
1622 |     stride: 1
1623 |     pad: 1
1624 |   }
1625 | }
1626 | layer {
1627 |   name: "inception_4e/pool_proj"
1628 |   type: "Convolution"
1629 |   bottom: "inception_4e/pool"
1630 |   top: "inception_4e/pool_proj"
1631 |   param {
1632 |     lr_mult: 1
1633 |     decay_mult: 1
1634 |   }
1635 |   param {
1636 |     lr_mult: 2
1637 |     decay_mult: 0
1638 |   }
1639 |   convolution_param {
1640 |     num_output: 128
1641 |     kernel_size: 1
1642 |     weight_filler {
1643 |       type: "xavier"
1644 |       std: 0.1
1645 |     }
1646 |     bias_filler {
1647 |       type: "constant"
1648 |       value: 0.2
1649 |     }
1650 |   }
1651 | }
1652 | layer {
1653 |   name: "inception_4e/relu_pool_proj"
1654 |   type: "ReLU"
1655 |   bottom: "inception_4e/pool_proj"
1656 |   top: "inception_4e/pool_proj"
1657 | }
1658 | layer {
1659 |   name: "inception_4e/output"
1660 |   type: "Concat"
1661 |   bottom: "inception_4e/1x1"
1662 |   bottom: "inception_4e/3x3"
1663 |   bottom: "inception_4e/5x5"
1664 |   bottom: "inception_4e/pool_proj"
1665 |   top: "inception_4e/output"
1666 | }
1667 | layer {
1668 |   name: "CAM_conv"
1669 |   type: "Convolution"
1670 |   bottom: "inception_4e/output"
1671 |   top: "CAM_conv"
1672 |   param {
1673 |     lr_mult: 1
1674 |     decay_mult: 1
1675 |   }
1676 |   param {
1677 |     lr_mult: 2
1678 |     decay_mult: 0
1679 |   }
1680 |   convolution_param {
1681 |     num_output: 1024
1682 |     pad: 1
1683 |     kernel_size: 3
1684 |     group: 2
1685 |     weight_filler {
1686 |       type: "gaussian"
1687 |       std: 0.01
1688 |     }
1689 |     bias_filler {
1690 |       type: "constant"
1691 |       value: 1
1692 |     }
1693 |   }
1694 | }
1695 | layer {
1696 |   name: "CAM_relu"
1697 |   type: "ReLU"
1698 |   bottom: "CAM_conv"
1699 |   top: "CAM_conv"
1700 | }
1701 | layer {
1702 |   name: "CAM_pool"
1703 |   type: "Pooling"
1704 |   bottom: "CAM_conv"
1705 |   top: "CAM_pool"
1706 |   pooling_param {
1707 |     pool: AVE
1708 |     kernel_size: 14
1709 |     stride: 14
1710 |   }
1711 | }
1712 | layer {
1713 |   name: "CAM_fc"
1714 |   type: "InnerProduct"
1715 |   bottom: "CAM_pool"
1716 |   top: "CAM_fc"
1717 |   param {
1718 |     lr_mult: 1
1719 |     decay_mult: 1
1720 |   }
1721 |   param {
1722 |     lr_mult: 2
1723 |     decay_mult: 0
1724 |   }
1725 |   inner_product_param {
1726 |     num_output: 1000
1727 |     weight_filler {
1728 |       type: "xavier"
1729 |     }
1730 |     bias_filler {
1731 |       type: "constant"
1732 |       value: 0
1733 |     }
1734 |   }
1735 | }
1736 | layer {
1737 |   name: "prob"
1738 |   type: "Softmax"
1739 |   bottom: "CAM_fc"
1740 |   top: "prob"
1741 | }
1742 | 
1743 | 


--------------------------------------------------------------------------------
/models/deploy_googlenetCAM_places205.prototxt:
--------------------------------------------------------------------------------
   1 | name: "GoogleNet"
   2 | input: "data"
   3 | input_dim: 10
   4 | input_dim: 3
   5 | input_dim: 224
   6 | input_dim: 224
   7 | force_backward: true
   8 | layer {
   9 |   name: "conv1/7x7_s2"
  10 |   type: "Convolution"
  11 |   bottom: "data"
  12 |   top: "conv1/7x7_s2"
  13 |   param {
  14 |     lr_mult: 1
  15 |     decay_mult: 1
  16 |   }
  17 |   param {
  18 |     lr_mult: 2
  19 |     decay_mult: 0
  20 |   }
  21 |   convolution_param {
  22 |     num_output: 64
  23 |     pad: 3
  24 |     kernel_size: 7
  25 |     stride: 2
  26 |     weight_filler {
  27 |       type: "xavier"
  28 |       std: 0.1
  29 |     }
  30 |     bias_filler {
  31 |       type: "constant"
  32 |       value: 0.2
  33 |     }
  34 |   }
  35 | }
  36 | layer {
  37 |   name: "conv1/relu_7x7"
  38 |   type: "ReLU"
  39 |   bottom: "conv1/7x7_s2"
  40 |   top: "conv1/7x7_s2"
  41 | }
  42 | layer {
  43 |   name: "pool1/3x3_s2"
  44 |   type: "Pooling"
  45 |   bottom: "conv1/7x7_s2"
  46 |   top: "pool1/3x3_s2"
  47 |   pooling_param {
  48 |     pool: MAX
  49 |     kernel_size: 3
  50 |     stride: 2
  51 |   }
  52 | }
  53 | layer {
  54 |   name: "pool1/norm1"
  55 |   type: "LRN"
  56 |   bottom: "pool1/3x3_s2"
  57 |   top: "pool1/norm1"
  58 |   lrn_param {
  59 |     local_size: 5
  60 |     alpha: 0.0001
  61 |     beta: 0.75
  62 |   }
  63 | }
  64 | layer {
  65 |   name: "conv2/3x3_reduce"
  66 |   type: "Convolution"
  67 |   bottom: "pool1/norm1"
  68 |   top: "conv2/3x3_reduce"
  69 |   param {
  70 |     lr_mult: 1
  71 |     decay_mult: 1
  72 |   }
  73 |   param {
  74 |     lr_mult: 2
  75 |     decay_mult: 0
  76 |   }
  77 |   convolution_param {
  78 |     num_output: 64
  79 |     kernel_size: 1
  80 |     weight_filler {
  81 |       type: "xavier"
  82 |       std: 0.1
  83 |     }
  84 |     bias_filler {
  85 |       type: "constant"
  86 |       value: 0.2
  87 |     }
  88 |   }
  89 | }
  90 | layer {
  91 |   name: "conv2/relu_3x3_reduce"
  92 |   type: "ReLU"
  93 |   bottom: "conv2/3x3_reduce"
  94 |   top: "conv2/3x3_reduce"
  95 | }
  96 | layer {
  97 |   name: "conv2/3x3"
  98 |   type: "Convolution"
  99 |   bottom: "conv2/3x3_reduce"
 100 |   top: "conv2/3x3"
 101 |   param {
 102 |     lr_mult: 1
 103 |     decay_mult: 1
 104 |   }
 105 |   param {
 106 |     lr_mult: 2
 107 |     decay_mult: 0
 108 |   }
 109 |   convolution_param {
 110 |     num_output: 192
 111 |     pad: 1
 112 |     kernel_size: 3
 113 |     weight_filler {
 114 |       type: "xavier"
 115 |       std: 0.03
 116 |     }
 117 |     bias_filler {
 118 |       type: "constant"
 119 |       value: 0.2
 120 |     }
 121 |   }
 122 | }
 123 | layer {
 124 |   name: "conv2/relu_3x3"
 125 |   type: "ReLU"
 126 |   bottom: "conv2/3x3"
 127 |   top: "conv2/3x3"
 128 | }
 129 | layer {
 130 |   name: "conv2/norm2"
 131 |   type: "LRN"
 132 |   bottom: "conv2/3x3"
 133 |   top: "conv2/norm2"
 134 |   lrn_param {
 135 |     local_size: 5
 136 |     alpha: 0.0001
 137 |     beta: 0.75
 138 |   }
 139 | }
 140 | layer {
 141 |   name: "pool2/3x3_s2"
 142 |   type: "Pooling"
 143 |   bottom: "conv2/norm2"
 144 |   top: "pool2/3x3_s2"
 145 |   pooling_param {
 146 |     pool: MAX
 147 |     kernel_size: 3
 148 |     stride: 2
 149 |   }
 150 | }
 151 | layer {
 152 |   name: "inception_3a/1x1"
 153 |   type: "Convolution"
 154 |   bottom: "pool2/3x3_s2"
 155 |   top: "inception_3a/1x1"
 156 |   param {
 157 |     lr_mult: 1
 158 |     decay_mult: 1
 159 |   }
 160 |   param {
 161 |     lr_mult: 2
 162 |     decay_mult: 0
 163 |   }
 164 |   convolution_param {
 165 |     num_output: 64
 166 |     kernel_size: 1
 167 |     weight_filler {
 168 |       type: "xavier"
 169 |       std: 0.03
 170 |     }
 171 |     bias_filler {
 172 |       type: "constant"
 173 |       value: 0.2
 174 |     }
 175 |   }
 176 | }
 177 | layer {
 178 |   name: "inception_3a/relu_1x1"
 179 |   type: "ReLU"
 180 |   bottom: "inception_3a/1x1"
 181 |   top: "inception_3a/1x1"
 182 | }
 183 | layer {
 184 |   name: "inception_3a/3x3_reduce"
 185 |   type: "Convolution"
 186 |   bottom: "pool2/3x3_s2"
 187 |   top: "inception_3a/3x3_reduce"
 188 |   param {
 189 |     lr_mult: 1
 190 |     decay_mult: 1
 191 |   }
 192 |   param {
 193 |     lr_mult: 2
 194 |     decay_mult: 0
 195 |   }
 196 |   convolution_param {
 197 |     num_output: 96
 198 |     kernel_size: 1
 199 |     weight_filler {
 200 |       type: "xavier"
 201 |       std: 0.09
 202 |     }
 203 |     bias_filler {
 204 |       type: "constant"
 205 |       value: 0.2
 206 |     }
 207 |   }
 208 | }
 209 | layer {
 210 |   name: "inception_3a/relu_3x3_reduce"
 211 |   type: "ReLU"
 212 |   bottom: "inception_3a/3x3_reduce"
 213 |   top: "inception_3a/3x3_reduce"
 214 | }
 215 | layer {
 216 |   name: "inception_3a/3x3"
 217 |   type: "Convolution"
 218 |   bottom: "inception_3a/3x3_reduce"
 219 |   top: "inception_3a/3x3"
 220 |   param {
 221 |     lr_mult: 1
 222 |     decay_mult: 1
 223 |   }
 224 |   param {
 225 |     lr_mult: 2
 226 |     decay_mult: 0
 227 |   }
 228 |   convolution_param {
 229 |     num_output: 128
 230 |     pad: 1
 231 |     kernel_size: 3
 232 |     weight_filler {
 233 |       type: "xavier"
 234 |       std: 0.03
 235 |     }
 236 |     bias_filler {
 237 |       type: "constant"
 238 |       value: 0.2
 239 |     }
 240 |   }
 241 | }
 242 | layer {
 243 |   name: "inception_3a/relu_3x3"
 244 |   type: "ReLU"
 245 |   bottom: "inception_3a/3x3"
 246 |   top: "inception_3a/3x3"
 247 | }
 248 | layer {
 249 |   name: "inception_3a/5x5_reduce"
 250 |   type: "Convolution"
 251 |   bottom: "pool2/3x3_s2"
 252 |   top: "inception_3a/5x5_reduce"
 253 |   param {
 254 |     lr_mult: 1
 255 |     decay_mult: 1
 256 |   }
 257 |   param {
 258 |     lr_mult: 2
 259 |     decay_mult: 0
 260 |   }
 261 |   convolution_param {
 262 |     num_output: 16
 263 |     kernel_size: 1
 264 |     weight_filler {
 265 |       type: "xavier"
 266 |       std: 0.2
 267 |     }
 268 |     bias_filler {
 269 |       type: "constant"
 270 |       value: 0.2
 271 |     }
 272 |   }
 273 | }
 274 | layer {
 275 |   name: "inception_3a/relu_5x5_reduce"
 276 |   type: "ReLU"
 277 |   bottom: "inception_3a/5x5_reduce"
 278 |   top: "inception_3a/5x5_reduce"
 279 | }
 280 | layer {
 281 |   name: "inception_3a/5x5"
 282 |   type: "Convolution"
 283 |   bottom: "inception_3a/5x5_reduce"
 284 |   top: "inception_3a/5x5"
 285 |   param {
 286 |     lr_mult: 1
 287 |     decay_mult: 1
 288 |   }
 289 |   param {
 290 |     lr_mult: 2
 291 |     decay_mult: 0
 292 |   }
 293 |   convolution_param {
 294 |     num_output: 32
 295 |     pad: 2
 296 |     kernel_size: 5
 297 |     weight_filler {
 298 |       type: "xavier"
 299 |       std: 0.03
 300 |     }
 301 |     bias_filler {
 302 |       type: "constant"
 303 |       value: 0.2
 304 |     }
 305 |   }
 306 | }
 307 | layer {
 308 |   name: "inception_3a/relu_5x5"
 309 |   type: "ReLU"
 310 |   bottom: "inception_3a/5x5"
 311 |   top: "inception_3a/5x5"
 312 | }
 313 | layer {
 314 |   name: "inception_3a/pool"
 315 |   type: "Pooling"
 316 |   bottom: "pool2/3x3_s2"
 317 |   top: "inception_3a/pool"
 318 |   pooling_param {
 319 |     pool: MAX
 320 |     kernel_size: 3
 321 |     stride: 1
 322 |     pad: 1
 323 |   }
 324 | }
 325 | layer {
 326 |   name: "inception_3a/pool_proj"
 327 |   type: "Convolution"
 328 |   bottom: "inception_3a/pool"
 329 |   top: "inception_3a/pool_proj"
 330 |   param {
 331 |     lr_mult: 1
 332 |     decay_mult: 1
 333 |   }
 334 |   param {
 335 |     lr_mult: 2
 336 |     decay_mult: 0
 337 |   }
 338 |   convolution_param {
 339 |     num_output: 32
 340 |     kernel_size: 1
 341 |     weight_filler {
 342 |       type: "xavier"
 343 |       std: 0.1
 344 |     }
 345 |     bias_filler {
 346 |       type: "constant"
 347 |       value: 0.2
 348 |     }
 349 |   }
 350 | }
 351 | layer {
 352 |   name: "inception_3a/relu_pool_proj"
 353 |   type: "ReLU"
 354 |   bottom: "inception_3a/pool_proj"
 355 |   top: "inception_3a/pool_proj"
 356 | }
 357 | layer {
 358 |   name: "inception_3a/output"
 359 |   type: "Concat"
 360 |   bottom: "inception_3a/1x1"
 361 |   bottom: "inception_3a/3x3"
 362 |   bottom: "inception_3a/5x5"
 363 |   bottom: "inception_3a/pool_proj"
 364 |   top: "inception_3a/output"
 365 | }
 366 | layer {
 367 |   name: "inception_3b/1x1"
 368 |   type: "Convolution"
 369 |   bottom: "inception_3a/output"
 370 |   top: "inception_3b/1x1"
 371 |   param {
 372 |     lr_mult: 1
 373 |     decay_mult: 1
 374 |   }
 375 |   param {
 376 |     lr_mult: 2
 377 |     decay_mult: 0
 378 |   }
 379 |   convolution_param {
 380 |     num_output: 128
 381 |     kernel_size: 1
 382 |     weight_filler {
 383 |       type: "xavier"
 384 |       std: 0.03
 385 |     }
 386 |     bias_filler {
 387 |       type: "constant"
 388 |       value: 0.2
 389 |     }
 390 |   }
 391 | }
 392 | layer {
 393 |   name: "inception_3b/relu_1x1"
 394 |   type: "ReLU"
 395 |   bottom: "inception_3b/1x1"
 396 |   top: "inception_3b/1x1"
 397 | }
 398 | layer {
 399 |   name: "inception_3b/3x3_reduce"
 400 |   type: "Convolution"
 401 |   bottom: "inception_3a/output"
 402 |   top: "inception_3b/3x3_reduce"
 403 |   param {
 404 |     lr_mult: 1
 405 |     decay_mult: 1
 406 |   }
 407 |   param {
 408 |     lr_mult: 2
 409 |     decay_mult: 0
 410 |   }
 411 |   convolution_param {
 412 |     num_output: 128
 413 |     kernel_size: 1
 414 |     weight_filler {
 415 |       type: "xavier"
 416 |       std: 0.09
 417 |     }
 418 |     bias_filler {
 419 |       type: "constant"
 420 |       value: 0.2
 421 |     }
 422 |   }
 423 | }
 424 | layer {
 425 |   name: "inception_3b/relu_3x3_reduce"
 426 |   type: "ReLU"
 427 |   bottom: "inception_3b/3x3_reduce"
 428 |   top: "inception_3b/3x3_reduce"
 429 | }
 430 | layer {
 431 |   name: "inception_3b/3x3"
 432 |   type: "Convolution"
 433 |   bottom: "inception_3b/3x3_reduce"
 434 |   top: "inception_3b/3x3"
 435 |   param {
 436 |     lr_mult: 1
 437 |     decay_mult: 1
 438 |   }
 439 |   param {
 440 |     lr_mult: 2
 441 |     decay_mult: 0
 442 |   }
 443 |   convolution_param {
 444 |     num_output: 192
 445 |     pad: 1
 446 |     kernel_size: 3
 447 |     weight_filler {
 448 |       type: "xavier"
 449 |       std: 0.03
 450 |     }
 451 |     bias_filler {
 452 |       type: "constant"
 453 |       value: 0.2
 454 |     }
 455 |   }
 456 | }
 457 | layer {
 458 |   name: "inception_3b/relu_3x3"
 459 |   type: "ReLU"
 460 |   bottom: "inception_3b/3x3"
 461 |   top: "inception_3b/3x3"
 462 | }
 463 | layer {
 464 |   name: "inception_3b/5x5_reduce"
 465 |   type: "Convolution"
 466 |   bottom: "inception_3a/output"
 467 |   top: "inception_3b/5x5_reduce"
 468 |   param {
 469 |     lr_mult: 1
 470 |     decay_mult: 1
 471 |   }
 472 |   param {
 473 |     lr_mult: 2
 474 |     decay_mult: 0
 475 |   }
 476 |   convolution_param {
 477 |     num_output: 32
 478 |     kernel_size: 1
 479 |     weight_filler {
 480 |       type: "xavier"
 481 |       std: 0.2
 482 |     }
 483 |     bias_filler {
 484 |       type: "constant"
 485 |       value: 0.2
 486 |     }
 487 |   }
 488 | }
 489 | layer {
 490 |   name: "inception_3b/relu_5x5_reduce"
 491 |   type: "ReLU"
 492 |   bottom: "inception_3b/5x5_reduce"
 493 |   top: "inception_3b/5x5_reduce"
 494 | }
 495 | layer {
 496 |   name: "inception_3b/5x5"
 497 |   type: "Convolution"
 498 |   bottom: "inception_3b/5x5_reduce"
 499 |   top: "inception_3b/5x5"
 500 |   param {
 501 |     lr_mult: 1
 502 |     decay_mult: 1
 503 |   }
 504 |   param {
 505 |     lr_mult: 2
 506 |     decay_mult: 0
 507 |   }
 508 |   convolution_param {
 509 |     num_output: 96
 510 |     pad: 2
 511 |     kernel_size: 5
 512 |     weight_filler {
 513 |       type: "xavier"
 514 |       std: 0.03
 515 |     }
 516 |     bias_filler {
 517 |       type: "constant"
 518 |       value: 0.2
 519 |     }
 520 |   }
 521 | }
 522 | layer {
 523 |   name: "inception_3b/relu_5x5"
 524 |   type: "ReLU"
 525 |   bottom: "inception_3b/5x5"
 526 |   top: "inception_3b/5x5"
 527 | }
 528 | layer {
 529 |   name: "inception_3b/pool"
 530 |   type: "Pooling"
 531 |   bottom: "inception_3a/output"
 532 |   top: "inception_3b/pool"
 533 |   pooling_param {
 534 |     pool: MAX
 535 |     kernel_size: 3
 536 |     stride: 1
 537 |     pad: 1
 538 |   }
 539 | }
 540 | layer {
 541 |   name: "inception_3b/pool_proj"
 542 |   type: "Convolution"
 543 |   bottom: "inception_3b/pool"
 544 |   top: "inception_3b/pool_proj"
 545 |   param {
 546 |     lr_mult: 1
 547 |     decay_mult: 1
 548 |   }
 549 |   param {
 550 |     lr_mult: 2
 551 |     decay_mult: 0
 552 |   }
 553 |   convolution_param {
 554 |     num_output: 64
 555 |     kernel_size: 1
 556 |     weight_filler {
 557 |       type: "xavier"
 558 |       std: 0.1
 559 |     }
 560 |     bias_filler {
 561 |       type: "constant"
 562 |       value: 0.2
 563 |     }
 564 |   }
 565 | }
 566 | layer {
 567 |   name: "inception_3b/relu_pool_proj"
 568 |   type: "ReLU"
 569 |   bottom: "inception_3b/pool_proj"
 570 |   top: "inception_3b/pool_proj"
 571 | }
 572 | layer {
 573 |   name: "inception_3b/output"
 574 |   type: "Concat"
 575 |   bottom: "inception_3b/1x1"
 576 |   bottom: "inception_3b/3x3"
 577 |   bottom: "inception_3b/5x5"
 578 |   bottom: "inception_3b/pool_proj"
 579 |   top: "inception_3b/output"
 580 | }
 581 | layer {
 582 |   name: "pool3/3x3_s2"
 583 |   type: "Pooling"
 584 |   bottom: "inception_3b/output"
 585 |   top: "pool3/3x3_s2"
 586 |   pooling_param {
 587 |     pool: MAX
 588 |     kernel_size: 3
 589 |     stride: 2
 590 |   }
 591 | }
 592 | layer {
 593 |   name: "inception_4a/1x1"
 594 |   type: "Convolution"
 595 |   bottom: "pool3/3x3_s2"
 596 |   top: "inception_4a/1x1"
 597 |   param {
 598 |     lr_mult: 1
 599 |     decay_mult: 1
 600 |   }
 601 |   param {
 602 |     lr_mult: 2
 603 |     decay_mult: 0
 604 |   }
 605 |   convolution_param {
 606 |     num_output: 192
 607 |     kernel_size: 1
 608 |     weight_filler {
 609 |       type: "xavier"
 610 |       std: 0.03
 611 |     }
 612 |     bias_filler {
 613 |       type: "constant"
 614 |       value: 0.2
 615 |     }
 616 |   }
 617 | }
 618 | layer {
 619 |   name: "inception_4a/relu_1x1"
 620 |   type: "ReLU"
 621 |   bottom: "inception_4a/1x1"
 622 |   top: "inception_4a/1x1"
 623 | }
 624 | layer {
 625 |   name: "inception_4a/3x3_reduce"
 626 |   type: "Convolution"
 627 |   bottom: "pool3/3x3_s2"
 628 |   top: "inception_4a/3x3_reduce"
 629 |   param {
 630 |     lr_mult: 1
 631 |     decay_mult: 1
 632 |   }
 633 |   param {
 634 |     lr_mult: 2
 635 |     decay_mult: 0
 636 |   }
 637 |   convolution_param {
 638 |     num_output: 96
 639 |     kernel_size: 1
 640 |     weight_filler {
 641 |       type: "xavier"
 642 |       std: 0.09
 643 |     }
 644 |     bias_filler {
 645 |       type: "constant"
 646 |       value: 0.2
 647 |     }
 648 |   }
 649 | }
 650 | layer {
 651 |   name: "inception_4a/relu_3x3_reduce"
 652 |   type: "ReLU"
 653 |   bottom: "inception_4a/3x3_reduce"
 654 |   top: "inception_4a/3x3_reduce"
 655 | }
 656 | layer {
 657 |   name: "inception_4a/3x3"
 658 |   type: "Convolution"
 659 |   bottom: "inception_4a/3x3_reduce"
 660 |   top: "inception_4a/3x3"
 661 |   param {
 662 |     lr_mult: 1
 663 |     decay_mult: 1
 664 |   }
 665 |   param {
 666 |     lr_mult: 2
 667 |     decay_mult: 0
 668 |   }
 669 |   convolution_param {
 670 |     num_output: 208
 671 |     pad: 1
 672 |     kernel_size: 3
 673 |     weight_filler {
 674 |       type: "xavier"
 675 |       std: 0.03
 676 |     }
 677 |     bias_filler {
 678 |       type: "constant"
 679 |       value: 0.2
 680 |     }
 681 |   }
 682 | }
 683 | layer {
 684 |   name: "inception_4a/relu_3x3"
 685 |   type: "ReLU"
 686 |   bottom: "inception_4a/3x3"
 687 |   top: "inception_4a/3x3"
 688 | }
 689 | layer {
 690 |   name: "inception_4a/5x5_reduce"
 691 |   type: "Convolution"
 692 |   bottom: "pool3/3x3_s2"
 693 |   top: "inception_4a/5x5_reduce"
 694 |   param {
 695 |     lr_mult: 1
 696 |     decay_mult: 1
 697 |   }
 698 |   param {
 699 |     lr_mult: 2
 700 |     decay_mult: 0
 701 |   }
 702 |   convolution_param {
 703 |     num_output: 16
 704 |     kernel_size: 1
 705 |     weight_filler {
 706 |       type: "xavier"
 707 |       std: 0.2
 708 |     }
 709 |     bias_filler {
 710 |       type: "constant"
 711 |       value: 0.2
 712 |     }
 713 |   }
 714 | }
 715 | layer {
 716 |   name: "inception_4a/relu_5x5_reduce"
 717 |   type: "ReLU"
 718 |   bottom: "inception_4a/5x5_reduce"
 719 |   top: "inception_4a/5x5_reduce"
 720 | }
 721 | layer {
 722 |   name: "inception_4a/5x5"
 723 |   type: "Convolution"
 724 |   bottom: "inception_4a/5x5_reduce"
 725 |   top: "inception_4a/5x5"
 726 |   param {
 727 |     lr_mult: 1
 728 |     decay_mult: 1
 729 |   }
 730 |   param {
 731 |     lr_mult: 2
 732 |     decay_mult: 0
 733 |   }
 734 |   convolution_param {
 735 |     num_output: 48
 736 |     pad: 2
 737 |     kernel_size: 5
 738 |     weight_filler {
 739 |       type: "xavier"
 740 |       std: 0.03
 741 |     }
 742 |     bias_filler {
 743 |       type: "constant"
 744 |       value: 0.2
 745 |     }
 746 |   }
 747 | }
 748 | layer {
 749 |   name: "inception_4a/relu_5x5"
 750 |   type: "ReLU"
 751 |   bottom: "inception_4a/5x5"
 752 |   top: "inception_4a/5x5"
 753 | }
 754 | layer {
 755 |   name: "inception_4a/pool"
 756 |   type: "Pooling"
 757 |   bottom: "pool3/3x3_s2"
 758 |   top: "inception_4a/pool"
 759 |   pooling_param {
 760 |     pool: MAX
 761 |     kernel_size: 3
 762 |     stride: 1
 763 |     pad: 1
 764 |   }
 765 | }
 766 | layer {
 767 |   name: "inception_4a/pool_proj"
 768 |   type: "Convolution"
 769 |   bottom: "inception_4a/pool"
 770 |   top: "inception_4a/pool_proj"
 771 |   param {
 772 |     lr_mult: 1
 773 |     decay_mult: 1
 774 |   }
 775 |   param {
 776 |     lr_mult: 2
 777 |     decay_mult: 0
 778 |   }
 779 |   convolution_param {
 780 |     num_output: 64
 781 |     kernel_size: 1
 782 |     weight_filler {
 783 |       type: "xavier"
 784 |       std: 0.1
 785 |     }
 786 |     bias_filler {
 787 |       type: "constant"
 788 |       value: 0.2
 789 |     }
 790 |   }
 791 | }
 792 | layer {
 793 |   name: "inception_4a/relu_pool_proj"
 794 |   type: "ReLU"
 795 |   bottom: "inception_4a/pool_proj"
 796 |   top: "inception_4a/pool_proj"
 797 | }
 798 | layer {
 799 |   name: "inception_4a/output"
 800 |   type: "Concat"
 801 |   bottom: "inception_4a/1x1"
 802 |   bottom: "inception_4a/3x3"
 803 |   bottom: "inception_4a/5x5"
 804 |   bottom: "inception_4a/pool_proj"
 805 |   top: "inception_4a/output"
 806 | }
 807 | layer {
 808 |   name: "inception_4b/1x1"
 809 |   type: "Convolution"
 810 |   bottom: "inception_4a/output"
 811 |   top: "inception_4b/1x1"
 812 |   param {
 813 |     lr_mult: 1
 814 |     decay_mult: 1
 815 |   }
 816 |   param {
 817 |     lr_mult: 2
 818 |     decay_mult: 0
 819 |   }
 820 |   convolution_param {
 821 |     num_output: 160
 822 |     kernel_size: 1
 823 |     weight_filler {
 824 |       type: "xavier"
 825 |       std: 0.03
 826 |     }
 827 |     bias_filler {
 828 |       type: "constant"
 829 |       value: 0.2
 830 |     }
 831 |   }
 832 | }
 833 | layer {
 834 |   name: "inception_4b/relu_1x1"
 835 |   type: "ReLU"
 836 |   bottom: "inception_4b/1x1"
 837 |   top: "inception_4b/1x1"
 838 | }
 839 | layer {
 840 |   name: "inception_4b/3x3_reduce"
 841 |   type: "Convolution"
 842 |   bottom: "inception_4a/output"
 843 |   top: "inception_4b/3x3_reduce"
 844 |   param {
 845 |     lr_mult: 1
 846 |     decay_mult: 1
 847 |   }
 848 |   param {
 849 |     lr_mult: 2
 850 |     decay_mult: 0
 851 |   }
 852 |   convolution_param {
 853 |     num_output: 112
 854 |     kernel_size: 1
 855 |     weight_filler {
 856 |       type: "xavier"
 857 |       std: 0.09
 858 |     }
 859 |     bias_filler {
 860 |       type: "constant"
 861 |       value: 0.2
 862 |     }
 863 |   }
 864 | }
 865 | layer {
 866 |   name: "inception_4b/relu_3x3_reduce"
 867 |   type: "ReLU"
 868 |   bottom: "inception_4b/3x3_reduce"
 869 |   top: "inception_4b/3x3_reduce"
 870 | }
 871 | layer {
 872 |   name: "inception_4b/3x3"
 873 |   type: "Convolution"
 874 |   bottom: "inception_4b/3x3_reduce"
 875 |   top: "inception_4b/3x3"
 876 |   param {
 877 |     lr_mult: 1
 878 |     decay_mult: 1
 879 |   }
 880 |   param {
 881 |     lr_mult: 2
 882 |     decay_mult: 0
 883 |   }
 884 |   convolution_param {
 885 |     num_output: 224
 886 |     pad: 1
 887 |     kernel_size: 3
 888 |     weight_filler {
 889 |       type: "xavier"
 890 |       std: 0.03
 891 |     }
 892 |     bias_filler {
 893 |       type: "constant"
 894 |       value: 0.2
 895 |     }
 896 |   }
 897 | }
 898 | layer {
 899 |   name: "inception_4b/relu_3x3"
 900 |   type: "ReLU"
 901 |   bottom: "inception_4b/3x3"
 902 |   top: "inception_4b/3x3"
 903 | }
 904 | layer {
 905 |   name: "inception_4b/5x5_reduce"
 906 |   type: "Convolution"
 907 |   bottom: "inception_4a/output"
 908 |   top: "inception_4b/5x5_reduce"
 909 |   param {
 910 |     lr_mult: 1
 911 |     decay_mult: 1
 912 |   }
 913 |   param {
 914 |     lr_mult: 2
 915 |     decay_mult: 0
 916 |   }
 917 |   convolution_param {
 918 |     num_output: 24
 919 |     kernel_size: 1
 920 |     weight_filler {
 921 |       type: "xavier"
 922 |       std: 0.2
 923 |     }
 924 |     bias_filler {
 925 |       type: "constant"
 926 |       value: 0.2
 927 |     }
 928 |   }
 929 | }
 930 | layer {
 931 |   name: "inception_4b/relu_5x5_reduce"
 932 |   type: "ReLU"
 933 |   bottom: "inception_4b/5x5_reduce"
 934 |   top: "inception_4b/5x5_reduce"
 935 | }
 936 | layer {
 937 |   name: "inception_4b/5x5"
 938 |   type: "Convolution"
 939 |   bottom: "inception_4b/5x5_reduce"
 940 |   top: "inception_4b/5x5"
 941 |   param {
 942 |     lr_mult: 1
 943 |     decay_mult: 1
 944 |   }
 945 |   param {
 946 |     lr_mult: 2
 947 |     decay_mult: 0
 948 |   }
 949 |   convolution_param {
 950 |     num_output: 64
 951 |     pad: 2
 952 |     kernel_size: 5
 953 |     weight_filler {
 954 |       type: "xavier"
 955 |       std: 0.03
 956 |     }
 957 |     bias_filler {
 958 |       type: "constant"
 959 |       value: 0.2
 960 |     }
 961 |   }
 962 | }
 963 | layer {
 964 |   name: "inception_4b/relu_5x5"
 965 |   type: "ReLU"
 966 |   bottom: "inception_4b/5x5"
 967 |   top: "inception_4b/5x5"
 968 | }
 969 | layer {
 970 |   name: "inception_4b/pool"
 971 |   type: "Pooling"
 972 |   bottom: "inception_4a/output"
 973 |   top: "inception_4b/pool"
 974 |   pooling_param {
 975 |     pool: MAX
 976 |     kernel_size: 3
 977 |     stride: 1
 978 |     pad: 1
 979 |   }
 980 | }
 981 | layer {
 982 |   name: "inception_4b/pool_proj"
 983 |   type: "Convolution"
 984 |   bottom: "inception_4b/pool"
 985 |   top: "inception_4b/pool_proj"
 986 |   param {
 987 |     lr_mult: 1
 988 |     decay_mult: 1
 989 |   }
 990 |   param {
 991 |     lr_mult: 2
 992 |     decay_mult: 0
 993 |   }
 994 |   convolution_param {
 995 |     num_output: 64
 996 |     kernel_size: 1
 997 |     weight_filler {
 998 |       type: "xavier"
 999 |       std: 0.1
1000 |     }
1001 |     bias_filler {
1002 |       type: "constant"
1003 |       value: 0.2
1004 |     }
1005 |   }
1006 | }
1007 | layer {
1008 |   name: "inception_4b/relu_pool_proj"
1009 |   type: "ReLU"
1010 |   bottom: "inception_4b/pool_proj"
1011 |   top: "inception_4b/pool_proj"
1012 | }
1013 | layer {
1014 |   name: "inception_4b/output"
1015 |   type: "Concat"
1016 |   bottom: "inception_4b/1x1"
1017 |   bottom: "inception_4b/3x3"
1018 |   bottom: "inception_4b/5x5"
1019 |   bottom: "inception_4b/pool_proj"
1020 |   top: "inception_4b/output"
1021 | }
1022 | layer {
1023 |   name: "inception_4c/1x1"
1024 |   type: "Convolution"
1025 |   bottom: "inception_4b/output"
1026 |   top: "inception_4c/1x1"
1027 |   param {
1028 |     lr_mult: 1
1029 |     decay_mult: 1
1030 |   }
1031 |   param {
1032 |     lr_mult: 2
1033 |     decay_mult: 0
1034 |   }
1035 |   convolution_param {
1036 |     num_output: 128
1037 |     kernel_size: 1
1038 |     weight_filler {
1039 |       type: "xavier"
1040 |       std: 0.03
1041 |     }
1042 |     bias_filler {
1043 |       type: "constant"
1044 |       value: 0.2
1045 |     }
1046 |   }
1047 | }
1048 | layer {
1049 |   name: "inception_4c/relu_1x1"
1050 |   type: "ReLU"
1051 |   bottom: "inception_4c/1x1"
1052 |   top: "inception_4c/1x1"
1053 | }
1054 | layer {
1055 |   name: "inception_4c/3x3_reduce"
1056 |   type: "Convolution"
1057 |   bottom: "inception_4b/output"
1058 |   top: "inception_4c/3x3_reduce"
1059 |   param {
1060 |     lr_mult: 1
1061 |     decay_mult: 1
1062 |   }
1063 |   param {
1064 |     lr_mult: 2
1065 |     decay_mult: 0
1066 |   }
1067 |   convolution_param {
1068 |     num_output: 128
1069 |     kernel_size: 1
1070 |     weight_filler {
1071 |       type: "xavier"
1072 |       std: 0.09
1073 |     }
1074 |     bias_filler {
1075 |       type: "constant"
1076 |       value: 0.2
1077 |     }
1078 |   }
1079 | }
1080 | layer {
1081 |   name: "inception_4c/relu_3x3_reduce"
1082 |   type: "ReLU"
1083 |   bottom: "inception_4c/3x3_reduce"
1084 |   top: "inception_4c/3x3_reduce"
1085 | }
1086 | layer {
1087 |   name: "inception_4c/3x3"
1088 |   type: "Convolution"
1089 |   bottom: "inception_4c/3x3_reduce"
1090 |   top: "inception_4c/3x3"
1091 |   param {
1092 |     lr_mult: 1
1093 |     decay_mult: 1
1094 |   }
1095 |   param {
1096 |     lr_mult: 2
1097 |     decay_mult: 0
1098 |   }
1099 |   convolution_param {
1100 |     num_output: 256
1101 |     pad: 1
1102 |     kernel_size: 3
1103 |     weight_filler {
1104 |       type: "xavier"
1105 |       std: 0.03
1106 |     }
1107 |     bias_filler {
1108 |       type: "constant"
1109 |       value: 0.2
1110 |     }
1111 |   }
1112 | }
1113 | layer {
1114 |   name: "inception_4c/relu_3x3"
1115 |   type: "ReLU"
1116 |   bottom: "inception_4c/3x3"
1117 |   top: "inception_4c/3x3"
1118 | }
1119 | layer {
1120 |   name: "inception_4c/5x5_reduce"
1121 |   type: "Convolution"
1122 |   bottom: "inception_4b/output"
1123 |   top: "inception_4c/5x5_reduce"
1124 |   param {
1125 |     lr_mult: 1
1126 |     decay_mult: 1
1127 |   }
1128 |   param {
1129 |     lr_mult: 2
1130 |     decay_mult: 0
1131 |   }
1132 |   convolution_param {
1133 |     num_output: 24
1134 |     kernel_size: 1
1135 |     weight_filler {
1136 |       type: "xavier"
1137 |       std: 0.2
1138 |     }
1139 |     bias_filler {
1140 |       type: "constant"
1141 |       value: 0.2
1142 |     }
1143 |   }
1144 | }
1145 | layer {
1146 |   name: "inception_4c/relu_5x5_reduce"
1147 |   type: "ReLU"
1148 |   bottom: "inception_4c/5x5_reduce"
1149 |   top: "inception_4c/5x5_reduce"
1150 | }
1151 | layer {
1152 |   name: "inception_4c/5x5"
1153 |   type: "Convolution"
1154 |   bottom: "inception_4c/5x5_reduce"
1155 |   top: "inception_4c/5x5"
1156 |   param {
1157 |     lr_mult: 1
1158 |     decay_mult: 1
1159 |   }
1160 |   param {
1161 |     lr_mult: 2
1162 |     decay_mult: 0
1163 |   }
1164 |   convolution_param {
1165 |     num_output: 64
1166 |     pad: 2
1167 |     kernel_size: 5
1168 |     weight_filler {
1169 |       type: "xavier"
1170 |       std: 0.03
1171 |     }
1172 |     bias_filler {
1173 |       type: "constant"
1174 |       value: 0.2
1175 |     }
1176 |   }
1177 | }
1178 | layer {
1179 |   name: "inception_4c/relu_5x5"
1180 |   type: "ReLU"
1181 |   bottom: "inception_4c/5x5"
1182 |   top: "inception_4c/5x5"
1183 | }
1184 | layer {
1185 |   name: "inception_4c/pool"
1186 |   type: "Pooling"
1187 |   bottom: "inception_4b/output"
1188 |   top: "inception_4c/pool"
1189 |   pooling_param {
1190 |     pool: MAX
1191 |     kernel_size: 3
1192 |     stride: 1
1193 |     pad: 1
1194 |   }
1195 | }
1196 | layer {
1197 |   name: "inception_4c/pool_proj"
1198 |   type: "Convolution"
1199 |   bottom: "inception_4c/pool"
1200 |   top: "inception_4c/pool_proj"
1201 |   param {
1202 |     lr_mult: 1
1203 |     decay_mult: 1
1204 |   }
1205 |   param {
1206 |     lr_mult: 2
1207 |     decay_mult: 0
1208 |   }
1209 |   convolution_param {
1210 |     num_output: 64
1211 |     kernel_size: 1
1212 |     weight_filler {
1213 |       type: "xavier"
1214 |       std: 0.1
1215 |     }
1216 |     bias_filler {
1217 |       type: "constant"
1218 |       value: 0.2
1219 |     }
1220 |   }
1221 | }
1222 | layer {
1223 |   name: "inception_4c/relu_pool_proj"
1224 |   type: "ReLU"
1225 |   bottom: "inception_4c/pool_proj"
1226 |   top: "inception_4c/pool_proj"
1227 | }
1228 | layer {
1229 |   name: "inception_4c/output"
1230 |   type: "Concat"
1231 |   bottom: "inception_4c/1x1"
1232 |   bottom: "inception_4c/3x3"
1233 |   bottom: "inception_4c/5x5"
1234 |   bottom: "inception_4c/pool_proj"
1235 |   top: "inception_4c/output"
1236 | }
1237 | layer {
1238 |   name: "inception_4d/1x1"
1239 |   type: "Convolution"
1240 |   bottom: "inception_4c/output"
1241 |   top: "inception_4d/1x1"
1242 |   param {
1243 |     lr_mult: 1
1244 |     decay_mult: 1
1245 |   }
1246 |   param {
1247 |     lr_mult: 2
1248 |     decay_mult: 0
1249 |   }
1250 |   convolution_param {
1251 |     num_output: 112
1252 |     kernel_size: 1
1253 |     weight_filler {
1254 |       type: "xavier"
1255 |       std: 0.03
1256 |     }
1257 |     bias_filler {
1258 |       type: "constant"
1259 |       value: 0.2
1260 |     }
1261 |   }
1262 | }
1263 | layer {
1264 |   name: "inception_4d/relu_1x1"
1265 |   type: "ReLU"
1266 |   bottom: "inception_4d/1x1"
1267 |   top: "inception_4d/1x1"
1268 | }
1269 | layer {
1270 |   name: "inception_4d/3x3_reduce"
1271 |   type: "Convolution"
1272 |   bottom: "inception_4c/output"
1273 |   top: "inception_4d/3x3_reduce"
1274 |   param {
1275 |     lr_mult: 1
1276 |     decay_mult: 1
1277 |   }
1278 |   param {
1279 |     lr_mult: 2
1280 |     decay_mult: 0
1281 |   }
1282 |   convolution_param {
1283 |     num_output: 144
1284 |     kernel_size: 1
1285 |     weight_filler {
1286 |       type: "xavier"
1287 |       std: 0.09
1288 |     }
1289 |     bias_filler {
1290 |       type: "constant"
1291 |       value: 0.2
1292 |     }
1293 |   }
1294 | }
1295 | layer {
1296 |   name: "inception_4d/relu_3x3_reduce"
1297 |   type: "ReLU"
1298 |   bottom: "inception_4d/3x3_reduce"
1299 |   top: "inception_4d/3x3_reduce"
1300 | }
1301 | layer {
1302 |   name: "inception_4d/3x3"
1303 |   type: "Convolution"
1304 |   bottom: "inception_4d/3x3_reduce"
1305 |   top: "inception_4d/3x3"
1306 |   param {
1307 |     lr_mult: 1
1308 |     decay_mult: 1
1309 |   }
1310 |   param {
1311 |     lr_mult: 2
1312 |     decay_mult: 0
1313 |   }
1314 |   convolution_param {
1315 |     num_output: 288
1316 |     pad: 1
1317 |     kernel_size: 3
1318 |     weight_filler {
1319 |       type: "xavier"
1320 |       std: 0.03
1321 |     }
1322 |     bias_filler {
1323 |       type: "constant"
1324 |       value: 0.2
1325 |     }
1326 |   }
1327 | }
1328 | layer {
1329 |   name: "inception_4d/relu_3x3"
1330 |   type: "ReLU"
1331 |   bottom: "inception_4d/3x3"
1332 |   top: "inception_4d/3x3"
1333 | }
1334 | layer {
1335 |   name: "inception_4d/5x5_reduce"
1336 |   type: "Convolution"
1337 |   bottom: "inception_4c/output"
1338 |   top: "inception_4d/5x5_reduce"
1339 |   param {
1340 |     lr_mult: 1
1341 |     decay_mult: 1
1342 |   }
1343 |   param {
1344 |     lr_mult: 2
1345 |     decay_mult: 0
1346 |   }
1347 |   convolution_param {
1348 |     num_output: 32
1349 |     kernel_size: 1
1350 |     weight_filler {
1351 |       type: "xavier"
1352 |       std: 0.2
1353 |     }
1354 |     bias_filler {
1355 |       type: "constant"
1356 |       value: 0.2
1357 |     }
1358 |   }
1359 | }
1360 | layer {
1361 |   name: "inception_4d/relu_5x5_reduce"
1362 |   type: "ReLU"
1363 |   bottom: "inception_4d/5x5_reduce"
1364 |   top: "inception_4d/5x5_reduce"
1365 | }
1366 | layer {
1367 |   name: "inception_4d/5x5"
1368 |   type: "Convolution"
1369 |   bottom: "inception_4d/5x5_reduce"
1370 |   top: "inception_4d/5x5"
1371 |   param {
1372 |     lr_mult: 1
1373 |     decay_mult: 1
1374 |   }
1375 |   param {
1376 |     lr_mult: 2
1377 |     decay_mult: 0
1378 |   }
1379 |   convolution_param {
1380 |     num_output: 64
1381 |     pad: 2
1382 |     kernel_size: 5
1383 |     weight_filler {
1384 |       type: "xavier"
1385 |       std: 0.03
1386 |     }
1387 |     bias_filler {
1388 |       type: "constant"
1389 |       value: 0.2
1390 |     }
1391 |   }
1392 | }
1393 | layer {
1394 |   name: "inception_4d/relu_5x5"
1395 |   type: "ReLU"
1396 |   bottom: "inception_4d/5x5"
1397 |   top: "inception_4d/5x5"
1398 | }
1399 | layer {
1400 |   name: "inception_4d/pool"
1401 |   type: "Pooling"
1402 |   bottom: "inception_4c/output"
1403 |   top: "inception_4d/pool"
1404 |   pooling_param {
1405 |     pool: MAX
1406 |     kernel_size: 3
1407 |     stride: 1
1408 |     pad: 1
1409 |   }
1410 | }
1411 | layer {
1412 |   name: "inception_4d/pool_proj"
1413 |   type: "Convolution"
1414 |   bottom: "inception_4d/pool"
1415 |   top: "inception_4d/pool_proj"
1416 |   param {
1417 |     lr_mult: 1
1418 |     decay_mult: 1
1419 |   }
1420 |   param {
1421 |     lr_mult: 2
1422 |     decay_mult: 0
1423 |   }
1424 |   convolution_param {
1425 |     num_output: 64
1426 |     kernel_size: 1
1427 |     weight_filler {
1428 |       type: "xavier"
1429 |       std: 0.1
1430 |     }
1431 |     bias_filler {
1432 |       type: "constant"
1433 |       value: 0.2
1434 |     }
1435 |   }
1436 | }
1437 | layer {
1438 |   name: "inception_4d/relu_pool_proj"
1439 |   type: "ReLU"
1440 |   bottom: "inception_4d/pool_proj"
1441 |   top: "inception_4d/pool_proj"
1442 | }
1443 | layer {
1444 |   name: "inception_4d/output"
1445 |   type: "Concat"
1446 |   bottom: "inception_4d/1x1"
1447 |   bottom: "inception_4d/3x3"
1448 |   bottom: "inception_4d/5x5"
1449 |   bottom: "inception_4d/pool_proj"
1450 |   top: "inception_4d/output"
1451 | }
1452 | layer {
1453 |   name: "inception_4e/1x1"
1454 |   type: "Convolution"
1455 |   bottom: "inception_4d/output"
1456 |   top: "inception_4e/1x1"
1457 |   param {
1458 |     lr_mult: 1
1459 |     decay_mult: 1
1460 |   }
1461 |   param {
1462 |     lr_mult: 2
1463 |     decay_mult: 0
1464 |   }
1465 |   convolution_param {
1466 |     num_output: 256
1467 |     kernel_size: 1
1468 |     weight_filler {
1469 |       type: "xavier"
1470 |       std: 0.03
1471 |     }
1472 |     bias_filler {
1473 |       type: "constant"
1474 |       value: 0.2
1475 |     }
1476 |   }
1477 | }
1478 | layer {
1479 |   name: "inception_4e/relu_1x1"
1480 |   type: "ReLU"
1481 |   bottom: "inception_4e/1x1"
1482 |   top: "inception_4e/1x1"
1483 | }
1484 | layer {
1485 |   name: "inception_4e/3x3_reduce"
1486 |   type: "Convolution"
1487 |   bottom: "inception_4d/output"
1488 |   top: "inception_4e/3x3_reduce"
1489 |   param {
1490 |     lr_mult: 1
1491 |     decay_mult: 1
1492 |   }
1493 |   param {
1494 |     lr_mult: 2
1495 |     decay_mult: 0
1496 |   }
1497 |   convolution_param {
1498 |     num_output: 160
1499 |     kernel_size: 1
1500 |     weight_filler {
1501 |       type: "xavier"
1502 |       std: 0.09
1503 |     }
1504 |     bias_filler {
1505 |       type: "constant"
1506 |       value: 0.2
1507 |     }
1508 |   }
1509 | }
1510 | layer {
1511 |   name: "inception_4e/relu_3x3_reduce"
1512 |   type: "ReLU"
1513 |   bottom: "inception_4e/3x3_reduce"
1514 |   top: "inception_4e/3x3_reduce"
1515 | }
1516 | layer {
1517 |   name: "inception_4e/3x3"
1518 |   type: "Convolution"
1519 |   bottom: "inception_4e/3x3_reduce"
1520 |   top: "inception_4e/3x3"
1521 |   param {
1522 |     lr_mult: 1
1523 |     decay_mult: 1
1524 |   }
1525 |   param {
1526 |     lr_mult: 2
1527 |     decay_mult: 0
1528 |   }
1529 |   convolution_param {
1530 |     num_output: 320
1531 |     pad: 1
1532 |     kernel_size: 3
1533 |     weight_filler {
1534 |       type: "xavier"
1535 |       std: 0.03
1536 |     }
1537 |     bias_filler {
1538 |       type: "constant"
1539 |       value: 0.2
1540 |     }
1541 |   }
1542 | }
1543 | layer {
1544 |   name: "inception_4e/relu_3x3"
1545 |   type: "ReLU"
1546 |   bottom: "inception_4e/3x3"
1547 |   top: "inception_4e/3x3"
1548 | }
1549 | layer {
1550 |   name: "inception_4e/5x5_reduce"
1551 |   type: "Convolution"
1552 |   bottom: "inception_4d/output"
1553 |   top: "inception_4e/5x5_reduce"
1554 |   param {
1555 |     lr_mult: 1
1556 |     decay_mult: 1
1557 |   }
1558 |   param {
1559 |     lr_mult: 2
1560 |     decay_mult: 0
1561 |   }
1562 |   convolution_param {
1563 |     num_output: 32
1564 |     kernel_size: 1
1565 |     weight_filler {
1566 |       type: "xavier"
1567 |       std: 0.2
1568 |     }
1569 |     bias_filler {
1570 |       type: "constant"
1571 |       value: 0.2
1572 |     }
1573 |   }
1574 | }
1575 | layer {
1576 |   name: "inception_4e/relu_5x5_reduce"
1577 |   type: "ReLU"
1578 |   bottom: "inception_4e/5x5_reduce"
1579 |   top: "inception_4e/5x5_reduce"
1580 | }
1581 | layer {
1582 |   name: "inception_4e/5x5"
1583 |   type: "Convolution"
1584 |   bottom: "inception_4e/5x5_reduce"
1585 |   top: "inception_4e/5x5"
1586 |   param {
1587 |     lr_mult: 1
1588 |     decay_mult: 1
1589 |   }
1590 |   param {
1591 |     lr_mult: 2
1592 |     decay_mult: 0
1593 |   }
1594 |   convolution_param {
1595 |     num_output: 128
1596 |     pad: 2
1597 |     kernel_size: 5
1598 |     weight_filler {
1599 |       type: "xavier"
1600 |       std: 0.03
1601 |     }
1602 |     bias_filler {
1603 |       type: "constant"
1604 |       value: 0.2
1605 |     }
1606 |   }
1607 | }
1608 | layer {
1609 |   name: "inception_4e/relu_5x5"
1610 |   type: "ReLU"
1611 |   bottom: "inception_4e/5x5"
1612 |   top: "inception_4e/5x5"
1613 | }
1614 | layer {
1615 |   name: "inception_4e/pool"
1616 |   type: "Pooling"
1617 |   bottom: "inception_4d/output"
1618 |   top: "inception_4e/pool"
1619 |   pooling_param {
1620 |     pool: MAX
1621 |     kernel_size: 3
1622 |     stride: 1
1623 |     pad: 1
1624 |   }
1625 | }
1626 | layer {
1627 |   name: "inception_4e/pool_proj"
1628 |   type: "Convolution"
1629 |   bottom: "inception_4e/pool"
1630 |   top: "inception_4e/pool_proj"
1631 |   param {
1632 |     lr_mult: 1
1633 |     decay_mult: 1
1634 |   }
1635 |   param {
1636 |     lr_mult: 2
1637 |     decay_mult: 0
1638 |   }
1639 |   convolution_param {
1640 |     num_output: 128
1641 |     kernel_size: 1
1642 |     weight_filler {
1643 |       type: "xavier"
1644 |       std: 0.1
1645 |     }
1646 |     bias_filler {
1647 |       type: "constant"
1648 |       value: 0.2
1649 |     }
1650 |   }
1651 | }
1652 | layer {
1653 |   name: "inception_4e/relu_pool_proj"
1654 |   type: "ReLU"
1655 |   bottom: "inception_4e/pool_proj"
1656 |   top: "inception_4e/pool_proj"
1657 | }
1658 | layer {
1659 |   name: "inception_4e/output"
1660 |   type: "Concat"
1661 |   bottom: "inception_4e/1x1"
1662 |   bottom: "inception_4e/3x3"
1663 |   bottom: "inception_4e/5x5"
1664 |   bottom: "inception_4e/pool_proj"
1665 |   top: "inception_4e/output"
1666 | }
1667 | layer {
1668 |   name: "CAM_conv"
1669 |   type: "Convolution"
1670 |   bottom: "inception_4e/output"
1671 |   top: "CAM_conv"
1672 |   param {
1673 |     lr_mult: 1
1674 |     decay_mult: 1
1675 |   }
1676 |   param {
1677 |     lr_mult: 2
1678 |     decay_mult: 0
1679 |   }
1680 |   convolution_param {
1681 |     num_output: 1024
1682 |     pad: 1
1683 |     kernel_size: 3
1684 |     group: 2
1685 |     weight_filler {
1686 |       type: "gaussian"
1687 |       std: 0.01
1688 |     }
1689 |     bias_filler {
1690 |       type: "constant"
1691 |       value: 1
1692 |     }
1693 |   }
1694 | }
1695 | layer {
1696 |   name: "CAM_relu"
1697 |   type: "ReLU"
1698 |   bottom: "CAM_conv"
1699 |   top: "CAM_conv"
1700 | }
1701 | layer {
1702 |   name: "CAM_pool"
1703 |   type: "Pooling"
1704 |   bottom: "CAM_conv"
1705 |   top: "CAM_pool"
1706 |   pooling_param {
1707 |     pool: AVE
1708 |     kernel_size: 14
1709 |     stride: 14
1710 |   }
1711 | }
1712 | layer {
1713 |   name: "CAM_fc"
1714 |   type: "InnerProduct"
1715 |   bottom: "CAM_pool"
1716 |   top: "CAM_fc"
1717 |   param {
1718 |     lr_mult: 1
1719 |     decay_mult: 1
1720 |   }
1721 |   param {
1722 |     lr_mult: 2
1723 |     decay_mult: 0
1724 |   }
1725 |   inner_product_param {
1726 |     num_output: 205
1727 |     weight_filler {
1728 |       type: "xavier"
1729 |     }
1730 |     bias_filler {
1731 |       type: "constant"
1732 |       value: 0
1733 |     }
1734 |   }
1735 | }
1736 | layer {
1737 |   name: "prob"
1738 |   type: "Softmax"
1739 |   bottom: "CAM_fc"
1740 |   top: "prob"
1741 | }
1742 | 
1743 | 


--------------------------------------------------------------------------------
/models/deploy_vgg16CAM.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | input: "data"
  3 | input_dim: 10
  4 | input_dim: 3
  5 | input_dim: 224
  6 | input_dim: 224
  7 | layers {
  8 |   bottom: "data"
  9 |   top: "conv1_1"
 10 |   name: "conv1_1"
 11 |   type: CONVOLUTION
 12 |   convolution_param {
 13 |     num_output: 64
 14 |     pad: 1
 15 |     kernel_size: 3
 16 |   }
 17 |   blobs_lr: 1
 18 |   blobs_lr: 2
 19 |   weight_decay: 1
 20 |   weight_decay: 0
 21 | }
 22 | layers {
 23 |   bottom: "conv1_1"
 24 |   top: "conv1_1"
 25 |   name: "relu1_1"
 26 |   type: RELU
 27 | }
 28 | layers {
 29 |   bottom: "conv1_1"
 30 |   top: "conv1_2"
 31 |   name: "conv1_2"
 32 |   type: CONVOLUTION
 33 |   convolution_param {
 34 |     num_output: 64
 35 |     pad: 1
 36 |     kernel_size: 3
 37 |   }
 38 |   blobs_lr: 1
 39 |   blobs_lr: 2
 40 |   weight_decay: 1
 41 |   weight_decay: 0
 42 | }
 43 | layers {
 44 |   bottom: "conv1_2"
 45 |   top: "conv1_2"
 46 |   name: "relu1_2"
 47 |   type: RELU
 48 | }
 49 | layers {
 50 |   bottom: "conv1_2"
 51 |   top: "pool1"
 52 |   name: "pool1"
 53 |   type: POOLING
 54 |   pooling_param {
 55 |     pool: MAX
 56 |     kernel_size: 2
 57 |     stride: 2
 58 |   }
 59 | }
 60 | layers {
 61 |   bottom: "pool1"
 62 |   top: "conv2_1"
 63 |   name: "conv2_1"
 64 |   type: CONVOLUTION
 65 |   convolution_param {
 66 |     num_output: 128
 67 |     pad: 1
 68 |     kernel_size: 3
 69 |   }
 70 |   blobs_lr: 1
 71 |   blobs_lr: 2
 72 |   weight_decay: 1
 73 |   weight_decay: 0
 74 | }
 75 | layers {
 76 |   bottom: "conv2_1"
 77 |   top: "conv2_1"
 78 |   name: "relu2_1"
 79 |   type: RELU
 80 | }
 81 | layers {
 82 |   bottom: "conv2_1"
 83 |   top: "conv2_2"
 84 |   name: "conv2_2"
 85 |   type: CONVOLUTION
 86 |   convolution_param {
 87 |     num_output: 128
 88 |     pad: 1
 89 |     kernel_size: 3
 90 |   }
 91 |   blobs_lr: 1
 92 |   blobs_lr: 2
 93 |   weight_decay: 1
 94 |   weight_decay: 0
 95 | }
 96 | layers {
 97 |   bottom: "conv2_2"
 98 |   top: "conv2_2"
 99 |   name: "relu2_2"
100 |   type: RELU
101 | }
102 | layers {
103 |   bottom: "conv2_2"
104 |   top: "pool2"
105 |   name: "pool2"
106 |   type: POOLING
107 |   pooling_param {
108 |     pool: MAX
109 |     kernel_size: 2
110 |     stride: 2
111 |   }
112 | }
113 | layers {
114 |   bottom: "pool2"
115 |   top: "conv3_1"
116 |   name: "conv3_1"
117 |   type: CONVOLUTION
118 |   convolution_param {
119 |     num_output: 256
120 |     pad: 1
121 |     kernel_size: 3
122 |   }
123 |   blobs_lr: 1
124 |   blobs_lr: 2
125 |   weight_decay: 1
126 |   weight_decay: 0
127 | }
128 | layers {
129 |   bottom: "conv3_1"
130 |   top: "conv3_1"
131 |   name: "relu3_1"
132 |   type: RELU
133 | }
134 | layers {
135 |   bottom: "conv3_1"
136 |   top: "conv3_2"
137 |   name: "conv3_2"
138 |   type: CONVOLUTION
139 |   convolution_param {
140 |     num_output: 256
141 |     pad: 1
142 |     kernel_size: 3
143 |   }
144 |   blobs_lr: 1
145 |   blobs_lr: 2
146 |   weight_decay: 1
147 |   weight_decay: 0
148 | }
149 | layers {
150 |   bottom: "conv3_2"
151 |   top: "conv3_2"
152 |   name: "relu3_2"
153 |   type: RELU
154 | }
155 | layers {
156 |   bottom: "conv3_2"
157 |   top: "conv3_3"
158 |   name: "conv3_3"
159 |   type: CONVOLUTION
160 |   convolution_param {
161 |     num_output: 256
162 |     pad: 1
163 |     kernel_size: 3
164 |   }
165 |   blobs_lr: 1
166 |   blobs_lr: 2
167 |   weight_decay: 1
168 |   weight_decay: 0
169 | }
170 | layers {
171 |   bottom: "conv3_3"
172 |   top: "conv3_3"
173 |   name: "relu3_3"
174 |   type: RELU
175 | }
176 | layers {
177 |   bottom: "conv3_3"
178 |   top: "pool3"
179 |   name: "pool3"
180 |   type: POOLING
181 |   pooling_param {
182 |     pool: MAX
183 |     kernel_size: 2
184 |     stride: 2
185 |   }
186 | }
187 | layers {
188 |   bottom: "pool3"
189 |   top: "conv4_1"
190 |   name: "conv4_1"
191 |   type: CONVOLUTION
192 |   convolution_param {
193 |     num_output: 512
194 |     pad: 1
195 |     kernel_size: 3
196 |   }
197 |   blobs_lr: 1
198 |   blobs_lr: 2
199 |   weight_decay: 1
200 |   weight_decay: 0
201 | }
202 | layers {
203 |   bottom: "conv4_1"
204 |   top: "conv4_1"
205 |   name: "relu4_1"
206 |   type: RELU
207 | }
208 | layers {
209 |   bottom: "conv4_1"
210 |   top: "conv4_2"
211 |   name: "conv4_2"
212 |   type: CONVOLUTION
213 |   convolution_param {
214 |     num_output: 512
215 |     pad: 1
216 |     kernel_size: 3
217 |   }
218 |   blobs_lr: 1
219 |   blobs_lr: 2
220 |   weight_decay: 1
221 |   weight_decay: 0
222 | }
223 | layers {
224 |   bottom: "conv4_2"
225 |   top: "conv4_2"
226 |   name: "relu4_2"
227 |   type: RELU
228 | }
229 | layers {
230 |   bottom: "conv4_2"
231 |   top: "conv4_3"
232 |   name: "conv4_3"
233 |   type: CONVOLUTION
234 |   convolution_param {
235 |     num_output: 512
236 |     pad: 1
237 |     kernel_size: 3
238 |   }
239 |   blobs_lr: 1
240 |   blobs_lr: 2
241 |   weight_decay: 1
242 |   weight_decay: 0
243 | }
244 | layers {
245 |   bottom: "conv4_3"
246 |   top: "conv4_3"
247 |   name: "relu4_3"
248 |   type: RELU
249 | }
250 | layers {
251 |   bottom: "conv4_3"
252 |   top: "pool4"
253 |   name: "pool4"
254 |   type: POOLING
255 |   pooling_param {
256 |     pool: MAX
257 |     kernel_size: 2
258 |     stride: 2
259 |   }
260 | }
261 | layers {
262 |   bottom: "pool4"
263 |   top: "conv5_1"
264 |   name: "conv5_1"
265 |   type: CONVOLUTION
266 |   convolution_param {
267 |     num_output: 512
268 |     pad: 1
269 |     kernel_size: 3
270 |   }
271 |   blobs_lr: 1
272 |   blobs_lr: 2
273 |   weight_decay: 1
274 |   weight_decay: 0
275 | }
276 | layers {
277 |   bottom: "conv5_1"
278 |   top: "conv5_1"
279 |   name: "relu5_1"
280 |   type: RELU
281 | }
282 | layers {
283 |   bottom: "conv5_1"
284 |   top: "conv5_2"
285 |   name: "conv5_2"
286 |   type: CONVOLUTION
287 |   convolution_param {
288 |     num_output: 512
289 |     pad: 1
290 |     kernel_size: 3
291 |   }
292 |   blobs_lr: 1
293 |   blobs_lr: 2
294 |   weight_decay: 1
295 |   weight_decay: 0
296 | }
297 | layers {
298 |   bottom: "conv5_2"
299 |   top: "conv5_2"
300 |   name: "relu5_2"
301 |   type: RELU
302 | }
303 | layers {
304 |   bottom: "conv5_2"
305 |   top: "conv5_3"
306 |   name: "conv5_3"
307 |   type: CONVOLUTION
308 |   convolution_param {
309 |     num_output: 512
310 |     pad: 1
311 |     kernel_size: 3
312 |   }
313 |   blobs_lr: 1
314 |   blobs_lr: 2
315 |   weight_decay: 1
316 |   weight_decay: 0
317 | }
318 | layers {
319 |   bottom: "conv5_3"
320 |   top: "conv5_3"
321 |   name: "relu5_3"
322 |   type: RELU
323 | }
324 | layers {
325 |   bottom: "conv5_3"
326 |   top: "CAM_conv"
327 |   name: "CAM_conv"
328 |   type: CONVOLUTION
329 |   convolution_param {
330 |     num_output: 1024
331 |     pad: 1
332 |     kernel_size: 3
333 |     group: 2
334 |     weight_filler {
335 |       type: "gaussian"
336 |       std: 0.01
337 |     }
338 |     bias_filler {
339 |       type: "constant"
340 |       value: 0
341 |     }
342 |   }  
343 |   blobs_lr: 1
344 |   blobs_lr: 2
345 |   weight_decay: 1
346 |   weight_decay: 0
347 | }
348 | layers {
349 |   bottom: "CAM_conv"
350 |   top: "CAM_conv"
351 |   name: "CAM_relu"
352 |   type: RELU
353 | }
354 | layers {
355 |   name: "CAM_pool"
356 |   type: POOLING
357 |   bottom: "CAM_conv"
358 |   top: "CAM_pool"
359 |   pooling_param {
360 |     pool: AVE
361 |     kernel_size: 14
362 |     stride: 14
363 |   }
364 | }
365 | layers {
366 |   bottom: "CAM_pool"
367 |   top: "CAM_pool"
368 |   name: "CAM_dropout"
369 |   type: DROPOUT
370 |   dropout_param {
371 |     dropout_ratio: 0.5
372 |   }
373 | }
374 | layers {
375 |   name: "CAM_fc"
376 |   bottom: "CAM_pool"
377 |   top: "CAM_fc"
378 |   type: INNER_PRODUCT
379 |   inner_product_param {
380 |     num_output: 1000
381 |   }
382 |   blobs_lr: 1
383 |   weight_decay: 1
384 | 
385 | }
386 | layers {
387 |   bottom: "CAM_fc"
388 |   top: "prob"
389 |   name: "prob"
390 |   type: SOFTMAX
391 | }
392 | 
393 | 


--------------------------------------------------------------------------------
/models/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd $(dirname $0)
4 | curl -O http://cnnlocalization.csail.mit.edu/demoCAM/models/imagenet_googlenetCAM_train_iter_120000.caffemodel
5 | 


--------------------------------------------------------------------------------
/prepare_image.m:
--------------------------------------------------------------------------------
 1 | function crops_data = prepare_image(im)
 2 | % ------------------------------------------------------------------------
 3 | % caffe/matlab/+caffe/imagenet/ilsvrc_2012_mean.mat contains mean_data that
 4 | % is already in W x H x C with BGR channels
 5 | d = load('ilsvrc_2012_mean.mat');
 6 | mean_data = d.mean_data;
 7 | IMAGE_DIM = 256;
 8 | CROPPED_DIM = 224; % 224 for googLeNet , 227 for VGG and AlexNet
 9 | 
10 | % Convert an image returned by Matlab's imread to im_data in caffe's data
11 | % format: W x H x C with BGR channels
12 | im_data = im(:, :, [3, 2, 1]);  % permute channels from RGB to BGR
13 | im_data = permute(im_data, [2, 1, 3]);  % flip width and height
14 | im_data = single(im_data);  % convert from uint8 to single
15 | im_data = imresize(im_data, [IMAGE_DIM IMAGE_DIM], 'bilinear');  % resize im_data
16 | im_data = im_data - mean_data;  % subtract mean_data (already in W x H x C, BGR)
17 | 
18 | % oversample (4 corners, center, and their x-axis flips)
19 | crops_data = zeros(CROPPED_DIM, CROPPED_DIM, 3, 10, 'single');
20 | indices = [0 IMAGE_DIM-CROPPED_DIM] + 1;
21 | n = 1;
22 | for i = indices
23 |   for j = indices
24 |     crops_data(:, :, :, n) = im_data(i:i+CROPPED_DIM-1, j:j+CROPPED_DIM-1, :);
25 |     crops_data(:, :, :, n+5) = crops_data(end:-1:1, :, :, n);
26 |     n = n + 1;
27 |   end
28 | end
29 | center = floor(indices(2) / 2) + 1;
30 | crops_data(:,:,:,5) = ...
31 |   im_data(center:center+CROPPED_DIM-1,center:center+CROPPED_DIM-1,:);
32 | crops_data(:,:,:,10) = crops_data(end:-1:1, :, :, 5);
33 | 


--------------------------------------------------------------------------------
/py_demo.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | import os
 4 | try:
 5 | 	caffe_root = os.environ['CAFFE_ROOT'] + '/'
 6 | except KeyError:
 7 |   	raise KeyError("Define CAFFE_ROOT in ~/.bashrc")
 8 | 
 9 | sys.path.insert(1, caffe_root+'python/')
10 | import caffe
11 | import cv2
12 | from py_returnCAMmap import py_returnCAMmap
13 | from py_map2jpg import py_map2jpg
14 | import scipy.io
15 | 
16 | def im2double(im):
17 | 	return cv2.normalize(im.astype('float'), None, 0.0, 1.0, cv2.NORM_MINMAX)
18 | 
19 | ## Be aware that since Matlab is 1-indexed and column-major, 
20 | ## the usual 4 blob dimensions in Matlab are [width, height, channels, num]
21 | 
22 | ## In python the dimensions are [num, channels, width, height]
23 | 
24 | model = 'googlenet'
25 | if model == 'alexnet':
26 | 	net_weights = 'models/alexnetplusCAM_imagenet.caffemodel'
27 | 	net_model = 'models/deploy_alexnetplusCAM_imagenet.prototxt'
28 | 	out_layer = 'fc9'
29 | 	last_conv = 'conv7'
30 | 	crop_size = 227
31 | elif model == 'googlenet':
32 | 	net_weights = 'models/imagenet_googlenetCAM_train_iter_120000.caffemodel'
33 | 	net_model = 'models/deploy_googlenetCAM.prototxt'
34 | 	out_layer = 'CAM_fc'
35 | 	crop_size = 224
36 | 	last_conv = 'CAM_conv'
37 | else:
38 | 	raise Exception('This model is not defined')
39 | 
40 | categories = scipy.io.loadmat('categories1000.mat')
41 | 
42 | # load CAM model and extract features
43 | net = caffe.Net(net_model, net_weights, caffe.TEST)
44 | 
45 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
46 | transformer.set_transpose('data', (2,0,1))
47 | transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1))
48 | #transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB
49 | 
50 | weights_LR = net.params[out_layer][0].data # get the softmax layer of the network
51 | # shape: [1000, N] N-> depends on the network
52 | 
53 | image = cv2.imread('img2.jpg')
54 | image = cv2.resize(image, (256, 256))
55 | 
56 | # Take center crop.
57 | center = np.array(image.shape[:2]) / 2.0
58 | crop = np.tile(center, (1, 2))[0] + np.concatenate([
59 | 	-np.array([crop_size, crop_size]) / 2.0,
60 | 	np.array([crop_size, crop_size]) / 2.0
61 | ])
62 | crop = crop.astype(int)
63 | input_ = image[crop[0]:crop[2], crop[1]:crop[3], :]
64 | 
65 | # extract conv features
66 | net.blobs['data'].reshape(*np.asarray([1,3,crop_size,crop_size])) # run only one image
67 | net.blobs['data'].data[...][0,:,:,:] = transformer.preprocess('data', input_)
68 | out = net.forward()
69 | scores = out['prob']
70 | activation_lastconv = net.blobs[last_conv].data
71 | 
72 | 
73 | 
74 | 
75 | ## Class Activation Mapping
76 | 
77 | topNum = 5 # generate heatmap for top X prediction results
78 | scoresMean = np.mean(scores, axis=0)
79 | ascending_order = np.argsort(scoresMean)
80 | IDX_category = ascending_order[::-1] # [::-1] to sort in descending order
81 | 
82 | curCAMmapAll = py_returnCAMmap(activation_lastconv, weights_LR[IDX_category[:topNum],:])
83 | 
84 | curResult = im2double(image)
85 | 
86 | for j in range(topNum):
87 | 	# for one image
88 | 	curCAMmap_crops = curCAMmapAll[:,:,j]
89 | 	curCAMmapLarge_crops = cv2.resize(curCAMmap_crops, (256,256))
90 | 	curHeatMap = cv2.resize(im2double(curCAMmapLarge_crops),(256,256)) # this line is not doing much
91 | 	curHeatMap = im2double(curHeatMap)
92 | 
93 | 	curHeatMap = py_map2jpg(curHeatMap, None, 'jet')
94 | 	curHeatMap = im2double(image)*0.2+im2double(curHeatMap)*0.7
95 | 	
96 | 	cv2.imshow(categories['categories'][IDX_category[j]][0][0], curHeatMap)
97 | 	cv2.waitKey(0)
98 | 


--------------------------------------------------------------------------------
/py_generate_bbox.py:
--------------------------------------------------------------------------------
 1 | ## Here is the code to generate the bounding box from the heatmap
 2 | # 
 3 | # to reproduce the ILSVRC localization result, you need to first generate
 4 | # the heatmap for each testing image by merging the heatmap from the
 5 | # 10-crops (it is exactly what the demo code is doing), then resize the merged heatmap back to the original size of
 6 | # that image. Then use this bbox generator to generate the bbox from the resized heatmap.
 7 | #
 8 | # The source code of the bbox generator is also released. Probably you need
 9 | # to install the correct version of OpenCV to compile it.
10 | #
11 | # Special thanks to Hui Li for helping on this code.
12 | #
13 | # Bolei Zhou, April 19, 2016
14 | 
15 | import os
16 | import numpy as np
17 | import cv2
18 | from py_map2jpg import py_map2jpg
19 | 
20 | def im2double(im):
21 | 	return cv2.normalize(im.astype('float'), None, 0.0, 1.0, cv2.NORM_MINMAX)
22 | 
23 | bbox_threshold = [20, 100, 110] # parameters for the bbox generator
24 | curParaThreshold = str(bbox_threshold[0])+' '+str(bbox_threshold[1])+' '+str(bbox_threshold[2])+' '
25 | curHeatMapFile = 'bboxgenerator/heatmap_6.jpg';
26 | curImgFile = 'bboxgenerator/sample_6.jpg';
27 | curBBoxFile = 'bboxgenerator/heatmap_6.txt';
28 | 
29 | os.system("bboxgenerator/./dt_box "+curHeatMapFile+' '+curParaThreshold+' '+curBBoxFile)
30 | 
31 | with open(curBBoxFile) as f:
32 | 	for line in f:
33 | 		items = [int(x) for x in line.strip().split()]
34 | 
35 | boxData1 = np.array(items[0::4]).T
36 | boxData2 = np.array(items[1::4]).T
37 | boxData3 = np.array(items[2::4]).T
38 | boxData4 = np.array(items[3::4]).T
39 | 
40 | boxData_formulate = np.array([boxData1, boxData2, boxData1+boxData3, boxData2+boxData4]).T
41 | 
42 | col1 = np.min(np.array([boxData_formulate[:,0], boxData_formulate[:,2]]), axis=0)
43 | col2 = np.min(np.array([boxData_formulate[:,1], boxData_formulate[:,3]]), axis=0)
44 | col3 = np.max(np.array([boxData_formulate[:,0], boxData_formulate[:,2]]), axis=0)
45 | col4 = np.max(np.array([boxData_formulate[:,1], boxData_formulate[:,3]]), axis=0)
46 | 
47 | boxData_formulate = np.array([col1, col2, col3, col4]).T
48 | 
49 | curHeatMap = cv2.imread(curHeatMapFile)
50 | curImg = cv2.imread(curImgFile)
51 | 
52 | curHeatMap = im2double(curHeatMap)
53 | curHeatMap = py_map2jpg(curHeatMap, None, 'jet')
54 | curHeatMap = im2double(curImg)*0.2+im2double(curHeatMap)*0.7
55 | 
56 | for i in range(boxData_formulate.shape[0]): # for each bbox
57 | 	print(boxData_formulate[i][:2])
58 | 	print(boxData_formulate[i][2:])
59 | 	cv2.rectangle(curHeatMap, tuple(boxData_formulate[i][:2]), tuple(boxData_formulate[i][2:]), (255,0,0), 3)
60 | 	cv2.imshow('bbox', curHeatMap)
61 | 	cv2.waitKey(0)


--------------------------------------------------------------------------------
/py_map2jpg.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | def py_map2jpg(imgmap, rang, colorMap):
 5 | 	if rang is None:
 6 | 		rang = [np.min(imgmap), np.max(imgmap)]
 7 | 
 8 | 	heatmap_x = np.round(imgmap*255).astype(np.uint8)
 9 | 
10 | 	return cv2.applyColorMap(heatmap_x, cv2.COLORMAP_JET)


--------------------------------------------------------------------------------
/py_returnCAMmap.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def py_returnCAMmap(activation, weights_LR):
 4 | 	print(activation.shape)
 5 | 
 6 | 	if activation.shape[0] == 1: # only one image
 7 | 		n_feat, w, h = activation[0].shape
 8 | 		act_vec = np.reshape(activation[0], [n_feat, w*h])
 9 | 		n_top = weights_LR.shape[0]
10 | 		out = np.zeros([w, h, n_top])
11 | 
12 | 		for t in range(n_top):
13 | 			weights_vec = np.reshape(weights_LR[t], [1, weights_LR[t].shape[0]])
14 | 			heatmap_vec = np.dot(weights_vec,act_vec)
15 | 			heatmap = np.reshape( np.squeeze(heatmap_vec) , [w, h])
16 | 			out[:,:,t] = heatmap
17 | 	else: # 10 images (over-sampling)
18 | 		raise Exception('Not implemented')
19 | 
20 | 	return out


--------------------------------------------------------------------------------
/returnCAMmap.m:
--------------------------------------------------------------------------------
 1 | function [curColumnMap] = returnCAMmap( featureObjectSwitchSpatial, weights_LR)
 2 | %RETURNCOLUMNMAP Summary of this function goes here
 3 | %   Detailed explanation goes here
 4 | 
 5 | if size(featureObjectSwitchSpatial,4) ==1
 6 |     
 7 |     featureObjectSwitchSpatial_vectorized = reshape(featureObjectSwitchSpatial,[size(featureObjectSwitchSpatial,1)*size(featureObjectSwitchSpatial,2) size(featureObjectSwitchSpatial,3)]);
 8 |     detectionMap = featureObjectSwitchSpatial_vectorized*weights_LR;
 9 |     curColumnMap = reshape(detectionMap,[size(featureObjectSwitchSpatial,1),size(featureObjectSwitchSpatial,2), size(weights_LR,2)]);
10 | else
11 |     columnSet = zeros(size(featureObjectSwitchSpatial,1),size(featureObjectSwitchSpatial,2),size(weights_LR,2),size(featureObjectSwitchSpatial,4));
12 |     for i=1:size(featureObjectSwitchSpatial,4)
13 |         curFeatureObjectSwitchSpatial = squeeze(featureObjectSwitchSpatial(:,:,:,i));
14 |         featureObjectSwitchSpatial_vectorized = reshape(curFeatureObjectSwitchSpatial,[size(curFeatureObjectSwitchSpatial,1)*size(curFeatureObjectSwitchSpatial,2) size(curFeatureObjectSwitchSpatial,3)]);
15 |         detectionMap = featureObjectSwitchSpatial_vectorized*weights_LR;
16 |         curColumnMap = reshape(detectionMap,[size(featureObjectSwitchSpatial,1),size(featureObjectSwitchSpatial,2), size(weights_LR,2)]);
17 |         columnSet(:,:,:,i) = curColumnMap;
18 |     end
19 |     curColumnMap = columnSet;
20 | end
21 | 
22 |    
23 | 
24 | end
25 | 
26 | 


--------------------------------------------------------------------------------