├── .gitignore
├── pascal
    ├── wsddnVOChash_lookup.m
    ├── wsddnVOCap.m
    ├── wsddnVOChash_init.m
    ├── nms.m
    ├── wsddnVOCevaldet.m
    ├── setup_voc07_ssw.m
    └── setup_voc07_eb.m
├── setup_WSDDN.m
├── matlab
    └── +dagnn
    │   ├── SumOverDim.m
    │   ├── Times.m
    │   ├── BiasSamples.m
    │   ├── SoftMax2.m
    │   ├── LossTopBoxSmoothProb.m
    │   └── LayerAP.m
├── README.md
└── core
    ├── wsddn_get_batch.m
    ├── wsddn_init.m
    ├── wsddn_demo.m
    ├── wsddn_train.m
    └── wsddn_test.m


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | 


--------------------------------------------------------------------------------
/pascal/wsddnVOChash_lookup.m:
--------------------------------------------------------------------------------
1 | function ind = wsddnVOChash_lookup(hash,s)
2 | % From the PASCAL VOC 2011 devkit
3 | 
4 | hsize=numel(hash.key);
5 | h=mod(str2double(s([4 6:end])),hsize)+1;
6 | ind=hash.val{h}(strmatch(s,hash.key{h},'exact'));
7 | 


--------------------------------------------------------------------------------
/pascal/wsddnVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = wsddnVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/setup_WSDDN.m:
--------------------------------------------------------------------------------
1 | function setup_WSDDN()
2 | %SETUP_WSDDN Sets up WSDDN, by adding its folders to the Matlab path
3 | 
4 | root = fileparts(mfilename('fullpath')) ;
5 | addpath(root, [root '/matlab'], [root '/pascal'], [root '/core']) ;
6 | addpath([vl_rootnn '/examples/']) ;
7 | addpath([vl_rootnn '/examples/imagenet/']) ;
8 | 
9 | 


--------------------------------------------------------------------------------
/pascal/wsddnVOChash_init.m:
--------------------------------------------------------------------------------
 1 | function hash = wsddnVOChash_init(strs)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | hsize=4999;
 5 | hash.key=cell(hsize,1);
 6 | hash.val=cell(hsize,1);
 7 | 
 8 | for i=1:numel(strs)
 9 |     s=strs{i};
10 |     h=mod(str2double(s([4 6:end])),hsize)+1;
11 |     j=numel(hash.key{h})+1;
12 |     hash.key{h}{j}=strs{i};
13 |     hash.val{h}(j)=i;
14 | end
15 | 
16 | 


--------------------------------------------------------------------------------
/matlab/+dagnn/SumOverDim.m:
--------------------------------------------------------------------------------
 1 | classdef SumOverDim < dagnn.ElementWise
 2 |   % @author: Hakan Bilen
 3 |   % SumOverDim is the sum of the elements of inputs{1} over dimension dim
 4 |   properties 
 5 |     dim = 3;
 6 |   end
 7 |   
 8 |   methods
 9 |     function outputs = forward(obj, inputs, params)
10 |       outputs{1} = sum(inputs{1},obj.dim) ;
11 |     end
12 | 
13 |     function [derInputs, derParams] = backward(obj, inputs, params, derOutputs)
14 |       
15 |       ndims = ones(1,numel(size(inputs{1})));
16 |       ndims(obj.dim) = size(inputs{1},obj.dim); 
17 |       derInputs{1} = repmat(derOutputs{1},ndims);
18 |       
19 |       derParams = {} ;
20 |     end
21 | 
22 |     function outputSizes = getOutputSizes(obj, inputSizes)
23 |       outputSizes{1} = inputSizes{1} ;
24 |       outputSizes{1}(obj.dim) = 1;
25 |     end
26 | 
27 |     function obj = SumOverDim(varargin)
28 |       obj.load(varargin) ;
29 |       obj.dim = obj.dim;
30 |     end
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/matlab/+dagnn/Times.m:
--------------------------------------------------------------------------------
 1 | classdef Times < dagnn.ElementWise
 2 |   % @author: Hakan Bilen
 3 |   % Times (multiply) DagNN layer
 4 |   %   The Times layer takes the multiplication of two inputs and store the result
 5 |   %   as its only output.
 6 |   methods
 7 |     function outputs = forward(obj, inputs, params)
 8 |       if numel(inputs) ~= 2
 9 |         error('Number of inputs is not 2');
10 |       end
11 |       outputs{1} = inputs{1} .* inputs{2} ;
12 |     end
13 |     
14 |     function [derInputs, derParams] = backward(obj, inputs, params, derOutputs)
15 |       derInputs = cell(1,2) ;
16 |       derInputs{1} = derOutputs{1} .* inputs{2}  ;
17 |       derInputs{2} = derOutputs{1} .* inputs{1}  ;
18 |       derParams = {} ;
19 |     end
20 |     
21 |     function obj = Times(varargin)
22 |       obj.load(varargin) ;
23 |     end
24 |     
25 |     function rfs = getReceptiveFields(obj)
26 |       rfs.size = [1 1] ;
27 |       rfs.stride = [1 1] ;
28 |       rfs.offset = [1 1] ;
29 |     end
30 | 
31 |     function outputSizes = getOutputSizes(obj, inputSizes)
32 |       outputSizes = inputSizes(1) ;
33 |     end
34 |   end
35 |   
36 | end


--------------------------------------------------------------------------------
/matlab/+dagnn/BiasSamples.m:
--------------------------------------------------------------------------------
 1 | classdef BiasSamples < dagnn.ElementWise
 2 |   % @author: Hakan Bilen
 3 |   properties
 4 |     scale = single(1)
 5 |   end
 6 |   properties (Transient)
 7 |     boxCoefs = []
 8 |   end
 9 |   methods
10 |     function outputs = forward(obj, inputs, params)
11 |       if numel(inputs) ~= 2
12 |         error('Number of inputs is not 2');
13 |       end
14 |       obj.boxCoefs = single(1)+obj.scale*inputs{2};
15 |       outputs{1} = bsxfun(@times,inputs{1},obj.boxCoefs);
16 |     end
17 |     
18 |     function [derInputs, derParams] = backward(obj, inputs, params, derOutputs)
19 |       derInputs = cell(1,2) ;
20 |       obj.boxCoefs = single(1)+obj.scale*inputs{2};
21 |       derInputs{1} = bsxfun(@times,derOutputs{1},obj.boxCoefs) ;
22 |       derParams = {} ;
23 |     end
24 |     
25 |     function obj = BiasSamples(varargin)
26 |       obj.load(varargin) ;
27 |     end
28 |     
29 |     function reset(obj)
30 |       obj.boxCoefs = [] ;
31 |     end
32 |     
33 |     function rfs = getReceptiveFields(obj)
34 |       rfs.size = [1 1] ;
35 |       rfs.stride = [1 1] ;
36 |       rfs.offset = [1 1] ;
37 |     end
38 | 
39 |     function outputSizes = getOutputSizes(obj, inputSizes)
40 |       outputSizes = inputSizes(1) ;
41 |     end
42 |     
43 |   end
44 |   
45 | end
46 | 


--------------------------------------------------------------------------------
/matlab/+dagnn/SoftMax2.m:
--------------------------------------------------------------------------------
 1 | classdef SoftMax2 < dagnn.ElementWise
 2 |   % @author: Hakan Bilen
 3 |   % Softmax2 : it is a more generic softmax layer with a dimension and temperature parameter
 4 |   properties
 5 |     dim = 3;
 6 |     temp = 1;
 7 |     scale = 1;
 8 |   end
 9 |   
10 |   methods
11 |     function outputs = forward(self, inputs, params)
12 |       inputs{1} = inputs{1} / self.temp;
13 |       order = 1:numel(size(inputs{1}));
14 |       if self.dim~=3
15 |         order([3 self.dim]) = [self.dim 3];
16 |         inputs{1} = permute(inputs{1},order);
17 |       end
18 |       outputs{1} = vl_nnsoftmax(inputs{1}) ;
19 |       if self.dim~=3
20 |         outputs{1} = permute(outputs{1},order) ;
21 |       end
22 |     end
23 |     
24 |     function [derInputs, derParams] = backward(self, inputs, params, derOutputs)
25 |       
26 |       inputs{1} = inputs{1} / self.temp;
27 |       order = 1:numel(size(inputs{1}));
28 |       if self.dim~=3
29 |         order(3) = self.dim;
30 |         order(self.dim) = 3;
31 |         inputs{1} = permute(inputs{1},order);
32 |         derOutputs{1} = permute(derOutputs{1},order);
33 |       end
34 |       
35 |       derInputs{1} = vl_nnsoftmax(inputs{1}, derOutputs{1}) ;
36 |       if self.dim~=3
37 |         derInputs{1} = permute(derInputs{1},order) ;
38 |       end
39 |       derParams = {} ;
40 |     end
41 |     
42 |     function obj = SoftMax2(varargin)
43 |       obj.load(varargin) ;
44 |       obj.dim   = single(obj.dim);
45 |       obj.temp  = single(obj.temp);
46 |       obj.scale = single(obj.scale);
47 |     end
48 |   end
49 | end
50 | 
51 | 


--------------------------------------------------------------------------------
/pascal/nms.m:
--------------------------------------------------------------------------------
 1 | function pick = nms(boxes, overlap)
 2 | % top = nms(boxes, overlap)
 3 | % Non-maximum suppression. (FAST VERSION)
 4 | % Greedily select high-scoring detections and skip detections
 5 | % that are significantly covered by a previously selected
 6 | % detection.
 7 | %
 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m),
 9 | % but an inner loop has been eliminated to significantly speed it
10 | % up in the case of a large number of boxes
11 | 
12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz
13 | % All rights reserved.
14 | %
15 | % This file is part of the Exemplar-SVM library and is made
16 | % available under the terms of the MIT license (see COPYING file).
17 | % Project homepage: https://github.com/quantombone/exemplarsvm
18 | 
19 | 
20 | if isempty(boxes)
21 |   pick = [];
22 |   return;
23 | end
24 | 
25 | x1 = boxes(:,1);
26 | y1 = boxes(:,2);
27 | x2 = boxes(:,3);
28 | y2 = boxes(:,4);
29 | if size(boxes,2)==4
30 |   s = ones(1,size(boxes,1));
31 | else
32 |   s = boxes(:,end);
33 | end
34 | 
35 | area = (x2-x1+1) .* (y2-y1+1);
36 | [~, I] = sort(s);
37 | 
38 | pick = s*0;
39 | counter = 1;
40 | while ~isempty(I)
41 |   last = length(I);
42 |   i = I(last);
43 |   pick(counter) = i;
44 |   counter = counter + 1;
45 |   
46 |   xx1 = max(x1(i), x1(I(1:last-1)));
47 |   yy1 = max(y1(i), y1(I(1:last-1)));
48 |   xx2 = min(x2(i), x2(I(1:last-1)));
49 |   yy2 = min(y2(i), y2(I(1:last-1)));
50 |   
51 |   w = max(0.0, xx2-xx1+1);
52 |   h = max(0.0, yy2-yy1+1);
53 |   
54 |   inter = w.*h;
55 |   o = inter ./ (area(i) + area(I(1:last-1)) - inter);
56 |   
57 | %   I = I(find(o<=overlap));
58 |   I = I((o<=overlap));
59 | end
60 | 
61 | pick = pick(1:(counter-1));
62 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Weakly Supervised Deep Detection Networks (WSDDN)
 2 | 
 3 | 
 4 | ## Installation
 5 | 1. Download and install [MatConvNet](http://www.vlfeat.org/matconvnet/install/)
 6 | 2. Install this module with the package manager of MatConvNet [`vl_contrib`](http://www.vlfeat.org/matconvnet/mfiles/vl_contrib/#notes):
 7 | 
 8 | ```
 9 |     vl_contrib('install', 'WSDDN') ;
10 |     vl_contrib('setup', 'WSDDN') ;
11 | ```
12 | 
13 | 3. If you want to train a WSDDN model, `wsddn_train` will automatically download the items below:
14 | 
15 |     a.  [PASCAL VOC 2007 devkit and dataset](http://host.robots.ox.ac.uk/pascal/VOC/) under `data` folder
16 | 
17 |     b.  Pre-computed edge-boxes for [trainval](http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/EdgeBoxesVOC2007trainval.mat) and [test](http://groups.inf.ed.ac.uk/hbilen-data/WSDDN/EdgeBoxesVOC2007test.mat) splits:
18 | 
19 |     c. Pre-trained network from [MatConvNet website](http://www.vlfeat.org/matconvnet/models)
20 | 
21 | 4. You can also download the pre-trained WSDDN model ([VGGF-EB-BoxSc-SpReg](http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/wsddn.mat)). Note that it gives slightly different performance reported than in the paper (34.4% mAP instead of 34.5% mAP)
22 | 
23 | 
24 | ## Demo
25 | 
26 | After completing the installation and downloading the required files, you are ready for the demo
27 | 
28 | ```matlab
29 |             cd scripts;
30 |             opts.modelPath = '....' ;
31 |             opts.imdbPath = '....' ;
32 |             opts.gpu = .... ;
33 |             wsddn_demo(opts) ;
34 |                         
35 | ```
36 | 
37 | ## Test
38 | 
39 | ```matlab
40 |             addpath scripts;
41 |             opts.modelPath = '....' ;
42 |             opts.imdbPath = '....' ;
43 |             opts.gpu = .... ;
44 |             opts.vis = true ; % visualize
45 |             wsddn_test(opts) ;
46 |                         
47 | ```
48 | 
49 | ## Train
50 | 
51 | Download an ImageNet pre-trained model from [http://www.vlfeat.org/matconvnet/pretrained/](http://www.vlfeat.org/matconvnet/pretrained/)
52 | 
53 | ```matlab
54 |             addpath scripts;
55 |             opts.modelPath = '....' ;
56 |             opts.imdbPath = '....' ;
57 |             opts.train.gpus = .... ;
58 |             [net,info] = wsddn_train(opts) ;
59 |                         
60 | ```
61 | 
62 | ## Citing WSDDN
63 | If you find the code useful, please cite:
64 | 
65 | ```latex
66 |     @inproceedings{Bilen16,
67 |       author     = "Bilen, H. and Vedaldi, A.",
68 |       title      = "Weakly Supervised Deep Detection Networks",
69 |       booktitle  = "Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition",
70 |       year       = "2016"
71 |     }
72 | ```
73 | 
74 | ## Acknowledgement
75 | Many thanks to Sam Albanie for his help with contrib package manager and other nameless heros who diligently found my bugs.
76 | 
77 | ### License
78 | The analysis work performed with the program(s) must be non-proprietary work. Licensee and its contract users must be or be affiliated with an academic facility. Licensee may additionally permit individuals who are students at such academic facility to access and use the program(s). Such students will be considered contract users of licensee. The program(s) may not be used for commercial competitive analysis (such as benchmarking) or for any commercial activity, including consulting.
79 | 


--------------------------------------------------------------------------------
/pascal/wsddnVOCevaldet.m:
--------------------------------------------------------------------------------
  1 | function [rec,prec,ap] = wsddnVOCevaldet(VOCopts,cls,res,draw)
  2 | 
  3 | % load test set
  4 | tic;
  5 | VOCopts.annocachepath=[VOCopts.localdir '%s_anno_cache.mat'];
  6 | cp=sprintf(VOCopts.annocachepath,VOCopts.testset);
  7 | if exist(cp,'file')
  8 |   fprintf('%s: pr: loading ground truth\n',cls);
  9 |   load(cp,'gtids','recs');
 10 | else
 11 |   [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d');
 12 |   for i=1:length(gtids)
 13 |     % display progress
 14 |     if toc>1
 15 |       fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids));
 16 |       drawnow;
 17 |       tic;
 18 |     end
 19 |     
 20 |     % read annotation
 21 |     recs(i)=PASreadrecord(sprintf(VOCopts.annopath,gtids{i}));
 22 |   end
 23 |   save(cp,'gtids','recs');
 24 | end
 25 | 
 26 | fprintf('%s: pr: evaluating detections\n',cls);
 27 | 
 28 | % hash image ids
 29 | hash=wsddnVOChash_init(gtids);
 30 | 
 31 | % extract ground truth objects
 32 | 
 33 | npos=0;
 34 | gt(length(gtids))=struct('BB',[],'diff',[],'det',[]);
 35 | for i=1:length(gtids)
 36 |   % extract objects of class
 37 |   clsinds=strmatch(cls,{recs(i).objects(:).class},'exact');
 38 |   gt(i).BB=cat(1,recs(i).objects(clsinds).bbox)';
 39 |   gt(i).diff=[recs(i).objects(clsinds).difficult];
 40 |   gt(i).det=false(length(clsinds),1);
 41 |   npos=npos+sum(~gt(i).diff);
 42 | end
 43 | 
 44 | % load results
 45 | ids        = res.ids;
 46 | confidence = res.confidence;
 47 | BB         = res.bbox';
 48 | 
 49 | % sort detections by decreasing confidence
 50 | [sc,si]=sort(-confidence);
 51 | ids=ids(si);
 52 | BB=BB(:,si);
 53 | 
 54 | % assign detections to ground truth objects
 55 | nd=length(confidence);
 56 | tp=zeros(nd,1);
 57 | fp=zeros(nd,1);
 58 | tic;
 59 | for d=1:nd
 60 |   % display progress
 61 |   if toc>1
 62 |     fprintf('%s: pr: compute: %d/%d\n',cls,d,nd);
 63 |     drawnow;
 64 |     tic;
 65 |   end
 66 |   
 67 |   % find ground truth image
 68 |   i=wsddnVOChash_lookup(hash,ids{d});
 69 |   if isempty(i)
 70 |     error('unrecognized image "%s"',ids{d});
 71 |   elseif length(i)>1
 72 |     error('multiple image "%s"',ids{d});
 73 |   end
 74 |   
 75 |   % assign detection to ground truth object if any
 76 |   bb=BB(:,d);
 77 |   ovmax=-inf;
 78 |   for j=1:size(gt(i).BB,2)
 79 |     bbgt=gt(i).BB(:,j);
 80 |     bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))];
 81 |     iw=bi(3)-bi(1)+1;
 82 |     ih=bi(4)-bi(2)+1;
 83 |     if iw>0 & ih>0
 84 |       % compute overlap as area of intersection / area of union
 85 |       ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+...
 86 |         (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-...
 87 |         iw*ih;
 88 |       ov=iw*ih/ua;
 89 |       if ov>ovmax
 90 |         ovmax=ov;
 91 |         jmax=j;
 92 |       end
 93 |     end
 94 |   end
 95 |   % assign detection as true positive/don't care/false positive
 96 |   if ovmax>=VOCopts.minoverlap
 97 |     if ~gt(i).diff(jmax)
 98 |       if ~gt(i).det(jmax)
 99 |         tp(d)=1;            % true positive
100 |         gt(i).det(jmax)=true;
101 |       else
102 |         fp(d)=1;            % false positive (multiple detection)
103 |       end
104 |     end
105 |   else
106 |     fp(d)=1;                    % false positive
107 |   end
108 | end
109 | 
110 | % compute precision/recall
111 | fp=cumsum(fp);
112 | tp=cumsum(tp);
113 | rec=tp/npos;
114 | prec=tp./(fp+tp);
115 | 
116 | ap=wsddnVOCap(rec,prec);
117 | 
118 | if draw
119 |   % plot precision/recall
120 |   plot(rec,prec,'-');
121 |   grid;
122 |   xlabel 'recall'
123 |   ylabel 'precision'
124 |   title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap));
125 | end
126 | 


--------------------------------------------------------------------------------
/matlab/+dagnn/LossTopBoxSmoothProb.m:
--------------------------------------------------------------------------------
  1 | classdef LossTopBoxSmoothProb < dagnn.Loss
  2 |   % given top scoring box, it finds other boxes with at least overlap of
  3 |   % minOverlap and calculates the euclidean dist between top and other
  4 |   % boxes
  5 |   
  6 |   properties (Transient)
  7 |     gtIdx = []
  8 |     boxIdx = []
  9 |     probs = []
 10 |     minOverlap = 0.5
 11 |     nBoxes = 10
 12 |   end
 13 |   
 14 |   methods
 15 |     function outputs = forward(obj, inputs, params)
 16 |       if numel(inputs) ~= 4
 17 |         error('Number of inputs is not 2');
 18 |       end
 19 |       obj.gtIdx = [];
 20 |       obj.boxIdx = [];
 21 |       obj.probs = [];
 22 |       boxes  = double(gather(inputs{2})');
 23 |       scores = gather(squeeze(inputs{3}));
 24 |       labels = gather(squeeze(inputs{4}));
 25 |       
 26 |       if numel(boxes)<5
 27 |         return;
 28 |       end
 29 |       
 30 |       outputs{1} = zeros(1,'like',inputs{1});
 31 |       for c=1:numel(labels)
 32 |         if labels(c)<=0
 33 |           continue;
 34 |         end
 35 |         
 36 |         [so, si] = sort(scores(c,:),'descend');
 37 |         obj.gtIdx{c} = si(1);
 38 |         gtBox = boxes(:,obj.gtIdx{c});
 39 |         gtArea = (gtBox(3)-gtBox(1)+1) .* (gtBox(4)-gtBox(2)+1);
 40 |         
 41 |         bbs = boxes(:,si(2:min(obj.nBoxes,end)))';
 42 |         
 43 |         y1 = bbs(:,1);
 44 |         x1 = bbs(:,2);
 45 |         y2 = bbs(:,3);
 46 |         x2 = bbs(:,4);
 47 |         
 48 |         area = (x2-x1+1) .* (y2-y1+1);
 49 |         
 50 |         yy1 = max(gtBox(1), y1);
 51 |         xx1 = max(gtBox(2), x1);
 52 |         yy2 = min(gtBox(3), y2);
 53 |         xx2 = min(gtBox(4), x2);
 54 |         
 55 |         w = max(0.0, xx2-xx1+1);
 56 |         h = max(0.0, yy2-yy1+1);
 57 |         
 58 |         inter = w.*h;
 59 |         o = find((inter ./ (gtArea + area - inter))>obj.minOverlap);
 60 |         
 61 |         if isempty(o)
 62 |           continue;
 63 |         end
 64 |         
 65 |         obj.boxIdx{c} = si(o+1);
 66 |         obj.probs{c} = so(o+1);
 67 |         d = bsxfun(@minus,inputs{1}(:,:,:,obj.boxIdx{c}),inputs{1}(:,:,:,obj.gtIdx{c}));
 68 |         d = bsxfun(@times,d,obj.probs{c});
 69 |         outputs{1} = outputs{1} + 0.5 * sum(d(:).^2);
 70 |       end
 71 |       
 72 |       n = obj.numAveraged ;
 73 |       m = n + 1 ;
 74 |       obj.average = (n * obj.average + gather(outputs{1})) / m ;
 75 |       obj.numAveraged = m ;
 76 |     end
 77 |     
 78 |     function [derInputs, derParams] = backward(obj, inputs, params, derOutputs)
 79 |       derInputs = cell(1,4) ;
 80 |       derInputs{1} = zeros(size(inputs{1}),'like',inputs{1});
 81 |       for c=1:numel(obj.boxIdx)
 82 |         if isempty(obj.boxIdx{c}), continue; end
 83 |         derInputs{1}(:,:,:,obj.boxIdx{c}) = ...
 84 |           bsxfun(@minus,inputs{1}(:,:,:,obj.boxIdx{c}),inputs{1}(:,:,:,obj.gtIdx{c}));
 85 |         derInputs{1}(:,:,:,obj.boxIdx{c}) = bsxfun(@times,...
 86 |           reshape(obj.probs{c},[1 1 1 numel(obj.probs{c})]),derInputs{1}(:,:,:,obj.boxIdx{c}));
 87 |         derInputs{1}(:,:,:,obj.gtIdx{c}) = -sum(derInputs{1}(:,:,:,obj.boxIdx{c}),4);
 88 | 
 89 |       end
 90 |       derInputs{1} = derInputs{1} * derOutputs{1};
 91 | %       fprintf('LossTopBox l2 %f ',sqrt(sum(derInputs{1}(:).^2)));
 92 |       derParams = {} ;
 93 |     end
 94 |     
 95 |     function obj = LossTopBoxSmoothProb(varargin)
 96 |       obj.load(varargin) ;
 97 |       obj.loss = 'LossTopBoxSmoothProb';
 98 |     end
 99 |     
100 |     function reset(obj)
101 |       obj.gtIdx = [];
102 |       obj.boxIdx = [];
103 |       obj.probs = [];
104 |       obj.average = 0 ;
105 |       obj.numAveraged = 0 ;
106 |     end
107 |     
108 |     
109 |   end
110 |   
111 | end
112 | 


--------------------------------------------------------------------------------
/core/wsddn_get_batch.m:
--------------------------------------------------------------------------------
  1 | function [imo,rois] = wsddn_get_batch(images, imdb, batch, opts)
  2 | % cnn_wsddn_get_batch  Load, preprocess, and pack images for CNN evaluation
  3 | 
  4 | if isempty(images)
  5 |   imo = [] ;
  6 |   rois = [] ;
  7 |   return ;
  8 | end
  9 | 
 10 | % fetch is true if images is a list of filenames (instead of
 11 | % a cell array of images)
 12 | fetch = ischar(images{1}) ;
 13 | 
 14 | % prefetch is used to load images in a separate thread
 15 | prefetch = fetch & opts.prefetch ;
 16 | 
 17 | % pick size
 18 | imSize = imdb.images.size(batch(1),:);
 19 | factor = min(opts.scale(1)/imSize(1),opts.scale(1)/imSize(2));
 20 | height = floor(factor*imSize(1));
 21 | 
 22 | if prefetch
 23 |   vl_imreadjpeg(images, 'numThreads',opts.numThreads,'Resize',height,'prefetch') ;
 24 |   imo = [] ;
 25 |   rois = [] ;
 26 |   return ;
 27 | end
 28 | 
 29 | if fetch
 30 |   ims = vl_imreadjpeg(images,'numThreads',opts.numThreads,'Resize',height) ;
 31 | else
 32 |   ims = images ;
 33 | end
 34 | 
 35 | for i=1:numel(images)
 36 |   % acquire image
 37 |   if isempty(ims{i})
 38 |     imt = imread(images{i}) ;
 39 |     if size(imt,3) == 1
 40 |       imt = cat(3, imt, imt, imt) ;
 41 |     end
 42 |     
 43 |     ims{i} = imresize(imt,factor,'Method',opts.interpolation);
 44 |     ims{i} = single(ims{i}) ; % faster than im2single (and multiplies by 255)
 45 |   end
 46 | end
 47 | 
 48 | 
 49 | 
 50 | bboxes = cell(1,numel(batch));
 51 | nBoxes = 0;
 52 | for b=1:numel(batch)
 53 |   bboxes{b} = double(imdb.images.boxes{batch(b)});
 54 |   nBoxes = nBoxes + size(bboxes{b},1);
 55 | end
 56 |  
 57 | 
 58 | rois = zeros(nBoxes,5);
 59 | countr = 0;
 60 | 
 61 | maxW = 0;
 62 | maxH = 0;
 63 | 
 64 | 
 65 | 
 66 | for b=1:numel(batch)
 67 |   
 68 |   hw = imdb.images.size(batch(b),:);
 69 |   h = hw(1);
 70 |   w = hw(2);
 71 |   
 72 |   imsz = size(ims{b});
 73 |   
 74 |   if opts.flip(b)
 75 |     im = ims{b};
 76 |     ims{b} = im(:,end:-1:1,:);
 77 |     
 78 |     bbox = bboxes{b};
 79 |     bbox(:,[2,4]) = w + 1 - bbox(:,[4,2]);
 80 |     bboxes{b} = bbox;
 81 |   end
 82 |   
 83 | 
 84 |   maxH = max(imsz(1),maxH);
 85 |   maxW = max(imsz(2),maxW);
 86 |  
 87 |   % adapt bounding boxes into new coord
 88 |   bbox = bboxes{b};
 89 |   if any(bbox(:)<=0)
 90 |     error('bbox error');
 91 |   end
 92 |   nB = size(bbox,1);
 93 |   tbbox = scale_box(bbox,[h,w],imsz);
 94 |   if any(tbbox(:)<=0)
 95 |     error('tbbox error');
 96 |   end
 97 | 
 98 |   rois(countr+1:countr+nB,:) = [b*ones(nB,1),tbbox];
 99 |   countr = countr + nB;
100 | end
101 | 
102 | % rois = single(rois);
103 | depth = size(ims{1},3);
104 | imo = zeros(maxH,maxW,depth,numel(batch),'single');
105 | 
106 | if isempty(opts.averageImage)
107 |   avgIm = [];
108 | elseif numel(opts.averageImage)==depth
109 |   avgIm = opts.averageImage;
110 | end
111 | 
112 | 
113 | for b=1:numel(batch)
114 |   sz = size(ims{b});
115 | 
116 |   imo(1:sz(1),1:sz(2),:,b) = single(ims{b});
117 |   
118 |   if ~isempty(avgIm)
119 |     imo(1:sz(1),1:sz(2),:,b) = single(bsxfun(@minus,imo(1:sz(1),1:sz(2),:,b),opts.averageImage));
120 |   end
121 |   if ~isempty(opts.rgbVariance)
122 |     imo(1:sz(1),1:sz(2),:,b) = bsxfun(@plus, imo(1:sz(1),1:sz(2),:,b), ...
123 |         reshape(opts.rgbVariance * randn(3,1), 1,1,3)) ;
124 |   end
125 | end
126 | 
127 | 
128 | function boxOut = scale_box(boxIn,szIn,szOut)
129 |   
130 |   h = szIn(1);
131 |   w = szIn(2);
132 | 
133 |   bxr = 0.5 * (boxIn(:,2)+boxIn(:,4)) / w;
134 |   byr = 0.5 * (boxIn(:,1)+boxIn(:,3)) / h;
135 |  
136 |   bwr = (boxIn(:,4)-boxIn(:,2)+1) / w;
137 |   bhr = (boxIn(:,3)-boxIn(:,1)+1) / h;
138 |   
139 |   % boxIn center in new coord
140 |   byhat = (szOut(1) * byr);
141 |   bxhat = (szOut(2) * bxr);
142 |   
143 |   % relative width, height
144 |   bhhat = szOut(1) * bhr;
145 |   bwhat = szOut(2) * bwr;
146 |   
147 |   % transformed boxIn
148 |   boxOut = [max(1,round(byhat - 0.5 * bhhat)),...
149 |     max(1,round(bxhat - 0.5 * bwhat)), ...
150 |     min(szOut(1),round(byhat + 0.5 * bhhat)),...
151 |     min(szOut(2),round(bxhat + 0.5 * bwhat))];
152 | 
153 | 


--------------------------------------------------------------------------------
/matlab/+dagnn/LayerAP.m:
--------------------------------------------------------------------------------
  1 | classdef LayerAP < dagnn.Loss
  2 |   % @author: Hakan Bilen
  3 |   % 11 step average precision
  4 |   properties
  5 |     cls_index = 1
  6 |     resetLayer = false 
  7 |     gtLabels = []
  8 |     scores   = []
  9 |     ids      = []
 10 |     aps      = []
 11 |     voc07    = true % 11 step
 12 |     classNames = {} 
 13 |   end
 14 | 
 15 | 
 16 |   methods
 17 |     function outputs = forward(obj, inputs, params)
 18 |       if obj.resetLayer 
 19 |         obj.gtLabels = [] ;
 20 |         obj.scores   = [] ;
 21 |         obj.ids      = [] ;
 22 |         obj.aps      = [] ;
 23 |         obj.resetLayer = false ;
 24 |       end
 25 |       
 26 |       if numel(inputs)==2
 27 |         obj.scores = [obj.scores gather(squeeze(inputs{1}(:,:,obj.cls_index,:)))];
 28 |         obj.gtLabels = [obj.gtLabels gather(squeeze(inputs{2}(:,:,obj.cls_index,:)))];
 29 |       elseif numel(inputs)>2
 30 |         scoresCur = gather(squeeze(inputs{1}(:,:,obj.cls_index,:)));
 31 |         gtLabelsCur = gather(squeeze(inputs{2}(:,:,obj.cls_index,:)));
 32 |         
 33 |         idsCur = gather(squeeze(inputs{3}));
 34 |         
 35 |         [lia,locb] = ismember(idsCur,obj.ids);
 36 |         
 37 |         if any(lia)
 38 |           obj.scores = [obj.scores scoresCur(~lia,:)];
 39 |           obj.gtLabels = [obj.gtLabels gtLabelsCur(~lia,:)];
 40 |           obj.ids = [obj.ids(:) ; idsCur(~lia,:)];
 41 |           
 42 |           nz = find(lia);
 43 |           for i=1:numel(nz)
 44 |             obj.scores(locb(nz(i)),:) = obj.scores(locb(nz(i)),:) + ...
 45 |               scoresCur(nz(i),:);
 46 |           end
 47 |         else
 48 |           obj.scores = [obj.scores scoresCur];
 49 |           obj.gtLabels = [obj.gtLabels gtLabelsCur];
 50 |           obj.ids = [obj.ids(:) ; idsCur]';
 51 |         end
 52 |       else
 53 |         error('wrong number of inputs');
 54 |       end
 55 |       
 56 |       obj.aps = obj.compute_average_precision();
 57 |       obj.average = 100 * mean(obj.aps);
 58 |       outputs{1} =  100 * mean(obj.aps);
 59 |     end
 60 | 
 61 |     function [derInputs, derParams] = backward(obj, inputs, params, derOutputs)
 62 |       derInputs = cell(1,numel(inputs));
 63 |       derInputs{1} = derOutputs{1} ;
 64 |       derParams = {} ;
 65 |     end
 66 | 
 67 |     function reset(obj)
 68 |       obj.resetLayer = true ;
 69 | %       obj.average = 0 ;
 70 | %       obj.aps = 0 ;
 71 | %       obj.gtLabels = [];
 72 | %       obj.scores   = [];
 73 | %       obj.ids      = [];
 74 |     end
 75 | 
 76 |     function printAP(obj)
 77 |       if isempty(obj.classNames)
 78 |         for i=1:numel(obj.aps)
 79 |           fprintf('class-%d %.1f\n',i,100*obj.aps(i)) ;
 80 |         end
 81 |       else
 82 |         for i=1:numel(obj.aps)
 83 |           fprintf('%-50s %.1f\n',obj.classNames{i},100*obj.aps(i)) ;
 84 |         end
 85 |       end
 86 |     end
 87 |     
 88 |     function aps = compute_average_precision(obj)
 89 |       assert(all(size(obj.scores)==size(obj.gtLabels)));
 90 |       % nImg = size(obj.scores,1);
 91 |       nCls = numel(obj.cls_index);
 92 | 
 93 |       aps = zeros(1,nCls);
 94 | 
 95 |       for c=1:nCls
 96 |         gt = obj.gtLabels(c,:);
 97 |         conf = obj.scores(c,:) ;
 98 |         if sum(gt>0)==0, continue ; end
 99 |         
100 |         % compute average precision
101 |         if obj.voc07
102 |           [rec,prec,ap]=obj.VOC07ap(conf,gt) ;
103 |         else
104 |           [rec,prec,ap]=obj.THUMOSeventclspr(conf,gt) ;
105 |         end
106 |         aps(c) = ap;
107 |       end
108 |     end
109 | 
110 |     function [rec,prec,ap]=VOC07ap(obj,conf,gt)
111 |       [~,si]=sort(-conf);
112 |       tp=gt(si)>0;
113 |       fp=gt(si)<0;
114 |       
115 |       fp=cumsum(fp);
116 |       tp=cumsum(tp);
117 |       
118 |       rec=tp/sum(gt>0);
119 |       prec=tp./(fp+tp);
120 |       ap=0;
121 |       for t=0:0.1:1
122 |         p=max(prec(rec>=t));
123 |         if isempty(p)
124 |           p=0;
125 |         end
126 |         ap=ap+p/11;
127 |       end
128 |     end
129 |     
130 |     function [rec,prec,ap]=THUMOSeventclspr(obj,conf,gt)
131 |       [so,sortind]=sort(-conf);
132 |       tp=gt(sortind)==1;
133 |       fp=gt(sortind)~=1;
134 |       npos=length(find(gt==1));
135 |       
136 |       % compute precision/recall
137 |       fp=cumsum(fp);
138 |       tp=cumsum(tp);
139 |       rec=tp/npos;
140 |       prec=tp./(fp+tp);
141 |       
142 |       % compute average precision
143 |       
144 |       ap=0;
145 |       tmp=gt(sortind)==1;
146 |       for i=1:length(conf)
147 |         if tmp(i)==1
148 |           ap=ap+prec(i);
149 |         end
150 |       end
151 |       ap=ap/npos;
152 |     end
153 |     
154 |     function obj = LayerAP(varargin)
155 |       obj.load(varargin) ;
156 |       obj.loss = 'average_precision' ;
157 |     end
158 |   end
159 | end
160 | 


--------------------------------------------------------------------------------
/core/wsddn_init.m:
--------------------------------------------------------------------------------
  1 | % --------------------------------------------------------------------
  2 | function net = wsddn_init(net,varargin)
  3 | % --------------------------------------------------------------------
  4 | % @author: Hakan Bilen
  5 | % wsddn_init : this script initalise WSDDN model
  6 | 
  7 | opts.addBiasSamples = 1 ;
  8 | opts.softmaxTempCls = 1 ;
  9 | opts.softmaxTempDet = 2 ;
 10 | opts.addLossSmooth  = 1 ;
 11 | opts.averageImage = [] ;
 12 | opts.rgbVariance = [] ;
 13 | opts.numClasses = 1 ;
 14 | opts.classNames = {''} ;
 15 | 
 16 | opts = vl_argparse(opts, varargin) ;
 17 | 
 18 | % add drop-out layers
 19 | relu6p = find(cellfun(@(a) strcmp(a.name, 'relu6'), net.layers)==1);
 20 | relu7p = find(cellfun(@(a) strcmp(a.name, 'relu7'), net.layers)==1);
 21 | 
 22 | drop6 = struct('type', 'dropout', 'rate', 0.5, 'name','drop6');
 23 | drop7 = struct('type', 'dropout', 'rate', 0.5, 'name','drop7');
 24 | net.layers = [net.layers(1:relu6p) drop6 net.layers(relu6p+1:relu7p) drop7 net.layers(relu7p+1:end)];
 25 | 
 26 | 
 27 | % change loss fc layer
 28 | fc8p = (cellfun(@(a) strcmp(a.name, 'fc8'), net.layers)==1);
 29 | net.layers{fc8p}.weights{1} = 0.01 * ...
 30 |   randn(1,1,size(net.layers{fc8p}.weights{1},3),opts.numClasses,'single');
 31 | 
 32 | net.layers{fc8p}.weights{2} = zeros(1, opts.numClasses, 'single');
 33 | net.layers{fc8p}.name = 'fc8C';
 34 | 
 35 | net.layers(end) = [] ;
 36 | % add loss (this will be changed to binary log at the end)
 37 | % net.layers{end} = struct('name','loss', 'type','softmaxloss') ;
 38 | 
 39 | % add detection layer
 40 | clsLayerPos  = (cellfun(@(a) strcmp(a.name, 'fc8C'), net.layers)==1);
 41 | detLayer = net.layers{clsLayerPos};
 42 | detLayer.weights{1} = 0.01 * randn(1,1,size(detLayer.weights{1},3),opts.numClasses,'single');
 43 | % detLayer.weights{1} = zeros(1,1,size(detLayer.weights{1},3),opts.numClasses,'single');
 44 | detLayer.weights{2} = zeros(1, opts.numClasses, 'single');
 45 | 
 46 | detLayer.name = 'fc8R';
 47 | 
 48 | % remove pool5
 49 | pPool5 = find(cellfun(@(a) strcmp(a.name, 'pool5'), net.layers)==1);
 50 | net.layers = [net.layers([1:pPool5-1,pPool5+1:end]) detLayer];
 51 | 
 52 | % convert to dagnn
 53 | net = dagnn.DagNN.fromSimpleNN(net, 'canonicalNames', true) ;
 54 | 
 55 | % fix fc8R
 56 | pFc8R = (arrayfun(@(a) strcmp(a.name, 'fc8R'), net.layers)==1);
 57 | pFc8C = (arrayfun(@(a) strcmp(a.name, 'fc8C'), net.layers)==1);
 58 | 
 59 | net.layers(pFc8R).inputs = net.layers(pFc8C).inputs;
 60 | net.layers(pFc8R).inputIndexes = net.layers(pFc8C).inputIndexes;
 61 | 
 62 | % add spp
 63 | 
 64 | pRelu5 = (arrayfun(@(a) strcmp(a.name, 'relu5'), net.layers)==1);
 65 | vggdeep = 0;
 66 | if all(pRelu5==0)
 67 |   pRelu5 = (arrayfun(@(a) strcmp(a.name, 'relu5_3'), net.layers)==1);
 68 |   assert(any(pRelu5==1));
 69 |   vggdeep = 1;
 70 | end
 71 | pFc6 = (arrayfun(@(a) strcmp(a.name, 'fc6'), net.layers)==1);
 72 | 
 73 | % add spp (offset1 = rf offset, offset2 = shrinking factor)
 74 | % offset1=18  offset2=9.5 levels=6 for vgg-f and vgg-m-1024
 75 | % offset1=8.5 offset2=9.5 levels=7 for vgg-very-deep-16
 76 | if vggdeep
 77 |   net.addLayer('SPP', dagnn.ROIPooling('subdivisions',[7 7],...
 78 |     'transform',1), ...
 79 |     {net.layers(pRelu5).outputs{1},'rois'}, ...
 80 |     'xSPP');
 81 | else
 82 |   net.addLayer('SPP', dagnn.ROIPooling('subdivisions',[6 6],...
 83 |     'transform',1), ...
 84 |     {net.layers(pRelu5).outputs{1},'rois'}, ...
 85 |     'xSPP');
 86 | end
 87 | 
 88 | 
 89 | if opts.addBiasSamples
 90 |   % add boost
 91 |   net.addLayer('boostBox', ...
 92 |     dagnn.BiasSamples('scale',10), ...
 93 |     {'xSPP','boxScore'},'xBoostBox');
 94 |   net.layers(pFc6).inputs{1} = 'xBoostBox';
 95 | else
 96 |   net.layers(pFc6).inputs{1} = 'xSPP';
 97 | end
 98 | 
 99 | 
100 | 
101 | % add softmax layer for det
102 | pFc8R = (arrayfun(@(a) strcmp(a.name, 'fc8R'), net.layers)==1);
103 | net.addLayer('softmaxDet', ...
104 |   dagnn.SoftMax2('dim',4, 'temp',opts.softmaxTempDet), ...
105 |   net.layers(pFc8R).outputs{1},'xSoftmaxDet');
106 | 
107 | % add softmax layers for cls
108 | pFc8C = (arrayfun(@(a) strcmp(a.name, 'fc8C'), net.layers)==1);
109 | net.layers(pFc8C).outputs{1} = 'xfc8C';
110 | 
111 | net.addLayer('softmaxCls', ...
112 |   dagnn.SoftMax2('dim',3, 'temp',opts.softmaxTempCls), ...
113 |   net.layers(pFc8C).outputs{1},'xSoftmaxCls');
114 | 
115 | % add times layer
116 | net.addLayer('timesCR', ...
117 |   dagnn.Times(), ...
118 |   {'xSoftmaxCls','xSoftmaxDet'},'xTimes');
119 | 
120 | % add sum layer
121 | net.addLayer('sum', ...
122 |   dagnn.SumOverDim('dim',4), ...
123 |   'xTimes','prediction');
124 | 
125 | 
126 | 
127 | % add classification AP
128 | net.addLayer('mAP', dagnn.LayerAP('cls_index',1:opts.numClasses), ...
129 |   {'prediction','label', 'ids'}, 'mAP') ;
130 | 
131 | net.addLayer('loss', dagnn.Loss('loss','binarylog'), ...
132 |   {'prediction','label'}, 'objective') ;
133 | 
134 | 
135 | % no decay for bias
136 | for i=2:2:numel(net.params)
137 |   net.params(i).weightDecay = 0;
138 | end
139 | 
140 | if opts.addLossSmooth
141 |   net.addLayer('LossTopBoxSmooth',dagnn.LossTopBoxSmoothProb('minOverlap',0.6),...
142 |     {net.layers(pFc8R).inputs{1},'boxes','xTimes','label'},...
143 |     'lossTopB');
144 | end
145 | meta = net.meta ; 
146 | net.meta = [] ;
147 | net.meta.normalization.interpolation = meta.normalization.interpolation ;
148 | net.meta.normalization.averageImage  = opts.averageImage ;
149 | net.meta.normalization.rgbVariance   = opts.rgbVariance ;
150 | net.meta.classes.name = {'aeroplane', 'bicycle', 'bird', ...
151 |     'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', ...
152 |     'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', ...
153 |     'sofa', 'train', 'tvmonitor', 'background' };


--------------------------------------------------------------------------------
/pascal/setup_voc07_ssw.m:
--------------------------------------------------------------------------------
  1 | function imdb = setup_voc07_ssw(varargin)
  2 | % setup_voc07_ssw  Initialize PASCAL VOC2007 data with selective
  3 | % search windows 
  4 | 
  5 | % Warning! boxes are in the format of ([y1 x1 y2 x2])
  6 | 
  7 | opts.dataDir = fullfile('data') ;
  8 | opts.proposalDir = fullfile(opts.dataDir,'SSW');
  9 | opts.loadTest = 1;
 10 | opts = vl_argparse(opts, varargin) ;
 11 | 
 12 | % -------------------------------------------------------------------------
 13 | %                                                 Load selective search win
 14 | % -------------------------------------------------------------------------
 15 | %% get selective search windows
 16 | files = {'SelectiveSearchVOC2007trainval.mat', ...
 17 |   'SelectiveSearchVOC2007test.mat'} ;
 18 | 
 19 | if ~exist(opts.proposalDir, 'dir')
 20 |   mkdir(opts.proposalDir) ;
 21 | end
 22 | 
 23 | for i=1:numel(files)
 24 |   if ~exist(fullfile(opts.proposalDir, files{i}), 'file')
 25 |     url = sprintf('http://koen.me/research/downloads/%s',files{i}) ;
 26 |     fprintf('downloading %s\n', url) ;
 27 |     urlwrite(url,[opts.proposalDir filesep files{i}]);
 28 |   end
 29 | end
 30 | 
 31 | if ~isempty(opts.proposalDir)
 32 |   t1 = load([opts.proposalDir,filesep,files{1}]);
 33 |   if opts.loadTest
 34 |     t2 = load([opts.proposalDir,filesep,files{2}]);
 35 |     ssw.id = [str2double(t1.images);str2double(t2.images)]';
 36 |     ssw.boxes = cat(2,t1.boxes,t2.boxes);
 37 |   else
 38 |     ssw.id = str2double(t1.images)';
 39 |     ssw.boxes = t1.boxes;
 40 |   end
 41 | 
 42 |   [~,si] = sort(ssw.id);
 43 |   ssw.id = ssw.id(si);
 44 |   ssw.boxes = ssw.boxes(si);
 45 | end
 46 | 
 47 | % -------------------------------------------------------------------------
 48 | %                                                  Load categories metadata
 49 | % -------------------------------------------------------------------------
 50 | cats = {'aeroplane','bicycle','bird','boat','bottle','bus','car',...
 51 |   'cat','chair','cow','diningtable','dog','horse','motorbike','person',...
 52 |   'pottedplant','sheep','sofa','train','tvmonitor'};
 53 |     
 54 | if ~exist(opts.dataDir,'dir')
 55 |   error('wrong data folder!');
 56 | end
 57 | 
 58 | if ~exist(opts.dataDir,'dir')
 59 |   error('wrong data folder!');
 60 | end
 61 | 
 62 | % Download VOC Devkit and data
 63 | if ~exist(fullfile(opts.dataDir,'VOCdevkit'),'dir')
 64 |   files = {'VOCtest_06-Nov-2007.tar',...
 65 |            'VOCtrainval_06-Nov-2007.tar',...
 66 |            'VOCdevkit_08-Jun-2007.tar'} ;
 67 |   for i=1:numel(files)
 68 |     if ~exist(fullfile(opts.dataDir, files{i}), 'file')
 69 |       outPath = fullfile(opts.dataDir,files{i}) ;
 70 |       url = sprintf('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/%s',files{i}) ;
 71 |       fprintf('Downloading %s to %s\n', url, outPath) ;
 72 |       urlwrite(url,outPath) ;
 73 |       untar(outPath,opts.dataDir);
 74 |     end
 75 |   end
 76 | end
 77 | addpath(fullfile(opts.dataDir, 'VOCdevkit', 'VOCcode'));
 78 | 
 79 | traindata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','train.txt'));
 80 | valdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','val.txt'));
 81 | testdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','test.txt'));
 82 | 
 83 | assert(numel(traindata)==2501);
 84 | assert(numel(valdata)==2510);
 85 | assert(numel(testdata)==4952);
 86 | 
 87 | imdb.classes.name = cats ;
 88 | imdb.classes.description = cats ;
 89 | imdb.imageDir = fullfile(opts.dataDir, fullfile('VOCdevkit','VOC2007','JPEGImages')) ;
 90 | 
 91 | % -------------------------------------------------------------------------
 92 | %                                                           Training images
 93 | % -------------------------------------------------------------------------% 
 94 | names = cell(1,numel(traindata));
 95 | labels = zeros(numel(traindata),numel(cats));
 96 | 
 97 | 
 98 | % load image names
 99 | for t=1:numel(traindata)
100 |   names{t} = sprintf('%06d.jpg',traindata(t));
101 | %   data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t}));
102 | end
103 | 
104 | % load binary labels
105 | for c=1:numel(cats)
106 |   t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_train.txt']));
107 |   labels(:,c) = t(:,2);
108 | end
109 | 
110 | imdb.images.id = traindata';
111 | imdb.images.name = names ;
112 | imdb.images.set = ones(1, numel(names)) ;
113 | imdb.images.label = labels' ;
114 | % imdb.images.data = data;
115 | 
116 | % -------------------------------------------------------------------------
117 | %                                                         Validation images
118 | % -------------------------------------------------------------------------
119 | 
120 | names = cell(1,numel(valdata));
121 | labels = zeros(numel(valdata),numel(cats));
122 | % data = cell(1,numel(valdata));
123 | 
124 | % load image names
125 | for t=1:numel(valdata)
126 |   names{t} = sprintf('%06d.jpg',valdata(t));
127 | %   data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t}));
128 | end
129 | 
130 | % load binary labels
131 | for c=1:numel(cats)
132 |   t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_val.txt']));
133 |   labels(:,c) = t(:,2);
134 | end
135 | 
136 | 
137 | imdb.images.id = horzcat(imdb.images.id, valdata') ;
138 | imdb.images.name = horzcat(imdb.images.name, names) ;
139 | imdb.images.set = horzcat(imdb.images.set, 2*ones(1,numel(names))) ;
140 | imdb.images.label = horzcat(imdb.images.label, labels') ;
141 | % imdb.images.data = horzcat(imdb.images.data, data) ;
142 | 
143 | % % -------------------------------------------------------------------------
144 | % %                                                               Test images
145 | % % -------------------------------------------------------------------------
146 | % 
147 | %
148 | if opts.loadTest
149 |   names = cell(1,numel(testdata));
150 |   labels = zeros(numel(testdata),numel(cats));
151 |   % data = cell(1,numel(testdata));
152 | 
153 |   % load image names
154 |   for t=1:numel(testdata)
155 |     names{t} = sprintf('%06d.jpg',testdata(t));
156 |   %   data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t}));
157 |   end
158 | 
159 |   % load binary labels
160 |   for c=1:numel(cats)
161 |     t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_test.txt']));
162 |     labels(:,c) = t(:,2);
163 |   end
164 | 
165 |   imdb.images.id = horzcat(imdb.images.id, testdata') ;
166 |   imdb.images.name = horzcat(imdb.images.name, names) ;
167 |   imdb.images.set = horzcat(imdb.images.set, 3 * ones(1,numel(names))) ;
168 |   imdb.images.label = horzcat(imdb.images.label, labels') ;
169 |   % imdb.images.data = horzcat(imdb.images.data, data) ;
170 | end
171 | % -------------------------------------------------------------------------
172 | %                                                            Postprocessing
173 | % -------------------------------------------------------------------------
174 | [~,sorti] = sort(imdb.images.id);
175 | 
176 | 
177 | imdb.images.id = imdb.images.id(sorti);
178 | imdb.images.name = imdb.images.name(sorti) ;
179 | imdb.images.set = imdb.images.set(sorti) ;
180 | imdb.images.label = single(imdb.images.label(:,sorti)) ;
181 | imdb.images.size = zeros(numel(imdb.images.name),2);
182 | 
183 | if ~isempty(opts.proposalDir)
184 |   imdb.images.boxes = ssw.boxes;
185 |   assert(all(ssw.id==imdb.images.id));
186 | end
187 | 
188 | % this is zero as scores of selective search windows are not much
189 | % informative
190 | if ~isempty(opts.proposalDir)
191 | imdb.images.boxScores = cell(size(imdb.images.boxes));
192 | for i=1:numel(imdb.images.boxes)
193 |   imdb.images.boxes{i} = int16(imdb.images.boxes{i});
194 |   imdb.images.boxScores{i} = zeros(size(imdb.images.boxes{i},1),1,'single');
195 |   imf = imfinfo(fullfile(imdb.imageDir,imdb.images.name{i}));
196 |   imdb.images.size(i,:) = [imf.Height,imf.Width];
197 | end
198 | end
199 | end
200 | 


--------------------------------------------------------------------------------
/core/wsddn_demo.m:
--------------------------------------------------------------------------------
  1 | function wsddn_demo(varargin)
  2 | % @author: Hakan Bilen
  3 | % wsddn_demo : this script shows a detection demo
  4 | 
  5 | opts.dataDir = fullfile(vl_rootnn, 'data') ;
  6 | opts.expDir = fullfile(vl_rootnn, 'exp') ;
  7 | opts.imdbPath = fullfile(vl_rootnn, 'data', 'imdbs', 'imdb-eb.mat');
  8 | opts.modelPath = fullfile(vl_rootnn, 'exp', 'net.mat') ;
  9 | opts.proposalType = 'eb' ;
 10 | opts.proposalDir = fullfile(vl_rootnn, 'data','EdgeBoxes') ;
 11 | 
 12 | % if you have limited gpu memory (<6gb), you can change the next 2 params
 13 | opts.maxNumProposals = inf; % limit number
 14 | % opts.imageScales = [480,576,688,864,1200]; % scales
 15 | opts.imageScales = [480,576,688,864,1200]; % scales
 16 | 
 17 | opts.gpu = [] ;
 18 | opts.train.prefetch = true ;
 19 | 
 20 | opts.numFetchThreads = 1 ;
 21 | opts = vl_argparse(opts, varargin) ;
 22 | 
 23 | display(opts);
 24 | if ~exist(fullfile(opts.dataDir,'VOCdevkit','VOCcode','VOCinit.m'),'file')
 25 |   error('VOCdevkit is not installed');
 26 | end
 27 | addpath(fullfile(opts.dataDir,'VOCdevkit','VOCcode'));
 28 | opts.train.expDir = opts.expDir ;
 29 | % -------------------------------------------------------------------------
 30 | %                                                    Network initialization
 31 | % -------------------------------------------------------------------------
 32 | 
 33 | if ~exist(opts.modelPath, 'file')
 34 |   url = 'http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/wsddn.mat' ;
 35 |   fprintf('Downloading %s to %s\n', url, opts.modelPath) ;
 36 |   urlwrite(url, opts.modelPath) ;
 37 | end
 38 | 
 39 | net = load(opts.modelPath);
 40 | net = dagnn.DagNN.loadobj(net) ;
 41 | 
 42 | net.mode = 'test' ;
 43 | if ~isempty(opts.gpu)
 44 |   gpuDevice(opts.gpu) ;
 45 |   net.move('gpu') ;
 46 | end
 47 | 
 48 | if isfield(net,'normalization')
 49 |   bopts = net.normalization;
 50 | else
 51 |   bopts = net.meta.normalization;
 52 | end
 53 | 
 54 | bopts.rgbVariance = [] ;
 55 | bopts.interpolation = net.meta.normalization.interpolation;
 56 | bopts.jitterBrightness = 0 ;
 57 | bopts.imageScales = opts.imageScales;
 58 | bopts.numThreads = opts.numFetchThreads;
 59 | bs = find(arrayfun(@(a) isa(a.block, 'dagnn.BiasSamples'), net.layers)==1);
 60 | bopts.addBiasSamples = ~isempty(bs) ;
 61 | bopts.vgg16 = any(arrayfun(@(a) strcmp(a.name, 'relu5_1'), net.layers)==1) ;
 62 | 
 63 | % -------------------------------------------------------------------------
 64 | %                                                   Database initialization
 65 | % -------------------------------------------------------------------------
 66 | fprintf('loading imdb...');
 67 | if exist(opts.imdbPath,'file')==2
 68 |   imdb = load(opts.imdbPath) ;
 69 | else
 70 |   imdb = setup_voc07_eb('dataDir',opts.dataDir, ...
 71 |     'proposalDir',opts.proposalDir,'loadTest',1);
 72 |     
 73 |   save(opts.imdbPath,'-struct', 'imdb', '-v7.3');
 74 | end
 75 | 
 76 | fprintf('done\n');
 77 | minSize = 20;
 78 | imdb = fixBBoxes(imdb, minSize, opts.maxNumProposals);
 79 | 
 80 | % --------------------------------------------------------------------
 81 | %                                                               Detect
 82 | % --------------------------------------------------------------------
 83 | % query images
 84 | testIdx = [12,15];
 85 | 
 86 | VOCinit;
 87 | cats = VOCopts.classes;
 88 | ovTh = 0.4; % nms threshold
 89 | scTh = 0.1; % det confidence threshold
 90 | 
 91 | bopts.useGpu = numel(opts.gpu) >  0 ;
 92 | 
 93 | detLayer = find(arrayfun(@(a) strcmp(a.name, 'xTimes'), net.vars)==1);
 94 | 
 95 | net.vars(detLayer(1)).precious = 1;
 96 | % run detection
 97 | rcolors = randi(255,3,numel(cats));
 98 | for t=1:numel(testIdx)
 99 |   batch = testIdx(t);  
100 |   
101 |   scoret = [];
102 |   for s=1:numel(opts.imageScales)
103 |     for f=1:2 % add flips
104 |       inputs = getBatch(bopts, imdb, batch, opts.imageScales(s), f-1 );
105 |       net.eval(inputs) ;
106 |   
107 |       if isempty(scoret)
108 |         scoret = squeeze(gather(net.vars(detLayer).value));
109 |       else
110 |         scoret = scoret + squeeze(gather(net.vars(detLayer).value));
111 |       end
112 |     end
113 |   end
114 |   
115 |   % divide by number of scales and flips
116 |   scoret = scoret / (2 * numel(opts.imageScales));
117 |   im = imread(fullfile(imdb.imageDir,imdb.images.name{testIdx(t)}));
118 |   
119 |   for cls = 1:numel(cats)
120 |     scores = scoret;
121 |     boxes  = double(imdb.images.boxes{testIdx(t)});
122 |     boxesSc = [boxes,scores(cls,:)'];
123 |     boxesSc = boxesSc(boxesSc(:,5)>scTh,:);
124 |     if isempty(boxesSc), continue; end;
125 |     
126 |     pick = nms(boxesSc, ovTh);
127 |     boxesSc = boxesSc(pick,:);
128 |     im = bbox_draw(im,boxesSc(1,1:4),rcolors(:,cls),2);
129 |     fprintf('%s %.2f\n',cats{cls},boxesSc(1,5));
130 |   end
131 |   imshow(im);
132 |   pause() ;
133 |   if exist('zs_dispFig', 'file'), zs_dispFig ; end
134 | end
135 | 
136 | 
137 | 
138 | % --------------------------------------------------------------------
139 | function inputs = getBatch(opts, imdb, batch, scale, flip)
140 | % --------------------------------------------------------------------
141 | 
142 | opts.scale = scale;
143 | opts.flip = flip;
144 | is_vgg16 = opts.vgg16 ;
145 | opts = rmfield(opts,'vgg16') ;
146 | 
147 | images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ;
148 | opts.prefetch = (nargout == 0);
149 | 
150 | [im,rois] = wsddn_get_batch(images, imdb, batch, opts);
151 | 
152 | 
153 | rois = single(rois');
154 | if opts.useGpu > 0
155 |   im = gpuArray(im) ;
156 |   rois = gpuArray(rois) ;
157 | end
158 | rois = rois([1 3 2 5 4],:) ;
159 | 
160 | 
161 | ss = [16 16] ;
162 | if is_vgg16
163 |   o0 = 8.5 ;
164 |   o1 = 9.5 ;
165 | else
166 |   o0 = 18 ;
167 |   o1 = 9.5 ;
168 | end
169 | rois = [ rois(1,:);
170 |         floor((rois(2,:) - o0 + o1) / ss(1) + 0.5) + 1;
171 |         floor((rois(3,:) - o0 + o1) / ss(2) + 0.5) + 1;
172 |         ceil((rois(4,:) - o0 - o1) / ss(1) - 0.5) + 1;
173 |         ceil((rois(5,:) - o0 - o1) / ss(2) - 0.5) + 1];
174 | 
175 |       
176 | inputs = {'input', im, 'rois', rois} ;
177 |   
178 |   
179 | if opts.addBiasSamples && isfield(imdb.images,'boxScores')
180 |   boxScore = reshape(imdb.images.boxScores{batch},[1 1 1 numel(imdb.images.boxScores{batch})]);
181 |   inputs{end+1} = 'boxScore';
182 |   inputs{end+1} = boxScore ; 
183 | end
184 | 
185 | 
186 | % -------------------------------------------------------------------------
187 | function imdb = fixBBoxes(imdb, minSize, maxNum)
188 | % -------------------------------------------------------------------------
189 | 
190 | for i=1:numel(imdb.images.name)
191 |   bbox = imdb.images.boxes{i};
192 |   % remove small bbox
193 |   isGood = (bbox(:,3)>=bbox(:,1)+minSize) & (bbox(:,4)>=bbox(:,2)+minSize);
194 |   bbox = bbox(isGood,:);
195 |   % remove duplicate ones
196 |   [dummy, uniqueIdx] = unique(bbox, 'rows', 'first');
197 |   uniqueIdx = sort(uniqueIdx);
198 |   bbox = bbox(uniqueIdx,:);
199 |   % limit number for training
200 |   if imdb.images.set(i)~=3
201 |     nB = min(size(bbox,1),maxNum);
202 |   else
203 |     nB = size(bbox,1);
204 |   end
205 |   
206 |   if isfield(imdb.images,'boxScores')
207 |     imdb.images.boxScores{i} = imdb.images.boxScores{i}(uniqueIdx);
208 |     imdb.images.boxScores{i} = imdb.images.boxScores{i}(1:nB);
209 |   end
210 |   imdb.images.boxes{i} = bbox(1:nB,:);
211 |   %   [h,w,~] = size(imdb.images.data{i});
212 |   %   imdb.images.boxes{i} = [1 1 h w];
213 |   
214 | end
215 | 
216 | % -------------------------------------------------------------------------
217 | function im = bbox_draw(im,roi,color,t)
218 | % DRAWRECT
219 | % IM : input image
220 | % ROI : rectangle
221 | % COLOR :
222 | % T : thickness
223 | 
224 | [h,w,d] = size(im);
225 | assert(d == numel(color));
226 | if any(roi(:,1)>h) || any(roi(:,3)>h) || any(roi(:,2)>w) || any(roi(:,4)>w)
227 |   error('Wrong bounding box coord!\n');
228 | end
229 | for c=1:d
230 |   im(max(roi(1)-t,1):min(roi(1)+t,h),max(roi(2)-t,1):min(roi(4)+t,w),c) = color(c);
231 |   im(max(roi(3)-t,1):min(roi(3)+t,h),max(roi(2)-t,1):min(roi(4)+t,w),c) = color(c);
232 |   im(max(roi(1)-t,1):min(roi(3)+t,h),max(roi(2)-t,1):min(roi(2)+t,w),c) = color(c);
233 |   im(max(roi(1)-t,1):min(roi(3)+t,h),max(roi(4)-t,1):min(roi(4)+t,w),c) = color(c);
234 | end
235 | 


--------------------------------------------------------------------------------
/pascal/setup_voc07_eb.m:
--------------------------------------------------------------------------------
  1 | function imdb = setup_voc07_eb(varargin)
  2 | % cnn_voc07_eb_setup_data  Initialize PASCAL VOC2007 data with edge
  3 | % boxes
  4 | 
  5 | % Warning! boxes are in the format of ([y1 x1 y2 x2])
  6 | 
  7 | opts.dataDir = fullfile('data') ;
  8 | opts.proposalDir = fullfile(opts.dataDir,'EB');
  9 | opts.loadTest = 1;
 10 | opts = vl_argparse(opts, varargin) ;
 11 | 
 12 | % -------------------------------------------------------------------------
 13 | %                                                 Load selective search win
 14 | % -------------------------------------------------------------------------
 15 | %% Get selective search windows
 16 | files = {'EdgeBoxesVOC2007trainval.mat', ...
 17 |   'EdgeBoxesVOC2007test.mat'} ;
 18 | 
 19 | if ~exist(opts.proposalDir, 'dir')
 20 |   mkdir(opts.proposalDir) ;
 21 | end
 22 | 
 23 | for i=1:numel(files)
 24 |   outPath = fullfile(opts.proposalDir, files{i}) ;
 25 |   if ~exist(outPath, 'file')
 26 |     url = sprintf('http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/%s',files{i}) ;
 27 |     fprintf('Downloading %s to %s\n', url, outPath) ;
 28 |     urlwrite(url,outPath) ;
 29 |   end
 30 | end
 31 | 
 32 | 
 33 | if ~isempty(opts.proposalDir)
 34 |   t1 = load([opts.proposalDir,filesep,files{1}]);
 35 |   if opts.loadTest
 36 |     t2 = load([opts.proposalDir,filesep,files{2}]);
 37 |     ssw.id = [str2double(t1.images) str2double(t2.images)];
 38 |     ssw.boxes = cat(2,t1.boxes,t2.boxes);
 39 |     ssw.boxScores = cat(2,t1.boxScores,t2.boxScores);
 40 |   else
 41 |     ssw.id = str2double(t1.images);
 42 |     ssw.boxes = t1.boxes;
 43 |     ssw.boxScores = t1.boxScores;
 44 |   end
 45 |   
 46 |   [~,si] = sort(ssw.id);
 47 |   ssw.id = ssw.id(si);
 48 |   ssw.boxes = ssw.boxes(si);
 49 |   ssw.boxScores = ssw.boxScores(si);
 50 | end
 51 | 
 52 | % -------------------------------------------------------------------------
 53 | %                                                  Load categories metadata
 54 | % -------------------------------------------------------------------------
 55 | cats = {'aeroplane','bicycle','bird','boat','bottle','bus','car',...
 56 |   'cat','chair','cow','diningtable','dog','horse','motorbike','person',...
 57 |   'pottedplant','sheep','sofa','train','tvmonitor'};
 58 | 
 59 | if ~exist(opts.dataDir,'dir')
 60 |   error('wrong data folder!');
 61 | end
 62 | 
 63 | % Download VOC Devkit and data
 64 | if ~exist(fullfile(opts.dataDir,'VOCdevkit'),'dir')
 65 |   files = {'VOCtest_06-Nov-2007.tar',...
 66 |            'VOCtrainval_06-Nov-2007.tar',...
 67 |            'VOCdevkit_08-Jun-2007.tar'} ;
 68 |   for i=1:numel(files)
 69 |     if ~exist(fullfile(opts.dataDir, files{i}), 'file')
 70 |       outPath = fullfile(opts.dataDir,files{i}) ;
 71 |       url = sprintf('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/%s',files{i}) ;
 72 |       fprintf('Downloading %s to %s\n', url, outPath) ;
 73 |       urlwrite(url,outPath) ;
 74 |       untar(outPath,opts.dataDir);
 75 |     end
 76 |   end
 77 | end
 78 | addpath(fullfile(opts.dataDir, 'VOCdevkit', 'VOCcode'));
 79 | 
 80 | traindata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','train.txt'));
 81 | valdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','val.txt'));
 82 | testdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','test.txt'));
 83 | 
 84 | assert(numel(traindata)==2501);
 85 | assert(numel(valdata)==2510);
 86 | assert(numel(testdata)==4952);
 87 | 
 88 | imdb.classes.name = cats ;
 89 | imdb.classes.description = cats ;
 90 | imdb.imageDir = fullfile(opts.dataDir, fullfile('VOCdevkit','VOC2007','JPEGImages')) ;
 91 | 
 92 | % -------------------------------------------------------------------------
 93 | %                                                           Training images
 94 | % -------------------------------------------------------------------------%
 95 | names = cell(1,numel(traindata));
 96 | labels = zeros(numel(traindata),numel(cats));
 97 | 
 98 | 
 99 | % load image names
100 | for t=1:numel(traindata)
101 |   names{t} = sprintf('%06d.jpg',traindata(t));
102 |   %   data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t}));
103 | end
104 | 
105 | % load binary labels
106 | for c=1:numel(cats)
107 |   t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_train.txt']));
108 |   labels(:,c) = t(:,2);
109 | end
110 | 
111 | imdb.images.id = traindata';
112 | imdb.images.name = names ;
113 | imdb.images.set = ones(1, numel(names)) ;
114 | imdb.images.label = labels' ;
115 | % imdb.images.data = data;
116 | 
117 | % -------------------------------------------------------------------------
118 | %                                                         Validation images
119 | % -------------------------------------------------------------------------
120 | 
121 | names = cell(1,numel(valdata));
122 | labels = zeros(numel(valdata),numel(cats));
123 | % data = cell(1,numel(valdata));
124 | 
125 | % load image names
126 | for t=1:numel(valdata)
127 |   names{t} = sprintf('%06d.jpg',valdata(t));
128 |   %   data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t}));
129 | end
130 | 
131 | % load binary labels
132 | for c=1:numel(cats)
133 |   t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_val.txt']));
134 |   labels(:,c) = t(:,2);
135 | end
136 | 
137 | 
138 | imdb.images.id = horzcat(imdb.images.id, valdata') ;
139 | imdb.images.name = horzcat(imdb.images.name, names) ;
140 | imdb.images.set = horzcat(imdb.images.set, 2*ones(1,numel(names))) ;
141 | imdb.images.label = horzcat(imdb.images.label, labels') ;
142 | % imdb.images.data = horzcat(imdb.images.data, data) ;
143 | 
144 | % % -------------------------------------------------------------------------
145 | % %                                                               Test images
146 | % % -------------------------------------------------------------------------
147 | %
148 | %
149 | if opts.loadTest
150 |   names = cell(1,numel(testdata));
151 |   labels = zeros(numel(testdata),numel(cats));
152 |   % data = cell(1,numel(testdata));
153 |   
154 |   % load image names
155 |   for t=1:numel(testdata)
156 |     names{t} = sprintf('%06d.jpg',testdata(t));
157 |     %   data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t}));
158 |   end
159 |   
160 |   % load binary labels
161 |   for c=1:numel(cats)
162 |     t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_test.txt']));
163 |     labels(:,c) = t(:,2);
164 |   end
165 |   
166 |   imdb.images.id = horzcat(imdb.images.id, testdata') ;
167 |   imdb.images.name = horzcat(imdb.images.name, names) ;
168 |   imdb.images.set = horzcat(imdb.images.set, 3 * ones(1,numel(names))) ;
169 |   imdb.images.label = horzcat(imdb.images.label, labels') ;
170 |   % imdb.images.data = horzcat(imdb.images.data, data) ;
171 | end
172 | % -------------------------------------------------------------------------
173 | %                                                            Postprocessing
174 | % -------------------------------------------------------------------------
175 | [~,sorti] = sort(imdb.images.id);
176 | 
177 | 
178 | imdb.images.id = imdb.images.id(sorti);
179 | imdb.images.name = imdb.images.name(sorti) ;
180 | imdb.images.set = imdb.images.set(sorti) ;
181 | imdb.images.label = single(imdb.images.label(:,sorti)) ;
182 | imdb.images.size = zeros(numel(imdb.images.name),2);
183 | 
184 | if ~isempty(opts.proposalDir)
185 |   imdb.images.boxes = ssw.boxes;
186 |   imdb.images.boxScores = ssw.boxScores;
187 |   assert(all(ssw.id==imdb.images.id));
188 | end
189 | 
190 | % this is zero as scores of selective search windows are not much
191 | % informative
192 | if ~isempty(opts.proposalDir)
193 |   % imdb.images.boxScores = cell(size(imdb.images.boxes));
194 |   for i=1:numel(imdb.images.boxes)
195 |     imdb.images.boxes{i} = int16(imdb.images.boxes{i});
196 |     imdb.images.boxScores{i} = single(imdb.images.boxScores{i});
197 |     
198 |     imf = imfinfo(fullfile(imdb.imageDir,imdb.images.name{i}));
199 |     imdb.images.size(i,:) = [imf.Height,imf.Width];
200 |     
201 |     maxBoxes = max(imdb.images.boxes{i});
202 |     if imdb.images.size(i,1)< max(maxBoxes([1,3]))
203 |       error('Wrong box coordinates');
204 |     end
205 |     if imdb.images.size(i,2)< max(maxBoxes([2,4]))
206 |       error('Wrong box coordinates');
207 |     end
208 |     
209 |   end
210 | end
211 | end
212 | 


--------------------------------------------------------------------------------
/core/wsddn_train.m:
--------------------------------------------------------------------------------
  1 | function [net, info] = wsddn_train(varargin)
  2 | % @author: Hakan Bilen
  3 | % wsddn_train: training script for WSDDN
  4 | 
  5 | opts.dataDir = fullfile(vl_rootnn, 'data') ;
  6 | opts.expDir = fullfile(vl_rootnn, 'exp') ;
  7 | opts.imdbPath = fullfile(vl_rootnn, 'data', 'imdbs', 'imdb-eb.mat');
  8 | opts.modelPath = fullfile(vl_rootnn, 'models', 'imagenet-vgg-f.mat') ;
  9 | opts.proposalType = 'eb' ;
 10 | opts.proposalDir = fullfile(vl_rootnn, 'data', 'EdgeBoxes') ;
 11 | 
 12 | 
 13 | opts.addBiasSamples = 1; % add Box Scores
 14 | opts.addLossSmooth  = 1; % add Spatial Regulariser
 15 | opts.softmaxTempCls = 1; % softmax temp for cls
 16 | opts.softmaxTempDet = 2; % softmax temp for det
 17 | opts.maxScale = 2000 ;
 18 | 
 19 | % if you have limited gpu memory (<6gb), you can change the next 2 params
 20 | opts.maxNumProposals = inf; % limit number (eg 1500)
 21 | opts.imageScales = [480,576,688,864,1200]; % scales
 22 | opts.minBoxSize = 20; % minimum bounding box size
 23 | opts.train.gpus = [] ;
 24 | opts.train.continue = true ;
 25 | opts.train.prefetch = true ;
 26 | opts.train.learningRate = 1e-5 * [ones(1,10) 0.1*ones(1,10)] ;
 27 | opts.train.weightDecay = 0.0005;
 28 | opts.train.numEpochs = 20;
 29 | opts.train.derOutputs = {'objective', 1} ;
 30 | 
 31 | opts.numFetchThreads = 1 ;
 32 | opts = vl_argparse(opts, varargin) ;
 33 | 
 34 | display(opts);
 35 | 
 36 | opts.train.batchSize = 1 ;
 37 | opts.train.expDir = opts.expDir ;
 38 | opts.train.numEpochs = numel(opts.train.learningRate) ;
 39 | %% -------------------------------------------------------------------------
 40 | %                                                   Database initialization
 41 | % -------------------------------------------------------------------------
 42 | fprintf('loading imdb...');
 43 | if exist(opts.imdbPath,'file')==2
 44 |   imdb = load(opts.imdbPath) ;
 45 | else
 46 |   if strcmp(opts.proposalType,'ssw')
 47 |     imdb = setup_voc07_ssw('dataDir',opts.dataDir, ...
 48 |       'proposalDir',opts.proposalDir,'loadTest',1);
 49 |   elseif strcmp(opts.proposalType,'eb')
 50 |     imdb = setup_voc07_eb('dataDir',opts.dataDir, ...
 51 |       'proposalDir',opts.proposalDir,'loadTest',1);
 52 |   else
 53 |     error('undefined proposal type %s\n',opts.proposalType)
 54 |   end
 55 |   
 56 |   imdbFolder = fileparts(opts.imdbPath);
 57 |   
 58 |   if ~exist(imdbFolder,'dir')
 59 |     mkdir(imdbFolder);
 60 |   end
 61 |   save(opts.imdbPath,'-struct', 'imdb', '-v7.3');
 62 | end
 63 | 
 64 | fprintf('done\n');
 65 | 
 66 | imdb = fixBBoxes(imdb, opts.minBoxSize, opts.maxNumProposals);
 67 | 
 68 | % use train + val for training
 69 | imdb.images.set(imdb.images.set == 2) = 1;
 70 | trainIdx = find(imdb.images.set == 1);
 71 | 
 72 | %% Compute image statistics (mean, RGB covariances, etc.)
 73 | imageStatsPath = fullfile(opts.dataDir, 'imageStats.mat') ;
 74 | if exist(imageStatsPath,'file')
 75 |   load(imageStatsPath, 'averageImage', 'rgbMean', 'rgbCovariance') ;
 76 | else
 77 |  
 78 |   images = imdb.images.name(imdb.images.set == 1) ;
 79 |   images = strcat([imdb.imageDir filesep],images) ;
 80 |   
 81 |   [averageImage, rgbMean, rgbCovariance] = getImageStats(images, ...
 82 |     'imageSize', [256 256], ...
 83 |     'numThreads', opts.numFetchThreads, ...
 84 |     'gpus', opts.train.gpus) ;
 85 |   save(imageStatsPath, 'averageImage', 'rgbMean', 'rgbCovariance') ;
 86 | end
 87 | [v,d] = eig(rgbCovariance) ;
 88 | rgbDeviation = v*sqrt(d) ;
 89 | clear v d ;
 90 | 
 91 | 
 92 | %% ------------------------------------------------------------------------
 93 | %                                                    Network initialization
 94 | % -------------------------------------------------------------------------
 95 | nopts.addBiasSamples = opts.addBiasSamples; % add Box Scores (only with Edge Boxes)
 96 | nopts.addLossSmooth  = opts.addLossSmooth; % add Spatial Regulariser
 97 | nopts.softmaxTempCls = opts.softmaxTempCls; % softmax temp for cls
 98 | nopts.softmaxTempDet = opts.softmaxTempDet; % softmax temp for det
 99 | 
100 | nopts.averageImage = reshape(rgbMean,[1 1 3]) ;
101 | % nopts.rgbVariance = 0.1 * rgbDeviation ;
102 | nopts.rgbVariance = [] ;
103 | nopts.numClasses = numel(imdb.classes.name) ;
104 | nopts.classNames = imdb.classes.name ;
105 | 
106 | if ~exist(opts.modelPath,'file')
107 |   [pname,fname,ext]  = fileparts(opts.modelPath) ;
108 |   if ~exist(pname,'dir')
109 |     mkdir(pname) ;
110 |   end
111 |   fprintf('Downloading %s to %s\n', [fname ext], pname) ;
112 |   urlwrite(sprintf('http://www.vlfeat.org/matconvnet/models/%s',[fname ext]),...
113 |     opts.modelPath) ;
114 | end
115 | 
116 | net = load(opts.modelPath);
117 | net = wsddn_init(net,nopts);
118 | 
119 | if nopts.addLossSmooth
120 |   opts.train.derOutputs = {'objective', 1, 'lossTopB', 1e-4} ;
121 | end
122 | 
123 | 
124 | if ~exist(opts.expDir,'dir')
125 |   mkdir(opts.expDir) ;
126 | end
127 | 
128 | %% -------------------------------------------------------------------------
129 | %                                                   Database stats
130 | % -------------------------------------------------------------------------
131 | bopts = net.meta.normalization;
132 | net.meta.augmentation.jitterBrightness = 0 ;
133 | % bopts.interpolation = 'bilinear';
134 | bopts.jitterBrightness = net.meta.augmentation.jitterBrightness ;
135 | bopts.imageScales = opts.imageScales;
136 | bopts.numThreads = opts.numFetchThreads;
137 | bopts.addLossSmooth = opts.addLossSmooth;
138 | bopts.addBiasSamples = opts.addBiasSamples;
139 | bopts.maxScale = opts.maxScale ;
140 | bopts.vgg16 = any(arrayfun(@(a) strcmp(a.name, 'relu5_1'), net.layers)==1) ;
141 | %% -------------------------------------------------------------------
142 | %                                                                Train
143 | % --------------------------------------------------------------------
144 | % avoid test data
145 | valIdx = find(imdb.images.set == 3);
146 | valIdx = valIdx(1:5:end) ;
147 | % valIdx = [];
148 | 
149 | %% 
150 | bopts.useGpu = numel(opts.train.gpus) >  0 ;
151 | bopts.prefetch = opts.train.prefetch;
152 | 
153 | info = cnn_train_dag(net, imdb, @(i,b) ...
154 |   getBatch(bopts,i,b), ...
155 |   opts.train, 'train', trainIdx, ...
156 |   'val', valIdx) ;
157 | 
158 | %% -------------------------------------------------------------------
159 | %                                                       Deploy network
160 | % --------------------------------------------------------------------
161 | if ~exist(fullfile(opts.expDir,'net.mat'),'file')
162 |   removeLoss = {'dagnn.Loss','dagnn.DropOut'};
163 |   for i=1:numel(removeLoss)
164 |     dagRemoveLayersOfType(net,removeLoss{i}) ;
165 |   end
166 |   
167 |   net.mode = 'test' ;
168 |   net_ = net ;
169 |   net = net_.saveobj() ;
170 |   save(fullfile(opts.expDir,'net.mat'), '-struct','net');
171 | end
172 | % --------------------------------------------------------------------
173 | function inputs = getBatch(opts, imdb, batch)
174 | % --------------------------------------------------------------------
175 | if isempty(batch)
176 |   inputs = {'input', [], 'label', [], 'rois', [], 'ids', []};
177 |   return;
178 | end
179 | 
180 | opts.scale = opts.imageScales(randi(numel(opts.imageScales)));
181 | opts.flip = randi(2,numel(batch),1)-1; % random flip
182 | is_vgg16 = opts.vgg16 ;
183 | opts = rmfield(opts,'vgg16') ;
184 | 
185 | images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ;
186 | opts.prefetch = (nargout == 0);
187 | 
188 | [im,rois] = wsddn_get_batch(images, imdb, batch, opts);
189 | 
190 | if nargout>0
191 |   rois = single(rois') ;
192 |   labels = imdb.images.label(:,batch) ;
193 |   labels = reshape(labels,[1 1 size(labels,1) numel(batch)]);
194 | 
195 |   if opts.useGpu > 0
196 |     im = gpuArray(im) ;
197 |     rois = gpuArray(rois) ;
198 |   end
199 | 
200 |   if ~isempty(rois)
201 |    rois = rois([1 3 2 5 4],:) ;
202 |   end
203 | 
204 |   ss = [16 16] ;
205 | 
206 |   if is_vgg16
207 |     o0 = 8.5 ;
208 |     o1 = 9.5 ;
209 |   else
210 |     o0 = 18 ;
211 |     o1 = 9.5 ;
212 |   end
213 | 
214 |   rois = [ rois(1,:); ...
215 |     floor((rois(2,:) - o0 + o1) / ss(1) + 0.5) + 1;
216 |     floor((rois(3,:) - o0 + o1) / ss(2) + 0.5) + 1;
217 |     ceil((rois(4,:) - o0 - o1) / ss(1) - 0.5) + 1;
218 |     ceil((rois(5,:) - o0 - o1) / ss(2) - 0.5) + 1];
219 | 
220 | 
221 |   inputs = {'input', im, 'label', labels, 'rois', rois, 'ids', batch} ;
222 | 
223 |   if opts.addLossSmooth
224 |     inputs{end+1} = 'boxes' ;
225 |     inputs{end+1} = imdb.images.boxes{batch} ;
226 |   end
227 | 
228 |   if opts.addBiasSamples==1
229 |     boxScore = reshape(imdb.images.boxScores{batch},[1 1 1 numel(imdb.images.boxScores{batch})]);
230 |     inputs{end+1} = 'boxScore';
231 |     inputs{end+1} = boxScore ;
232 |   end
233 | end
234 | 
235 | % -------------------------------------------------------------------------
236 | function imdb = fixBBoxes(imdb, minSize, maxNum)
237 | % -------------------------------------------------------------------------
238 | for i=1:numel(imdb.images.name)
239 |   bbox = imdb.images.boxes{i};
240 |   % remove small bbox
241 |   isGood = (bbox(:,3)>=bbox(:,1)+minSize) & (bbox(:,4)>=bbox(:,2)+minSize);
242 |   bbox = bbox(isGood,:);
243 |   % remove duplicate ones
244 |   [dummy, uniqueIdx] = unique(bbox, 'rows', 'first');
245 |   uniqueIdx = sort(uniqueIdx);
246 |   bbox = bbox(uniqueIdx,:);
247 |   % limit number for training
248 |   if imdb.images.set(i)~=3
249 |     nB = min(size(bbox,1),maxNum);
250 |   else
251 |     nB = size(bbox,1);
252 |   end
253 |   
254 |   if isfield(imdb.images,'boxScores')
255 |     imdb.images.boxScores{i} = imdb.images.boxScores{i}(isGood);
256 |     imdb.images.boxScores{i} = imdb.images.boxScores{i}(uniqueIdx);
257 |     imdb.images.boxScores{i} = imdb.images.boxScores{i}(1:nB);
258 |   end
259 |   imdb.images.boxes{i} = bbox(1:nB,:);
260 |   %   [h,w,~] = size(imdb.images.data{i});
261 |   %   imdb.images.boxes{i} = [1 1 h w];
262 |   
263 | end
264 | 
265 | % -------------------------------------------------------------------------
266 | function layers = dagFindLayersOfType(net, type)
267 | % -------------------------------------------------------------------------
268 | layers = [] ;
269 | for l = 1:numel(net.layers)
270 |   if isa(net.layers(l).block, type)
271 |     layers{1,end+1} = net.layers(l).name ;
272 |   end
273 | end
274 | % -------------------------------------------------------------------------
275 | function dagRemoveLayersOfType(net, type)
276 | % -------------------------------------------------------------------------
277 | names = dagFindLayersOfType(net, type) ;
278 | for i = 1:numel(names)
279 |   layer = net.layers(net.getLayerIndex(names{i})) ;
280 |   net.removeLayer(names{i}) ;
281 |   net.renameVar(layer.outputs{1}, layer.inputs{1}, 'quiet', true) ;
282 | end
283 | 


--------------------------------------------------------------------------------
/core/wsddn_test.m:
--------------------------------------------------------------------------------
  1 | function aps = wsddn_test(varargin)
  2 | % @author: Hakan Bilen
  3 | % wsddn_test : this script evaluates detection performance in PASCAL VOC
  4 | % dataset for given a WSDDN model
  5 | 
  6 | opts.dataDir = fullfile(vl_rootnn, 'data') ;
  7 | opts.expDir = fullfile(vl_rootnn, 'exp') ;
  8 | opts.imdbPath = fullfile(vl_rootnn, 'data', 'imdbs', 'imdb-eb.mat');
  9 | opts.modelPath = fullfile(vl_rootnn, 'exp', 'net.mat') ;
 10 | opts.proposalType = 'eb' ;
 11 | opts.proposalDir = fullfile(vl_rootnn, 'data','EdgeBoxes') ;
 12 | 
 13 | % if you have limited gpu memory (<6gb), you can change the next 2 params
 14 | opts.maxNumProposals = inf; % limit number
 15 | opts.imageScales = [480,576,688,864,1200]; % scales
 16 | 
 17 | opts.gpu = [] ;
 18 | opts.train.prefetch = true ;
 19 | opts.vis = 0 ;
 20 | opts.numFetchThreads = 1 ;
 21 | opts = vl_argparse(opts, varargin) ;
 22 | 
 23 | display(opts);
 24 | if ~exist(fullfile(opts.dataDir,'VOCdevkit','VOCcode','VOCinit.m'),'file')
 25 |   error('VOCdevkit is not installed');
 26 | end
 27 | addpath(fullfile(opts.dataDir,'VOCdevkit','VOCcode'));
 28 | opts.train.expDir = opts.expDir ;
 29 | % -------------------------------------------------------------------------
 30 | %                                                    Network initialization
 31 | % -------------------------------------------------------------------------
 32 | net = load(opts.modelPath);
 33 | % figure(2) ;
 34 | if isfield(net,'net')
 35 |   net = net.net;
 36 | end
 37 | net = dagnn.DagNN.loadobj(net) ;
 38 | 
 39 | net.mode = 'test' ;
 40 | if ~isempty(opts.gpu)
 41 |   gpuDevice(opts.gpu) ;
 42 |   net.move('gpu') ;
 43 | end
 44 | 
 45 | if isfield(net,'normalization')
 46 |   bopts = net.normalization;
 47 | else
 48 |   bopts = net.meta.normalization;
 49 | end
 50 | 
 51 | bopts.rgbVariance = [] ;
 52 | bopts.interpolation = net.meta.normalization.interpolation;
 53 | bopts.jitterBrightness = 0 ;
 54 | bopts.imageScales = opts.imageScales;
 55 | bopts.numThreads = opts.numFetchThreads;
 56 | bs = find(arrayfun(@(a) isa(a.block, 'dagnn.BiasSamples'), net.layers)==1);
 57 | bopts.addBiasSamples = ~isempty(bs) ;
 58 | bopts.vgg16 = any(arrayfun(@(a) strcmp(a.name, 'relu5_1'), net.layers)==1) ;
 59 | % -------------------------------------------------------------------------
 60 | %                                                   Database initialization
 61 | % -------------------------------------------------------------------------
 62 | fprintf('loading imdb...');
 63 | if exist(opts.imdbPath,'file')==2
 64 |   imdb = load(opts.imdbPath) ;
 65 | else
 66 |   imdb = cnn_voc07_eb_setup_data('dataDir',opts.dataDir, ...
 67 |     'proposalDir',opts.proposalDir,'loadTest',1);
 68 |   save(opts.imdbPath,'-struct', 'imdb', '-v7.3');
 69 | end
 70 | 
 71 | fprintf('done\n');
 72 | minSize = 20;
 73 | imdb = fixBBoxes(imdb, minSize, opts.maxNumProposals);
 74 | 
 75 | VOCinit;
 76 | VOCopts.testset = 'test';
 77 | VOCopts.annopath = fullfile(opts.dataDir,'VOCdevkit','VOC2007','Annotations','%s.xml');
 78 | VOCopts.imgsetpath = fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','%s.txt');
 79 | VOCopts.localdir = fullfile(opts.dataDir,'VOCdevkit','local','VOC2007');
 80 | cats = VOCopts.classes;
 81 | ovTh = 0.4;
 82 | scTh = 1e-3;
 83 | % --------------------------------------------------------------------
 84 | %                                                               Detect
 85 | % --------------------------------------------------------------------
 86 | if strcmp(VOCopts.testset,'test')
 87 |   testIdx = find(imdb.images.set == 3);
 88 | elseif strcmp(VOCopts.testset,'trainval')
 89 |   testIdx = find(imdb.images.set < 3);
 90 | end
 91 | bopts.useGpu = numel(opts.gpu) >  0 ;
 92 | 
 93 | scores = cell(1,numel(testIdx));
 94 | boxes = imdb.images.boxes(testIdx);
 95 | names = imdb.images.name(testIdx);
 96 | 
 97 | detLayer = find(arrayfun(@(a) strcmp(a.name, 'xTimes'), net.vars)==1);
 98 | net.vars(detLayer(1)).precious = 1;
 99 | % run detection
100 | start = tic ;
101 | for t=1:numel(testIdx)
102 |   batch = testIdx(t);  
103 |   
104 |   scoret = [];
105 |   for s=1:numel(opts.imageScales)
106 |     for f=1:2 % add flips
107 |       inputs = getBatch(bopts, imdb, batch, opts.imageScales(s), f-1 );
108 |       net.eval(inputs) ;
109 |   
110 |       if isempty(scoret)
111 |         scoret = squeeze(gather(net.vars(detLayer).value));
112 |       else
113 |         scoret = scoret + squeeze(gather(net.vars(detLayer).value));
114 |       end
115 |     end
116 |   end
117 |   scores{t} = scoret;
118 |   % show speed
119 |   time = toc(start) ;
120 |   n = t * 2 * numel(opts.imageScales) ; % number of images processed overall
121 |   speed = n/time ;
122 |   if mod(t,10)==0
123 |     fprintf('test %d / %d speed %.1f Hz\n',t,numel(testIdx),speed);
124 |   end
125 |   
126 |   
127 |   if opts.vis
128 |     for cls = 1:numel(cats)
129 |       idx = (scores{t}(cls,:)>0.05);
130 |       if sum(idx)==0, continue;end
131 |         % divide by number of scales and flips
132 |   
133 |       im = imread(fullfile(imdb.imageDir,imdb.images.name{testIdx(t)}));
134 |       boxest  = double(imdb.images.boxes{testIdx(t)}(idx,:));
135 |       scorest = scores{t}(cls,idx)' / (2 * numel(opts.imageScales));
136 |       boxesSc = [boxest,scorest];
137 |       pick = nms(boxesSc, ovTh);
138 |       boxesSc = boxesSc(pick,:);
139 |       figure(1) ;
140 |       im = bbox_draw(im,boxesSc(1,[2 1 4 3 5]));
141 |       fprintf('%s %.2f',cats{cls},boxesSc(1,5));
142 |      
143 |       fprintf('\n') ;
144 |       title(cats{cls});
145 |       pause;
146 | 
147 |     end
148 |   end  
149 | end
150 | 
151 | dets.names  = names;
152 | dets.scores = scores;
153 | dets.boxes  = boxes;
154 | 
155 | % --------------------------------------------------------------------
156 | %                                                PASCAL VOC evaluation
157 | % --------------------------------------------------------------------
158 | 
159 | aps = zeros(numel(cats),1);
160 | for cls = 1:numel(cats)
161 |   
162 |   vocDets.confidence = [];
163 |   vocDets.bbox       = [];
164 |   vocDets.ids        = [];
165 | 
166 |   for i=1:numel(dets.names)
167 |     
168 |     scores = double(dets.scores{i});
169 |     boxes  = double(dets.boxes{i});
170 |     
171 |     boxesSc = [boxes,scores(cls,:)'];
172 |     boxesSc = boxesSc(boxesSc(:,5)>scTh,:);
173 |     pick = nms(boxesSc, ovTh);
174 |     boxesSc = boxesSc(pick,:);
175 |     
176 |     vocDets.confidence = [vocDets.confidence;boxesSc(:,5)];
177 |     vocDets.bbox = [vocDets.bbox;boxesSc(:,[2 1 4 3])];
178 |     vocDets.ids = [vocDets.ids; repmat({dets.names{i}(1:6)},size(boxesSc,1),1)];
179 |     
180 |   end
181 |   [rec,prec,ap] = wsddnVOCevaldet(VOCopts,cats{cls},vocDets,0);
182 |   
183 |   fprintf('%s %.1f\n',cats{cls},100*ap);
184 |   aps(cls) = ap;
185 | end
186 | 
187 | % --------------------------------------------------------------------
188 | function inputs = getBatch(opts, imdb, batch, scale, flip)
189 | % --------------------------------------------------------------------
190 | 
191 | opts.scale = scale;
192 | opts.flip = flip;
193 | is_vgg16 = opts.vgg16 ;
194 | opts = rmfield(opts,'vgg16') ;
195 | 
196 | images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ;
197 | opts.prefetch = (nargout == 0);
198 | 
199 | [im,rois] = wsddn_get_batch(images, imdb, batch, opts);
200 | 
201 | 
202 | rois = single(rois');
203 | if opts.useGpu > 0
204 |   im = gpuArray(im) ;
205 |   rois = gpuArray(rois) ;
206 | end
207 | rois = rois([1 3 2 5 4],:) ;
208 | 
209 | 
210 | ss = [16 16] ;
211 | if is_vgg16
212 |   o0 = 8.5 ;
213 |   o1 = 9.5 ;
214 | else
215 |   o0 = 18 ;
216 |   o1 = 9.5 ;
217 | end
218 | rois = [ rois(1,:);
219 |         floor((rois(2,:) - o0 + o1) / ss(1) + 0.5) + 1;
220 |         floor((rois(3,:) - o0 + o1) / ss(2) + 0.5) + 1;
221 |         ceil((rois(4,:) - o0 - o1) / ss(1) - 0.5) + 1;
222 |         ceil((rois(5,:) - o0 - o1) / ss(2) - 0.5) + 1];
223 | 
224 |       
225 | inputs = {'input', im, 'rois', rois} ;
226 |   
227 |   
228 | if opts.addBiasSamples && isfield(imdb.images,'boxScores')
229 |   boxScore = reshape(imdb.images.boxScores{batch},[1 1 1 numel(imdb.images.boxScores{batch})]);
230 |   inputs{end+1} = 'boxScore';
231 |   inputs{end+1} = boxScore ; 
232 | end
233 | 
234 | 
235 | % -------------------------------------------------------------------------
236 | function imdb = fixBBoxes(imdb, minSize, maxNum)
237 | 
238 | for i=1:numel(imdb.images.name)
239 |   bbox = imdb.images.boxes{i};
240 |   % remove small bbox
241 |   isGood = (bbox(:,3)>=bbox(:,1)+minSize) & (bbox(:,4)>=bbox(:,2)+minSize);
242 |   bbox = bbox(isGood,:);
243 |   % remove duplicate ones
244 |   [dummy, uniqueIdx] = unique(bbox, 'rows', 'first');
245 |   uniqueIdx = sort(uniqueIdx);
246 |   bbox = bbox(uniqueIdx,:);
247 |   % limit number for training
248 |   if imdb.images.set(i)~=3
249 |     nB = min(size(bbox,1),maxNum);
250 |   else
251 |     nB = size(bbox,1);
252 |   end
253 |   
254 |   if isfield(imdb.images,'boxScores')
255 |     imdb.images.boxScores{i} = imdb.images.boxScores{i}(isGood);
256 |     imdb.images.boxScores{i} = imdb.images.boxScores{i}(uniqueIdx);
257 |     imdb.images.boxScores{i} = imdb.images.boxScores{i}(1:nB);
258 |   end
259 |   imdb.images.boxes{i} = bbox(1:nB,:);
260 |   %   [h,w,~] = size(imdb.images.data{i});
261 |   %   imdb.images.boxes{i} = [1 1 h w];
262 |   
263 | end
264 | 
265 | %-------------------------------------------------------------------------%
266 | 
267 | function im = bbox_draw(im,boxes,c,t)
268 | 
269 | % copied from Ross Girshick
270 | % Fast R-CNN
271 | % Copyright (c) 2015 Microsoft
272 | % Licensed under The MIT License [see LICENSE for details]
273 | % Written by Ross Girshick
274 | % --------------------------------------------------------
275 | % source: https://github.com/rbgirshick/fast-rcnn/blob/master/matlab/showboxes.m
276 | %
277 | %
278 | % Fast R-CNN
279 | % 
280 | % Copyright (c) Microsoft Corporation
281 | % 
282 | % All rights reserved.
283 | % 
284 | % MIT License
285 | % 
286 | % Permission is hereby granted, free of charge, to any person obtaining a
287 | % copy of this software and associated documentation files (the "Software"),
288 | % to deal in the Software without restriction, including without limitation
289 | % the rights to use, copy, modify, merge, publish, distribute, sublicense,
290 | % and/or sell copies of the Software, and to permit persons to whom the
291 | % Software is furnished to do so, subject to the following conditions:
292 | % 
293 | % The above copyright notice and this permission notice shall be included
294 | % in all copies or substantial portions of the Software.
295 | % 
296 | % THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
297 | % IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
298 | % FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
299 | % THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
300 | % OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
301 | % ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
302 | % OTHER DEALINGS IN THE SOFTWARE.
303 | 
304 | image(im);
305 | axis image;
306 | axis off;
307 | set(gcf, 'Color', 'white');
308 | 
309 | if nargin<3
310 |   c = 'r';
311 |   t = 2;
312 | end
313 | 
314 | s = '-';
315 | if ~isempty(boxes)
316 |     x1 = boxes(:, 1);
317 |     y1 = boxes(:, 2);
318 |     x2 = boxes(:, 3);
319 |     y2 = boxes(:, 4);
320 |     line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', ...
321 |         'color', c, 'linewidth', t, 'linestyle', s);
322 |     for i = 1:size(boxes, 1)
323 |         text(double(x1(i)), double(y1(i)) - 2, ...
324 |             sprintf('%.4f', boxes(i, end)), ...
325 |             'backgroundcolor', 'b', 'color', 'w', 'FontSize', 8);
326 |     end
327 | end
328 | 


--------------------------------------------------------------------------------