├── .gitignore ├── pascal ├── wsddnVOChash_lookup.m ├── wsddnVOCap.m ├── wsddnVOChash_init.m ├── nms.m ├── wsddnVOCevaldet.m ├── setup_voc07_ssw.m └── setup_voc07_eb.m ├── setup_WSDDN.m ├── matlab └── +dagnn │ ├── SumOverDim.m │ ├── Times.m │ ├── BiasSamples.m │ ├── SoftMax2.m │ ├── LossTopBoxSmoothProb.m │ └── LayerAP.m ├── README.md └── core ├── wsddn_get_batch.m ├── wsddn_init.m ├── wsddn_demo.m ├── wsddn_train.m └── wsddn_test.m /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | -------------------------------------------------------------------------------- /pascal/wsddnVOChash_lookup.m: -------------------------------------------------------------------------------- 1 | function ind = wsddnVOChash_lookup(hash,s) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | hsize=numel(hash.key); 5 | h=mod(str2double(s([4 6:end])),hsize)+1; 6 | ind=hash.val{h}(strmatch(s,hash.key{h},'exact')); 7 | -------------------------------------------------------------------------------- /pascal/wsddnVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = wsddnVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /setup_WSDDN.m: -------------------------------------------------------------------------------- 1 | function setup_WSDDN() 2 | %SETUP_WSDDN Sets up WSDDN, by adding its folders to the Matlab path 3 | 4 | root = fileparts(mfilename('fullpath')) ; 5 | addpath(root, [root '/matlab'], [root '/pascal'], [root '/core']) ; 6 | addpath([vl_rootnn '/examples/']) ; 7 | addpath([vl_rootnn '/examples/imagenet/']) ; 8 | 9 | -------------------------------------------------------------------------------- /pascal/wsddnVOChash_init.m: -------------------------------------------------------------------------------- 1 | function hash = wsddnVOChash_init(strs) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | hsize=4999; 5 | hash.key=cell(hsize,1); 6 | hash.val=cell(hsize,1); 7 | 8 | for i=1:numel(strs) 9 | s=strs{i}; 10 | h=mod(str2double(s([4 6:end])),hsize)+1; 11 | j=numel(hash.key{h})+1; 12 | hash.key{h}{j}=strs{i}; 13 | hash.val{h}(j)=i; 14 | end 15 | 16 | -------------------------------------------------------------------------------- /matlab/+dagnn/SumOverDim.m: -------------------------------------------------------------------------------- 1 | classdef SumOverDim < dagnn.ElementWise 2 | % @author: Hakan Bilen 3 | % SumOverDim is the sum of the elements of inputs{1} over dimension dim 4 | properties 5 | dim = 3; 6 | end 7 | 8 | methods 9 | function outputs = forward(obj, inputs, params) 10 | outputs{1} = sum(inputs{1},obj.dim) ; 11 | end 12 | 13 | function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) 14 | 15 | ndims = ones(1,numel(size(inputs{1}))); 16 | ndims(obj.dim) = size(inputs{1},obj.dim); 17 | derInputs{1} = repmat(derOutputs{1},ndims); 18 | 19 | derParams = {} ; 20 | end 21 | 22 | function outputSizes = getOutputSizes(obj, inputSizes) 23 | outputSizes{1} = inputSizes{1} ; 24 | outputSizes{1}(obj.dim) = 1; 25 | end 26 | 27 | function obj = SumOverDim(varargin) 28 | obj.load(varargin) ; 29 | obj.dim = obj.dim; 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /matlab/+dagnn/Times.m: -------------------------------------------------------------------------------- 1 | classdef Times < dagnn.ElementWise 2 | % @author: Hakan Bilen 3 | % Times (multiply) DagNN layer 4 | % The Times layer takes the multiplication of two inputs and store the result 5 | % as its only output. 6 | methods 7 | function outputs = forward(obj, inputs, params) 8 | if numel(inputs) ~= 2 9 | error('Number of inputs is not 2'); 10 | end 11 | outputs{1} = inputs{1} .* inputs{2} ; 12 | end 13 | 14 | function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) 15 | derInputs = cell(1,2) ; 16 | derInputs{1} = derOutputs{1} .* inputs{2} ; 17 | derInputs{2} = derOutputs{1} .* inputs{1} ; 18 | derParams = {} ; 19 | end 20 | 21 | function obj = Times(varargin) 22 | obj.load(varargin) ; 23 | end 24 | 25 | function rfs = getReceptiveFields(obj) 26 | rfs.size = [1 1] ; 27 | rfs.stride = [1 1] ; 28 | rfs.offset = [1 1] ; 29 | end 30 | 31 | function outputSizes = getOutputSizes(obj, inputSizes) 32 | outputSizes = inputSizes(1) ; 33 | end 34 | end 35 | 36 | end -------------------------------------------------------------------------------- /matlab/+dagnn/BiasSamples.m: -------------------------------------------------------------------------------- 1 | classdef BiasSamples < dagnn.ElementWise 2 | % @author: Hakan Bilen 3 | properties 4 | scale = single(1) 5 | end 6 | properties (Transient) 7 | boxCoefs = [] 8 | end 9 | methods 10 | function outputs = forward(obj, inputs, params) 11 | if numel(inputs) ~= 2 12 | error('Number of inputs is not 2'); 13 | end 14 | obj.boxCoefs = single(1)+obj.scale*inputs{2}; 15 | outputs{1} = bsxfun(@times,inputs{1},obj.boxCoefs); 16 | end 17 | 18 | function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) 19 | derInputs = cell(1,2) ; 20 | obj.boxCoefs = single(1)+obj.scale*inputs{2}; 21 | derInputs{1} = bsxfun(@times,derOutputs{1},obj.boxCoefs) ; 22 | derParams = {} ; 23 | end 24 | 25 | function obj = BiasSamples(varargin) 26 | obj.load(varargin) ; 27 | end 28 | 29 | function reset(obj) 30 | obj.boxCoefs = [] ; 31 | end 32 | 33 | function rfs = getReceptiveFields(obj) 34 | rfs.size = [1 1] ; 35 | rfs.stride = [1 1] ; 36 | rfs.offset = [1 1] ; 37 | end 38 | 39 | function outputSizes = getOutputSizes(obj, inputSizes) 40 | outputSizes = inputSizes(1) ; 41 | end 42 | 43 | end 44 | 45 | end 46 | -------------------------------------------------------------------------------- /matlab/+dagnn/SoftMax2.m: -------------------------------------------------------------------------------- 1 | classdef SoftMax2 < dagnn.ElementWise 2 | % @author: Hakan Bilen 3 | % Softmax2 : it is a more generic softmax layer with a dimension and temperature parameter 4 | properties 5 | dim = 3; 6 | temp = 1; 7 | scale = 1; 8 | end 9 | 10 | methods 11 | function outputs = forward(self, inputs, params) 12 | inputs{1} = inputs{1} / self.temp; 13 | order = 1:numel(size(inputs{1})); 14 | if self.dim~=3 15 | order([3 self.dim]) = [self.dim 3]; 16 | inputs{1} = permute(inputs{1},order); 17 | end 18 | outputs{1} = vl_nnsoftmax(inputs{1}) ; 19 | if self.dim~=3 20 | outputs{1} = permute(outputs{1},order) ; 21 | end 22 | end 23 | 24 | function [derInputs, derParams] = backward(self, inputs, params, derOutputs) 25 | 26 | inputs{1} = inputs{1} / self.temp; 27 | order = 1:numel(size(inputs{1})); 28 | if self.dim~=3 29 | order(3) = self.dim; 30 | order(self.dim) = 3; 31 | inputs{1} = permute(inputs{1},order); 32 | derOutputs{1} = permute(derOutputs{1},order); 33 | end 34 | 35 | derInputs{1} = vl_nnsoftmax(inputs{1}, derOutputs{1}) ; 36 | if self.dim~=3 37 | derInputs{1} = permute(derInputs{1},order) ; 38 | end 39 | derParams = {} ; 40 | end 41 | 42 | function obj = SoftMax2(varargin) 43 | obj.load(varargin) ; 44 | obj.dim = single(obj.dim); 45 | obj.temp = single(obj.temp); 46 | obj.scale = single(obj.scale); 47 | end 48 | end 49 | end 50 | 51 | -------------------------------------------------------------------------------- /pascal/nms.m: -------------------------------------------------------------------------------- 1 | function pick = nms(boxes, overlap) 2 | % top = nms(boxes, overlap) 3 | % Non-maximum suppression. (FAST VERSION) 4 | % Greedily select high-scoring detections and skip detections 5 | % that are significantly covered by a previously selected 6 | % detection. 7 | % 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m), 9 | % but an inner loop has been eliminated to significantly speed it 10 | % up in the case of a large number of boxes 11 | 12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz 13 | % All rights reserved. 14 | % 15 | % This file is part of the Exemplar-SVM library and is made 16 | % available under the terms of the MIT license (see COPYING file). 17 | % Project homepage: https://github.com/quantombone/exemplarsvm 18 | 19 | 20 | if isempty(boxes) 21 | pick = []; 22 | return; 23 | end 24 | 25 | x1 = boxes(:,1); 26 | y1 = boxes(:,2); 27 | x2 = boxes(:,3); 28 | y2 = boxes(:,4); 29 | if size(boxes,2)==4 30 | s = ones(1,size(boxes,1)); 31 | else 32 | s = boxes(:,end); 33 | end 34 | 35 | area = (x2-x1+1) .* (y2-y1+1); 36 | [~, I] = sort(s); 37 | 38 | pick = s*0; 39 | counter = 1; 40 | while ~isempty(I) 41 | last = length(I); 42 | i = I(last); 43 | pick(counter) = i; 44 | counter = counter + 1; 45 | 46 | xx1 = max(x1(i), x1(I(1:last-1))); 47 | yy1 = max(y1(i), y1(I(1:last-1))); 48 | xx2 = min(x2(i), x2(I(1:last-1))); 49 | yy2 = min(y2(i), y2(I(1:last-1))); 50 | 51 | w = max(0.0, xx2-xx1+1); 52 | h = max(0.0, yy2-yy1+1); 53 | 54 | inter = w.*h; 55 | o = inter ./ (area(i) + area(I(1:last-1)) - inter); 56 | 57 | % I = I(find(o<=overlap)); 58 | I = I((o<=overlap)); 59 | end 60 | 61 | pick = pick(1:(counter-1)); 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Weakly Supervised Deep Detection Networks (WSDDN) 2 | 3 | 4 | ## Installation 5 | 1. Download and install [MatConvNet](http://www.vlfeat.org/matconvnet/install/) 6 | 2. Install this module with the package manager of MatConvNet [`vl_contrib`](http://www.vlfeat.org/matconvnet/mfiles/vl_contrib/#notes): 7 | 8 | ``` 9 | vl_contrib('install', 'WSDDN') ; 10 | vl_contrib('setup', 'WSDDN') ; 11 | ``` 12 | 13 | 3. If you want to train a WSDDN model, `wsddn_train` will automatically download the items below: 14 | 15 | a. [PASCAL VOC 2007 devkit and dataset](http://host.robots.ox.ac.uk/pascal/VOC/) under `data` folder 16 | 17 | b. Pre-computed edge-boxes for [trainval](http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/EdgeBoxesVOC2007trainval.mat) and [test](http://groups.inf.ed.ac.uk/hbilen-data/WSDDN/EdgeBoxesVOC2007test.mat) splits: 18 | 19 | c. Pre-trained network from [MatConvNet website](http://www.vlfeat.org/matconvnet/models) 20 | 21 | 4. You can also download the pre-trained WSDDN model ([VGGF-EB-BoxSc-SpReg](http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/wsddn.mat)). Note that it gives slightly different performance reported than in the paper (34.4% mAP instead of 34.5% mAP) 22 | 23 | 24 | ## Demo 25 | 26 | After completing the installation and downloading the required files, you are ready for the demo 27 | 28 | ```matlab 29 | cd scripts; 30 | opts.modelPath = '....' ; 31 | opts.imdbPath = '....' ; 32 | opts.gpu = .... ; 33 | wsddn_demo(opts) ; 34 | 35 | ``` 36 | 37 | ## Test 38 | 39 | ```matlab 40 | addpath scripts; 41 | opts.modelPath = '....' ; 42 | opts.imdbPath = '....' ; 43 | opts.gpu = .... ; 44 | opts.vis = true ; % visualize 45 | wsddn_test(opts) ; 46 | 47 | ``` 48 | 49 | ## Train 50 | 51 | Download an ImageNet pre-trained model from [http://www.vlfeat.org/matconvnet/pretrained/](http://www.vlfeat.org/matconvnet/pretrained/) 52 | 53 | ```matlab 54 | addpath scripts; 55 | opts.modelPath = '....' ; 56 | opts.imdbPath = '....' ; 57 | opts.train.gpus = .... ; 58 | [net,info] = wsddn_train(opts) ; 59 | 60 | ``` 61 | 62 | ## Citing WSDDN 63 | If you find the code useful, please cite: 64 | 65 | ```latex 66 | @inproceedings{Bilen16, 67 | author = "Bilen, H. and Vedaldi, A.", 68 | title = "Weakly Supervised Deep Detection Networks", 69 | booktitle = "Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition", 70 | year = "2016" 71 | } 72 | ``` 73 | 74 | ## Acknowledgement 75 | Many thanks to Sam Albanie for his help with contrib package manager and other nameless heros who diligently found my bugs. 76 | 77 | ### License 78 | The analysis work performed with the program(s) must be non-proprietary work. Licensee and its contract users must be or be affiliated with an academic facility. Licensee may additionally permit individuals who are students at such academic facility to access and use the program(s). Such students will be considered contract users of licensee. The program(s) may not be used for commercial competitive analysis (such as benchmarking) or for any commercial activity, including consulting. 79 | -------------------------------------------------------------------------------- /pascal/wsddnVOCevaldet.m: -------------------------------------------------------------------------------- 1 | function [rec,prec,ap] = wsddnVOCevaldet(VOCopts,cls,res,draw) 2 | 3 | % load test set 4 | tic; 5 | VOCopts.annocachepath=[VOCopts.localdir '%s_anno_cache.mat']; 6 | cp=sprintf(VOCopts.annocachepath,VOCopts.testset); 7 | if exist(cp,'file') 8 | fprintf('%s: pr: loading ground truth\n',cls); 9 | load(cp,'gtids','recs'); 10 | else 11 | [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d'); 12 | for i=1:length(gtids) 13 | % display progress 14 | if toc>1 15 | fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids)); 16 | drawnow; 17 | tic; 18 | end 19 | 20 | % read annotation 21 | recs(i)=PASreadrecord(sprintf(VOCopts.annopath,gtids{i})); 22 | end 23 | save(cp,'gtids','recs'); 24 | end 25 | 26 | fprintf('%s: pr: evaluating detections\n',cls); 27 | 28 | % hash image ids 29 | hash=wsddnVOChash_init(gtids); 30 | 31 | % extract ground truth objects 32 | 33 | npos=0; 34 | gt(length(gtids))=struct('BB',[],'diff',[],'det',[]); 35 | for i=1:length(gtids) 36 | % extract objects of class 37 | clsinds=strmatch(cls,{recs(i).objects(:).class},'exact'); 38 | gt(i).BB=cat(1,recs(i).objects(clsinds).bbox)'; 39 | gt(i).diff=[recs(i).objects(clsinds).difficult]; 40 | gt(i).det=false(length(clsinds),1); 41 | npos=npos+sum(~gt(i).diff); 42 | end 43 | 44 | % load results 45 | ids = res.ids; 46 | confidence = res.confidence; 47 | BB = res.bbox'; 48 | 49 | % sort detections by decreasing confidence 50 | [sc,si]=sort(-confidence); 51 | ids=ids(si); 52 | BB=BB(:,si); 53 | 54 | % assign detections to ground truth objects 55 | nd=length(confidence); 56 | tp=zeros(nd,1); 57 | fp=zeros(nd,1); 58 | tic; 59 | for d=1:nd 60 | % display progress 61 | if toc>1 62 | fprintf('%s: pr: compute: %d/%d\n',cls,d,nd); 63 | drawnow; 64 | tic; 65 | end 66 | 67 | % find ground truth image 68 | i=wsddnVOChash_lookup(hash,ids{d}); 69 | if isempty(i) 70 | error('unrecognized image "%s"',ids{d}); 71 | elseif length(i)>1 72 | error('multiple image "%s"',ids{d}); 73 | end 74 | 75 | % assign detection to ground truth object if any 76 | bb=BB(:,d); 77 | ovmax=-inf; 78 | for j=1:size(gt(i).BB,2) 79 | bbgt=gt(i).BB(:,j); 80 | bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; 81 | iw=bi(3)-bi(1)+1; 82 | ih=bi(4)-bi(2)+1; 83 | if iw>0 & ih>0 84 | % compute overlap as area of intersection / area of union 85 | ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... 86 | (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... 87 | iw*ih; 88 | ov=iw*ih/ua; 89 | if ov>ovmax 90 | ovmax=ov; 91 | jmax=j; 92 | end 93 | end 94 | end 95 | % assign detection as true positive/don't care/false positive 96 | if ovmax>=VOCopts.minoverlap 97 | if ~gt(i).diff(jmax) 98 | if ~gt(i).det(jmax) 99 | tp(d)=1; % true positive 100 | gt(i).det(jmax)=true; 101 | else 102 | fp(d)=1; % false positive (multiple detection) 103 | end 104 | end 105 | else 106 | fp(d)=1; % false positive 107 | end 108 | end 109 | 110 | % compute precision/recall 111 | fp=cumsum(fp); 112 | tp=cumsum(tp); 113 | rec=tp/npos; 114 | prec=tp./(fp+tp); 115 | 116 | ap=wsddnVOCap(rec,prec); 117 | 118 | if draw 119 | % plot precision/recall 120 | plot(rec,prec,'-'); 121 | grid; 122 | xlabel 'recall' 123 | ylabel 'precision' 124 | title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap)); 125 | end 126 | -------------------------------------------------------------------------------- /matlab/+dagnn/LossTopBoxSmoothProb.m: -------------------------------------------------------------------------------- 1 | classdef LossTopBoxSmoothProb < dagnn.Loss 2 | % given top scoring box, it finds other boxes with at least overlap of 3 | % minOverlap and calculates the euclidean dist between top and other 4 | % boxes 5 | 6 | properties (Transient) 7 | gtIdx = [] 8 | boxIdx = [] 9 | probs = [] 10 | minOverlap = 0.5 11 | nBoxes = 10 12 | end 13 | 14 | methods 15 | function outputs = forward(obj, inputs, params) 16 | if numel(inputs) ~= 4 17 | error('Number of inputs is not 2'); 18 | end 19 | obj.gtIdx = []; 20 | obj.boxIdx = []; 21 | obj.probs = []; 22 | boxes = double(gather(inputs{2})'); 23 | scores = gather(squeeze(inputs{3})); 24 | labels = gather(squeeze(inputs{4})); 25 | 26 | if numel(boxes)<5 27 | return; 28 | end 29 | 30 | outputs{1} = zeros(1,'like',inputs{1}); 31 | for c=1:numel(labels) 32 | if labels(c)<=0 33 | continue; 34 | end 35 | 36 | [so, si] = sort(scores(c,:),'descend'); 37 | obj.gtIdx{c} = si(1); 38 | gtBox = boxes(:,obj.gtIdx{c}); 39 | gtArea = (gtBox(3)-gtBox(1)+1) .* (gtBox(4)-gtBox(2)+1); 40 | 41 | bbs = boxes(:,si(2:min(obj.nBoxes,end)))'; 42 | 43 | y1 = bbs(:,1); 44 | x1 = bbs(:,2); 45 | y2 = bbs(:,3); 46 | x2 = bbs(:,4); 47 | 48 | area = (x2-x1+1) .* (y2-y1+1); 49 | 50 | yy1 = max(gtBox(1), y1); 51 | xx1 = max(gtBox(2), x1); 52 | yy2 = min(gtBox(3), y2); 53 | xx2 = min(gtBox(4), x2); 54 | 55 | w = max(0.0, xx2-xx1+1); 56 | h = max(0.0, yy2-yy1+1); 57 | 58 | inter = w.*h; 59 | o = find((inter ./ (gtArea + area - inter))>obj.minOverlap); 60 | 61 | if isempty(o) 62 | continue; 63 | end 64 | 65 | obj.boxIdx{c} = si(o+1); 66 | obj.probs{c} = so(o+1); 67 | d = bsxfun(@minus,inputs{1}(:,:,:,obj.boxIdx{c}),inputs{1}(:,:,:,obj.gtIdx{c})); 68 | d = bsxfun(@times,d,obj.probs{c}); 69 | outputs{1} = outputs{1} + 0.5 * sum(d(:).^2); 70 | end 71 | 72 | n = obj.numAveraged ; 73 | m = n + 1 ; 74 | obj.average = (n * obj.average + gather(outputs{1})) / m ; 75 | obj.numAveraged = m ; 76 | end 77 | 78 | function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) 79 | derInputs = cell(1,4) ; 80 | derInputs{1} = zeros(size(inputs{1}),'like',inputs{1}); 81 | for c=1:numel(obj.boxIdx) 82 | if isempty(obj.boxIdx{c}), continue; end 83 | derInputs{1}(:,:,:,obj.boxIdx{c}) = ... 84 | bsxfun(@minus,inputs{1}(:,:,:,obj.boxIdx{c}),inputs{1}(:,:,:,obj.gtIdx{c})); 85 | derInputs{1}(:,:,:,obj.boxIdx{c}) = bsxfun(@times,... 86 | reshape(obj.probs{c},[1 1 1 numel(obj.probs{c})]),derInputs{1}(:,:,:,obj.boxIdx{c})); 87 | derInputs{1}(:,:,:,obj.gtIdx{c}) = -sum(derInputs{1}(:,:,:,obj.boxIdx{c}),4); 88 | 89 | end 90 | derInputs{1} = derInputs{1} * derOutputs{1}; 91 | % fprintf('LossTopBox l2 %f ',sqrt(sum(derInputs{1}(:).^2))); 92 | derParams = {} ; 93 | end 94 | 95 | function obj = LossTopBoxSmoothProb(varargin) 96 | obj.load(varargin) ; 97 | obj.loss = 'LossTopBoxSmoothProb'; 98 | end 99 | 100 | function reset(obj) 101 | obj.gtIdx = []; 102 | obj.boxIdx = []; 103 | obj.probs = []; 104 | obj.average = 0 ; 105 | obj.numAveraged = 0 ; 106 | end 107 | 108 | 109 | end 110 | 111 | end 112 | -------------------------------------------------------------------------------- /core/wsddn_get_batch.m: -------------------------------------------------------------------------------- 1 | function [imo,rois] = wsddn_get_batch(images, imdb, batch, opts) 2 | % cnn_wsddn_get_batch Load, preprocess, and pack images for CNN evaluation 3 | 4 | if isempty(images) 5 | imo = [] ; 6 | rois = [] ; 7 | return ; 8 | end 9 | 10 | % fetch is true if images is a list of filenames (instead of 11 | % a cell array of images) 12 | fetch = ischar(images{1}) ; 13 | 14 | % prefetch is used to load images in a separate thread 15 | prefetch = fetch & opts.prefetch ; 16 | 17 | % pick size 18 | imSize = imdb.images.size(batch(1),:); 19 | factor = min(opts.scale(1)/imSize(1),opts.scale(1)/imSize(2)); 20 | height = floor(factor*imSize(1)); 21 | 22 | if prefetch 23 | vl_imreadjpeg(images, 'numThreads',opts.numThreads,'Resize',height,'prefetch') ; 24 | imo = [] ; 25 | rois = [] ; 26 | return ; 27 | end 28 | 29 | if fetch 30 | ims = vl_imreadjpeg(images,'numThreads',opts.numThreads,'Resize',height) ; 31 | else 32 | ims = images ; 33 | end 34 | 35 | for i=1:numel(images) 36 | % acquire image 37 | if isempty(ims{i}) 38 | imt = imread(images{i}) ; 39 | if size(imt,3) == 1 40 | imt = cat(3, imt, imt, imt) ; 41 | end 42 | 43 | ims{i} = imresize(imt,factor,'Method',opts.interpolation); 44 | ims{i} = single(ims{i}) ; % faster than im2single (and multiplies by 255) 45 | end 46 | end 47 | 48 | 49 | 50 | bboxes = cell(1,numel(batch)); 51 | nBoxes = 0; 52 | for b=1:numel(batch) 53 | bboxes{b} = double(imdb.images.boxes{batch(b)}); 54 | nBoxes = nBoxes + size(bboxes{b},1); 55 | end 56 | 57 | 58 | rois = zeros(nBoxes,5); 59 | countr = 0; 60 | 61 | maxW = 0; 62 | maxH = 0; 63 | 64 | 65 | 66 | for b=1:numel(batch) 67 | 68 | hw = imdb.images.size(batch(b),:); 69 | h = hw(1); 70 | w = hw(2); 71 | 72 | imsz = size(ims{b}); 73 | 74 | if opts.flip(b) 75 | im = ims{b}; 76 | ims{b} = im(:,end:-1:1,:); 77 | 78 | bbox = bboxes{b}; 79 | bbox(:,[2,4]) = w + 1 - bbox(:,[4,2]); 80 | bboxes{b} = bbox; 81 | end 82 | 83 | 84 | maxH = max(imsz(1),maxH); 85 | maxW = max(imsz(2),maxW); 86 | 87 | % adapt bounding boxes into new coord 88 | bbox = bboxes{b}; 89 | if any(bbox(:)<=0) 90 | error('bbox error'); 91 | end 92 | nB = size(bbox,1); 93 | tbbox = scale_box(bbox,[h,w],imsz); 94 | if any(tbbox(:)<=0) 95 | error('tbbox error'); 96 | end 97 | 98 | rois(countr+1:countr+nB,:) = [b*ones(nB,1),tbbox]; 99 | countr = countr + nB; 100 | end 101 | 102 | % rois = single(rois); 103 | depth = size(ims{1},3); 104 | imo = zeros(maxH,maxW,depth,numel(batch),'single'); 105 | 106 | if isempty(opts.averageImage) 107 | avgIm = []; 108 | elseif numel(opts.averageImage)==depth 109 | avgIm = opts.averageImage; 110 | end 111 | 112 | 113 | for b=1:numel(batch) 114 | sz = size(ims{b}); 115 | 116 | imo(1:sz(1),1:sz(2),:,b) = single(ims{b}); 117 | 118 | if ~isempty(avgIm) 119 | imo(1:sz(1),1:sz(2),:,b) = single(bsxfun(@minus,imo(1:sz(1),1:sz(2),:,b),opts.averageImage)); 120 | end 121 | if ~isempty(opts.rgbVariance) 122 | imo(1:sz(1),1:sz(2),:,b) = bsxfun(@plus, imo(1:sz(1),1:sz(2),:,b), ... 123 | reshape(opts.rgbVariance * randn(3,1), 1,1,3)) ; 124 | end 125 | end 126 | 127 | 128 | function boxOut = scale_box(boxIn,szIn,szOut) 129 | 130 | h = szIn(1); 131 | w = szIn(2); 132 | 133 | bxr = 0.5 * (boxIn(:,2)+boxIn(:,4)) / w; 134 | byr = 0.5 * (boxIn(:,1)+boxIn(:,3)) / h; 135 | 136 | bwr = (boxIn(:,4)-boxIn(:,2)+1) / w; 137 | bhr = (boxIn(:,3)-boxIn(:,1)+1) / h; 138 | 139 | % boxIn center in new coord 140 | byhat = (szOut(1) * byr); 141 | bxhat = (szOut(2) * bxr); 142 | 143 | % relative width, height 144 | bhhat = szOut(1) * bhr; 145 | bwhat = szOut(2) * bwr; 146 | 147 | % transformed boxIn 148 | boxOut = [max(1,round(byhat - 0.5 * bhhat)),... 149 | max(1,round(bxhat - 0.5 * bwhat)), ... 150 | min(szOut(1),round(byhat + 0.5 * bhhat)),... 151 | min(szOut(2),round(bxhat + 0.5 * bwhat))]; 152 | 153 | -------------------------------------------------------------------------------- /matlab/+dagnn/LayerAP.m: -------------------------------------------------------------------------------- 1 | classdef LayerAP < dagnn.Loss 2 | % @author: Hakan Bilen 3 | % 11 step average precision 4 | properties 5 | cls_index = 1 6 | resetLayer = false 7 | gtLabels = [] 8 | scores = [] 9 | ids = [] 10 | aps = [] 11 | voc07 = true % 11 step 12 | classNames = {} 13 | end 14 | 15 | 16 | methods 17 | function outputs = forward(obj, inputs, params) 18 | if obj.resetLayer 19 | obj.gtLabels = [] ; 20 | obj.scores = [] ; 21 | obj.ids = [] ; 22 | obj.aps = [] ; 23 | obj.resetLayer = false ; 24 | end 25 | 26 | if numel(inputs)==2 27 | obj.scores = [obj.scores gather(squeeze(inputs{1}(:,:,obj.cls_index,:)))]; 28 | obj.gtLabels = [obj.gtLabels gather(squeeze(inputs{2}(:,:,obj.cls_index,:)))]; 29 | elseif numel(inputs)>2 30 | scoresCur = gather(squeeze(inputs{1}(:,:,obj.cls_index,:))); 31 | gtLabelsCur = gather(squeeze(inputs{2}(:,:,obj.cls_index,:))); 32 | 33 | idsCur = gather(squeeze(inputs{3})); 34 | 35 | [lia,locb] = ismember(idsCur,obj.ids); 36 | 37 | if any(lia) 38 | obj.scores = [obj.scores scoresCur(~lia,:)]; 39 | obj.gtLabels = [obj.gtLabels gtLabelsCur(~lia,:)]; 40 | obj.ids = [obj.ids(:) ; idsCur(~lia,:)]; 41 | 42 | nz = find(lia); 43 | for i=1:numel(nz) 44 | obj.scores(locb(nz(i)),:) = obj.scores(locb(nz(i)),:) + ... 45 | scoresCur(nz(i),:); 46 | end 47 | else 48 | obj.scores = [obj.scores scoresCur]; 49 | obj.gtLabels = [obj.gtLabels gtLabelsCur]; 50 | obj.ids = [obj.ids(:) ; idsCur]'; 51 | end 52 | else 53 | error('wrong number of inputs'); 54 | end 55 | 56 | obj.aps = obj.compute_average_precision(); 57 | obj.average = 100 * mean(obj.aps); 58 | outputs{1} = 100 * mean(obj.aps); 59 | end 60 | 61 | function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) 62 | derInputs = cell(1,numel(inputs)); 63 | derInputs{1} = derOutputs{1} ; 64 | derParams = {} ; 65 | end 66 | 67 | function reset(obj) 68 | obj.resetLayer = true ; 69 | % obj.average = 0 ; 70 | % obj.aps = 0 ; 71 | % obj.gtLabels = []; 72 | % obj.scores = []; 73 | % obj.ids = []; 74 | end 75 | 76 | function printAP(obj) 77 | if isempty(obj.classNames) 78 | for i=1:numel(obj.aps) 79 | fprintf('class-%d %.1f\n',i,100*obj.aps(i)) ; 80 | end 81 | else 82 | for i=1:numel(obj.aps) 83 | fprintf('%-50s %.1f\n',obj.classNames{i},100*obj.aps(i)) ; 84 | end 85 | end 86 | end 87 | 88 | function aps = compute_average_precision(obj) 89 | assert(all(size(obj.scores)==size(obj.gtLabels))); 90 | % nImg = size(obj.scores,1); 91 | nCls = numel(obj.cls_index); 92 | 93 | aps = zeros(1,nCls); 94 | 95 | for c=1:nCls 96 | gt = obj.gtLabels(c,:); 97 | conf = obj.scores(c,:) ; 98 | if sum(gt>0)==0, continue ; end 99 | 100 | % compute average precision 101 | if obj.voc07 102 | [rec,prec,ap]=obj.VOC07ap(conf,gt) ; 103 | else 104 | [rec,prec,ap]=obj.THUMOSeventclspr(conf,gt) ; 105 | end 106 | aps(c) = ap; 107 | end 108 | end 109 | 110 | function [rec,prec,ap]=VOC07ap(obj,conf,gt) 111 | [~,si]=sort(-conf); 112 | tp=gt(si)>0; 113 | fp=gt(si)<0; 114 | 115 | fp=cumsum(fp); 116 | tp=cumsum(tp); 117 | 118 | rec=tp/sum(gt>0); 119 | prec=tp./(fp+tp); 120 | ap=0; 121 | for t=0:0.1:1 122 | p=max(prec(rec>=t)); 123 | if isempty(p) 124 | p=0; 125 | end 126 | ap=ap+p/11; 127 | end 128 | end 129 | 130 | function [rec,prec,ap]=THUMOSeventclspr(obj,conf,gt) 131 | [so,sortind]=sort(-conf); 132 | tp=gt(sortind)==1; 133 | fp=gt(sortind)~=1; 134 | npos=length(find(gt==1)); 135 | 136 | % compute precision/recall 137 | fp=cumsum(fp); 138 | tp=cumsum(tp); 139 | rec=tp/npos; 140 | prec=tp./(fp+tp); 141 | 142 | % compute average precision 143 | 144 | ap=0; 145 | tmp=gt(sortind)==1; 146 | for i=1:length(conf) 147 | if tmp(i)==1 148 | ap=ap+prec(i); 149 | end 150 | end 151 | ap=ap/npos; 152 | end 153 | 154 | function obj = LayerAP(varargin) 155 | obj.load(varargin) ; 156 | obj.loss = 'average_precision' ; 157 | end 158 | end 159 | end 160 | -------------------------------------------------------------------------------- /core/wsddn_init.m: -------------------------------------------------------------------------------- 1 | % -------------------------------------------------------------------- 2 | function net = wsddn_init(net,varargin) 3 | % -------------------------------------------------------------------- 4 | % @author: Hakan Bilen 5 | % wsddn_init : this script initalise WSDDN model 6 | 7 | opts.addBiasSamples = 1 ; 8 | opts.softmaxTempCls = 1 ; 9 | opts.softmaxTempDet = 2 ; 10 | opts.addLossSmooth = 1 ; 11 | opts.averageImage = [] ; 12 | opts.rgbVariance = [] ; 13 | opts.numClasses = 1 ; 14 | opts.classNames = {''} ; 15 | 16 | opts = vl_argparse(opts, varargin) ; 17 | 18 | % add drop-out layers 19 | relu6p = find(cellfun(@(a) strcmp(a.name, 'relu6'), net.layers)==1); 20 | relu7p = find(cellfun(@(a) strcmp(a.name, 'relu7'), net.layers)==1); 21 | 22 | drop6 = struct('type', 'dropout', 'rate', 0.5, 'name','drop6'); 23 | drop7 = struct('type', 'dropout', 'rate', 0.5, 'name','drop7'); 24 | net.layers = [net.layers(1:relu6p) drop6 net.layers(relu6p+1:relu7p) drop7 net.layers(relu7p+1:end)]; 25 | 26 | 27 | % change loss fc layer 28 | fc8p = (cellfun(@(a) strcmp(a.name, 'fc8'), net.layers)==1); 29 | net.layers{fc8p}.weights{1} = 0.01 * ... 30 | randn(1,1,size(net.layers{fc8p}.weights{1},3),opts.numClasses,'single'); 31 | 32 | net.layers{fc8p}.weights{2} = zeros(1, opts.numClasses, 'single'); 33 | net.layers{fc8p}.name = 'fc8C'; 34 | 35 | net.layers(end) = [] ; 36 | % add loss (this will be changed to binary log at the end) 37 | % net.layers{end} = struct('name','loss', 'type','softmaxloss') ; 38 | 39 | % add detection layer 40 | clsLayerPos = (cellfun(@(a) strcmp(a.name, 'fc8C'), net.layers)==1); 41 | detLayer = net.layers{clsLayerPos}; 42 | detLayer.weights{1} = 0.01 * randn(1,1,size(detLayer.weights{1},3),opts.numClasses,'single'); 43 | % detLayer.weights{1} = zeros(1,1,size(detLayer.weights{1},3),opts.numClasses,'single'); 44 | detLayer.weights{2} = zeros(1, opts.numClasses, 'single'); 45 | 46 | detLayer.name = 'fc8R'; 47 | 48 | % remove pool5 49 | pPool5 = find(cellfun(@(a) strcmp(a.name, 'pool5'), net.layers)==1); 50 | net.layers = [net.layers([1:pPool5-1,pPool5+1:end]) detLayer]; 51 | 52 | % convert to dagnn 53 | net = dagnn.DagNN.fromSimpleNN(net, 'canonicalNames', true) ; 54 | 55 | % fix fc8R 56 | pFc8R = (arrayfun(@(a) strcmp(a.name, 'fc8R'), net.layers)==1); 57 | pFc8C = (arrayfun(@(a) strcmp(a.name, 'fc8C'), net.layers)==1); 58 | 59 | net.layers(pFc8R).inputs = net.layers(pFc8C).inputs; 60 | net.layers(pFc8R).inputIndexes = net.layers(pFc8C).inputIndexes; 61 | 62 | % add spp 63 | 64 | pRelu5 = (arrayfun(@(a) strcmp(a.name, 'relu5'), net.layers)==1); 65 | vggdeep = 0; 66 | if all(pRelu5==0) 67 | pRelu5 = (arrayfun(@(a) strcmp(a.name, 'relu5_3'), net.layers)==1); 68 | assert(any(pRelu5==1)); 69 | vggdeep = 1; 70 | end 71 | pFc6 = (arrayfun(@(a) strcmp(a.name, 'fc6'), net.layers)==1); 72 | 73 | % add spp (offset1 = rf offset, offset2 = shrinking factor) 74 | % offset1=18 offset2=9.5 levels=6 for vgg-f and vgg-m-1024 75 | % offset1=8.5 offset2=9.5 levels=7 for vgg-very-deep-16 76 | if vggdeep 77 | net.addLayer('SPP', dagnn.ROIPooling('subdivisions',[7 7],... 78 | 'transform',1), ... 79 | {net.layers(pRelu5).outputs{1},'rois'}, ... 80 | 'xSPP'); 81 | else 82 | net.addLayer('SPP', dagnn.ROIPooling('subdivisions',[6 6],... 83 | 'transform',1), ... 84 | {net.layers(pRelu5).outputs{1},'rois'}, ... 85 | 'xSPP'); 86 | end 87 | 88 | 89 | if opts.addBiasSamples 90 | % add boost 91 | net.addLayer('boostBox', ... 92 | dagnn.BiasSamples('scale',10), ... 93 | {'xSPP','boxScore'},'xBoostBox'); 94 | net.layers(pFc6).inputs{1} = 'xBoostBox'; 95 | else 96 | net.layers(pFc6).inputs{1} = 'xSPP'; 97 | end 98 | 99 | 100 | 101 | % add softmax layer for det 102 | pFc8R = (arrayfun(@(a) strcmp(a.name, 'fc8R'), net.layers)==1); 103 | net.addLayer('softmaxDet', ... 104 | dagnn.SoftMax2('dim',4, 'temp',opts.softmaxTempDet), ... 105 | net.layers(pFc8R).outputs{1},'xSoftmaxDet'); 106 | 107 | % add softmax layers for cls 108 | pFc8C = (arrayfun(@(a) strcmp(a.name, 'fc8C'), net.layers)==1); 109 | net.layers(pFc8C).outputs{1} = 'xfc8C'; 110 | 111 | net.addLayer('softmaxCls', ... 112 | dagnn.SoftMax2('dim',3, 'temp',opts.softmaxTempCls), ... 113 | net.layers(pFc8C).outputs{1},'xSoftmaxCls'); 114 | 115 | % add times layer 116 | net.addLayer('timesCR', ... 117 | dagnn.Times(), ... 118 | {'xSoftmaxCls','xSoftmaxDet'},'xTimes'); 119 | 120 | % add sum layer 121 | net.addLayer('sum', ... 122 | dagnn.SumOverDim('dim',4), ... 123 | 'xTimes','prediction'); 124 | 125 | 126 | 127 | % add classification AP 128 | net.addLayer('mAP', dagnn.LayerAP('cls_index',1:opts.numClasses), ... 129 | {'prediction','label', 'ids'}, 'mAP') ; 130 | 131 | net.addLayer('loss', dagnn.Loss('loss','binarylog'), ... 132 | {'prediction','label'}, 'objective') ; 133 | 134 | 135 | % no decay for bias 136 | for i=2:2:numel(net.params) 137 | net.params(i).weightDecay = 0; 138 | end 139 | 140 | if opts.addLossSmooth 141 | net.addLayer('LossTopBoxSmooth',dagnn.LossTopBoxSmoothProb('minOverlap',0.6),... 142 | {net.layers(pFc8R).inputs{1},'boxes','xTimes','label'},... 143 | 'lossTopB'); 144 | end 145 | meta = net.meta ; 146 | net.meta = [] ; 147 | net.meta.normalization.interpolation = meta.normalization.interpolation ; 148 | net.meta.normalization.averageImage = opts.averageImage ; 149 | net.meta.normalization.rgbVariance = opts.rgbVariance ; 150 | net.meta.classes.name = {'aeroplane', 'bicycle', 'bird', ... 151 | 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', ... 152 | 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', ... 153 | 'sofa', 'train', 'tvmonitor', 'background' }; -------------------------------------------------------------------------------- /pascal/setup_voc07_ssw.m: -------------------------------------------------------------------------------- 1 | function imdb = setup_voc07_ssw(varargin) 2 | % setup_voc07_ssw Initialize PASCAL VOC2007 data with selective 3 | % search windows 4 | 5 | % Warning! boxes are in the format of ([y1 x1 y2 x2]) 6 | 7 | opts.dataDir = fullfile('data') ; 8 | opts.proposalDir = fullfile(opts.dataDir,'SSW'); 9 | opts.loadTest = 1; 10 | opts = vl_argparse(opts, varargin) ; 11 | 12 | % ------------------------------------------------------------------------- 13 | % Load selective search win 14 | % ------------------------------------------------------------------------- 15 | %% get selective search windows 16 | files = {'SelectiveSearchVOC2007trainval.mat', ... 17 | 'SelectiveSearchVOC2007test.mat'} ; 18 | 19 | if ~exist(opts.proposalDir, 'dir') 20 | mkdir(opts.proposalDir) ; 21 | end 22 | 23 | for i=1:numel(files) 24 | if ~exist(fullfile(opts.proposalDir, files{i}), 'file') 25 | url = sprintf('http://koen.me/research/downloads/%s',files{i}) ; 26 | fprintf('downloading %s\n', url) ; 27 | urlwrite(url,[opts.proposalDir filesep files{i}]); 28 | end 29 | end 30 | 31 | if ~isempty(opts.proposalDir) 32 | t1 = load([opts.proposalDir,filesep,files{1}]); 33 | if opts.loadTest 34 | t2 = load([opts.proposalDir,filesep,files{2}]); 35 | ssw.id = [str2double(t1.images);str2double(t2.images)]'; 36 | ssw.boxes = cat(2,t1.boxes,t2.boxes); 37 | else 38 | ssw.id = str2double(t1.images)'; 39 | ssw.boxes = t1.boxes; 40 | end 41 | 42 | [~,si] = sort(ssw.id); 43 | ssw.id = ssw.id(si); 44 | ssw.boxes = ssw.boxes(si); 45 | end 46 | 47 | % ------------------------------------------------------------------------- 48 | % Load categories metadata 49 | % ------------------------------------------------------------------------- 50 | cats = {'aeroplane','bicycle','bird','boat','bottle','bus','car',... 51 | 'cat','chair','cow','diningtable','dog','horse','motorbike','person',... 52 | 'pottedplant','sheep','sofa','train','tvmonitor'}; 53 | 54 | if ~exist(opts.dataDir,'dir') 55 | error('wrong data folder!'); 56 | end 57 | 58 | if ~exist(opts.dataDir,'dir') 59 | error('wrong data folder!'); 60 | end 61 | 62 | % Download VOC Devkit and data 63 | if ~exist(fullfile(opts.dataDir,'VOCdevkit'),'dir') 64 | files = {'VOCtest_06-Nov-2007.tar',... 65 | 'VOCtrainval_06-Nov-2007.tar',... 66 | 'VOCdevkit_08-Jun-2007.tar'} ; 67 | for i=1:numel(files) 68 | if ~exist(fullfile(opts.dataDir, files{i}), 'file') 69 | outPath = fullfile(opts.dataDir,files{i}) ; 70 | url = sprintf('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/%s',files{i}) ; 71 | fprintf('Downloading %s to %s\n', url, outPath) ; 72 | urlwrite(url,outPath) ; 73 | untar(outPath,opts.dataDir); 74 | end 75 | end 76 | end 77 | addpath(fullfile(opts.dataDir, 'VOCdevkit', 'VOCcode')); 78 | 79 | traindata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','train.txt')); 80 | valdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','val.txt')); 81 | testdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','test.txt')); 82 | 83 | assert(numel(traindata)==2501); 84 | assert(numel(valdata)==2510); 85 | assert(numel(testdata)==4952); 86 | 87 | imdb.classes.name = cats ; 88 | imdb.classes.description = cats ; 89 | imdb.imageDir = fullfile(opts.dataDir, fullfile('VOCdevkit','VOC2007','JPEGImages')) ; 90 | 91 | % ------------------------------------------------------------------------- 92 | % Training images 93 | % -------------------------------------------------------------------------% 94 | names = cell(1,numel(traindata)); 95 | labels = zeros(numel(traindata),numel(cats)); 96 | 97 | 98 | % load image names 99 | for t=1:numel(traindata) 100 | names{t} = sprintf('%06d.jpg',traindata(t)); 101 | % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); 102 | end 103 | 104 | % load binary labels 105 | for c=1:numel(cats) 106 | t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_train.txt'])); 107 | labels(:,c) = t(:,2); 108 | end 109 | 110 | imdb.images.id = traindata'; 111 | imdb.images.name = names ; 112 | imdb.images.set = ones(1, numel(names)) ; 113 | imdb.images.label = labels' ; 114 | % imdb.images.data = data; 115 | 116 | % ------------------------------------------------------------------------- 117 | % Validation images 118 | % ------------------------------------------------------------------------- 119 | 120 | names = cell(1,numel(valdata)); 121 | labels = zeros(numel(valdata),numel(cats)); 122 | % data = cell(1,numel(valdata)); 123 | 124 | % load image names 125 | for t=1:numel(valdata) 126 | names{t} = sprintf('%06d.jpg',valdata(t)); 127 | % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); 128 | end 129 | 130 | % load binary labels 131 | for c=1:numel(cats) 132 | t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_val.txt'])); 133 | labels(:,c) = t(:,2); 134 | end 135 | 136 | 137 | imdb.images.id = horzcat(imdb.images.id, valdata') ; 138 | imdb.images.name = horzcat(imdb.images.name, names) ; 139 | imdb.images.set = horzcat(imdb.images.set, 2*ones(1,numel(names))) ; 140 | imdb.images.label = horzcat(imdb.images.label, labels') ; 141 | % imdb.images.data = horzcat(imdb.images.data, data) ; 142 | 143 | % % ------------------------------------------------------------------------- 144 | % % Test images 145 | % % ------------------------------------------------------------------------- 146 | % 147 | % 148 | if opts.loadTest 149 | names = cell(1,numel(testdata)); 150 | labels = zeros(numel(testdata),numel(cats)); 151 | % data = cell(1,numel(testdata)); 152 | 153 | % load image names 154 | for t=1:numel(testdata) 155 | names{t} = sprintf('%06d.jpg',testdata(t)); 156 | % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); 157 | end 158 | 159 | % load binary labels 160 | for c=1:numel(cats) 161 | t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_test.txt'])); 162 | labels(:,c) = t(:,2); 163 | end 164 | 165 | imdb.images.id = horzcat(imdb.images.id, testdata') ; 166 | imdb.images.name = horzcat(imdb.images.name, names) ; 167 | imdb.images.set = horzcat(imdb.images.set, 3 * ones(1,numel(names))) ; 168 | imdb.images.label = horzcat(imdb.images.label, labels') ; 169 | % imdb.images.data = horzcat(imdb.images.data, data) ; 170 | end 171 | % ------------------------------------------------------------------------- 172 | % Postprocessing 173 | % ------------------------------------------------------------------------- 174 | [~,sorti] = sort(imdb.images.id); 175 | 176 | 177 | imdb.images.id = imdb.images.id(sorti); 178 | imdb.images.name = imdb.images.name(sorti) ; 179 | imdb.images.set = imdb.images.set(sorti) ; 180 | imdb.images.label = single(imdb.images.label(:,sorti)) ; 181 | imdb.images.size = zeros(numel(imdb.images.name),2); 182 | 183 | if ~isempty(opts.proposalDir) 184 | imdb.images.boxes = ssw.boxes; 185 | assert(all(ssw.id==imdb.images.id)); 186 | end 187 | 188 | % this is zero as scores of selective search windows are not much 189 | % informative 190 | if ~isempty(opts.proposalDir) 191 | imdb.images.boxScores = cell(size(imdb.images.boxes)); 192 | for i=1:numel(imdb.images.boxes) 193 | imdb.images.boxes{i} = int16(imdb.images.boxes{i}); 194 | imdb.images.boxScores{i} = zeros(size(imdb.images.boxes{i},1),1,'single'); 195 | imf = imfinfo(fullfile(imdb.imageDir,imdb.images.name{i})); 196 | imdb.images.size(i,:) = [imf.Height,imf.Width]; 197 | end 198 | end 199 | end 200 | -------------------------------------------------------------------------------- /core/wsddn_demo.m: -------------------------------------------------------------------------------- 1 | function wsddn_demo(varargin) 2 | % @author: Hakan Bilen 3 | % wsddn_demo : this script shows a detection demo 4 | 5 | opts.dataDir = fullfile(vl_rootnn, 'data') ; 6 | opts.expDir = fullfile(vl_rootnn, 'exp') ; 7 | opts.imdbPath = fullfile(vl_rootnn, 'data', 'imdbs', 'imdb-eb.mat'); 8 | opts.modelPath = fullfile(vl_rootnn, 'exp', 'net.mat') ; 9 | opts.proposalType = 'eb' ; 10 | opts.proposalDir = fullfile(vl_rootnn, 'data','EdgeBoxes') ; 11 | 12 | % if you have limited gpu memory (<6gb), you can change the next 2 params 13 | opts.maxNumProposals = inf; % limit number 14 | % opts.imageScales = [480,576,688,864,1200]; % scales 15 | opts.imageScales = [480,576,688,864,1200]; % scales 16 | 17 | opts.gpu = [] ; 18 | opts.train.prefetch = true ; 19 | 20 | opts.numFetchThreads = 1 ; 21 | opts = vl_argparse(opts, varargin) ; 22 | 23 | display(opts); 24 | if ~exist(fullfile(opts.dataDir,'VOCdevkit','VOCcode','VOCinit.m'),'file') 25 | error('VOCdevkit is not installed'); 26 | end 27 | addpath(fullfile(opts.dataDir,'VOCdevkit','VOCcode')); 28 | opts.train.expDir = opts.expDir ; 29 | % ------------------------------------------------------------------------- 30 | % Network initialization 31 | % ------------------------------------------------------------------------- 32 | 33 | if ~exist(opts.modelPath, 'file') 34 | url = 'http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/wsddn.mat' ; 35 | fprintf('Downloading %s to %s\n', url, opts.modelPath) ; 36 | urlwrite(url, opts.modelPath) ; 37 | end 38 | 39 | net = load(opts.modelPath); 40 | net = dagnn.DagNN.loadobj(net) ; 41 | 42 | net.mode = 'test' ; 43 | if ~isempty(opts.gpu) 44 | gpuDevice(opts.gpu) ; 45 | net.move('gpu') ; 46 | end 47 | 48 | if isfield(net,'normalization') 49 | bopts = net.normalization; 50 | else 51 | bopts = net.meta.normalization; 52 | end 53 | 54 | bopts.rgbVariance = [] ; 55 | bopts.interpolation = net.meta.normalization.interpolation; 56 | bopts.jitterBrightness = 0 ; 57 | bopts.imageScales = opts.imageScales; 58 | bopts.numThreads = opts.numFetchThreads; 59 | bs = find(arrayfun(@(a) isa(a.block, 'dagnn.BiasSamples'), net.layers)==1); 60 | bopts.addBiasSamples = ~isempty(bs) ; 61 | bopts.vgg16 = any(arrayfun(@(a) strcmp(a.name, 'relu5_1'), net.layers)==1) ; 62 | 63 | % ------------------------------------------------------------------------- 64 | % Database initialization 65 | % ------------------------------------------------------------------------- 66 | fprintf('loading imdb...'); 67 | if exist(opts.imdbPath,'file')==2 68 | imdb = load(opts.imdbPath) ; 69 | else 70 | imdb = setup_voc07_eb('dataDir',opts.dataDir, ... 71 | 'proposalDir',opts.proposalDir,'loadTest',1); 72 | 73 | save(opts.imdbPath,'-struct', 'imdb', '-v7.3'); 74 | end 75 | 76 | fprintf('done\n'); 77 | minSize = 20; 78 | imdb = fixBBoxes(imdb, minSize, opts.maxNumProposals); 79 | 80 | % -------------------------------------------------------------------- 81 | % Detect 82 | % -------------------------------------------------------------------- 83 | % query images 84 | testIdx = [12,15]; 85 | 86 | VOCinit; 87 | cats = VOCopts.classes; 88 | ovTh = 0.4; % nms threshold 89 | scTh = 0.1; % det confidence threshold 90 | 91 | bopts.useGpu = numel(opts.gpu) > 0 ; 92 | 93 | detLayer = find(arrayfun(@(a) strcmp(a.name, 'xTimes'), net.vars)==1); 94 | 95 | net.vars(detLayer(1)).precious = 1; 96 | % run detection 97 | rcolors = randi(255,3,numel(cats)); 98 | for t=1:numel(testIdx) 99 | batch = testIdx(t); 100 | 101 | scoret = []; 102 | for s=1:numel(opts.imageScales) 103 | for f=1:2 % add flips 104 | inputs = getBatch(bopts, imdb, batch, opts.imageScales(s), f-1 ); 105 | net.eval(inputs) ; 106 | 107 | if isempty(scoret) 108 | scoret = squeeze(gather(net.vars(detLayer).value)); 109 | else 110 | scoret = scoret + squeeze(gather(net.vars(detLayer).value)); 111 | end 112 | end 113 | end 114 | 115 | % divide by number of scales and flips 116 | scoret = scoret / (2 * numel(opts.imageScales)); 117 | im = imread(fullfile(imdb.imageDir,imdb.images.name{testIdx(t)})); 118 | 119 | for cls = 1:numel(cats) 120 | scores = scoret; 121 | boxes = double(imdb.images.boxes{testIdx(t)}); 122 | boxesSc = [boxes,scores(cls,:)']; 123 | boxesSc = boxesSc(boxesSc(:,5)>scTh,:); 124 | if isempty(boxesSc), continue; end; 125 | 126 | pick = nms(boxesSc, ovTh); 127 | boxesSc = boxesSc(pick,:); 128 | im = bbox_draw(im,boxesSc(1,1:4),rcolors(:,cls),2); 129 | fprintf('%s %.2f\n',cats{cls},boxesSc(1,5)); 130 | end 131 | imshow(im); 132 | pause() ; 133 | if exist('zs_dispFig', 'file'), zs_dispFig ; end 134 | end 135 | 136 | 137 | 138 | % -------------------------------------------------------------------- 139 | function inputs = getBatch(opts, imdb, batch, scale, flip) 140 | % -------------------------------------------------------------------- 141 | 142 | opts.scale = scale; 143 | opts.flip = flip; 144 | is_vgg16 = opts.vgg16 ; 145 | opts = rmfield(opts,'vgg16') ; 146 | 147 | images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ; 148 | opts.prefetch = (nargout == 0); 149 | 150 | [im,rois] = wsddn_get_batch(images, imdb, batch, opts); 151 | 152 | 153 | rois = single(rois'); 154 | if opts.useGpu > 0 155 | im = gpuArray(im) ; 156 | rois = gpuArray(rois) ; 157 | end 158 | rois = rois([1 3 2 5 4],:) ; 159 | 160 | 161 | ss = [16 16] ; 162 | if is_vgg16 163 | o0 = 8.5 ; 164 | o1 = 9.5 ; 165 | else 166 | o0 = 18 ; 167 | o1 = 9.5 ; 168 | end 169 | rois = [ rois(1,:); 170 | floor((rois(2,:) - o0 + o1) / ss(1) + 0.5) + 1; 171 | floor((rois(3,:) - o0 + o1) / ss(2) + 0.5) + 1; 172 | ceil((rois(4,:) - o0 - o1) / ss(1) - 0.5) + 1; 173 | ceil((rois(5,:) - o0 - o1) / ss(2) - 0.5) + 1]; 174 | 175 | 176 | inputs = {'input', im, 'rois', rois} ; 177 | 178 | 179 | if opts.addBiasSamples && isfield(imdb.images,'boxScores') 180 | boxScore = reshape(imdb.images.boxScores{batch},[1 1 1 numel(imdb.images.boxScores{batch})]); 181 | inputs{end+1} = 'boxScore'; 182 | inputs{end+1} = boxScore ; 183 | end 184 | 185 | 186 | % ------------------------------------------------------------------------- 187 | function imdb = fixBBoxes(imdb, minSize, maxNum) 188 | % ------------------------------------------------------------------------- 189 | 190 | for i=1:numel(imdb.images.name) 191 | bbox = imdb.images.boxes{i}; 192 | % remove small bbox 193 | isGood = (bbox(:,3)>=bbox(:,1)+minSize) & (bbox(:,4)>=bbox(:,2)+minSize); 194 | bbox = bbox(isGood,:); 195 | % remove duplicate ones 196 | [dummy, uniqueIdx] = unique(bbox, 'rows', 'first'); 197 | uniqueIdx = sort(uniqueIdx); 198 | bbox = bbox(uniqueIdx,:); 199 | % limit number for training 200 | if imdb.images.set(i)~=3 201 | nB = min(size(bbox,1),maxNum); 202 | else 203 | nB = size(bbox,1); 204 | end 205 | 206 | if isfield(imdb.images,'boxScores') 207 | imdb.images.boxScores{i} = imdb.images.boxScores{i}(uniqueIdx); 208 | imdb.images.boxScores{i} = imdb.images.boxScores{i}(1:nB); 209 | end 210 | imdb.images.boxes{i} = bbox(1:nB,:); 211 | % [h,w,~] = size(imdb.images.data{i}); 212 | % imdb.images.boxes{i} = [1 1 h w]; 213 | 214 | end 215 | 216 | % ------------------------------------------------------------------------- 217 | function im = bbox_draw(im,roi,color,t) 218 | % DRAWRECT 219 | % IM : input image 220 | % ROI : rectangle 221 | % COLOR : 222 | % T : thickness 223 | 224 | [h,w,d] = size(im); 225 | assert(d == numel(color)); 226 | if any(roi(:,1)>h) || any(roi(:,3)>h) || any(roi(:,2)>w) || any(roi(:,4)>w) 227 | error('Wrong bounding box coord!\n'); 228 | end 229 | for c=1:d 230 | im(max(roi(1)-t,1):min(roi(1)+t,h),max(roi(2)-t,1):min(roi(4)+t,w),c) = color(c); 231 | im(max(roi(3)-t,1):min(roi(3)+t,h),max(roi(2)-t,1):min(roi(4)+t,w),c) = color(c); 232 | im(max(roi(1)-t,1):min(roi(3)+t,h),max(roi(2)-t,1):min(roi(2)+t,w),c) = color(c); 233 | im(max(roi(1)-t,1):min(roi(3)+t,h),max(roi(4)-t,1):min(roi(4)+t,w),c) = color(c); 234 | end 235 | -------------------------------------------------------------------------------- /pascal/setup_voc07_eb.m: -------------------------------------------------------------------------------- 1 | function imdb = setup_voc07_eb(varargin) 2 | % cnn_voc07_eb_setup_data Initialize PASCAL VOC2007 data with edge 3 | % boxes 4 | 5 | % Warning! boxes are in the format of ([y1 x1 y2 x2]) 6 | 7 | opts.dataDir = fullfile('data') ; 8 | opts.proposalDir = fullfile(opts.dataDir,'EB'); 9 | opts.loadTest = 1; 10 | opts = vl_argparse(opts, varargin) ; 11 | 12 | % ------------------------------------------------------------------------- 13 | % Load selective search win 14 | % ------------------------------------------------------------------------- 15 | %% Get selective search windows 16 | files = {'EdgeBoxesVOC2007trainval.mat', ... 17 | 'EdgeBoxesVOC2007test.mat'} ; 18 | 19 | if ~exist(opts.proposalDir, 'dir') 20 | mkdir(opts.proposalDir) ; 21 | end 22 | 23 | for i=1:numel(files) 24 | outPath = fullfile(opts.proposalDir, files{i}) ; 25 | if ~exist(outPath, 'file') 26 | url = sprintf('http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/%s',files{i}) ; 27 | fprintf('Downloading %s to %s\n', url, outPath) ; 28 | urlwrite(url,outPath) ; 29 | end 30 | end 31 | 32 | 33 | if ~isempty(opts.proposalDir) 34 | t1 = load([opts.proposalDir,filesep,files{1}]); 35 | if opts.loadTest 36 | t2 = load([opts.proposalDir,filesep,files{2}]); 37 | ssw.id = [str2double(t1.images) str2double(t2.images)]; 38 | ssw.boxes = cat(2,t1.boxes,t2.boxes); 39 | ssw.boxScores = cat(2,t1.boxScores,t2.boxScores); 40 | else 41 | ssw.id = str2double(t1.images); 42 | ssw.boxes = t1.boxes; 43 | ssw.boxScores = t1.boxScores; 44 | end 45 | 46 | [~,si] = sort(ssw.id); 47 | ssw.id = ssw.id(si); 48 | ssw.boxes = ssw.boxes(si); 49 | ssw.boxScores = ssw.boxScores(si); 50 | end 51 | 52 | % ------------------------------------------------------------------------- 53 | % Load categories metadata 54 | % ------------------------------------------------------------------------- 55 | cats = {'aeroplane','bicycle','bird','boat','bottle','bus','car',... 56 | 'cat','chair','cow','diningtable','dog','horse','motorbike','person',... 57 | 'pottedplant','sheep','sofa','train','tvmonitor'}; 58 | 59 | if ~exist(opts.dataDir,'dir') 60 | error('wrong data folder!'); 61 | end 62 | 63 | % Download VOC Devkit and data 64 | if ~exist(fullfile(opts.dataDir,'VOCdevkit'),'dir') 65 | files = {'VOCtest_06-Nov-2007.tar',... 66 | 'VOCtrainval_06-Nov-2007.tar',... 67 | 'VOCdevkit_08-Jun-2007.tar'} ; 68 | for i=1:numel(files) 69 | if ~exist(fullfile(opts.dataDir, files{i}), 'file') 70 | outPath = fullfile(opts.dataDir,files{i}) ; 71 | url = sprintf('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/%s',files{i}) ; 72 | fprintf('Downloading %s to %s\n', url, outPath) ; 73 | urlwrite(url,outPath) ; 74 | untar(outPath,opts.dataDir); 75 | end 76 | end 77 | end 78 | addpath(fullfile(opts.dataDir, 'VOCdevkit', 'VOCcode')); 79 | 80 | traindata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','train.txt')); 81 | valdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','val.txt')); 82 | testdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','test.txt')); 83 | 84 | assert(numel(traindata)==2501); 85 | assert(numel(valdata)==2510); 86 | assert(numel(testdata)==4952); 87 | 88 | imdb.classes.name = cats ; 89 | imdb.classes.description = cats ; 90 | imdb.imageDir = fullfile(opts.dataDir, fullfile('VOCdevkit','VOC2007','JPEGImages')) ; 91 | 92 | % ------------------------------------------------------------------------- 93 | % Training images 94 | % -------------------------------------------------------------------------% 95 | names = cell(1,numel(traindata)); 96 | labels = zeros(numel(traindata),numel(cats)); 97 | 98 | 99 | % load image names 100 | for t=1:numel(traindata) 101 | names{t} = sprintf('%06d.jpg',traindata(t)); 102 | % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); 103 | end 104 | 105 | % load binary labels 106 | for c=1:numel(cats) 107 | t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_train.txt'])); 108 | labels(:,c) = t(:,2); 109 | end 110 | 111 | imdb.images.id = traindata'; 112 | imdb.images.name = names ; 113 | imdb.images.set = ones(1, numel(names)) ; 114 | imdb.images.label = labels' ; 115 | % imdb.images.data = data; 116 | 117 | % ------------------------------------------------------------------------- 118 | % Validation images 119 | % ------------------------------------------------------------------------- 120 | 121 | names = cell(1,numel(valdata)); 122 | labels = zeros(numel(valdata),numel(cats)); 123 | % data = cell(1,numel(valdata)); 124 | 125 | % load image names 126 | for t=1:numel(valdata) 127 | names{t} = sprintf('%06d.jpg',valdata(t)); 128 | % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); 129 | end 130 | 131 | % load binary labels 132 | for c=1:numel(cats) 133 | t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_val.txt'])); 134 | labels(:,c) = t(:,2); 135 | end 136 | 137 | 138 | imdb.images.id = horzcat(imdb.images.id, valdata') ; 139 | imdb.images.name = horzcat(imdb.images.name, names) ; 140 | imdb.images.set = horzcat(imdb.images.set, 2*ones(1,numel(names))) ; 141 | imdb.images.label = horzcat(imdb.images.label, labels') ; 142 | % imdb.images.data = horzcat(imdb.images.data, data) ; 143 | 144 | % % ------------------------------------------------------------------------- 145 | % % Test images 146 | % % ------------------------------------------------------------------------- 147 | % 148 | % 149 | if opts.loadTest 150 | names = cell(1,numel(testdata)); 151 | labels = zeros(numel(testdata),numel(cats)); 152 | % data = cell(1,numel(testdata)); 153 | 154 | % load image names 155 | for t=1:numel(testdata) 156 | names{t} = sprintf('%06d.jpg',testdata(t)); 157 | % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); 158 | end 159 | 160 | % load binary labels 161 | for c=1:numel(cats) 162 | t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_test.txt'])); 163 | labels(:,c) = t(:,2); 164 | end 165 | 166 | imdb.images.id = horzcat(imdb.images.id, testdata') ; 167 | imdb.images.name = horzcat(imdb.images.name, names) ; 168 | imdb.images.set = horzcat(imdb.images.set, 3 * ones(1,numel(names))) ; 169 | imdb.images.label = horzcat(imdb.images.label, labels') ; 170 | % imdb.images.data = horzcat(imdb.images.data, data) ; 171 | end 172 | % ------------------------------------------------------------------------- 173 | % Postprocessing 174 | % ------------------------------------------------------------------------- 175 | [~,sorti] = sort(imdb.images.id); 176 | 177 | 178 | imdb.images.id = imdb.images.id(sorti); 179 | imdb.images.name = imdb.images.name(sorti) ; 180 | imdb.images.set = imdb.images.set(sorti) ; 181 | imdb.images.label = single(imdb.images.label(:,sorti)) ; 182 | imdb.images.size = zeros(numel(imdb.images.name),2); 183 | 184 | if ~isempty(opts.proposalDir) 185 | imdb.images.boxes = ssw.boxes; 186 | imdb.images.boxScores = ssw.boxScores; 187 | assert(all(ssw.id==imdb.images.id)); 188 | end 189 | 190 | % this is zero as scores of selective search windows are not much 191 | % informative 192 | if ~isempty(opts.proposalDir) 193 | % imdb.images.boxScores = cell(size(imdb.images.boxes)); 194 | for i=1:numel(imdb.images.boxes) 195 | imdb.images.boxes{i} = int16(imdb.images.boxes{i}); 196 | imdb.images.boxScores{i} = single(imdb.images.boxScores{i}); 197 | 198 | imf = imfinfo(fullfile(imdb.imageDir,imdb.images.name{i})); 199 | imdb.images.size(i,:) = [imf.Height,imf.Width]; 200 | 201 | maxBoxes = max(imdb.images.boxes{i}); 202 | if imdb.images.size(i,1)< max(maxBoxes([1,3])) 203 | error('Wrong box coordinates'); 204 | end 205 | if imdb.images.size(i,2)< max(maxBoxes([2,4])) 206 | error('Wrong box coordinates'); 207 | end 208 | 209 | end 210 | end 211 | end 212 | -------------------------------------------------------------------------------- /core/wsddn_train.m: -------------------------------------------------------------------------------- 1 | function [net, info] = wsddn_train(varargin) 2 | % @author: Hakan Bilen 3 | % wsddn_train: training script for WSDDN 4 | 5 | opts.dataDir = fullfile(vl_rootnn, 'data') ; 6 | opts.expDir = fullfile(vl_rootnn, 'exp') ; 7 | opts.imdbPath = fullfile(vl_rootnn, 'data', 'imdbs', 'imdb-eb.mat'); 8 | opts.modelPath = fullfile(vl_rootnn, 'models', 'imagenet-vgg-f.mat') ; 9 | opts.proposalType = 'eb' ; 10 | opts.proposalDir = fullfile(vl_rootnn, 'data', 'EdgeBoxes') ; 11 | 12 | 13 | opts.addBiasSamples = 1; % add Box Scores 14 | opts.addLossSmooth = 1; % add Spatial Regulariser 15 | opts.softmaxTempCls = 1; % softmax temp for cls 16 | opts.softmaxTempDet = 2; % softmax temp for det 17 | opts.maxScale = 2000 ; 18 | 19 | % if you have limited gpu memory (<6gb), you can change the next 2 params 20 | opts.maxNumProposals = inf; % limit number (eg 1500) 21 | opts.imageScales = [480,576,688,864,1200]; % scales 22 | opts.minBoxSize = 20; % minimum bounding box size 23 | opts.train.gpus = [] ; 24 | opts.train.continue = true ; 25 | opts.train.prefetch = true ; 26 | opts.train.learningRate = 1e-5 * [ones(1,10) 0.1*ones(1,10)] ; 27 | opts.train.weightDecay = 0.0005; 28 | opts.train.numEpochs = 20; 29 | opts.train.derOutputs = {'objective', 1} ; 30 | 31 | opts.numFetchThreads = 1 ; 32 | opts = vl_argparse(opts, varargin) ; 33 | 34 | display(opts); 35 | 36 | opts.train.batchSize = 1 ; 37 | opts.train.expDir = opts.expDir ; 38 | opts.train.numEpochs = numel(opts.train.learningRate) ; 39 | %% ------------------------------------------------------------------------- 40 | % Database initialization 41 | % ------------------------------------------------------------------------- 42 | fprintf('loading imdb...'); 43 | if exist(opts.imdbPath,'file')==2 44 | imdb = load(opts.imdbPath) ; 45 | else 46 | if strcmp(opts.proposalType,'ssw') 47 | imdb = setup_voc07_ssw('dataDir',opts.dataDir, ... 48 | 'proposalDir',opts.proposalDir,'loadTest',1); 49 | elseif strcmp(opts.proposalType,'eb') 50 | imdb = setup_voc07_eb('dataDir',opts.dataDir, ... 51 | 'proposalDir',opts.proposalDir,'loadTest',1); 52 | else 53 | error('undefined proposal type %s\n',opts.proposalType) 54 | end 55 | 56 | imdbFolder = fileparts(opts.imdbPath); 57 | 58 | if ~exist(imdbFolder,'dir') 59 | mkdir(imdbFolder); 60 | end 61 | save(opts.imdbPath,'-struct', 'imdb', '-v7.3'); 62 | end 63 | 64 | fprintf('done\n'); 65 | 66 | imdb = fixBBoxes(imdb, opts.minBoxSize, opts.maxNumProposals); 67 | 68 | % use train + val for training 69 | imdb.images.set(imdb.images.set == 2) = 1; 70 | trainIdx = find(imdb.images.set == 1); 71 | 72 | %% Compute image statistics (mean, RGB covariances, etc.) 73 | imageStatsPath = fullfile(opts.dataDir, 'imageStats.mat') ; 74 | if exist(imageStatsPath,'file') 75 | load(imageStatsPath, 'averageImage', 'rgbMean', 'rgbCovariance') ; 76 | else 77 | 78 | images = imdb.images.name(imdb.images.set == 1) ; 79 | images = strcat([imdb.imageDir filesep],images) ; 80 | 81 | [averageImage, rgbMean, rgbCovariance] = getImageStats(images, ... 82 | 'imageSize', [256 256], ... 83 | 'numThreads', opts.numFetchThreads, ... 84 | 'gpus', opts.train.gpus) ; 85 | save(imageStatsPath, 'averageImage', 'rgbMean', 'rgbCovariance') ; 86 | end 87 | [v,d] = eig(rgbCovariance) ; 88 | rgbDeviation = v*sqrt(d) ; 89 | clear v d ; 90 | 91 | 92 | %% ------------------------------------------------------------------------ 93 | % Network initialization 94 | % ------------------------------------------------------------------------- 95 | nopts.addBiasSamples = opts.addBiasSamples; % add Box Scores (only with Edge Boxes) 96 | nopts.addLossSmooth = opts.addLossSmooth; % add Spatial Regulariser 97 | nopts.softmaxTempCls = opts.softmaxTempCls; % softmax temp for cls 98 | nopts.softmaxTempDet = opts.softmaxTempDet; % softmax temp for det 99 | 100 | nopts.averageImage = reshape(rgbMean,[1 1 3]) ; 101 | % nopts.rgbVariance = 0.1 * rgbDeviation ; 102 | nopts.rgbVariance = [] ; 103 | nopts.numClasses = numel(imdb.classes.name) ; 104 | nopts.classNames = imdb.classes.name ; 105 | 106 | if ~exist(opts.modelPath,'file') 107 | [pname,fname,ext] = fileparts(opts.modelPath) ; 108 | if ~exist(pname,'dir') 109 | mkdir(pname) ; 110 | end 111 | fprintf('Downloading %s to %s\n', [fname ext], pname) ; 112 | urlwrite(sprintf('http://www.vlfeat.org/matconvnet/models/%s',[fname ext]),... 113 | opts.modelPath) ; 114 | end 115 | 116 | net = load(opts.modelPath); 117 | net = wsddn_init(net,nopts); 118 | 119 | if nopts.addLossSmooth 120 | opts.train.derOutputs = {'objective', 1, 'lossTopB', 1e-4} ; 121 | end 122 | 123 | 124 | if ~exist(opts.expDir,'dir') 125 | mkdir(opts.expDir) ; 126 | end 127 | 128 | %% ------------------------------------------------------------------------- 129 | % Database stats 130 | % ------------------------------------------------------------------------- 131 | bopts = net.meta.normalization; 132 | net.meta.augmentation.jitterBrightness = 0 ; 133 | % bopts.interpolation = 'bilinear'; 134 | bopts.jitterBrightness = net.meta.augmentation.jitterBrightness ; 135 | bopts.imageScales = opts.imageScales; 136 | bopts.numThreads = opts.numFetchThreads; 137 | bopts.addLossSmooth = opts.addLossSmooth; 138 | bopts.addBiasSamples = opts.addBiasSamples; 139 | bopts.maxScale = opts.maxScale ; 140 | bopts.vgg16 = any(arrayfun(@(a) strcmp(a.name, 'relu5_1'), net.layers)==1) ; 141 | %% ------------------------------------------------------------------- 142 | % Train 143 | % -------------------------------------------------------------------- 144 | % avoid test data 145 | valIdx = find(imdb.images.set == 3); 146 | valIdx = valIdx(1:5:end) ; 147 | % valIdx = []; 148 | 149 | %% 150 | bopts.useGpu = numel(opts.train.gpus) > 0 ; 151 | bopts.prefetch = opts.train.prefetch; 152 | 153 | info = cnn_train_dag(net, imdb, @(i,b) ... 154 | getBatch(bopts,i,b), ... 155 | opts.train, 'train', trainIdx, ... 156 | 'val', valIdx) ; 157 | 158 | %% ------------------------------------------------------------------- 159 | % Deploy network 160 | % -------------------------------------------------------------------- 161 | if ~exist(fullfile(opts.expDir,'net.mat'),'file') 162 | removeLoss = {'dagnn.Loss','dagnn.DropOut'}; 163 | for i=1:numel(removeLoss) 164 | dagRemoveLayersOfType(net,removeLoss{i}) ; 165 | end 166 | 167 | net.mode = 'test' ; 168 | net_ = net ; 169 | net = net_.saveobj() ; 170 | save(fullfile(opts.expDir,'net.mat'), '-struct','net'); 171 | end 172 | % -------------------------------------------------------------------- 173 | function inputs = getBatch(opts, imdb, batch) 174 | % -------------------------------------------------------------------- 175 | if isempty(batch) 176 | inputs = {'input', [], 'label', [], 'rois', [], 'ids', []}; 177 | return; 178 | end 179 | 180 | opts.scale = opts.imageScales(randi(numel(opts.imageScales))); 181 | opts.flip = randi(2,numel(batch),1)-1; % random flip 182 | is_vgg16 = opts.vgg16 ; 183 | opts = rmfield(opts,'vgg16') ; 184 | 185 | images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ; 186 | opts.prefetch = (nargout == 0); 187 | 188 | [im,rois] = wsddn_get_batch(images, imdb, batch, opts); 189 | 190 | if nargout>0 191 | rois = single(rois') ; 192 | labels = imdb.images.label(:,batch) ; 193 | labels = reshape(labels,[1 1 size(labels,1) numel(batch)]); 194 | 195 | if opts.useGpu > 0 196 | im = gpuArray(im) ; 197 | rois = gpuArray(rois) ; 198 | end 199 | 200 | if ~isempty(rois) 201 | rois = rois([1 3 2 5 4],:) ; 202 | end 203 | 204 | ss = [16 16] ; 205 | 206 | if is_vgg16 207 | o0 = 8.5 ; 208 | o1 = 9.5 ; 209 | else 210 | o0 = 18 ; 211 | o1 = 9.5 ; 212 | end 213 | 214 | rois = [ rois(1,:); ... 215 | floor((rois(2,:) - o0 + o1) / ss(1) + 0.5) + 1; 216 | floor((rois(3,:) - o0 + o1) / ss(2) + 0.5) + 1; 217 | ceil((rois(4,:) - o0 - o1) / ss(1) - 0.5) + 1; 218 | ceil((rois(5,:) - o0 - o1) / ss(2) - 0.5) + 1]; 219 | 220 | 221 | inputs = {'input', im, 'label', labels, 'rois', rois, 'ids', batch} ; 222 | 223 | if opts.addLossSmooth 224 | inputs{end+1} = 'boxes' ; 225 | inputs{end+1} = imdb.images.boxes{batch} ; 226 | end 227 | 228 | if opts.addBiasSamples==1 229 | boxScore = reshape(imdb.images.boxScores{batch},[1 1 1 numel(imdb.images.boxScores{batch})]); 230 | inputs{end+1} = 'boxScore'; 231 | inputs{end+1} = boxScore ; 232 | end 233 | end 234 | 235 | % ------------------------------------------------------------------------- 236 | function imdb = fixBBoxes(imdb, minSize, maxNum) 237 | % ------------------------------------------------------------------------- 238 | for i=1:numel(imdb.images.name) 239 | bbox = imdb.images.boxes{i}; 240 | % remove small bbox 241 | isGood = (bbox(:,3)>=bbox(:,1)+minSize) & (bbox(:,4)>=bbox(:,2)+minSize); 242 | bbox = bbox(isGood,:); 243 | % remove duplicate ones 244 | [dummy, uniqueIdx] = unique(bbox, 'rows', 'first'); 245 | uniqueIdx = sort(uniqueIdx); 246 | bbox = bbox(uniqueIdx,:); 247 | % limit number for training 248 | if imdb.images.set(i)~=3 249 | nB = min(size(bbox,1),maxNum); 250 | else 251 | nB = size(bbox,1); 252 | end 253 | 254 | if isfield(imdb.images,'boxScores') 255 | imdb.images.boxScores{i} = imdb.images.boxScores{i}(isGood); 256 | imdb.images.boxScores{i} = imdb.images.boxScores{i}(uniqueIdx); 257 | imdb.images.boxScores{i} = imdb.images.boxScores{i}(1:nB); 258 | end 259 | imdb.images.boxes{i} = bbox(1:nB,:); 260 | % [h,w,~] = size(imdb.images.data{i}); 261 | % imdb.images.boxes{i} = [1 1 h w]; 262 | 263 | end 264 | 265 | % ------------------------------------------------------------------------- 266 | function layers = dagFindLayersOfType(net, type) 267 | % ------------------------------------------------------------------------- 268 | layers = [] ; 269 | for l = 1:numel(net.layers) 270 | if isa(net.layers(l).block, type) 271 | layers{1,end+1} = net.layers(l).name ; 272 | end 273 | end 274 | % ------------------------------------------------------------------------- 275 | function dagRemoveLayersOfType(net, type) 276 | % ------------------------------------------------------------------------- 277 | names = dagFindLayersOfType(net, type) ; 278 | for i = 1:numel(names) 279 | layer = net.layers(net.getLayerIndex(names{i})) ; 280 | net.removeLayer(names{i}) ; 281 | net.renameVar(layer.outputs{1}, layer.inputs{1}, 'quiet', true) ; 282 | end 283 | -------------------------------------------------------------------------------- /core/wsddn_test.m: -------------------------------------------------------------------------------- 1 | function aps = wsddn_test(varargin) 2 | % @author: Hakan Bilen 3 | % wsddn_test : this script evaluates detection performance in PASCAL VOC 4 | % dataset for given a WSDDN model 5 | 6 | opts.dataDir = fullfile(vl_rootnn, 'data') ; 7 | opts.expDir = fullfile(vl_rootnn, 'exp') ; 8 | opts.imdbPath = fullfile(vl_rootnn, 'data', 'imdbs', 'imdb-eb.mat'); 9 | opts.modelPath = fullfile(vl_rootnn, 'exp', 'net.mat') ; 10 | opts.proposalType = 'eb' ; 11 | opts.proposalDir = fullfile(vl_rootnn, 'data','EdgeBoxes') ; 12 | 13 | % if you have limited gpu memory (<6gb), you can change the next 2 params 14 | opts.maxNumProposals = inf; % limit number 15 | opts.imageScales = [480,576,688,864,1200]; % scales 16 | 17 | opts.gpu = [] ; 18 | opts.train.prefetch = true ; 19 | opts.vis = 0 ; 20 | opts.numFetchThreads = 1 ; 21 | opts = vl_argparse(opts, varargin) ; 22 | 23 | display(opts); 24 | if ~exist(fullfile(opts.dataDir,'VOCdevkit','VOCcode','VOCinit.m'),'file') 25 | error('VOCdevkit is not installed'); 26 | end 27 | addpath(fullfile(opts.dataDir,'VOCdevkit','VOCcode')); 28 | opts.train.expDir = opts.expDir ; 29 | % ------------------------------------------------------------------------- 30 | % Network initialization 31 | % ------------------------------------------------------------------------- 32 | net = load(opts.modelPath); 33 | % figure(2) ; 34 | if isfield(net,'net') 35 | net = net.net; 36 | end 37 | net = dagnn.DagNN.loadobj(net) ; 38 | 39 | net.mode = 'test' ; 40 | if ~isempty(opts.gpu) 41 | gpuDevice(opts.gpu) ; 42 | net.move('gpu') ; 43 | end 44 | 45 | if isfield(net,'normalization') 46 | bopts = net.normalization; 47 | else 48 | bopts = net.meta.normalization; 49 | end 50 | 51 | bopts.rgbVariance = [] ; 52 | bopts.interpolation = net.meta.normalization.interpolation; 53 | bopts.jitterBrightness = 0 ; 54 | bopts.imageScales = opts.imageScales; 55 | bopts.numThreads = opts.numFetchThreads; 56 | bs = find(arrayfun(@(a) isa(a.block, 'dagnn.BiasSamples'), net.layers)==1); 57 | bopts.addBiasSamples = ~isempty(bs) ; 58 | bopts.vgg16 = any(arrayfun(@(a) strcmp(a.name, 'relu5_1'), net.layers)==1) ; 59 | % ------------------------------------------------------------------------- 60 | % Database initialization 61 | % ------------------------------------------------------------------------- 62 | fprintf('loading imdb...'); 63 | if exist(opts.imdbPath,'file')==2 64 | imdb = load(opts.imdbPath) ; 65 | else 66 | imdb = cnn_voc07_eb_setup_data('dataDir',opts.dataDir, ... 67 | 'proposalDir',opts.proposalDir,'loadTest',1); 68 | save(opts.imdbPath,'-struct', 'imdb', '-v7.3'); 69 | end 70 | 71 | fprintf('done\n'); 72 | minSize = 20; 73 | imdb = fixBBoxes(imdb, minSize, opts.maxNumProposals); 74 | 75 | VOCinit; 76 | VOCopts.testset = 'test'; 77 | VOCopts.annopath = fullfile(opts.dataDir,'VOCdevkit','VOC2007','Annotations','%s.xml'); 78 | VOCopts.imgsetpath = fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','%s.txt'); 79 | VOCopts.localdir = fullfile(opts.dataDir,'VOCdevkit','local','VOC2007'); 80 | cats = VOCopts.classes; 81 | ovTh = 0.4; 82 | scTh = 1e-3; 83 | % -------------------------------------------------------------------- 84 | % Detect 85 | % -------------------------------------------------------------------- 86 | if strcmp(VOCopts.testset,'test') 87 | testIdx = find(imdb.images.set == 3); 88 | elseif strcmp(VOCopts.testset,'trainval') 89 | testIdx = find(imdb.images.set < 3); 90 | end 91 | bopts.useGpu = numel(opts.gpu) > 0 ; 92 | 93 | scores = cell(1,numel(testIdx)); 94 | boxes = imdb.images.boxes(testIdx); 95 | names = imdb.images.name(testIdx); 96 | 97 | detLayer = find(arrayfun(@(a) strcmp(a.name, 'xTimes'), net.vars)==1); 98 | net.vars(detLayer(1)).precious = 1; 99 | % run detection 100 | start = tic ; 101 | for t=1:numel(testIdx) 102 | batch = testIdx(t); 103 | 104 | scoret = []; 105 | for s=1:numel(opts.imageScales) 106 | for f=1:2 % add flips 107 | inputs = getBatch(bopts, imdb, batch, opts.imageScales(s), f-1 ); 108 | net.eval(inputs) ; 109 | 110 | if isempty(scoret) 111 | scoret = squeeze(gather(net.vars(detLayer).value)); 112 | else 113 | scoret = scoret + squeeze(gather(net.vars(detLayer).value)); 114 | end 115 | end 116 | end 117 | scores{t} = scoret; 118 | % show speed 119 | time = toc(start) ; 120 | n = t * 2 * numel(opts.imageScales) ; % number of images processed overall 121 | speed = n/time ; 122 | if mod(t,10)==0 123 | fprintf('test %d / %d speed %.1f Hz\n',t,numel(testIdx),speed); 124 | end 125 | 126 | 127 | if opts.vis 128 | for cls = 1:numel(cats) 129 | idx = (scores{t}(cls,:)>0.05); 130 | if sum(idx)==0, continue;end 131 | % divide by number of scales and flips 132 | 133 | im = imread(fullfile(imdb.imageDir,imdb.images.name{testIdx(t)})); 134 | boxest = double(imdb.images.boxes{testIdx(t)}(idx,:)); 135 | scorest = scores{t}(cls,idx)' / (2 * numel(opts.imageScales)); 136 | boxesSc = [boxest,scorest]; 137 | pick = nms(boxesSc, ovTh); 138 | boxesSc = boxesSc(pick,:); 139 | figure(1) ; 140 | im = bbox_draw(im,boxesSc(1,[2 1 4 3 5])); 141 | fprintf('%s %.2f',cats{cls},boxesSc(1,5)); 142 | 143 | fprintf('\n') ; 144 | title(cats{cls}); 145 | pause; 146 | 147 | end 148 | end 149 | end 150 | 151 | dets.names = names; 152 | dets.scores = scores; 153 | dets.boxes = boxes; 154 | 155 | % -------------------------------------------------------------------- 156 | % PASCAL VOC evaluation 157 | % -------------------------------------------------------------------- 158 | 159 | aps = zeros(numel(cats),1); 160 | for cls = 1:numel(cats) 161 | 162 | vocDets.confidence = []; 163 | vocDets.bbox = []; 164 | vocDets.ids = []; 165 | 166 | for i=1:numel(dets.names) 167 | 168 | scores = double(dets.scores{i}); 169 | boxes = double(dets.boxes{i}); 170 | 171 | boxesSc = [boxes,scores(cls,:)']; 172 | boxesSc = boxesSc(boxesSc(:,5)>scTh,:); 173 | pick = nms(boxesSc, ovTh); 174 | boxesSc = boxesSc(pick,:); 175 | 176 | vocDets.confidence = [vocDets.confidence;boxesSc(:,5)]; 177 | vocDets.bbox = [vocDets.bbox;boxesSc(:,[2 1 4 3])]; 178 | vocDets.ids = [vocDets.ids; repmat({dets.names{i}(1:6)},size(boxesSc,1),1)]; 179 | 180 | end 181 | [rec,prec,ap] = wsddnVOCevaldet(VOCopts,cats{cls},vocDets,0); 182 | 183 | fprintf('%s %.1f\n',cats{cls},100*ap); 184 | aps(cls) = ap; 185 | end 186 | 187 | % -------------------------------------------------------------------- 188 | function inputs = getBatch(opts, imdb, batch, scale, flip) 189 | % -------------------------------------------------------------------- 190 | 191 | opts.scale = scale; 192 | opts.flip = flip; 193 | is_vgg16 = opts.vgg16 ; 194 | opts = rmfield(opts,'vgg16') ; 195 | 196 | images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ; 197 | opts.prefetch = (nargout == 0); 198 | 199 | [im,rois] = wsddn_get_batch(images, imdb, batch, opts); 200 | 201 | 202 | rois = single(rois'); 203 | if opts.useGpu > 0 204 | im = gpuArray(im) ; 205 | rois = gpuArray(rois) ; 206 | end 207 | rois = rois([1 3 2 5 4],:) ; 208 | 209 | 210 | ss = [16 16] ; 211 | if is_vgg16 212 | o0 = 8.5 ; 213 | o1 = 9.5 ; 214 | else 215 | o0 = 18 ; 216 | o1 = 9.5 ; 217 | end 218 | rois = [ rois(1,:); 219 | floor((rois(2,:) - o0 + o1) / ss(1) + 0.5) + 1; 220 | floor((rois(3,:) - o0 + o1) / ss(2) + 0.5) + 1; 221 | ceil((rois(4,:) - o0 - o1) / ss(1) - 0.5) + 1; 222 | ceil((rois(5,:) - o0 - o1) / ss(2) - 0.5) + 1]; 223 | 224 | 225 | inputs = {'input', im, 'rois', rois} ; 226 | 227 | 228 | if opts.addBiasSamples && isfield(imdb.images,'boxScores') 229 | boxScore = reshape(imdb.images.boxScores{batch},[1 1 1 numel(imdb.images.boxScores{batch})]); 230 | inputs{end+1} = 'boxScore'; 231 | inputs{end+1} = boxScore ; 232 | end 233 | 234 | 235 | % ------------------------------------------------------------------------- 236 | function imdb = fixBBoxes(imdb, minSize, maxNum) 237 | 238 | for i=1:numel(imdb.images.name) 239 | bbox = imdb.images.boxes{i}; 240 | % remove small bbox 241 | isGood = (bbox(:,3)>=bbox(:,1)+minSize) & (bbox(:,4)>=bbox(:,2)+minSize); 242 | bbox = bbox(isGood,:); 243 | % remove duplicate ones 244 | [dummy, uniqueIdx] = unique(bbox, 'rows', 'first'); 245 | uniqueIdx = sort(uniqueIdx); 246 | bbox = bbox(uniqueIdx,:); 247 | % limit number for training 248 | if imdb.images.set(i)~=3 249 | nB = min(size(bbox,1),maxNum); 250 | else 251 | nB = size(bbox,1); 252 | end 253 | 254 | if isfield(imdb.images,'boxScores') 255 | imdb.images.boxScores{i} = imdb.images.boxScores{i}(isGood); 256 | imdb.images.boxScores{i} = imdb.images.boxScores{i}(uniqueIdx); 257 | imdb.images.boxScores{i} = imdb.images.boxScores{i}(1:nB); 258 | end 259 | imdb.images.boxes{i} = bbox(1:nB,:); 260 | % [h,w,~] = size(imdb.images.data{i}); 261 | % imdb.images.boxes{i} = [1 1 h w]; 262 | 263 | end 264 | 265 | %-------------------------------------------------------------------------% 266 | 267 | function im = bbox_draw(im,boxes,c,t) 268 | 269 | % copied from Ross Girshick 270 | % Fast R-CNN 271 | % Copyright (c) 2015 Microsoft 272 | % Licensed under The MIT License [see LICENSE for details] 273 | % Written by Ross Girshick 274 | % -------------------------------------------------------- 275 | % source: https://github.com/rbgirshick/fast-rcnn/blob/master/matlab/showboxes.m 276 | % 277 | % 278 | % Fast R-CNN 279 | % 280 | % Copyright (c) Microsoft Corporation 281 | % 282 | % All rights reserved. 283 | % 284 | % MIT License 285 | % 286 | % Permission is hereby granted, free of charge, to any person obtaining a 287 | % copy of this software and associated documentation files (the "Software"), 288 | % to deal in the Software without restriction, including without limitation 289 | % the rights to use, copy, modify, merge, publish, distribute, sublicense, 290 | % and/or sell copies of the Software, and to permit persons to whom the 291 | % Software is furnished to do so, subject to the following conditions: 292 | % 293 | % The above copyright notice and this permission notice shall be included 294 | % in all copies or substantial portions of the Software. 295 | % 296 | % THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 297 | % IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 298 | % FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 299 | % THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 300 | % OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 301 | % ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 302 | % OTHER DEALINGS IN THE SOFTWARE. 303 | 304 | image(im); 305 | axis image; 306 | axis off; 307 | set(gcf, 'Color', 'white'); 308 | 309 | if nargin<3 310 | c = 'r'; 311 | t = 2; 312 | end 313 | 314 | s = '-'; 315 | if ~isempty(boxes) 316 | x1 = boxes(:, 1); 317 | y1 = boxes(:, 2); 318 | x2 = boxes(:, 3); 319 | y2 = boxes(:, 4); 320 | line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', ... 321 | 'color', c, 'linewidth', t, 'linestyle', s); 322 | for i = 1:size(boxes, 1) 323 | text(double(x1(i)), double(y1(i)) - 2, ... 324 | sprintf('%.4f', boxes(i, end)), ... 325 | 'backgroundcolor', 'b', 'color', 'w', 'FontSize', 8); 326 | end 327 | end 328 | --------------------------------------------------------------------------------