├── utils
    ├── cpu_cores.m
    ├── mkdir_if_missing.m
    ├── xVOChash_lookup.m
    ├── RectLTWH2LTRB.m
    ├── RectLTRB2LTWH.m
    ├── new_parpool.m
    ├── xVOCap.m
    ├── xVOChash_init.m
    ├── vis_label.m
    ├── prep_im_for_blob_size.m
    ├── im_list_to_blob.m
    ├── symbolic_link.m
    ├── parse_rst.m
    ├── procid.m
    ├── tic_toc_print.m
    ├── auto_select_gpu.m
    ├── seed_rand.m
    ├── active_caffe_mex.m
    ├── prep_im_for_blob.m
    ├── boxoverlap.m
    ├── subsample_images.m
    ├── subsample_images_per_class.m
    └── showboxes.m
├── data
    └── demo
    │   ├── 000166.jpg
    │   ├── 001852.jpg
    │   ├── 002597.jpg
    │   ├── 004030.jpg
    │   ├── 005225.jpg
    │   ├── 000166_boxes.mat
    │   ├── 001852_boxes.mat
    │   ├── 002597_boxes.mat
    │   ├── 004030_boxes.mat
    │   └── 005225_boxes.mat
├── experiments
    ├── +Dataset
    │   ├── private
    │   │   ├── voc2007_devkit.m
    │   │   └── voc2012_devkit.m
    │   ├── voc2007_test_ss.m
    │   ├── voc0712_trainval_ss.m
    │   ├── voc2007_test_sp.m
    │   └── voc0712_trainval_sp.m
    ├── +Model
    │   ├── ResNet50_for_RFCN_VOC0712.m
    │   ├── ResNet101_for_RFCN_VOC0712.m
    │   ├── ResNet50_for_RFCN_VOC0712_OHEM.m
    │   └── ResNet101_for_RFCN_VOC0712_OHEM.m
    ├── script_rfcn_VOC0712_ResNet50_OHEM_ss.m
    ├── script_rfcn_VOC0712_ResNet50_rpn.m
    ├── script_rfcn_VOC0712_ResNet101_OHEM_ss.m
    ├── script_rfcn_VOC0712_ResNet101_rpn.m
    ├── script_rfcn_VOC0712_ResNet50_OHEM_rpn.m
    ├── script_rfcn_VOC0712_ResNet101_OHEM_rpn.m
    └── script_rfcn_demo.m
├── .gitmodules
├── imdb
    ├── get_voc_opts.m
    ├── imdb_eval_voc.m
    ├── imdb_from_voc.m
    └── roidb_from_voc.m
├── models
    └── rfcn_prototxts
    │   ├── ResNet-101L_res3a
    │       └── solver_80k110k_lr1_3.prototxt
    │   ├── ResNet-50L_res3a
    │       └── solver_80k110k_lr1_3.prototxt
    │   ├── ResNet-101L_OHEM_res3a
    │       └── solver_80k110k_lr1_3.prototxt
    │   └── ResNet-50L_OHEM_res3a
    │       └── solver_80k110k_lr1_3.prototxt
├── .gitattributes
├── fetch_data
    ├── fetch_region_proposals.m
    ├── fetch_caffe_mex_windows_vs2013_cuda75.m
    ├── fetch_model_ResNet50.m
    ├── fetch_model_ResNet101.m
    └── fetch_demo_model_ResNet101.m
├── functions
    ├── rfcn
    │   ├── rfcn_map_im_rois_to_feat_rois.m
    │   ├── rfcn_bbox_transform.m
    │   ├── rfcn_bbox_transform_inv.m
    │   ├── rfcn_config_ohem.m
    │   ├── rfcn_config_simple.m
    │   ├── rfcn_im_detect.m
    │   ├── rfcn_prepare_image_roidb.m
    │   ├── rfcn_get_minibatch.m
    │   ├── rfcn_test.m
    │   └── rfcn_train.m
    └── nms
    │   ├── nms_multiclass.m
    │   ├── nvmex.m
    │   ├── nms.m
    │   ├── nms_mex.cpp
    │   ├── nms_multiclass_mex.cpp
    │   └── nms_gpu_mex.cu
├── rfcn_build.m
├── .gitignore
├── startup.m
├── LICENSE
└── README.md


/utils/cpu_cores.m:
--------------------------------------------------------------------------------
1 | function num = cpu_cores()
2 |     num = feature('numcores');
3 | end


--------------------------------------------------------------------------------
/data/demo/000166.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/000166.jpg


--------------------------------------------------------------------------------
/data/demo/001852.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/001852.jpg


--------------------------------------------------------------------------------
/data/demo/002597.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/002597.jpg


--------------------------------------------------------------------------------
/data/demo/004030.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/004030.jpg


--------------------------------------------------------------------------------
/data/demo/005225.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/005225.jpg


--------------------------------------------------------------------------------
/data/demo/000166_boxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/000166_boxes.mat


--------------------------------------------------------------------------------
/data/demo/001852_boxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/001852_boxes.mat


--------------------------------------------------------------------------------
/data/demo/002597_boxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/002597_boxes.mat


--------------------------------------------------------------------------------
/data/demo/004030_boxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/004030_boxes.mat


--------------------------------------------------------------------------------
/data/demo/005225_boxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/005225_boxes.mat


--------------------------------------------------------------------------------
/experiments/+Dataset/private/voc2007_devkit.m:
--------------------------------------------------------------------------------
1 | function path = voc2007_devkit()
2 |     path = './datasets/VOCdevkit2007';
3 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/private/voc2012_devkit.m:
--------------------------------------------------------------------------------
1 | function path = voc2012_devkit()
2 |     path = './datasets/VOCdevkit2012';
3 | end


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "external/caffe"]
2 | 	path = external/caffe
3 | 	url = https://github.com/daijifeng001/caffe-rfcn.git
4 | 	branch = faster-R-CNN
5 | 


--------------------------------------------------------------------------------
/utils/mkdir_if_missing.m:
--------------------------------------------------------------------------------
1 | function made = mkdir_if_missing(path)
2 | made = false;
3 | if exist(path, 'dir') == 0
4 |   mkdir(path);
5 |   made = true;
6 | end
7 | 


--------------------------------------------------------------------------------
/utils/xVOChash_lookup.m:
--------------------------------------------------------------------------------
1 | function ind = xVOChash_lookup(hash,s)
2 | % From the PASCAL VOC 2011 devkit
3 | 
4 | hsize=numel(hash.key);
5 | h=mod(str2double(s([4 6:end])),hsize)+1;
6 | ind=hash.val{h}(strmatch(s,hash.key{h},'exact'));
7 | 


--------------------------------------------------------------------------------
/utils/RectLTWH2LTRB.m:
--------------------------------------------------------------------------------
1 | function [ rectsLTRB ] = RectLTWH2LTRB(rectsLTWH)
2 | %rects (l, t, r, b) to (l, t, w, h)
3 | 
4 | rectsLTRB = [rectsLTWH(:, 1), rectsLTWH(:, 2), rectsLTWH(:, 1)+rectsLTWH(:,3)-1, rectsLTWH(:,2)+rectsLTWH(:,4)-1];
5 | end
6 | 
7 | 


--------------------------------------------------------------------------------
/utils/RectLTRB2LTWH.m:
--------------------------------------------------------------------------------
1 | function [ rectsLTWH ] = RectLTRB2LTWH( rectsLTRB )
2 | %rects (l, t, r, b) to (l, t, w, h)
3 | 
4 | rectsLTWH = [rectsLTRB(:, 1), rectsLTRB(:, 2), rectsLTRB(:, 3)-rectsLTRB(:,1)+1, rectsLTRB(:,4)-rectsLTRB(:,2)+1];
5 | end
6 | 
7 | 


--------------------------------------------------------------------------------
/utils/new_parpool.m:
--------------------------------------------------------------------------------
 1 | function p = new_parpool(number)    
 2 |     
 3 |     if ~exist('number', 'var')
 4 |         number = cpu_cores();
 5 |     end
 6 | 
 7 |     if ~isempty(gcp('nocreate'))
 8 |         delete(gcp);
 9 |     end
10 |     p = parpool(number);   
11 | end


--------------------------------------------------------------------------------
/utils/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/imdb/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/utils/xVOChash_init.m:
--------------------------------------------------------------------------------
 1 | function hash = xVOChash_init(strs)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | hsize=4999;
 5 | hash.key=cell(hsize,1);
 6 | hash.val=cell(hsize,1);
 7 | 
 8 | for i=1:numel(strs)
 9 |     s=strs{i};
10 |     h=mod(str2double(s([4 6:end])),hsize)+1;
11 |     j=numel(hash.key{h})+1;
12 |     hash.key{h}{j}=strs{i};
13 |     hash.val{h}(j)=i;
14 | end
15 | 
16 | 


--------------------------------------------------------------------------------
/utils/vis_label.m:
--------------------------------------------------------------------------------
 1 | function vis_label(imdb, roidb)
 2 | 
 3 |     rois = roidb.rois;
 4 |     for iIM = 1:length(rois)
 5 |         im = imread(imdb.image_at(iIM));
 6 |         boxes = arrayfun(@(x) rois(iIM).boxes(rois(iIM).class == x, :), 1:length(imdb.classes), 'UniformOutput', false);
 7 |         legends = imdb.classes;
 8 |         showboxes(im, boxes, legends);
 9 |         pause;
10 |     end
11 | end
12 |   


--------------------------------------------------------------------------------
/models/rfcn_prototxts/ResNet-101L_res3a/solver_80k110k_lr1_3.prototxt:
--------------------------------------------------------------------------------
 1 | net: "./models/rfcn_prototxts/ResNet-101L_res3a/train_val.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 80000
 6 | display: 20
 7 | max_iter: 110000
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | snapshot: 0
12 | #debug_info: true
13 | 
14 | 


--------------------------------------------------------------------------------
/models/rfcn_prototxts/ResNet-50L_res3a/solver_80k110k_lr1_3.prototxt:
--------------------------------------------------------------------------------
 1 | net: "./models/rfcn_prototxts/ResNet-50L_res3a/train_val.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 80000
 6 | display: 20
 7 | max_iter: 110000
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | snapshot: 0
12 | #debug_info: true
13 | 
14 | 


--------------------------------------------------------------------------------
/models/rfcn_prototxts/ResNet-101L_OHEM_res3a/solver_80k110k_lr1_3.prototxt:
--------------------------------------------------------------------------------
 1 | net: "./models/rfcn_prototxts/ResNet-101L_OHEM_res3a/train_val.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 80000
 6 | display: 20
 7 | max_iter: 110000
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | snapshot: 0
12 | #debug_info: true
13 | 
14 | 


--------------------------------------------------------------------------------
/models/rfcn_prototxts/ResNet-50L_OHEM_res3a/solver_80k110k_lr1_3.prototxt:
--------------------------------------------------------------------------------
 1 | net: "./models/rfcn_prototxts/ResNet-50L_OHEM_res3a/train_val.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 80000
 6 | display: 20
 7 | max_iter: 110000
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | snapshot: 0
12 | #debug_info: true
13 | 
14 | 


--------------------------------------------------------------------------------
/utils/prep_im_for_blob_size.m:
--------------------------------------------------------------------------------
 1 | function im_scale = prep_im_for_blob_size(im_size, target_size, max_size)
 2 | 
 3 |     im_size_min = min(im_size(1:2));
 4 |     im_size_max = max(im_size(1:2));
 5 |     im_scale = double(target_size) / im_size_min;
 6 |     
 7 |     % Prevent the biggest axis from being more than MAX_SIZE
 8 |     if round(im_scale * im_size_max) > max_size
 9 |         im_scale = double(max_size) / double(im_size_max);
10 |     end
11 | end


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/utils/im_list_to_blob.m:
--------------------------------------------------------------------------------
 1 | function blob = im_list_to_blob(ims)
 2 |     max_shape = max(cell2mat(cellfun(@size, ims(:), 'UniformOutput', false)), [], 1);
 3 |     assert(all(cellfun(@(x) size(x, 3), ims, 'UniformOutput', true) == 3));
 4 |     num_images = length(ims);
 5 |     blob = zeros(max_shape(1), max_shape(2), 3, num_images, 'single');
 6 |     
 7 |     for i = 1:length(ims)
 8 |         im = ims{i};
 9 |         blob(1:size(im, 1), 1:size(im, 2), :, i) = im; 
10 |     end
11 | end


--------------------------------------------------------------------------------
/utils/symbolic_link.m:
--------------------------------------------------------------------------------
 1 | function symbolic_link(link, target)
 2 | % symbolic_link(link, target)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 |     if ispc()
10 |         system(sprintf('mklink /J %s %s', link, target)); 
11 |     else 
12 |         system(sprintf('ln -s %s %s', link, target)); 
13 |     end
14 | 
15 | end
16 | 


--------------------------------------------------------------------------------
/fetch_data/fetch_region_proposals.m:
--------------------------------------------------------------------------------
 1 | 
 2 | cur_dir = pwd;
 3 | cd(fileparts(mfilename('fullpath')));
 4 | 
 5 | try
 6 |     fprintf('Downloading region proposals...\n');
 7 |     urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91965&authkey=!AErVqYD6NhjxAfw', ...
 8 |         'proposals.zip');
 9 | 
10 |     fprintf('Unzipping...\n');
11 |     unzip('proposals.zip', '..');
12 | 
13 |     fprintf('Done.\n');
14 |     delete('proposals.zip');
15 | catch
16 |     fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 
17 | end
18 | 
19 | cd(cur_dir);
20 | 


--------------------------------------------------------------------------------
/fetch_data/fetch_caffe_mex_windows_vs2013_cuda75.m:
--------------------------------------------------------------------------------
 1 | 
 2 | cur_dir = pwd;
 3 | cd(fileparts(mfilename('fullpath')));
 4 | 
 5 | try
 6 |     fprintf('Downloading caffe_mex...\n');
 7 |     urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91961&authkey=!AOkZbLTBfuMB69Y', ...
 8 |         'caffe_mex.zip');
 9 | 
10 |     fprintf('Unzipping...\n');
11 |     unzip('caffe_mex.zip', '..');
12 | 
13 |     fprintf('Done.\n');
14 |     delete('caffe_mex.zip');
15 | catch
16 |     fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 
17 | end
18 | 
19 | cd(cur_dir);
20 | 


--------------------------------------------------------------------------------
/fetch_data/fetch_model_ResNet50.m:
--------------------------------------------------------------------------------
 1 | 
 2 | cur_dir = pwd;
 3 | cd(fileparts(mfilename('fullpath')));
 4 | 
 5 | try
 6 |     fprintf('Downloading model_ResNet-50L...\n');
 7 |     urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91962&authkey=!AET2I7W3WzcDyf8', ...
 8 |         'models_ResNet-50L.zip');
 9 | 
10 |     fprintf('Unzipping...\n');
11 |     unzip('models_ResNet-50L.zip', '..');
12 | 
13 |     fprintf('Done.\n');
14 |     delete('models_ResNet-50L.zip');
15 | catch
16 |     fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 
17 | end
18 | 
19 | cd(cur_dir);
20 | 


--------------------------------------------------------------------------------
/fetch_data/fetch_model_ResNet101.m:
--------------------------------------------------------------------------------
 1 | 
 2 | cur_dir = pwd;
 3 | cd(fileparts(mfilename('fullpath')));
 4 | 
 5 | try
 6 |     fprintf('Downloading model_ResNet-101L...\n');
 7 |     urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91963&authkey=!AM-EuzuUJelv9Po', ...
 8 |         'models_ResNet-101L.zip');
 9 | 
10 |     fprintf('Unzipping...\n');
11 |     unzip('models_ResNet-101L.zip', '..');
12 | 
13 |     fprintf('Done.\n');
14 |     delete('models_ResNet-101L.zip');
15 | catch
16 |     fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 
17 | end
18 | 
19 | cd(cur_dir);
20 | 


--------------------------------------------------------------------------------
/experiments/+Model/ResNet50_for_RFCN_VOC0712.m:
--------------------------------------------------------------------------------
 1 | function model = ResNet50_for_RFCN_VOC0712(model)
 2 | % ResNet 50layers (finetuned from res3a)
 3 | 
 4 | model.solver_def_file        = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_res3a', 'solver_80k110k_lr1_3.prototxt');
 5 | model.test_net_def_file      = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_res3a', 'test.prototxt');
 6 | 
 7 | model.net_file               = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'ResNet-50-model.caffemodel');
 8 | model.mean_image             = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'mean_image');
 9 | 
10 | end


--------------------------------------------------------------------------------
/experiments/+Model/ResNet101_for_RFCN_VOC0712.m:
--------------------------------------------------------------------------------
 1 | function model = ResNet101_for_RFCN_VOC0712(model)
 2 | % ResNet 101layers (finetuned from res3a)
 3 | 
 4 | model.solver_def_file        = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_res3a', 'solver_80k110k_lr1_3.prototxt');
 5 | model.test_net_def_file      = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_res3a', 'test.prototxt');
 6 | 
 7 | model.net_file               = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'ResNet-101-model.caffemodel');
 8 | model.mean_image             = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'mean_image');
 9 | 
10 | end


--------------------------------------------------------------------------------
/utils/parse_rst.m:
--------------------------------------------------------------------------------
 1 | function results = parse_rst(results, rst)
 2 | % results = parse_rst(results, rst)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 |     if isempty(results)
10 |         for i = 1:length(rst)
11 |             results.(rst(i).blob_name).data = [];
12 |         end
13 |     end
14 |         
15 |     for i = 1:length(rst)
16 |         results.(rst(i).blob_name).data = [results.(rst(i).blob_name).data; rst(i).data(:)];
17 |     end
18 | end


--------------------------------------------------------------------------------
/fetch_data/fetch_demo_model_ResNet101.m:
--------------------------------------------------------------------------------
 1 | 
 2 | cur_dir = pwd;
 3 | cd(fileparts(mfilename('fullpath')));
 4 | 
 5 | try
 6 |     fprintf('Downloading demo_models_ResNet-101L...\n');
 7 |     urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91964&authkey=!AOk8r5H95KFO0e8', ...
 8 |         'demo_models_ResNet-101L.zip');
 9 | 
10 |     fprintf('Unzipping...\n');
11 |     unzip('demo_models_ResNet-101L.zip', '..');
12 | 
13 |     fprintf('Done.\n');
14 |     delete('demo_models_ResNet-101L.zip');
15 | catch
16 |     fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 
17 | end
18 | 
19 | cd(cur_dir);
20 | 


--------------------------------------------------------------------------------
/utils/procid.m:
--------------------------------------------------------------------------------
 1 | function s = procid()
 2 | % Returns a string identifying the process.
 3 | 
 4 | % AUTORIGHTS
 5 | % -------------------------------------------------------
 6 | % Copyright (C) 2009-2012 Ross Girshick
 7 | % 
 8 | % This file is part of the voc-releaseX code
 9 | % (http://people.cs.uchicago.edu/~rbg/latent/)
10 | % and is available under the terms of an MIT-like license
11 | % provided in COPYING. Please retain this notice and
12 | % COPYING if you use this file (or a portion of it) in
13 | % your project.
14 | % -------------------------------------------------------
15 | 
16 | d = pwd();
17 | i = strfind(d, filesep);
18 | d = d(i(end)+1:end);
19 | s = d;
20 | 


--------------------------------------------------------------------------------
/experiments/+Model/ResNet50_for_RFCN_VOC0712_OHEM.m:
--------------------------------------------------------------------------------
 1 | function model = ResNet50_for_RFCN_VOC0712_OHEM(model)
 2 | % ResNet 50layers with OHEM training (finetuned from res3a)
 3 | 
 4 | model.solver_def_file        = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_OHEM_res3a', 'solver_80k110k_lr1_3.prototxt');
 5 | model.test_net_def_file      = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_OHEM_res3a', 'test.prototxt');
 6 | 
 7 | model.net_file               = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'ResNet-50-model.caffemodel');
 8 | model.mean_image             = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'mean_image');
 9 | 
10 | end


--------------------------------------------------------------------------------
/experiments/+Model/ResNet101_for_RFCN_VOC0712_OHEM.m:
--------------------------------------------------------------------------------
 1 | function model = ResNet101_for_RFCN_VOC0712_OHEM(model)
 2 | % ResNet 101layers with OHEM training (finetuned from res3a)
 3 | 
 4 | model.solver_def_file        = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_OHEM_res3a', 'solver_80k110k_lr1_3.prototxt');
 5 | model.test_net_def_file      = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_OHEM_res3a', 'test.prototxt');
 6 | 
 7 | model.net_file               = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'ResNet-101-model.caffemodel');
 8 | model.mean_image             = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'mean_image');
 9 | 
10 | end


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_map_im_rois_to_feat_rois.m:
--------------------------------------------------------------------------------
 1 | function [feat_rois] = rfcn_map_im_rois_to_feat_rois(conf, im_rois, im_scale_factor)
 2 | % [feat_rois] = rfcn_map_im_rois_to_feat_rois(conf, im_rois, im_scale_factor)
 3 | % --------------------------------------------------------
 4 | % R-FCN implementation
 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 6 | % Copyright (c) 2016, Jifeng Dai
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 | %% Map a ROI in image-pixel coordinates to a ROI in feature coordinates.
11 | % in matlab's index (start from 1)
12 | 
13 |     feat_rois = round((im_rois-1) * im_scale_factor) + 1;
14 |     
15 |     %feat_rois = round((im_rois-1) * im_scale_factor / single(conf.feat_stride)) + 1;
16 | 
17 | end


--------------------------------------------------------------------------------
/utils/tic_toc_print.m:
--------------------------------------------------------------------------------
 1 | function tic_toc_print(fmt, varargin)
 2 | % Print only after 1 second has passed since the last print. 
 3 | % Arguments are the same as for fprintf.
 4 | 
 5 | % AUTORIGHTS
 6 | % -------------------------------------------------------
 7 | % Copyright (C) 2009-2012 Ross Girshick
 8 | % 
 9 | % This file is part of the voc-releaseX code
10 | % (http://people.cs.uchicago.edu/~rbg/latent/)
11 | % and is available under the terms of an MIT-like license
12 | % provided in COPYING. Please retain this notice and
13 | % COPYING if you use this file (or a portion of it) in
14 | % your project.
15 | % -------------------------------------------------------
16 | 
17 | persistent th;
18 | 
19 | if isempty(th)
20 |   th = tic();
21 | end
22 | 
23 | if toc(th) > 1
24 |   fprintf(fmt, varargin{:});
25 |   drawnow;
26 |   th = tic();
27 | end
28 | 


--------------------------------------------------------------------------------
/rfcn_build.m:
--------------------------------------------------------------------------------
 1 | function rfcn_build()
 2 | % rfcn_build()
 3 | % --------------------------------------------------------
 4 | % R-FCN implementation
 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 6 | % Copyright (c) 2016, Jifeng Dai
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 | % Compile nms_mex
11 | if ~exist('nms_mex', 'file')
12 |   fprintf('Compiling nms_mex\n');
13 | 
14 |   mex -O -outdir bin ...
15 |       CXXFLAGS="\$CXXFLAGS -std=c++11"  ...
16 |       -largeArrayDims ...
17 |       functions/nms/nms_mex.cpp ...
18 |       -output nms_mex;
19 | end
20 | 
21 | if ~exist('nms_gpu_mex', 'file')
22 |    fprintf('Compiling nms_gpu_mex\n');
23 |    addpath(fullfile(pwd, 'functions', 'nms'));
24 |    nvmex('functions/nms/nms_gpu_mex.cu', 'bin');
25 |    delete('nms_gpu_mex.o');
26 | end
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/utils/auto_select_gpu.m:
--------------------------------------------------------------------------------
 1 | function gpu_id = auto_select_gpu()
 2 | % gpu_id = auto_select_gpu()
 3 | % Select the gpu which has the maximum free memory 
 4 | % --------------------------------------------------------
 5 | % Faster R-CNN
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 |     % deselects all GPU devices
11 |     gpuDevice([]);
12 | 
13 |     maxFreeMemory = 0;
14 |     for i = 1:gpuDeviceCount
15 |         g = gpuDevice(i);
16 |         freeMemory = g.FreeMemory();
17 |         fprintf('GPU %d: free memory %d\n', i, freeMemory);
18 |         if freeMemory > maxFreeMemory
19 |             maxFreeMemory = freeMemory;
20 |             gpu_id = i;
21 |         end
22 |     end
23 |     fprintf('Use GPU %d\n', gpu_id);
24 |     
25 |     % deselects all GPU devices
26 |     gpuDevice([]);
27 | end
28 | 


--------------------------------------------------------------------------------
/experiments/+Dataset/voc2007_test_ss.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc2007_test_ss(dataset, usage, use_flip)
 2 | % Pascal voc 2007 test set with selective search
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit                      = voc2007_devkit();
 8 | 
 9 | switch usage
10 |     case {'train'}
11 |         dataset.imdb_train    = {  imdb_from_voc(devkit, 'test', '2007', use_flip) };
12 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false);
13 |     case {'test'}
14 |         dataset.imdb_test     = imdb_from_voc(devkit, 'test', '2007', use_flip) ;
15 |         dataset.roidb_test    = dataset.imdb_test.roidb_func(dataset.imdb_test, 'with_selective_search', true);
16 |     otherwise
17 |         error('usage = ''train'' or ''test''');
18 | end
19 | 
20 | end


--------------------------------------------------------------------------------
/utils/seed_rand.m:
--------------------------------------------------------------------------------
 1 | function prev_rng = seed_rand(seed)
 2 | % seed_rand - Set random number generator to a fixed seed.
 3 | %   prev_rng = seed_rand(seed)
 4 | %
 5 | %   Strategic use ensures that results are reproducible.
 6 | %
 7 | %   To restore the previous rng after calling this do:
 8 | %   rng(prev_rng);
 9 | 
10 | % AUTORIGHTS
11 | % ---------------------------------------------------------
12 | % Copyright (c) 2014, Ross Girshick
13 | % 
14 | % This file is part of the R-CNN code and is available 
15 | % under the terms of the Simplified BSD License provided in 
16 | % LICENSE. Please retain this notice and LICENSE if you use 
17 | % this file (or any portion of it) in your project.
18 | % ---------------------------------------------------------
19 | 
20 | if nargin < 1
21 |     % This value works best for me.
22 |     seed = 3;
23 |     % Just kidding, of course ;-).
24 | end
25 | 
26 | prev_rng = rng;
27 | rng(seed, 'twister')
28 | 


--------------------------------------------------------------------------------
/experiments/+Dataset/voc0712_trainval_ss.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc0712_trainval_ss(dataset, usage, use_flip)
 2 | % Pascal voc 0712 trainval set with selective search
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit2007                      = voc2007_devkit();
 8 | devkit2012                      = voc2012_devkit();
 9 | 
10 | switch usage
11 |     case {'train'}
12 |         dataset.imdb_train    = {  imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ...
13 |                                     imdb_from_voc(devkit2012, 'trainval', '2012', use_flip)};
14 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false);
15 |     case {'test'}
16 |         error('only supports one source test currently');  
17 |     otherwise
18 |         error('usage = ''train'' or ''test''');
19 | end
20 | 
21 | end


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # User Ingore
12 | models/fast_rcnn_prototxts/
13 | models/pre_trained_model/
14 | models/rpn_prototxts/
15 | data/
16 | datasets/
17 | output/
18 | cachedir/
19 | imdb/cache
20 | bin/
21 | external/caffe/matlab
22 | fetch_data/*.zip
23 | *.caffemodel
24 | *.mat
25 | 
26 | # Windows Installer files
27 | *.cab
28 | *.msi
29 | *.msm
30 | *.msp
31 | 
32 | # Windows shortcuts
33 | *.lnk
34 | 
35 | # =========================
36 | # Operating System Files
37 | # =========================
38 | 
39 | # OSX
40 | # =========================
41 | 
42 | .DS_Store
43 | .AppleDouble
44 | .LSOverride
45 | 
46 | # Thumbnails
47 | ._*
48 | 
49 | # Files that might appear on external disk
50 | .Spotlight-V100
51 | .Trashes
52 | 
53 | # Directories potentially created on remote AFP share
54 | .AppleDB
55 | .AppleDesktop
56 | Network Trash Folder
57 | Temporary Items
58 | .apdisk
59 | 


--------------------------------------------------------------------------------
/experiments/+Dataset/voc2007_test_sp.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc2007_test_sp(dataset, usage, use_flip, extension)
 2 | % Pascal voc 2007 test set with *pre-computed* RPN proposals (trained with ResNet50 or ResNet101)  
 3 | % extension = "resnet50" or "resnet101" for specifying pre-computed RPN proposals  
 4 | % set opts.imdb_train opts.roidb_train  
 5 | 
 6 | 
 7 | % change to point to your devkit install
 8 | devkit                      = voc2007_devkit();
 9 | 
10 | switch usage
11 |     case {'train'}
12 |         dataset.imdb_train    = {  imdb_from_voc(devkit, 'test', '2007', use_flip) };
13 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x, 'with_self_proposal', true, 'extension', extension), dataset.imdb_train, 'UniformOutput', false);
14 |     case {'test'}
15 |         dataset.imdb_test     = imdb_from_voc(devkit, 'test', '2007', use_flip);
16 |         dataset.roidb_test    = dataset.imdb_test.roidb_func(dataset.imdb_test, 'with_self_proposal', true, 'extension', extension);
17 |     otherwise
18 |         error('usage = ''train'' or ''test''');
19 | end
20 | 
21 | end


--------------------------------------------------------------------------------
/utils/active_caffe_mex.m:
--------------------------------------------------------------------------------
 1 | function active_caffe_mex(gpu_id, caffe_version)
 2 | % active_caffe_mex(gpu_id, caffe_version)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 |     % set gpu in matlab
10 |     gpuDevice(gpu_id);
11 | 
12 |     if ~exist('caffe_version', 'var') || isempty(caffe_version)
13 |         caffe_version = 'caffe';
14 |     end
15 |     cur_dir = pwd;
16 |     caffe_dir = fullfile(pwd, 'external', 'caffe', 'matlab', caffe_version);
17 |     
18 |     if ~exist(caffe_dir, 'dir')
19 |         warning('Specified caffe folder (%s) is not exist, change to default one (%s)', ...
20 |             caffe_dir, fullfile(pwd, 'external', 'caffe', 'matlab'));
21 |         caffe_dir = fullfile(pwd, 'external', 'caffe', 'matlab');
22 |     end
23 |     
24 |     addpath(genpath(caffe_dir));
25 |     cd(caffe_dir);
26 |     caffe.set_device(gpu_id-1);
27 |     cd(cur_dir);
28 | end
29 | 


--------------------------------------------------------------------------------
/experiments/+Dataset/voc0712_trainval_sp.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc0712_trainval_sp(dataset, usage, use_flip, extension)
 2 | % Pascal voc 0712 trainval set with *pre-computed* RPN proposals (trained with ResNet50 or ResNet101)  
 3 | % extension = "resnet50" or "resnet101" for specifying pre-computed RPN proposals  
 4 | % set opts.imdb_train opts.roidb_train  
 5 | 
 6 | % change to point to your devkit install
 7 | devkit2007                      = voc2007_devkit();
 8 | devkit2012                      = voc2012_devkit();
 9 | 
10 | switch usage
11 |     case {'train'}
12 |         dataset.imdb_train    = {  imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ...
13 |                                     imdb_from_voc(devkit2012, 'trainval', '2012', use_flip)};
14 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x, 'with_self_proposal', true, 'extension', extension), dataset.imdb_train, 'UniformOutput', false);
15 |     case {'test'}
16 |         error('only supports one source test currently');  
17 |     otherwise
18 |         error('usage = ''train'' or ''test''');
19 | end
20 | 
21 | end


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_bbox_transform.m:
--------------------------------------------------------------------------------
 1 | function [regression_label] = rfcn_bbox_transform(ex_boxes, gt_boxes)
 2 | % [regression_label] = rfcn_bbox_transform(ex_boxes, gt_boxes)
 3 | % --------------------------------------------------------
 4 | % R-FCN implementation
 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 6 | % Copyright (c) 2016, Jifeng Dai
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 |     ex_widths = ex_boxes(:, 3) - ex_boxes(:, 1) + 1;
11 |     ex_heights = ex_boxes(:, 4) - ex_boxes(:, 2) + 1;
12 |     ex_ctr_x = ex_boxes(:, 1) + 0.5 * (ex_widths - 1);
13 |     ex_ctr_y = ex_boxes(:, 2) + 0.5 * (ex_heights - 1);
14 |     
15 |     gt_widths = gt_boxes(:, 3) - gt_boxes(:, 1) + 1;
16 |     gt_heights = gt_boxes(:, 4) - gt_boxes(:, 2) + 1;
17 |     gt_ctr_x = gt_boxes(:, 1) + 0.5 * (gt_widths - 1);
18 |     gt_ctr_y = gt_boxes(:, 2) + 0.5 * (gt_heights - 1);
19 |     
20 |     targets_dx = (gt_ctr_x - ex_ctr_x) ./ (ex_widths+eps);
21 |     targets_dy = (gt_ctr_y - ex_ctr_y) ./ (ex_heights+eps);
22 |     targets_dw = log(gt_widths ./ ex_widths);
23 |     targets_dh = log(gt_heights ./ ex_heights);
24 |     
25 |     regression_label = [targets_dx, targets_dy, targets_dw, targets_dh];
26 | end


--------------------------------------------------------------------------------
/startup.m:
--------------------------------------------------------------------------------
 1 | function startup()
 2 | % startup()
 3 | % --------------------------------------------------------
 4 | % R-FCN implementation
 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 6 | % Copyright (c) 2016, Jifeng Dai
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 |     curdir = fileparts(mfilename('fullpath'));
11 |     addpath(genpath(fullfile(curdir, 'utils')));
12 |     addpath(genpath(fullfile(curdir, 'functions')));
13 |     addpath(genpath(fullfile(curdir, 'bin')));
14 |     addpath(genpath(fullfile(curdir, 'experiments')));
15 |     addpath(genpath(fullfile(curdir, 'imdb')));
16 | 
17 |     mkdir_if_missing(fullfile(curdir, 'datasets'));
18 | 
19 |     mkdir_if_missing(fullfile(curdir, 'external'));
20 | 
21 |     caffe_path = fullfile(curdir, 'external', 'caffe', 'matlab');
22 |     if exist(caffe_path, 'dir') == 0
23 |         error('matcaffe is missing from external/caffe/matlab; See README.md');
24 |     end
25 |     addpath(genpath(caffe_path));
26 | 
27 |     mkdir_if_missing(fullfile(curdir, 'imdb', 'cache'));
28 | 
29 |     mkdir_if_missing(fullfile(curdir, 'output'));
30 | 
31 |     mkdir_if_missing(fullfile(curdir, 'models'));
32 | 
33 |     fprintf('rfcn startup done\n');
34 | end
35 | 


--------------------------------------------------------------------------------
/utils/prep_im_for_blob.m:
--------------------------------------------------------------------------------
 1 | function [im, im_scale] = prep_im_for_blob(im, im_means, target_size, max_size)
 2 |     im = single(im);
 3 |     
 4 |     if ~isa(im, 'gpuArray')
 5 |         try
 6 |             im = bsxfun(@minus, im, im_means);
 7 |         catch
 8 |             im_means = imresize(im_means, [size(im, 1), size(im, 2)], 'bilinear', 'antialiasing', false);    
 9 |             im = bsxfun(@minus, im, im_means);
10 |         end
11 |         im_scale = prep_im_for_blob_size(size(im), target_size, max_size);
12 | 
13 |         target_size = round([size(im, 1), size(im, 2)] * im_scale);
14 |         im = imresize(im, target_size, 'bilinear', 'antialiasing', false);
15 |     else
16 |         % for im as gpuArray
17 |         try
18 |             im = bsxfun(@minus, im, im_means);
19 |         catch
20 |             im_means_scale = max(double(size(im, 1)) / size(im_means, 1), double(size(im, 2)) / size(im_means, 2));
21 |             im_means = imresize(im_means, im_means_scale);    
22 |             y_start = floor((size(im_means, 1) - size(im, 1)) / 2) + 1;
23 |             x_start = floor((size(im_means, 2) - size(im, 2)) / 2) + 1;
24 |             im_means = im_means(y_start:(y_start+size(im, 1)-1), x_start:(x_start+size(im, 2)-1));
25 |             im = bsxfun(@minus, im, im_means);
26 |         end
27 |         
28 |         im_scale = prep_im_for_blob_size(size(im), target_size, max_size);
29 |         im = imresize(im, im_scale);
30 |     end
31 | end


--------------------------------------------------------------------------------
/utils/boxoverlap.m:
--------------------------------------------------------------------------------
 1 | function o = boxoverlap(a, b)
 2 | % Compute the symmetric intersection over union overlap between a set of
 3 | % bounding boxes in a and a single bounding box in b.
 4 | %
 5 | % a  a matrix where each row specifies a bounding box
 6 | % b  a matrix where each row specifies a bounding box
 7 | 
 8 | % AUTORIGHTS
 9 | % -------------------------------------------------------
10 | % Copyright (C) 2011-2012 Ross Girshick
11 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick
12 | % 
13 | % This file is part of the voc-releaseX code
14 | % (http://people.cs.uchicago.edu/~rbg/latent/)
15 | % and is available under the terms of an MIT-like license
16 | % provided in COPYING. Please retain this notice and
17 | % COPYING if you use this file (or a portion of it) in
18 | % your project.
19 | % -------------------------------------------------------
20 | 
21 | o = cell(1, size(b, 1));
22 | for i = 1:size(b, 1)
23 |     x1 = max(a(:,1), b(i,1));
24 |     y1 = max(a(:,2), b(i,2));
25 |     x2 = min(a(:,3), b(i,3));
26 |     y2 = min(a(:,4), b(i,4));
27 | 
28 |     w = x2-x1+1;
29 |     h = y2-y1+1;
30 |     inter = w.*h;
31 |     aarea = (a(:,3)-a(:,1)+1) .* (a(:,4)-a(:,2)+1);
32 |     barea = (b(i,3)-b(i,1)+1) * (b(i,4)-b(i,2)+1);
33 |     % intersection over union overlap
34 |     o{i} = inter ./ (aarea+barea-inter);
35 |     % set invalid entries to 0 overlap
36 |     o{i}(w <= 0) = 0;
37 |     o{i}(h <= 0) = 0;
38 | end
39 | 
40 | o = cell2mat(o);
41 | 


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_bbox_transform_inv.m:
--------------------------------------------------------------------------------
 1 | function [pred_boxes] = rfcn_bbox_transform_inv(boxes, box_deltas)
 2 | % [pred_boxes] = rfcn_bbox_transform_inv(boxes, box_deltas)
 3 | % --------------------------------------------------------
 4 | % R-FCN implementation
 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 6 | % Copyright (c) 2016, Jifeng Dai
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 |     
10 |     src_w = double(boxes(:, 3) - boxes(:, 1) + 1);
11 |     src_h = double(boxes(:, 4) - boxes(:, 2) + 1);
12 |     src_ctr_x = double(boxes(:, 1) + 0.5*(src_w-1));
13 |     src_ctr_y = double(boxes(:, 2) + 0.5*(src_h-1));
14 |     
15 |     dst_ctr_x = double(box_deltas(:, 1:4:end));
16 |     dst_ctr_y = double(box_deltas(:, 2:4:end));
17 |     dst_scl_x = double(box_deltas(:, 3:4:end));
18 |     dst_scl_y = double(box_deltas(:, 4:4:end));
19 | 
20 |     pred_ctr_x = bsxfun(@plus, bsxfun(@times, dst_ctr_x, src_w), src_ctr_x);
21 |     pred_ctr_y = bsxfun(@plus, bsxfun(@times, dst_ctr_y, src_h), src_ctr_y);
22 |     pred_w = bsxfun(@times, exp(dst_scl_x), src_w);
23 |     pred_h = bsxfun(@times, exp(dst_scl_y), src_h);
24 |     pred_boxes = zeros(size(box_deltas), 'single');
25 |     pred_boxes(:, 1:4:end) = pred_ctr_x - 0.5*(pred_w-1);
26 |     pred_boxes(:, 2:4:end) = pred_ctr_y - 0.5*(pred_h-1);
27 |     pred_boxes(:, 3:4:end) = pred_ctr_x + 0.5*(pred_w-1);
28 |     pred_boxes(:, 4:4:end) = pred_ctr_y + 0.5*(pred_h-1); 
29 | end


--------------------------------------------------------------------------------
/utils/subsample_images.m:
--------------------------------------------------------------------------------
 1 | function [imdbs, roidbs] = subsample_images(imdbs, roidbs, max_num_neg_images, seed)
 2 | 
 3 | if ~exist('seed', 'var')
 4 |   seed = 6;
 5 | end
 6 | 
 7 | % class_num = cellfun(@(x) length(x.class_ids), imdbs, 'UniformOutput', true);
 8 | % assert(length(unique(class_num)) == 1);
 9 | % class_num = unique(class_num);
10 | 
11 | rois = cellfun(@(x) x.rois(:), roidbs, 'UniformOutput', false);
12 | rois_combine = cell2mat(rois(:));
13 | 
14 | % fix the random seed for repeatability
15 | prev_rng = seed_rand(seed);
16 | inds = randperm(length(rois_combine), max_num_neg_images);
17 | inds = sort(inds);
18 | 
19 | img_idx_start = 1;
20 | for i = 1:length(imdbs)
21 |     imdb_img_num = length(imdbs{i}.image_ids);
22 |     img_idx_end = img_idx_start + imdb_img_num - 1;
23 |     inds_start = find(inds >= img_idx_start, 1, 'first');
24 |     inds_end = find(inds <= img_idx_end, 1, 'last');
25 |     
26 |     inds_sub = inds(inds_start:inds_end);
27 |     inds_sub = inds_sub - img_idx_start + 1;
28 |     
29 |     imdbs{i}.image_ids = imdbs{i}.image_ids(inds_sub);
30 |     imdbs{i}.sizes = imdbs{i}.sizes(inds_sub, :);
31 |     if isfield(imdbs{i}, 'image_dir')
32 |         imdbs{i}.image_at = @(x) ...
33 |             sprintf('%s/%s.%s', imdbs{i}.image_dir, imdbs{i}.image_ids{x}, imdbs{i}.extension);
34 |     else
35 |        imdbs{i}.image_at = @(x) ...
36 |             sprintf('%s/%s.%s', imdbs{i}.imagedir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 
37 |     end
38 |     roidbs{i}.rois = roidbs{i}.rois(inds_sub);
39 |     
40 |     img_idx_start = img_idx_start + imdb_img_num;
41 | end
42 | 
43 | % restore previous rng
44 | rng(prev_rng);
45 | 
46 | end


--------------------------------------------------------------------------------
/functions/nms/nms_multiclass.m:
--------------------------------------------------------------------------------
 1 | function picks = nms_multiclass(boxes, overlap)
 2 | % top = nms(boxes, overlap)
 3 | % Non-maximum suppression. (FAST VERSION)
 4 | % Greedily select high-scoring detections and skip detections
 5 | % that are significantly covered by a previously selected
 6 | % detection.
 7 | %
 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m),
 9 | % but an inner loop has been eliminated to significantly speed it
10 | % up in the case of a large number of boxes
11 | 
12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz
13 | % All rights reserved.
14 | % 
15 | % This file is part of the Exemplar-SVM library and is made
16 | % available under the terms of the MIT license (see COPYING file).
17 | % Project homepage: https://github.com/quantombone/exemplarsvm
18 | 
19 | 
20 | if isempty(boxes)
21 |   picks = {};
22 |   return;
23 | end
24 | 
25 | if size(boxes, 1) < 10000
26 |     picks = nms_multiclass_mex(double(boxes), double(overlap));
27 |     return;
28 | end
29 | 
30 | x1 = boxes(:,1);
31 | y1 = boxes(:,2);
32 | x2 = boxes(:,3);
33 | y2 = boxes(:,4);
34 | 
35 | area = (x2-x1+1) .* (y2-y1+1);
36 | 
37 | picks = cell(size(boxes, 2)-4, 1);
38 | for iS = 5:size(boxes, 2)
39 |     s = boxes(:,iS);
40 |     [~, I] = sort(s);
41 | 
42 |     pick = s*0;
43 |     counter = 1;
44 |     while ~isempty(I)
45 |       last = length(I);
46 |       i = I(last);  
47 |       pick(counter) = i;
48 |       counter = counter + 1;
49 | 
50 |       xx1 = max(x1(i), x1(I(1:last-1)));
51 |       yy1 = max(y1(i), y1(I(1:last-1)));
52 |       xx2 = min(x2(i), x2(I(1:last-1)));
53 |       yy2 = min(y2(i), y2(I(1:last-1)));
54 | 
55 |       w = max(0.0, xx2-xx1+1);
56 |       h = max(0.0, yy2-yy1+1);
57 | 
58 |       inter = w.*h;
59 |       o = inter ./ (area(i) + area(I(1:last-1)) - inter);
60 | 
61 |       I = I(o<=overlap);
62 |     end
63 | 
64 |     pick = pick(1:(counter-1));
65 |     picks{iS-4} = pick;
66 | end
67 | 


--------------------------------------------------------------------------------
/functions/nms/nvmex.m:
--------------------------------------------------------------------------------
 1 | function nvmex(cuFileName, outDir)
 2 | %NVMEX Compiles and links a CUDA file for MATLAB usage
 3 | % NVMEX(FILENAME) will create a MEX-File (also with the name FILENAME) by
 4 | % invoking the CUDA compiler, nvcc, and then linking with the MEX
 5 | % function in MATLAB.
 6 | 
 7 | if ispc % Windows
 8 |  Host_Compiler_Location = '-ccbin "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\x86_amd64"';
 9 |  CUDA_INC_Location = ['"' getenv('CUDA_PATH')  '\include"'];
10 |     CUDA_SAMPLES_Location =['"' getenv('NVCUDASAMPLES6_5_ROOT')  '\common\inc"'];
11 |     PIC_Option = '';
12 |     if ( strcmp(computer('arch'),'win32') ==1)
13 |         machine_str = ' --machine 32 ';
14 |         CUDA_LIB_Location = ['"' getenv('CUDA_PATH')  '\lib\Win32"'];
15 |     elseif  ( strcmp(computer('arch'),'win64') ==1)
16 |         machine_str = ' --machine 64 ';
17 |         CUDA_LIB_Location = ['"' getenv('CUDA_PATH')  '\lib\x64"'];
18 |     end
19 |     NVCC = 'nvcc';
20 | else % Mac and Linux (assuming gcc is on the path)
21 |     CUDA_INC_Location = '/usr/local/cuda/include';
22 |     CUDA_SAMPLES_Location = '/usr/local/cuda/samples/common/inc';
23 |     Host_Compiler_Location = ' ';
24 |     PIC_Option = ' --compiler-options -fPIC ';
25 |     machine_str = [];
26 |     CUDA_LIB_Location = '/usr/local/cuda/lib64';
27 |     NVCC = '/usr/local/cuda/bin/nvcc';
28 | end
29 | % !!! End of things to modify !!!
30 | [~, filename] = fileparts(cuFileName);
31 | nvccCommandLine = [ ...
32 | NVCC ' --compile ' Host_Compiler_Location ' ' ...
33 | '-o '  filename '.o ' ...
34 | machine_str PIC_Option ...
35 | ' -I' '"' matlabroot '/extern/include "' ...
36 | ' -I' CUDA_INC_Location ' -I' CUDA_SAMPLES_Location ...
37 | ' "' cuFileName '" ' 
38 |  ];
39 | mexCommandLine = ['mex ' '-outdir ' outDir ' ' filename '.o'  ' -L' CUDA_LIB_Location  ' -lcudart'];
40 | disp(nvccCommandLine);
41 | warning off;
42 | status = system(nvccCommandLine);
43 | warning on;
44 | if status < 0
45 |  error 'Error invoking nvcc';
46 | end
47 | disp(mexCommandLine);
48 | eval(mexCommandLine);
49 | end
50 | 


--------------------------------------------------------------------------------
/functions/nms/nms.m:
--------------------------------------------------------------------------------
 1 | function pick = nms(boxes, overlap, use_gpu)
 2 | % top = nms(boxes, overlap)
 3 | % Non-maximum suppression. (FAST VERSION)
 4 | % Greedily select high-scoring detections and skip detections
 5 | % that are significantly covered by a previously selected
 6 | % detection.
 7 | %
 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m),
 9 | % but an inner loop has been eliminated to significantly speed it
10 | % up in the case of a large number of boxes
11 | 
12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz
13 | % All rights reserved.
14 | % 
15 | % This file is part of the Exemplar-SVM library and is made
16 | % available under the terms of the MIT license (see COPYING file).
17 | % Project homepage: https://github.com/quantombone/exemplarsvm
18 | 
19 | 
20 | if isempty(boxes)
21 |   pick = [];
22 |   return;
23 | end
24 | 
25 | if ~exist('use_gpu', 'var')
26 |     use_gpu = false;
27 | end
28 | 
29 | if use_gpu
30 |     s = boxes(:, end);
31 |     if ~issorted(s(end:-1:1))
32 |         [~, I] = sort(s, 'descend');
33 |         boxes = boxes(I, :);
34 |         pick = nms_gpu_mex(single(boxes)', double(overlap));
35 |         pick = I(pick);
36 |     else
37 |         pick = nms_gpu_mex(single(boxes)', double(overlap));
38 |     end
39 |     return;
40 | end
41 |     
42 | if size(boxes, 1) < 1000000
43 |     pick = nms_mex(double(boxes), double(overlap));
44 |     return;
45 | end
46 | 
47 | x1 = boxes(:,1);
48 | y1 = boxes(:,2);
49 | x2 = boxes(:,3);
50 | y2 = boxes(:,4);
51 | s = boxes(:,end);
52 | 
53 | area = (x2-x1+1) .* (y2-y1+1);
54 | [vals, I] = sort(s);
55 | 
56 | pick = s*0;
57 | counter = 1;
58 | while ~isempty(I)
59 |   last = length(I);
60 |   i = I(last);  
61 |   pick(counter) = i;
62 |   counter = counter + 1;
63 |   
64 |   xx1 = max(x1(i), x1(I(1:last-1)));
65 |   yy1 = max(y1(i), y1(I(1:last-1)));
66 |   xx2 = min(x2(i), x2(I(1:last-1)));
67 |   yy2 = min(y2(i), y2(I(1:last-1)));
68 |   
69 |   w = max(0.0, xx2-xx1+1);
70 |   h = max(0.0, yy2-yy1+1);
71 |   
72 |   inter = w.*h;
73 |   o = inter ./ (area(i) + area(I(1:last-1)) - inter);
74 |   
75 |   I = I(find(o<=overlap));
76 | end
77 | 
78 | pick = pick(1:(counter-1));
79 | 


--------------------------------------------------------------------------------
/utils/subsample_images_per_class.m:
--------------------------------------------------------------------------------
 1 | function [imdbs, roidbs] = subsample_images_per_class(imdbs, roidbs, max_per_class_image_num, seed)
 2 | 
 3 | if ~exist('seed', 'var')
 4 |   seed = 6;
 5 | end
 6 | 
 7 | class_num = cellfun(@(x) length(x.class_ids), imdbs, 'UniformOutput', true);
 8 | assert(length(unique(class_num)) == 1);
 9 | class_num = unique(class_num);
10 | 
11 | rois = cellfun(@(x) x.rois, roidbs, 'UniformOutput', false);
12 | rois_combine = cell2mat(rois(:));
13 | rois_combine_class = arrayfun(@(x) x.class, rois_combine, 'UniformOutput', false);
14 | 
15 | %% select images with max_image_num
16 | 
17 | % fix the random seed for repeatability
18 | prev_rng = seed_rand(seed);
19 | inds = cell(class_num, 1);
20 | rois_combine_length = length(rois_combine);
21 | valid_idxs = cell(class_num, 1);
22 | parfor i = 1:class_num
23 |     valid_idxs{i} = cellfun(@(x) any(x == i), rois_combine_class, 'UniformOutput', false);
24 |     valid_idxs{i} = cell2mat(valid_idxs{i});
25 | end
26 | 
27 | for i = 1:class_num
28 |     valid_num = sum(valid_idxs{i});
29 | 
30 |     num = min(valid_num, max_per_class_image_num);
31 |     inds{i} = 1:rois_combine_length;
32 |     inds{i} = inds{i}(valid_idxs{i});
33 |     inds{i} = inds{i}(randperm(length(inds{i}), num));
34 | end
35 | 
36 | inds = cell2mat(inds')';
37 | inds = unique(inds);
38 | 
39 | % restore previous rng
40 | rng(prev_rng);
41 | 
42 | img_idx_start = 1;
43 | for i = 1:length(imdbs)
44 |     imdb_img_num = length(imdbs{i}.image_ids);
45 |     img_idx_end = img_idx_start + imdb_img_num - 1;
46 |     inds_start = find(inds >= img_idx_start, 1, 'first');
47 |     inds_end = find(inds <= img_idx_end, 1, 'last');
48 | 
49 |     inds_sub = inds(inds_start:inds_end);
50 |     inds_sub = inds_sub - img_idx_start + 1;
51 | 
52 |     imdbs{i}.image_ids = imdbs{i}.image_ids(inds_sub);
53 |     imdbs{i}.sizes = imdbs{i}.sizes(inds_sub, :);
54 |     if isfield(imdbs{i}, 'image_dir')
55 |         imdbs{i}.image_at = @(x) ...
56 |           sprintf('%s/%s.%s', imdbs{i}.image_dir, imdbs{i}.image_ids{x}, imdbs{i}.extension);
57 |     else
58 |         imdbs{i}.image_at = @(x) ...
59 |           sprintf('%s/%s.%s', imdbs{i}.imagedir, imdbs{i}.image_ids{x}, imdbs{i}.extension);
60 |     end
61 |     roidbs{i}.rois = roidbs{i}.rois(inds_sub);
62 | 
63 |     img_idx_start = img_idx_start + imdb_img_num;
64 | end
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/experiments/script_rfcn_VOC0712_ResNet50_OHEM_ss.m:
--------------------------------------------------------------------------------
 1 | function script_rfcn_VOC0712_ResNet50_OHEM_ss()
 2 | % script_rfcn_VOC0712_ResNet50_OHEM_ss()
 3 | % RFCN training and testing with OHEM using ResNet50 model and selective
 4 | % search proposals
 5 | % --------------------------------------------------------
 6 | % R-FCN implementation
 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 8 | % Copyright (c) 2016, Jifeng Dai
 9 | % Licensed under The MIT License [see LICENSE for details]
10 | % --------------------------------------------------------
11 | 
12 | clc;
13 | clear mex;
14 | clear is_valid_handle; % to clear init_key
15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
16 | %% -------------------- CONFIG --------------------
17 | opts.caffe_version          = 'caffe_rfcn';
18 | opts.gpu_id                 = auto_select_gpu;
19 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
20 | 
21 | % model
22 | model                       = Model.ResNet50_for_RFCN_VOC0712_OHEM();
23 | % cache name
24 | opts.cache_name             = 'rfcn_VOC0712_ResNet50_OHEM_ss';
25 | % config
26 | conf                        = rfcn_config_ohem('image_means', model.mean_image);
27 | % train/test data
28 | fprintf('Loading dataset...')
29 | dataset                     = [];
30 | dataset                     = Dataset.voc0712_trainval_ss(dataset, 'train', conf.use_flipped);
31 | dataset                     = Dataset.voc2007_test_ss(dataset, 'test', false);
32 | fprintf('Done.\n');
33 | 
34 | % do validation, or not
35 | opts.do_val                 = true; 
36 | 
37 | %% -------------------- TRAINING --------------------
38 | 
39 | opts.rfcn_model        = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
40 |                                 'do_val',           opts.do_val, ...
41 |                                 'imdb_val',         dataset.imdb_test, ...
42 |                                 'roidb_val',        dataset.roidb_test, ...
43 |                                 'solver_def_file',  model.solver_def_file, ...
44 |                                 'net_file',         model.net_file, ...
45 |                                 'cache_name',       opts.cache_name, ...
46 |                                 'caffe_version',    opts.caffe_version);
47 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model');
48 | 
49 | %% -------------------- TESTING --------------------
50 |                           rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
51 |                                 'net_def_file',     model.test_net_def_file, ...
52 |                                 'net_file',         opts.rfcn_model, ...
53 |                                 'cache_name',       opts.cache_name,...
54 |                                 'ignore_cache',     true);
55 | 
56 | end
57 | 


--------------------------------------------------------------------------------
/experiments/script_rfcn_VOC0712_ResNet50_rpn.m:
--------------------------------------------------------------------------------
 1 | function script_rfcn_VOC0712_ResNet50_rpn()
 2 | % script_rfcn_VOC0712_ResNet50_rpn()
 3 | % RFCN training and testing with OHEM using ResNet50 model and RPN proposals
 4 | % --------------------------------------------------------
 5 | % R-FCN implementation
 6 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 7 | % Copyright (c) 2016, Jifeng Dai
 8 | % Licensed under The MIT License [see LICENSE for details]
 9 | % --------------------------------------------------------
10 | 
11 | 
12 | clc;
13 | clear mex;
14 | clear is_valid_handle; % to clear init_key
15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
16 | %% -------------------- CONFIG --------------------
17 | opts.caffe_version          = 'caffe_rfcn';
18 | opts.gpu_id                 = auto_select_gpu;
19 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
20 | 
21 | % model
22 | model                       = Model.ResNet50_for_RFCN_VOC0712();
23 | % cache name
24 | opts.cache_name             = 'rfcn_VOC0712_ResNet50_rpn_resnet50';
25 | % config
26 | conf                        = rfcn_config_simple('image_means', model.mean_image);
27 | % train/test data
28 | fprintf('Loading dataset...')
29 | dataset                     = [];
30 | dataset                     = Dataset.voc0712_trainval_sp(dataset, 'train', conf.use_flipped, 'resnet50');
31 | dataset                     = Dataset.voc2007_test_sp(dataset, 'test', false, 'resnet50');
32 | fprintf('Done.\n');
33 | 
34 | % do validation, or not
35 | opts.do_val                 = true; 
36 | 
37 | %% -------------------- TRAINING --------------------
38 | 
39 | opts.rfcn_model        = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
40 |                                 'do_val',           opts.do_val, ...
41 |                                 'imdb_val',         dataset.imdb_test, ...
42 |                                 'roidb_val',        dataset.roidb_test, ...
43 |                                 'solver_def_file',  model.solver_def_file, ...
44 |                                 'net_file',         model.net_file, ...
45 |                                 'cache_name',       opts.cache_name, ...
46 |                                 'caffe_version',    opts.caffe_version);
47 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model');
48 | 
49 | %% -------------------- TESTING --------------------
50 |                           rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
51 |                                 'net_def_file',     model.test_net_def_file, ...
52 |                                 'net_file',         opts.rfcn_model, ...
53 |                                 'cache_name',       opts.cache_name,...
54 |                                 'ignore_cache',     true);
55 | 
56 | end
57 | 


--------------------------------------------------------------------------------
/experiments/script_rfcn_VOC0712_ResNet101_OHEM_ss.m:
--------------------------------------------------------------------------------
 1 | function script_rfcn_VOC0712_ResNet101_OHEM_ss()
 2 | % script_rfcn_VOC0712_ResNet101_OHEM_ss()
 3 | % RFCN training and testing with OHEM using ResNet101 model and selective
 4 | % search proposals
 5 | % --------------------------------------------------------
 6 | % R-FCN implementation
 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 8 | % Copyright (c) 2016, Jifeng Dai
 9 | % Licensed under The MIT License [see LICENSE for details]
10 | % --------------------------------------------------------
11 | 
12 | clc;
13 | clear mex;
14 | clear is_valid_handle; % to clear init_key
15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
16 | %% -------------------- CONFIG --------------------
17 | opts.caffe_version          = 'caffe_rfcn';
18 | opts.gpu_id                 = auto_select_gpu;
19 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
20 | 
21 | % model
22 | model                       = Model.ResNet101_for_RFCN_VOC0712_OHEM();
23 | % cache name
24 | opts.cache_name             = 'rfcn_VOC0712_ResNet101_OHEM_ss';
25 | % config
26 | conf                        = rfcn_config_ohem('image_means', model.mean_image);
27 | % train/test data
28 | fprintf('Loading dataset...')
29 | dataset                     = [];
30 | dataset                     = Dataset.voc0712_trainval_ss(dataset, 'train', conf.use_flipped);
31 | dataset                     = Dataset.voc2007_test_ss(dataset, 'test', false);
32 | fprintf('Done.\n');
33 | 
34 | % do validation, or not
35 | opts.do_val                 = true; 
36 | 
37 | %% -------------------- TRAINING --------------------
38 | 
39 | opts.rfcn_model        = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
40 |                                 'do_val',           opts.do_val, ...
41 |                                 'imdb_val',         dataset.imdb_test, ...
42 |                                 'roidb_val',        dataset.roidb_test, ...
43 |                                 'solver_def_file',  model.solver_def_file, ...
44 |                                 'net_file',         model.net_file, ...
45 |                                 'cache_name',       opts.cache_name, ...
46 |                                 'caffe_version',    opts.caffe_version);
47 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model');
48 | 
49 | %% -------------------- TESTING --------------------
50 |                           rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
51 |                                 'net_def_file',     model.test_net_def_file, ...
52 |                                 'net_file',         opts.rfcn_model, ...
53 |                                 'cache_name',       opts.cache_name,...
54 |                                 'ignore_cache',     true);
55 | 
56 | end
57 | 


--------------------------------------------------------------------------------
/experiments/script_rfcn_VOC0712_ResNet101_rpn.m:
--------------------------------------------------------------------------------
 1 | function script_rfcn_VOC0712_ResNet101_rpn()
 2 | % script_rfcn_VOC0712_ResNet101_rpn()
 3 | % RFCN training and testing with OHEM using ResNet101 model and RPN
 4 | % proposals
 5 | % --------------------------------------------------------
 6 | % R-FCN implementation
 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 8 | % Copyright (c) 2016, Jifeng Dai
 9 | % Licensed under The MIT License [see LICENSE for details]
10 | % --------------------------------------------------------
11 | 
12 | 
13 | clc;
14 | clear mex;
15 | clear is_valid_handle; % to clear init_key
16 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
17 | %% -------------------- CONFIG --------------------
18 | opts.caffe_version          = 'caffe_rfcn';
19 | opts.gpu_id                 = auto_select_gpu;
20 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
21 | 
22 | % model
23 | model                       = Model.ResNet101_for_RFCN_VOC0712();
24 | % cache name
25 | opts.cache_name             = 'rfcn_VOC0712_ResNet101_rpn_resnet101';
26 | % config
27 | conf                        = rfcn_config_simple('image_means', model.mean_image);
28 | % train/test data
29 | fprintf('Loading dataset...')
30 | dataset                     = [];
31 | dataset                     = Dataset.voc0712_trainval_sp(dataset, 'train', conf.use_flipped, 'resnet101');
32 | dataset                     = Dataset.voc2007_test_sp(dataset, 'test', false, 'resnet101');
33 | fprintf('Done.\n');
34 | 
35 | % do validation, or not
36 | opts.do_val                 = true; 
37 | 
38 | %% -------------------- TRAINING --------------------
39 | 
40 | opts.rfcn_model        = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
41 |                                 'do_val',           opts.do_val, ...
42 |                                 'imdb_val',         dataset.imdb_test, ...
43 |                                 'roidb_val',        dataset.roidb_test, ...
44 |                                 'solver_def_file',  model.solver_def_file, ...
45 |                                 'net_file',         model.net_file, ...
46 |                                 'cache_name',       opts.cache_name, ...
47 |                                 'caffe_version',    opts.caffe_version);
48 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model');
49 | 
50 | %% -------------------- TESTING --------------------
51 |                           rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
52 |                                 'net_def_file',     model.test_net_def_file, ...
53 |                                 'net_file',         opts.rfcn_model, ...
54 |                                 'cache_name',       opts.cache_name,...
55 |                                 'ignore_cache',     true);
56 | 
57 | end
58 | 


--------------------------------------------------------------------------------
/experiments/script_rfcn_VOC0712_ResNet50_OHEM_rpn.m:
--------------------------------------------------------------------------------
 1 | function script_rfcn_VOC0712_ResNet50_OHEM_rpn()
 2 | % script_rfcn_VOC0712_ResNet50_OHEM_rpn()
 3 | % RFCN training and testing with OHEM using ResNet50 model and RPN proposals
 4 | % --------------------------------------------------------
 5 | % R-FCN implementation
 6 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 7 | % Copyright (c) 2016, Jifeng Dai
 8 | % Licensed under The MIT License [see LICENSE for details]
 9 | % --------------------------------------------------------
10 | 
11 | 
12 | clc;
13 | clear mex;
14 | clear is_valid_handle; % to clear init_key
15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
16 | %% -------------------- CONFIG --------------------
17 | opts.caffe_version          = 'caffe_rfcn';
18 | opts.gpu_id                 = auto_select_gpu;
19 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
20 | 
21 | % model
22 | model                       = Model.ResNet50_for_RFCN_VOC0712_OHEM();
23 | % cache name
24 | opts.cache_name             = 'rfcn_VOC0712_ResNet50_OHEM_rpn_resnet50';
25 | % config
26 | conf                        = rfcn_config_ohem('image_means', model.mean_image);
27 | % train/test data
28 | fprintf('Loading dataset...')
29 | dataset                     = [];
30 | dataset                     = Dataset.voc0712_trainval_sp(dataset, 'train', conf.use_flipped, 'resnet50');
31 | dataset                     = Dataset.voc2007_test_sp(dataset, 'test', false, 'resnet50');
32 | fprintf('Done.\n');
33 | 
34 | % do validation, or not
35 | opts.do_val                 = true; 
36 | 
37 | %% -------------------- TRAINING --------------------
38 | 
39 | opts.rfcn_model        = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
40 |                                 'do_val',           opts.do_val, ...
41 |                                 'imdb_val',         dataset.imdb_test, ...
42 |                                 'roidb_val',        dataset.roidb_test, ...
43 |                                 'solver_def_file',  model.solver_def_file, ...
44 |                                 'net_file',         model.net_file, ...
45 |                                 'cache_name',       opts.cache_name, ...
46 |                                 'caffe_version',    opts.caffe_version);
47 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model');
48 | 
49 | %% -------------------- TESTING --------------------
50 |                           rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
51 |                                 'net_def_file',     model.test_net_def_file, ...
52 |                                 'net_file',         opts.rfcn_model, ...
53 |                                 'cache_name',       opts.cache_name,...
54 |                                 'ignore_cache',     true);
55 | 
56 | end
57 | 


--------------------------------------------------------------------------------
/experiments/script_rfcn_VOC0712_ResNet101_OHEM_rpn.m:
--------------------------------------------------------------------------------
 1 | function script_rfcn_VOC0712_ResNet101_OHEM_rpn()
 2 | % script_rfcn_VOC0712_ResNet101_OHEM_rpn()
 3 | % RFCN training and testing with OHEM using ResNet101 model and RPN
 4 | % proposals
 5 | % --------------------------------------------------------
 6 | % R-FCN implementation
 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 8 | % Copyright (c) 2016, Jifeng Dai
 9 | % Licensed under The MIT License [see LICENSE for details]
10 | % --------------------------------------------------------
11 | 
12 | 
13 | clc;
14 | clear mex;
15 | clear is_valid_handle; % to clear init_key
16 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
17 | %% -------------------- CONFIG --------------------
18 | opts.caffe_version          = 'caffe_rfcn';
19 | opts.gpu_id                 = auto_select_gpu;
20 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
21 | 
22 | % model
23 | model                       = Model.ResNet101_for_RFCN_VOC0712_OHEM();
24 | % cache name
25 | opts.cache_name             = 'rfcn_VOC0712_ResNet101_OHEM_rpn_resnet101';
26 | % config
27 | conf                        = rfcn_config_ohem('image_means', model.mean_image);
28 | % train/test data
29 | fprintf('Loading dataset...')
30 | dataset                     = [];
31 | dataset                     = Dataset.voc0712_trainval_sp(dataset, 'train', conf.use_flipped, 'resnet101');
32 | dataset                     = Dataset.voc2007_test_sp(dataset, 'test', false, 'resnet101');
33 | fprintf('Done.\n');
34 | 
35 | % do validation, or not
36 | opts.do_val                 = true; 
37 | 
38 | %% -------------------- TRAINING --------------------
39 | 
40 | opts.rfcn_model        = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
41 |                                 'do_val',           opts.do_val, ...
42 |                                 'imdb_val',         dataset.imdb_test, ...
43 |                                 'roidb_val',        dataset.roidb_test, ...
44 |                                 'solver_def_file',  model.solver_def_file, ...
45 |                                 'net_file',         model.net_file, ...
46 |                                 'cache_name',       opts.cache_name, ...
47 |                                 'caffe_version',    opts.caffe_version);
48 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model');
49 | 
50 | %% -------------------- TESTING --------------------
51 |                           rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
52 |                                 'net_def_file',     model.test_net_def_file, ...
53 |                                 'net_file',         opts.rfcn_model, ...
54 |                                 'cache_name',       opts.cache_name,...
55 |                                 'ignore_cache',     true);
56 | 
57 | end
58 | 


--------------------------------------------------------------------------------
/utils/showboxes.m:
--------------------------------------------------------------------------------
 1 | function showboxes(im, boxes, legends, color_conf)
 2 | % Draw bounding boxes on top of an image.
 3 | %   showboxes(im, boxes)
 4 | %
 5 | % -------------------------------------------------------
 6 | 
 7 | fix_width = 800;
 8 | if isa(im, 'gpuArray')
 9 |     im = gather(im);
10 | end
11 | imsz = size(im);
12 | scale = fix_width / imsz(2);
13 | im = imresize(im, scale);
14 | 
15 | if size(boxes{1}, 2) >= 5
16 |     boxes = cellfun(@(x) [x(:, 1:4) * scale, x(:, 5)], boxes, 'UniformOutput', false);
17 | else
18 |     boxes = cellfun(@(x) x(:, 1:4) * scale, boxes, 'UniformOutput', false);
19 | end
20 | 
21 | if ~exist('color_conf', 'var')
22 |     color_conf = 'default';
23 | end
24 | 
25 | image(im); 
26 | axis image;
27 | axis off;
28 | set(gcf, 'Color', 'white');
29 | 
30 | valid_boxes = cellfun(@(x) ~isempty(x), boxes, 'UniformOutput', true);
31 | valid_boxes_num = sum(valid_boxes);
32 | 
33 | if valid_boxes_num > 0
34 |     switch color_conf
35 |         case 'default'
36 |             colors_candidate = colormap('hsv');
37 |             colors_candidate = colors_candidate(1:(floor(size(colors_candidate, 1)/valid_boxes_num)):end, :);
38 |             colors_candidate = mat2cell(colors_candidate, ones(size(colors_candidate, 1), 1))';
39 |             colors = cell(size(valid_boxes));
40 |             colors(valid_boxes) = colors_candidate(1:sum(valid_boxes));
41 |         case 'voc'
42 |             colors_candidate = colormap('hsv');
43 |             colors_candidate = colors_candidate(1:(floor(size(colors_candidate, 1)/20)):end, :);
44 |             colors_candidate = mat2cell(colors_candidate, ones(size(colors_candidate, 1), 1))';
45 |             colors = colors_candidate;
46 |     end
47 |             
48 | 
49 |     for i = 1:length(boxes)
50 |         if isempty(boxes{i})
51 |             continue;
52 |         end
53 | 
54 |         for j = 1:size(boxes{i})
55 |             box = boxes{i}(j, 1:4);
56 |             if size(boxes{i}, 2) >= 5
57 |                 score = boxes{i}(j, end);
58 |                 linewidth = 2 + min(max(score, 0), 1) * 2;
59 |                 rectangle('Position', RectLTRB2LTWH(box), 'LineWidth', linewidth, 'EdgeColor', colors{i});
60 |                 label = sprintf('%s : %.3f', legends{i}, score);
61 |                 text(double(box(1))+2, double(box(2)), label, 'BackgroundColor', 'w');
62 |             else
63 |                 linewidth = 2;
64 |                 rectangle('Position', RectLTRB2LTWH(box), 'LineWidth', linewidth, 'EdgeColor', colors{i});
65 |                 label = sprintf('%s(%d)', legends{i}, i);
66 |                 text(double(box(1))+2, double(box(2)), label, 'BackgroundColor', 'w');
67 |             end
68 |         end
69 | 
70 |     end
71 | end
72 | end
73 | 
74 | function [ rectsLTWH ] = RectLTRB2LTWH( rectsLTRB )
75 | %rects (l, t, r, b) to (l, t, w, h)
76 | 
77 | rectsLTWH = [rectsLTRB(:, 1), rectsLTRB(:, 2), rectsLTRB(:, 3)-rectsLTRB(:,1)+1, rectsLTRB(:,4)-rectsLTRB(2)+1];
78 | end
79 | 
80 | 


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_config_ohem.m:
--------------------------------------------------------------------------------
 1 | function conf = rfcn_config_ohem(varargin)
 2 | % conf = rfcn_config(varargin)
 3 | % --------------------------------------------------------
 4 | % R-FCN implementation
 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 6 | % Copyright (c) 2016, Jifeng Dai
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | %
10 |      ip = inputParser;
11 |     
12 |     %% training
13 |     % whether use gpu
14 |     ip.addParamValue('use_gpu',         gpuDeviceCount > 0, ...            
15 |                                                         @islogical);
16 |     % Image scales -- the short edge of input image                                                
17 |     ip.addParamValue('scales',          600,            @ismatrix);
18 |     % Max pixel size of a scaled input image
19 |     ip.addParamValue('max_size',        1000,           @isscalar);
20 |     % Images per batch
21 |     ip.addParamValue('ims_per_batch',   2,              @isscalar);
22 |     % Minibatch size, set as -1 if using all the rois
23 |     ip.addParamValue('batch_size',      -1,            @isscalar);
24 |     % Fraction of minibatch that is foreground labeled (class > 0),
25 |     % which is disabled when batch_size = -1
26 |     ip.addParamValue('fg_fraction',     -1,           @isscalar);
27 |     % Overlap threshold for a ROI to be considered foreground (if >= fg_thresh)
28 |     ip.addParamValue('fg_thresh',       0.5,            @isscalar);
29 |     % Overlap threshold for a ROI to be considered background (class = 0 if
30 |     % overlap in [bg_thresh_lo, bg_thresh_hi))
31 |     ip.addParamValue('bg_thresh_hi',    0.5,            @isscalar);
32 |     ip.addParamValue('bg_thresh_lo',    0.0,            @isscalar);
33 |     % mean image, in RGB order
34 |     ip.addParamValue('image_means',     128,            @ismatrix);
35 |     % Use horizontally-flipped images during training?
36 |     ip.addParamValue('use_flipped',     true,           @islogical);
37 |     % Vaild training sample (IoU > bbox_thresh) for bounding box regresion
38 |     ip.addParamValue('bbox_thresh',     0.5,            @isscalar);
39 |     % Whether to perform class agnostic bbox regression
40 |     ip.addParamValue('bbox_class_agnostic', true,  @islogical);
41 | 
42 |     % random seed
43 |     ip.addParamValue('rng_seed',        6,              @isscalar);
44 | 
45 |     
46 |     %% testing
47 |     ip.addParamValue('test_scales',     600,            @isscalar);
48 |     ip.addParamValue('test_max_size',   1000,           @isscalar);
49 |     ip.addParamValue('test_nms',        0.3,            @isscalar);
50 |     ip.addParamValue('test_binary',     false,          @islogical);
51 |     
52 |     ip.parse(varargin{:});
53 |     conf = ip.Results;
54 |     
55 |     % if image_means is a file, load it
56 |     if ischar(conf.image_means)
57 |         s = load(conf.image_means);
58 |         s_fieldnames = fieldnames(s);
59 |         assert(length(s_fieldnames) == 1);
60 |         conf.image_means = s.(s_fieldnames{1});
61 |     end
62 |     
63 | end


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_config_simple.m:
--------------------------------------------------------------------------------
 1 | function conf = rfcn_config_simple(varargin)
 2 | % conf = rfcn_config(varargin)
 3 | % --------------------------------------------------------
 4 | % R-FCN implementation
 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 6 | % Copyright (c) 2016, Jifeng Dai
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | %
10 |      ip = inputParser;
11 |     
12 |     %% training
13 |     % whether use gpu
14 |     ip.addParamValue('use_gpu',         gpuDeviceCount > 0, ...            
15 |                                                         @islogical);
16 |     % Image scales -- the short edge of input image                                                
17 |     ip.addParamValue('scales',          600,            @ismatrix);
18 |     % Max pixel size of a scaled input image
19 |     ip.addParamValue('max_size',        1000,           @isscalar);
20 |     % Images per batch
21 |     ip.addParamValue('ims_per_batch',   2,              @isscalar);
22 |     % Minibatch size, set as -1 if using all the rois
23 |     ip.addParamValue('batch_size',      256,            @isscalar);
24 |     % Fraction of minibatch that is foreground labeled (class > 0),
25 |     % which is disabled when batch_size = -1
26 |     ip.addParamValue('fg_fraction',     0.25,           @isscalar);
27 |     % Overlap threshold for a ROI to be considered foreground (if >= fg_thresh)
28 |     ip.addParamValue('fg_thresh',       0.5,            @isscalar);
29 |     % Overlap threshold for a ROI to be considered background (class = 0 if
30 |     % overlap in [bg_thresh_lo, bg_thresh_hi))
31 |     ip.addParamValue('bg_thresh_hi',    0.5,            @isscalar);
32 |     ip.addParamValue('bg_thresh_lo',    0.1,            @isscalar);
33 |     % mean image, in RGB order
34 |     ip.addParamValue('image_means',     128,            @ismatrix);
35 |     % Use horizontally-flipped images during training?
36 |     ip.addParamValue('use_flipped',     true,           @islogical);
37 |     % Vaild training sample (IoU > bbox_thresh) for bounding box regresion
38 |     ip.addParamValue('bbox_thresh',     0.5,            @isscalar);
39 |     % Whether to perform class agnostic bbox regression
40 |     ip.addParamValue('bbox_class_agnostic', true,  @islogical);
41 | 
42 |     % random seed
43 |     ip.addParamValue('rng_seed',        6,              @isscalar);
44 | 
45 |     
46 |     %% testing
47 |     ip.addParamValue('test_scales',     600,            @isscalar);
48 |     ip.addParamValue('test_max_size',   1000,           @isscalar);
49 |     ip.addParamValue('test_nms',        0.3,            @isscalar);
50 |     ip.addParamValue('test_binary',     false,          @islogical);
51 |     
52 |     ip.parse(varargin{:});
53 |     conf = ip.Results;
54 |     
55 |     % if image_means is a file, load it
56 |     if ischar(conf.image_means)
57 |         s = load(conf.image_means);
58 |         s_fieldnames = fieldnames(s);
59 |         assert(length(s_fieldnames) == 1);
60 |         conf.image_means = s.(s_fieldnames{1});
61 |     end
62 |     
63 | end


--------------------------------------------------------------------------------
/imdb/imdb_eval_voc.m:
--------------------------------------------------------------------------------
  1 | function res = imdb_eval_voc(cls, boxes, imdb, cache_name, suffix)
  2 | % res = imdb_eval_voc(cls, boxes, imdb, suffix)
  3 | %   Use the VOCdevkit to evaluate detections specified in boxes
  4 | %   for class cls against the ground-truth boxes in the image
  5 | %   database imdb. Results files are saved with an optional
  6 | %   suffix.
  7 | 
  8 | % AUTORIGHTS
  9 | % ---------------------------------------------------------
 10 | % Copyright (c) 2014, Ross Girshick
 11 | % 
 12 | % This file is part of the R-CNN code and is available 
 13 | % under the terms of the Simplified BSD License provided in 
 14 | % LICENSE. Please retain this notice and LICENSE if you use 
 15 | % this file (or any portion of it) in your project.
 16 | % ---------------------------------------------------------
 17 | 
 18 | % Add a random string ("salt") to the end of the results file name
 19 | % to prevent concurrent evaluations from clobbering each other
 20 | use_res_salt = true;
 21 | % Delete results files after computing APs
 22 | rm_res = true;
 23 | % comp4 because we use outside data (ILSVRC2012)
 24 | comp_id = 'comp4';
 25 | % draw each class curve
 26 | draw_curve = true;
 27 | 
 28 | % save results
 29 | if ~exist('suffix', 'var') || isempty(suffix) || strcmp(suffix, '')
 30 |   suffix = '';
 31 | else
 32 |   if suffix(1) ~= '_'
 33 |     suffix = ['_' suffix];
 34 |   end
 35 | end
 36 | 
 37 | conf.cache_dir = fullfile('output', 'rfcn_cachedir', cache_name, imdb.name);
 38 | VOCopts  = imdb.details.VOCopts;
 39 | image_ids = imdb.image_ids;
 40 | test_set = VOCopts.testset;
 41 | year = VOCopts.dataset(4:end);
 42 | 
 43 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 
 44 | 
 45 | if use_res_salt
 46 |   prev_rng = rng;
 47 |   rng shuffle;
 48 |   salt = sprintf('%d', randi(100000));
 49 |   res_id = [comp_id '-' salt];
 50 |   rng(prev_rng);
 51 | else
 52 |   res_id = comp_id;
 53 | end
 54 | res_fn = sprintf(VOCopts.detrespath, res_id, cls);
 55 | 
 56 | % write out detections in PASCAL format and score
 57 | fid = fopen(res_fn, 'w');
 58 | for i = 1:length(image_ids);
 59 |   bbox = boxes{i};
 60 |   keep = nms(bbox, 0.3);
 61 |   bbox = bbox(keep,:);
 62 |   for j = 1:size(bbox,1)
 63 |     fprintf(fid, '%s %f %.3f %.3f %.3f %.3f\n', image_ids{i}, bbox(j,end), bbox(j,1:4));
 64 |   end
 65 | end
 66 | fclose(fid);
 67 | 
 68 | recall = [];
 69 | prec = [];
 70 | ap = 0;
 71 | ap_auc = 0;
 72 | 
 73 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
 74 | if do_eval
 75 |   % Bug in VOCevaldet requires that tic has been called first
 76 |   tic;
 77 |   [recall, prec, ap] = VOCevaldet(VOCopts, res_id, cls, draw_curve);
 78 |   ap_auc = xVOCap(recall, prec);
 79 | 
 80 |   % force plot limits
 81 |   ylim([0 1]);
 82 |   xlim([0 1]);
 83 | 
 84 |   print(gcf, '-djpeg', '-r0', ...
 85 |         fullfile(conf.cache_dir, [cls '_pr_' imdb.name suffix '.jpg']));
 86 | end
 87 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
 88 | 
 89 | save(fullfile(conf.cache_dir,  [cls '_pr_' imdb.name suffix]), ...
 90 |     'recall', 'prec', 'ap', 'ap_auc');
 91 | 
 92 | res.recall = recall;
 93 | res.prec = prec;
 94 | res.ap = ap;
 95 | res.ap_auc = ap_auc;
 96 | if rm_res
 97 |   delete(res_fn);
 98 | end
 99 | 
100 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 
101 | 


--------------------------------------------------------------------------------
/functions/nms/nms_mex.cpp:
--------------------------------------------------------------------------------
  1 | #include "mex.h"
  2 | #ifdef _MSC_VER
  3 | #include <windows.h>
  4 | #include <tchar.h>
  5 | #endif
  6 | #include <vector>
  7 | #include <map>
  8 | using namespace std;
  9 | 
 10 | struct score {
 11 | 	double s;
 12 | 	int idx;
 13 | 	bool operator() (score i, score j) { return (i.idx < j.idx);}
 14 | } score;
 15 | 
 16 | template <typename T>
 17 | void nms(const mxArray *input_boxes, double overlap, vector<int> &vPick, int &nPick)
 18 | {
 19 | 	int nSample = (int)mxGetM(input_boxes);
 20 | 	int nDim_boxes = (int)mxGetN(input_boxes);
 21 | 
 22 |     T *pBoxes = (T*)mxGetData(input_boxes);
 23 | 
 24 | 	vector<double> vArea(nSample);
 25 | 	for (int i = 0; i < nSample; ++i)
 26 | 	{
 27 | 		vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 
 28 | 		* (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1);
 29 | 		if (vArea[i] < 0)
 30 | 			mexErrMsgTxt("Boxes area must >= 0");
 31 | 	}
 32 | 
 33 | 	std::multimap<T, int> scores;
 34 | 	for (int i = 0; i < nSample; ++i)
 35 | 		scores.insert(std::pair<T,int>(pBoxes[4*nSample + i], i));
 36 | 
 37 | 	nPick = 0;
 38 | 
 39 | 	do 
 40 | 	{
 41 | 		int last = scores.rbegin()->second;
 42 | 		vPick[nPick] = last;
 43 | 		nPick += 1;
 44 | 
 45 | 		for (typename std::multimap<T, int>::iterator it = scores.begin(); it != scores.end();)
 46 | 		{
 47 | 			int it_idx = it->second;
 48 | 			T xx1 = max(pBoxes[0*nSample + last], pBoxes[0*nSample + it_idx]);
 49 | 			T yy1 = max(pBoxes[1*nSample + last], pBoxes[1*nSample + it_idx]);
 50 | 			T xx2 = min(pBoxes[2*nSample + last], pBoxes[2*nSample + it_idx]);
 51 | 			T yy2 = min(pBoxes[3*nSample + last], pBoxes[3*nSample + it_idx]);
 52 | 
 53 | 			double w = max(T(0.0), xx2-xx1+1), h = max(T(0.0), yy2-yy1+1);
 54 | 
 55 | 			double ov = w*h / (vArea[last] + vArea[it_idx] - w*h);
 56 | 
 57 | 			if (ov > overlap)
 58 | 			{
 59 | 				it = scores.erase(it);
 60 | 			}
 61 | 			else
 62 | 			{
 63 | 				it++;
 64 | 			}
 65 | 		}
 66 | 
 67 | 	} while (scores.size() != 0);
 68 | }
 69 | 
 70 | 
 71 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 72 | {
 73 | 	if (nrhs != 2)
 74 | 		mexErrMsgTxt("Wrong number of inputs"); 
 75 | 	if (nlhs != 1)
 76 | 		mexErrMsgTxt("One output");
 77 | 
 78 | 	const mxArray *input_boxes = prhs[0];
 79 | 	if (mxGetClassID(input_boxes) != mxDOUBLE_CLASS && mxGetClassID(input_boxes) != mxSINGLE_CLASS)
 80 | 		mexErrMsgTxt("Input boxes must be Double or Single");
 81 | 
 82 | 	const mxArray *input_overlap = prhs[1];
 83 | 	if (mxGetClassID(input_overlap) != mxDOUBLE_CLASS )
 84 | 		mexErrMsgTxt("Input overlap must be Double");
 85 | 
 86 | 	double overlap = mxGetScalar(input_overlap);
 87 | 
 88 | 	int nSample = (int)mxGetM(input_boxes);
 89 | 	int nDim_boxes = (int)mxGetN(input_boxes);
 90 | 
 91 | 	if (nSample * nDim_boxes == 0)
 92 | 	{
 93 | 		plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL);
 94 | 		return;
 95 | 	}
 96 | 
 97 | 	if (nDim_boxes != 5)
 98 | 		mexErrMsgTxt("nms_mex boxes must has 5 columns");
 99 | 
100 | 	
101 | 	int nPick = 0;
102 | 	vector<int> vPick(nSample);
103 | 	if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS)
104 | 		nms<double>(input_boxes, overlap, vPick, nPick);
105 | 	else
106 | 		nms<float>(input_boxes, overlap, vPick, nPick);
107 | 
108 | 	plhs[0] = mxCreateNumericMatrix(nPick, 1, mxDOUBLE_CLASS, mxREAL);
109 | 	double *pRst = mxGetPr(plhs[0]);
110 | 	for (int i = 0; i < nPick; ++i)
111 | 		pRst[i] = vPick[i] + 1;
112 | }
113 | 


--------------------------------------------------------------------------------
/imdb/imdb_from_voc.m:
--------------------------------------------------------------------------------
 1 | function imdb = imdb_from_voc(root_dir, image_set, year, flip)
 2 | % imdb = imdb_from_voc(root_dir, image_set, year)
 3 | %   Builds an image database for the PASCAL VOC devkit located
 4 | %   at root_dir using the image_set and year.
 5 | %
 6 | %   Inspired by Andrea Vedaldi's MKL imdb and roidb code.
 7 | 
 8 | % AUTORIGHTS
 9 | % ---------------------------------------------------------
10 | % Copyright (c) 2014, Ross Girshick
11 | % 
12 | % This file is part of the R-CNN code and is available 
13 | % under the terms of the Simplified BSD License provided in 
14 | % LICENSE. Please retain this notice and LICENSE if you use 
15 | % this file (or any portion of it) in your project.
16 | % ---------------------------------------------------------
17 | 
18 | %imdb.name = 'voc_train_2007'
19 | %imdb.image_dir = '/work4/rbg/VOC2007/VOCdevkit/VOC2007/JPEGImages/'
20 | %imdb.extension = '.jpg'
21 | %imdb.image_ids = {'000001', ... }
22 | %imdb.sizes = [numimages x 2]
23 | %imdb.classes = {'aeroplane', ... }
24 | %imdb.num_classes
25 | %imdb.class_to_id
26 | %imdb.class_ids
27 | %imdb.eval_func = pointer to the function that evaluates detections
28 | %imdb.roidb_func = pointer to the function that returns regions of interest
29 | 
30 | if nargin < 4
31 |     flip = false;
32 | end
33 | 
34 | cache_file = ['./imdb/cache/imdb_voc_' year '_' image_set];
35 | if flip
36 |     cache_file = [cache_file, '_flip'];
37 | end
38 | try
39 |   load(cache_file);
40 | catch
41 |   VOCopts = get_voc_opts(root_dir);
42 |   VOCopts.testset = image_set;
43 | 
44 |   imdb.name = ['voc_' year '_' image_set];
45 |   imdb.image_dir = fileparts(VOCopts.imgpath);
46 |   imdb.image_ids = textread(sprintf(VOCopts.imgsetpath, image_set), '%s');
47 |   imdb.extension = 'jpg';
48 |   imdb.flip = flip;
49 |   if flip
50 |       image_at = @(i) sprintf('%s/%s.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension);
51 |       flip_image_at = @(i) sprintf('%s/%s_flip.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension);
52 |       for i = 1:length(imdb.image_ids)
53 |           if ~exist(flip_image_at(i), 'file')
54 |              im = imread(image_at(i));
55 |              imwrite(fliplr(im), flip_image_at(i));
56 |           end
57 |       end
58 |       img_num = length(imdb.image_ids)*2;
59 |       image_ids = imdb.image_ids;
60 |       imdb.image_ids(1:2:img_num) = image_ids;
61 |       imdb.image_ids(2:2:img_num) = cellfun(@(x) [x, '_flip'], image_ids, 'UniformOutput', false);
62 |       imdb.flip_from = zeros(img_num, 1);
63 |       imdb.flip_from(2:2:img_num) = 1:2:img_num;
64 |   end
65 |   imdb.classes = VOCopts.classes;
66 |   imdb.num_classes = length(imdb.classes);
67 |   imdb.class_to_id = ...
68 |     containers.Map(imdb.classes, 1:imdb.num_classes);
69 |   imdb.class_ids = 1:imdb.num_classes;
70 | 
71 |   % private VOC details
72 |   imdb.details.VOCopts = VOCopts;
73 | 
74 |   % VOC specific functions for evaluation and region of interest DB
75 |   imdb.eval_func = @imdb_eval_voc;
76 |   imdb.roidb_func = @roidb_from_voc;
77 |   imdb.image_at = @(i) ...
78 |       sprintf('%s/%s.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension);
79 | 
80 |   for i = 1:length(imdb.image_ids)
81 |     tic_toc_print('imdb (%s): %d/%d\n', imdb.name, i, length(imdb.image_ids));
82 |     info = imfinfo(sprintf(VOCopts.imgpath, imdb.image_ids{i}));
83 |     imdb.sizes(i, :) = [info.Height info.Width];
84 |   end
85 | 
86 |   fprintf('Saving imdb to cache...');
87 |   save(cache_file, 'imdb', '-v7.3');
88 |   fprintf('done\n');
89 | end
90 | 


--------------------------------------------------------------------------------
/experiments/script_rfcn_demo.m:
--------------------------------------------------------------------------------
 1 | function script_rfcn_demo()
 2 | % script_rfcn_demo()
 3 | % A demo of R-FCN for object detection using ResNet101 model and RPN
 4 | % proposals
 5 | % --------------------------------------------------------
 6 | % R-FCN implementation
 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
 8 | % Copyright (c) 2016, Jifeng Dai
 9 | % Licensed under The MIT License [see LICENSE for details]
10 | % --------------------------------------------------------
11 | 
12 | clc;
13 | clear mex;
14 | clear is_valid_handle; % to clear init_key
15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
16 | %% -------------------- CONFIG --------------------
17 | opts.caffe_version          = 'caffe_rfcn';
18 | opts.gpu_id                 = auto_select_gpu;
19 | opts.use_gpu                = true;
20 | opts.max_rois_num_in_gpu    = 5000;
21 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
22 | classes = {'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair',...
23 |         'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', ...
24 |         'sheep', 'sofa', 'train', 'tvmonitor'};
25 | 
26 | demo_dir               = fullfile(pwd, 'data', 'demo');
27 | 
28 | % conf
29 | 
30 | conf                   = rfcn_config_ohem('image_means',...
31 |                                     fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'mean_image'));
32 | 
33 | %% -------------------- INIT MODEL -----------------
34 | rfcn_net_def           = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_OHEM_res3a', 'test.prototxt');
35 | rfcn_net               = fullfile(pwd, 'output', 'rfcn_demo', ...
36 |                                        'rfcn_VOC0712_ResNet101_OHEM_rpn_resnet101','final');
37 | 
38 | caffe_net = caffe.Net(rfcn_net_def, 'test');
39 | caffe_net.copy_from(rfcn_net);
40 | 
41 | % set gpu/cpu
42 | if opts.use_gpu
43 |     caffe.set_mode_gpu();
44 | else
45 |     caffe.set_mode_cpu();
46 | end
47 | %% -------------------- WARM UP --------------------
48 | % the first run will be slower; use an empty image to warm up
49 | for j = 1:2 % we warm up 2 times
50 |     im = uint8(ones(375, 500, 3)*128);
51 |     proposals = repmat([1,1,400,275], [2000, 1]);
52 |     proposals = proposals+100*rand(size(proposals));
53 |     [boxes, scores] = rfcn_im_detect(conf, caffe_net, im, proposals, opts.max_rois_num_in_gpu);
54 | end
55 | 
56 | %% -------------------- TESTING --------------------
57 | im_names = {'000166', '001852', '002597', '004030', '005225'};
58 | running_time = zeros(length(im_names), 1);
59 | for j = 1:length(im_names)
60 |     im = imread(fullfile(demo_dir, [im_names{j}, '.jpg']));
61 |     proposals = load(fullfile(demo_dir, [im_names{j}, '_boxes.mat']));
62 |     proposals = single(proposals.boxes);
63 |     tic
64 |     [boxes, scores] = rfcn_im_detect(conf, caffe_net, im, proposals, opts.max_rois_num_in_gpu);
65 |     th = toc;
66 |     fprintf('%s, (%dx%d): time %.3fs\n', im_names{j}, size(im, 1), size(im, 2), th);
67 |     running_time(j) = th;
68 |     boxes_cell = cell(length(classes), 1);
69 |     thres = 0.6;
70 |     for i = 1:length(boxes_cell)
71 |         boxes_cell{i} = [boxes(:, (1+(i-1)*4):(i*4)), scores(:, i)];
72 |         boxes_cell{i} = boxes_cell{i}(nms(boxes_cell{i}, 0.3), :);
73 |         
74 |         I = boxes_cell{i}(:, 5) >= thres;
75 |         boxes_cell{i} = boxes_cell{i}(I, :);
76 |     end
77 |     figure(j);
78 |     showboxes(im, boxes_cell, classes, 'voc');
79 |     pause(0.1);
80 | end
81 | fprintf('mean time: %.3fs\n', mean(running_time));
82 | end
83 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Faster R-CNN
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2015 Microsoft Corporation
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 
25 | ************************************************************************
26 | 
27 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
28 | 
29 | This project, Faster R-CNN, incorporates material from the project(s) listed below (collectively, "Third Party Code").  Microsoft is not the original author of the Third Party Code.  The original copyright notice and license under which Microsoft received such Third Party Code are set out below. This Third Party Code is licensed to you under their original license terms set forth below.  Microsoft reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
30 |  
31 | 1.	Caffe, version 0.9, (https://github.com/BVLC/caffe/)
32 | 
33 | COPYRIGHT
34 | 
35 | All contributions by the University of California:
36 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
37 | All rights reserved.
38 | 
39 | All other contributions:
40 | Copyright (c) 2014, 2015, the respective contributors
41 | All rights reserved.
42 | 
43 | Caffe uses a shared copyright model: each contributor holds copyright over their contributions to Caffe. The project versioning records all such contribution and copyright details. If a contributor wants to further mark their specific copyright on a particular contribution, they should indicate their copyright solely in the commit message of the change when it is committed.
44 | 
45 | The BSD 2-Clause License
46 | 
47 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
48 | 
49 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
50 | 
51 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
52 | 
53 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 | 
55 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/functions/nms/nms_multiclass_mex.cpp:
--------------------------------------------------------------------------------
  1 | #include "mex.h"
  2 | #ifdef WIN32
  3 | #include <windows.h>
  4 | #include <tchar.h>
  5 | #else
  6 | #include <algorithm>
  7 | #endif
  8 | #include <vector>
  9 | #include <map>
 10 | #include <omp.h>
 11 | using namespace std;
 12 | 
 13 | struct score {
 14 | 	double s;
 15 | 	int idx;
 16 | 	bool operator() (score i, score j) { return (i.idx < j.idx);}
 17 | } score;
 18 | 
 19 | template <typename T>
 20 | void nms(const mxArray *input_boxes, int iScoreIdx, double overlap, const vector<double> &vArea, vector<int> &vPick, int &nPick)
 21 | {
 22 | 	int nSample = (int)mxGetM(input_boxes);
 23 | 	int nDim_boxes = (int)mxGetN(input_boxes);
 24 | 
 25 |     T *pBoxes = (T*)mxGetData(input_boxes);
 26 | 
 27 | 	//vector<double> vArea(nSample);
 28 | 	//for (int i = 0; i < nSample; ++i)
 29 | 	//{
 30 | 	//	vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 
 31 | 	//	* (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1);
 32 | 	//	if (vArea[i] < 0)
 33 | 	//		mexErrMsgTxt("Boxes area must >= 0");
 34 | 	//}
 35 | 
 36 | 	std::multimap<T, int> scores;
 37 | 	for (int i = 0; i < nSample; ++i)
 38 | 		scores.insert(std::pair<T,int>(pBoxes[iScoreIdx*nSample + i], i));
 39 | 
 40 | 	nPick = 0;
 41 | 
 42 | 	do 
 43 | 	{
 44 | 		int last = scores.rbegin()->second;
 45 | 		vPick[nPick] = last;
 46 | 		nPick += 1;
 47 | 
 48 | 		for (typename std::multimap<T, int>::iterator it = scores.begin(); it != scores.end();)
 49 | 		{
 50 | 			int it_idx = it->second;
 51 | 			T xx1 = std::max(pBoxes[0*nSample + last], pBoxes[0*nSample + it_idx]);
 52 | 			T yy1 = std::max(pBoxes[1*nSample + last], pBoxes[1*nSample + it_idx]);
 53 | 			T xx2 = std::min(pBoxes[2*nSample + last], pBoxes[2*nSample + it_idx]);
 54 | 			T yy2 = std::min(pBoxes[3*nSample + last], pBoxes[3*nSample + it_idx]);
 55 | 
 56 | 			double w = max(0.0, xx2-xx1+1), h = max(0.0, yy2-yy1+1);
 57 | 
 58 | 			double ov = w*h / (vArea[last] + vArea[it_idx] - w*h);
 59 | 
 60 | 			if (ov > overlap)
 61 | 			{
 62 | 				#ifdef WIN32
 63 | 				it = scores.erase(it);
 64 |                 #else
 65 |                 typename std::multimap<T, int>::iterator save=it; ++save;
 66 | 				scores.erase(it);
 67 |                 it=save;
 68 |                 #endif
 69 | 			}
 70 | 			else
 71 | 			{
 72 | 				it++;
 73 | 			}
 74 | 		}
 75 | 
 76 | 	} while (scores.size() != 0);
 77 | }
 78 | 
 79 | 
 80 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[])
 81 | {
 82 | 	if (nrhs != 2)
 83 | 		mexErrMsgTxt("Wrong number of inputs"); 
 84 | 	if (nlhs != 1)
 85 | 		mexErrMsgTxt("One output");
 86 | 
 87 | 	const mxArray *input_boxes = prhs[0];
 88 | 	if (mxGetClassID(input_boxes) != mxDOUBLE_CLASS && mxGetClassID(input_boxes) != mxSINGLE_CLASS)
 89 | 		mexErrMsgTxt("Input boxes must be Double or Single");
 90 | 
 91 | 	const mxArray *input_overlap = prhs[1];
 92 | 	if (mxGetClassID(input_overlap) != mxDOUBLE_CLASS )
 93 | 		mexErrMsgTxt("Input overlap must be Double");
 94 | 
 95 | 	double overlap = mxGetScalar(input_overlap);
 96 | 
 97 | 	int nSample = (int)mxGetM(input_boxes);
 98 | 	int nDim_boxes = (int)mxGetN(input_boxes);
 99 | 
100 | 	if (nSample * nDim_boxes == 0)
101 | 	{
102 | 		plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL);
103 | 		return;
104 | 	}
105 | 
106 | 	if (nDim_boxes < 5)
107 | 		mexErrMsgTxt("nms_mex boxes must has least 5 columns");
108 | 
109 | 	vector<double> vArea(nSample);
110 | 	if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS)
111 | 	{
112 | 		double *pBoxes = (double*)mxGetData(input_boxes);
113 | 		for (int i = 0; i < nSample; ++i)
114 | 		{
115 | 			vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 
116 | 				* (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1);
117 | 			if (vArea[i] < 0)
118 | 				mexErrMsgTxt("Boxes area must >= 0");
119 | 		}
120 | 	}
121 | 	else
122 | 	{
123 | 		if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS)
124 | 		{
125 | 			float *pBoxes = (float*)mxGetData(input_boxes);
126 | 			for (int i = 0; i < nSample; ++i)
127 | 			{
128 | 				vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 
129 | 					* (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1);
130 | 				if (vArea[i] < 0)
131 | 					mexErrMsgTxt("Boxes area must >= 0");
132 | 			}
133 | 		}
134 | 	}
135 | 
136 | 	vector<int> nPick(nDim_boxes - 4, 0);
137 | 	vector<vector<int> > vPicks(nDim_boxes - 4);
138 | 	plhs[0] = mxCreateCellMatrix_730(nDim_boxes - 4, 1);
139 | 
140 | #pragma omp parallel for ordered schedule(dynamic)
141 | 	for (int i = 0; i < vPicks.size(); ++i)
142 | 	{
143 | 		vPicks[i].resize(nSample);
144 | 	
145 | 		if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS)
146 | 			nms<double>(input_boxes, i+4, overlap, vArea, vPicks[i], nPick[i]);
147 | 		else
148 | 			nms<float>(input_boxes, i+4, overlap, vArea, vPicks[i], nPick[i]);
149 | 
150 | 		mxArray *mxPick = mxCreateNumericMatrix(nPick[i], 1, mxDOUBLE_CLASS, mxREAL);
151 | 		double *pRst = mxGetPr(mxPick);
152 | 		for (int j = 0; j < nPick[i]; ++j)
153 | 			pRst[j] = vPicks[i][j] + 1;
154 | 
155 | 		mxSetCell(plhs[0], i, mxPick);
156 | 	}
157 | 	
158 | }


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_im_detect.m:
--------------------------------------------------------------------------------
  1 | function [pred_boxes, scores] = rfcn_im_detect(conf, caffe_net, im, boxes, max_rois_num_in_gpu)
  2 | % [pred_boxes, scores] = rfcn_im_detect(conf, caffe_net, im, boxes, max_rois_num_in_gpu)
  3 | % --------------------------------------------------------
  4 | % R-FCN implementation
  5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  6 | % Copyright (c) 2016, Jifeng Dai
  7 | % Licensed under The MIT License [see LICENSE for details]
  8 |     
  9 |     [im_blob, rois_blob, ~] = get_blobs(conf, im, boxes);
 10 |     
 11 |     % When mapping from image ROIs to feature map ROIs, there's some aliasing
 12 |     % (some distinct image ROIs get mapped to the same feature ROI).
 13 |     % Here, we identify duplicate feature ROIs, so we only compute features
 14 |     % on the unique subset.
 15 |     [~, index, inv_index] = unique(rois_blob, 'rows');
 16 |     rois_blob = rois_blob(index, :);
 17 |     boxes = boxes(index, :);
 18 |     
 19 |     % permute data into caffe c++ memory, thus [num, channels, height, width]
 20 |     im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg
 21 |     im_blob = permute(im_blob, [2, 1, 3, 4]);
 22 |     im_blob = single(im_blob);
 23 |     rois_blob = rois_blob - 1; % to c's index (start from 0)
 24 |     rois_blob = permute(rois_blob, [3, 4, 2, 1]);
 25 |     rois_blob = single(rois_blob);
 26 |     
 27 |     total_rois = size(rois_blob, 4);
 28 |     total_scores = cell(ceil(total_rois / max_rois_num_in_gpu), 1);
 29 |     total_box_deltas = cell(ceil(total_rois / max_rois_num_in_gpu), 1);
 30 |     for i = 1:ceil(total_rois / max_rois_num_in_gpu)
 31 |         
 32 |         sub_ind_start = 1 + (i-1) * max_rois_num_in_gpu;
 33 |         sub_ind_end = min(total_rois, i * max_rois_num_in_gpu);
 34 |         sub_rois_blob = rois_blob(:, :, :, sub_ind_start:sub_ind_end);
 35 |         
 36 |         net_inputs = {im_blob, sub_rois_blob};
 37 | 
 38 |         % Reshape net's input blobs
 39 |         caffe_net.reshape_as_input(net_inputs);
 40 |         caffe_net.forward(net_inputs);
 41 | 
 42 |         if conf.test_binary
 43 |             % simulate binary logistic regression
 44 |             scores = caffe_net.blobs('cls_score').get_data();
 45 |             scores = squeeze(scores)';
 46 |             % Return scores as fg - bg
 47 |             scores = bsxfun(@minus, scores, scores(:, 1));
 48 |         else
 49 |             % use softmax estimated probabilities
 50 |             scores = caffe_net.blobs('cls_prob').get_data();
 51 |             scores = squeeze(scores)';
 52 |         end
 53 | 
 54 |         % Apply bounding-box regression deltas
 55 |         box_deltas = caffe_net.blobs('bbox_pred').get_data();
 56 |         box_deltas = squeeze(box_deltas)';
 57 |         
 58 |         total_scores{i} = scores;
 59 |         total_box_deltas{i} = box_deltas;
 60 |     end 
 61 |     
 62 |     scores = cell2mat(total_scores);
 63 |     box_deltas = cell2mat(total_box_deltas);
 64 |     
 65 |     pred_boxes = rfcn_bbox_transform_inv(boxes, box_deltas);
 66 |     pred_boxes = clip_boxes(pred_boxes, size(im, 2), size(im, 1));
 67 | 
 68 |     % Map scores and predictions back to the original set of boxes
 69 |     scores = scores(inv_index, :);
 70 |     pred_boxes = pred_boxes(inv_index, :);
 71 |     
 72 |     % remove scores and boxes for back-ground
 73 |     pred_boxes = pred_boxes(:, 5:end);
 74 |     scores = scores(:, 2:end);
 75 |     if conf.bbox_class_agnostic
 76 |         pred_boxes = repmat(pred_boxes, [1, size(scores,2)]);
 77 |     end
 78 | end
 79 | 
 80 | function [data_blob, rois_blob, im_scale_factors] = get_blobs(conf, im, rois)
 81 |     [data_blob, im_scale_factors] = get_image_blob(conf, im);
 82 |     rois_blob = get_rois_blob(conf, rois, im_scale_factors);
 83 | end
 84 | 
 85 | function [blob, im_scales] = get_image_blob(conf, im)
 86 |     [ims, im_scales] = arrayfun(@(x) prep_im_for_blob(im, conf.image_means, x, conf.test_max_size), conf.test_scales, 'UniformOutput', false);
 87 |     im_scales = cell2mat(im_scales);
 88 |     blob = im_list_to_blob(ims);    
 89 | end
 90 | 
 91 | function [rois_blob] = get_rois_blob(conf, im_rois, im_scale_factors)
 92 |     [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, im_scale_factors);
 93 |     rois_blob = single([levels, feat_rois]);
 94 | end
 95 | 
 96 | function [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, scales)
 97 |     im_rois = single(im_rois);
 98 |     
 99 |     if length(scales) > 1
100 |         widths = im_rois(:, 3) - im_rois(:, 1) + 1;
101 |         heights = im_rois(:, 4) - im_rois(:, 2) + 1;
102 |         
103 |         areas = widths .* heights;
104 |         scaled_areas = bsxfun(@times, areas(:), scales(:)'.^2);
105 |         [~, levels] = min(abs(scaled_areas - 224.^2), [], 2); 
106 |     else
107 |         levels = ones(size(im_rois, 1), 1);
108 |     end
109 |     
110 |     feat_rois = round(bsxfun(@times, im_rois-1, scales(levels))) + 1;
111 | end
112 | 
113 | function boxes = clip_boxes(boxes, im_width, im_height)
114 |     % x1 >= 1 & <= im_width
115 |     boxes(:, 1:4:end) = max(min(boxes(:, 1:4:end), im_width), 1);
116 |     % y1 >= 1 & <= im_height
117 |     boxes(:, 2:4:end) = max(min(boxes(:, 2:4:end), im_height), 1);
118 |     % x2 >= 1 & <= im_width
119 |     boxes(:, 3:4:end) = max(min(boxes(:, 3:4:end), im_width), 1);
120 |     % y2 >= 1 & <= im_height
121 |     boxes(:, 4:4:end) = max(min(boxes(:, 4:4:end), im_height), 1);
122 | end


--------------------------------------------------------------------------------
/functions/nms/nms_gpu_mex.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Example of how to use the mxGPUArray API in a MEX file.  This example shows
  3 | * how to write a MEX function that takes a gpuArray input and returns a
  4 | * gpuArray output, e.g. B=mexFunction(A).
  5 | *
  6 | * Copyright 2012 The MathWorks, Inc.
  7 | */
  8 | 
  9 | #include "mex.h"
 10 | #include <vector>
 11 | #include <iostream>
 12 | 
 13 | #define DIVUP(m,n)		((m)/(n)+((m)%(n)>0))
 14 | int const threadsPerBlock = (sizeof(unsigned long long) * 8);
 15 | 
 16 | /*
 17 | * Device code
 18 | */
 19 | __device__ inline float devIoU(float const * const a, float const * const b)
 20 | {
 21 | 	float left = max(a[0], b[0]), right = min(a[2], b[2]);
 22 | 	float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 23 | 	float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 24 | 	float interS = width * height;
 25 | 	float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 26 | 	float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 27 | 	return interS / (Sa + Sb - interS);
 28 | }
 29 | 
 30 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thres, const float *dev_boxes, unsigned long long *dev_mask)
 31 | {
 32 | 	const int row_start = blockIdx.y, col_start = blockIdx.x;
 33 | 	const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock), col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 | 	//if (row_start > col_start) return;
 36 | 
 37 | 	__shared__ float block_boxes[threadsPerBlock * 5];
 38 | 	if (threadIdx.x < col_size)
 39 | 	{
 40 | 		block_boxes[threadIdx.x * 5 + 0] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 41 | 		block_boxes[threadIdx.x * 5 + 1] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 42 | 		block_boxes[threadIdx.x * 5 + 2] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 | 		block_boxes[threadIdx.x * 5 + 3] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 44 | 		block_boxes[threadIdx.x * 5 + 4] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 45 | 	}
 46 | 	__syncthreads();
 47 | 
 48 | 	if (threadIdx.x < row_size)
 49 | 	{
 50 | 		const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 51 | 		const float *cur_box = dev_boxes + cur_box_idx * 5;
 52 | 		int i = 0;
 53 | 		unsigned long long t = 0;
 54 | 		int start = 0;
 55 | 		if (row_start == col_start) start = threadIdx.x + 1;
 56 | 		for (i = start; i < col_size; i++)
 57 | 		{
 58 | 			if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thres)
 59 | 			{
 60 | 				t |= 1ULL << i;
 61 | 			}
 62 | 		}
 63 | 		const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 64 | 		dev_mask[cur_box_idx * col_blocks + col_start] = t;
 65 | 	}
 66 | }
 67 | 
 68 | /*
 69 | * Host code
 70 | */
 71 | void mexFunction(int nlhs, mxArray *plhs[],
 72 | 	int nrhs, const mxArray *prhs[])
 73 | {
 74 | 	
 75 | 	/* Declare all variables.*/
 76 | 	mxArray const *boxes, *ov_thres;
 77 | 	float *boxes_host = NULL;
 78 | 	float *boxes_dev = NULL;
 79 | 	unsigned long long *mask_dev = NULL;
 80 | 
 81 | 	/* Throw an error if the input is not a array. */
 82 | 	if (nrhs != 2) {
 83 | 		mexErrMsgTxt("nms_gpu_mex::need 2 inputs");
 84 | 	}
 85 | 
 86 | 	boxes = prhs[0];
 87 | 	if (mxGetClassID(boxes) != mxSINGLE_CLASS) {
 88 | 		mexErrMsgTxt("nms_gpu_mex::input boxes must be single");
 89 | 	}
 90 | 
 91 | 	ov_thres = prhs[1];
 92 | 	if (mxGetClassID(ov_thres) != mxDOUBLE_CLASS) {
 93 | 		mexErrMsgTxt("nms_gpu_mex::input boxes must be double");
 94 | 	}
 95 | 
 96 | 	float nms_overlap_thres = (float)mxGetScalar(ov_thres);
 97 | 
 98 | 	int boxes_dim = mxGetM(boxes);
 99 | 	int boxes_num = mxGetN(boxes);
100 | 	if (boxes_dim != 5)
101 | 	{
102 | 		mexErrMsgTxt("nms_gpu_mex::input boxes's row must be 5");
103 | 	}
104 | 
105 | 	boxes_host = (float *)(mxGetPr(boxes));
106 | 	const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
107 | 
108 | 	cudaMalloc(&boxes_dev, mxGetNumberOfElements(boxes) * sizeof(float));
109 | 	cudaMemcpy(boxes_dev, boxes_host, mxGetNumberOfElements(boxes) * sizeof(float), cudaMemcpyHostToDevice);
110 | 
111 | 	/* Create a GPUArray to hold the result and get its underlying pointer. */
112 | 	cudaMalloc(&mask_dev, boxes_num * col_blocks * sizeof(unsigned long long));
113 | 	
114 | 
115 | 	/*
116 | 	* Call the kernel using the CUDA runtime API. We are using a 1-d grid here,
117 | 	* and it would be possible for the number of elements to be too large for
118 | 	* the grid. For this example we are not guarding against this possibility.
119 | 	*/
120 | 
121 | 	dim3 blocks(DIVUP(boxes_num, threadsPerBlock), DIVUP(boxes_num, threadsPerBlock));
122 | 	dim3 threads(threadsPerBlock);
123 | 	nms_kernel << <blocks, threads >> >(boxes_num, nms_overlap_thres, boxes_dev, mask_dev);
124 | 
125 | 	std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
126 | 	cudaMemcpy(&mask_host[0], mask_dev, sizeof(unsigned long long) * boxes_num * col_blocks, cudaMemcpyDeviceToHost);
127 | 
128 | 	std::vector<unsigned long long> remv(col_blocks);
129 | 	memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
130 | 
131 | 	std::vector<int> keep;
132 | 	keep.reserve(boxes_num);
133 | 	for (int i = 0; i < boxes_num; i++)
134 | 	{
135 | 		int nblock = i / threadsPerBlock;
136 | 		int inblock = i % threadsPerBlock;
137 | 
138 | 		if (!(remv[nblock] & (1ULL << inblock)))
139 | 		{
140 | 			keep.push_back(i + 1);  // to matlab's index
141 | 
142 | 			unsigned long long *p = &mask_host[0] + i * col_blocks;
143 | 			for (int j = nblock; j < col_blocks; j++)
144 | 			{
145 | 				remv[j] |= p[j];
146 | 			}
147 | 		}
148 | 	}
149 | 
150 | 	/* Wrap the result up as a MATLAB cpuArray for return. */
151 | 	mwSize dims[4] = { (int)keep.size(), 1, 1, 1 };
152 | 	plhs[0] = mxCreateNumericArray(4, dims, mxINT32_CLASS, mxREAL);
153 | 
154 | 	int *output = (int *)(mxGetPr(plhs[0]));
155 | 	memcpy(output, &keep[0], (int)keep.size() * sizeof(int));
156 | 
157 | 
158 | 	cudaFree(boxes_dev);
159 | 	cudaFree(mask_dev);
160 | }
161 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # *R-FCN*: Object Detection via Region-based Fully Convolutional Networks
  2 | 
  3 | By Jifeng Dai, Yi Li, Kaiming He, Jian Sun
  4 | 
  5 | **It is highly recommended to use the [deformable R-FCN](https://github.com/msracver/Deformable-ConvNets) implemented in MXNet, which significantly increases the accuracy at very low extra computational overhead.**
  6 | 
  7 | *A [python version of R-FCN](https://github.com/Orpine/py-R-FCN) is available, which supports end-to-end training/inference of R-FCN for object detection.*
  8 | 
  9 | 
 10 | ### Introduction
 11 | 
 12 | **R-FCN** is a region-based object detection framework leveraging deep fully-convolutional networks, which is accurate and efficient. In contrast to previous region-based detectors such as Fast/Faster R-CNN that apply a costly per-region sub-network hundreds of times, our region-based detector is fully convolutional with almost all computation shared on the entire image. R-FCN can natually adopt powerful fully convolutional image classifier backbones, such as [ResNets](https://github.com/KaimingHe/deep-residual-networks), for object detection.
 13 | 
 14 | R-FCN was initially described in a [NIPS 2016 paper](https://arxiv.org/abs/1605.06409).
 15 | 
 16 | This code has been tested on Windows 7/8 64 bit, Windows Server 2012 R2, and Ubuntu 14.04, with Matlab 2014a.
 17 | 
 18 | ### License
 19 | 
 20 | R-FCN is released under the MIT License (refer to the LICENSE file for details).
 21 | 
 22 | ### Citing R-FCN
 23 | 
 24 | If you find R-FCN useful in your research, please consider citing:
 25 | 
 26 |     @article{dai16rfcn,
 27 |         Author = {Jifeng Dai, Yi Li, Kaiming He, Jian Sun},
 28 |         Title = {{R-FCN}: Object Detection via Region-based Fully Convolutional Networks},
 29 |         Journal = {arXiv preprint arXiv:1605.06409},
 30 |         Year = {2016}
 31 |     }
 32 | 
 33 | ### Main Results
 34 | 
 35 | |                   | training data       | test data             | mAP   | time/img (K40) | time/img (Titian X)|
 36 | |-------------------|:-------------------:|:---------------------:|:-----:|:--------------:|:------------------:|
 37 | |R-FCN, ResNet-50  | VOC 07+12 trainval  | VOC 07 test           | 77.4% | 0.12sec        | 0.09sec            |
 38 | |R-FCN, ResNet-101 | VOC 07+12 trainval  | VOC 07 test           | 79.5% | 0.17sec        | 0.12sec            |
 39 | 
 40 | 
 41 | ### Requirements: software
 42 | 
 43 | 0. `Caffe` build for R-FCN (included in this repository, see `external/caffe`)
 44 |     - If you are using Windows, you may download a compiled mex file by running `fetch_data/fetch_caffe_mex_windows_vs2013_cuda75.m`
 45 |     - If you are using Linux or you want to compile for Windows, please recompile [our Caffe branch](https://github.com/daijifeng001/caffe-rfcn).
 46 | 0.	MATLAB 2014a or later
 47 |  
 48 |     
 49 | ### Requirements: hardware
 50 | 
 51 | GPU: Titan, Titan X, K40, K80.
 52 | 
 53 | ### Demo
 54 | 0.	Run `fetch_data/fetch_caffe_mex_windows_vs2013_cuda75.m` to download a compiled Caffe mex (for Windows only).
 55 | 0.	Run `fetch_data/fetch_demo_model_ResNet101.m` to download a R-FCN model using ResNet-101 net (trained on VOC 07+12 trainval).
 56 | 0.	Run `rfcn_build.m`.
 57 | 0.	Run `startup.m`.
 58 | 0.	Run `experiments/script_rfcn_demo.m` to apply the R-FCN model on demo images.
 59 | 
 60 | ### Preparation for Training & Testing
 61 | 0.	Run `fetch_data/fetch_caffe_mex_windows_vs2013_cuda75.m` to download a compiled Caffe mex (for Windows only).
 62 | 0.	Run `fetch_data/fetch_model_ResNet50.m` to download an ImageNet-pre-trained ResNet-50 net.
 63 | 0.	Run `fetch_data/fetch_model_ResNet101.m` to download an ImageNet-pre-trained ResNet-101 net.
 64 | 0.	Run `fetch_data/fetch_region_proposals.m` to download the pre-computed region proposals.
 65 | 0.	Download VOC 2007 and 2012 data to ./datasets.
 66 | 0.	Run `rfcn_build.m`.
 67 | 0.	Run `startup.m`.
 68 | 
 69 | 
 70 | ### Training & Testing
 71 | 0. Run `experiments/script_rfcn_VOC0712_ResNet50_OHEM_ss.m` to train a model using ResNet-50 net with online hard example mining (OHEM), leveraging selective search proposals. The accuracy should be ~75.4% in mAP.
 72 |     - **Note**: the training time is ~13 hours on Titian X.
 73 | 0. Run `experiments/script_rfcn_VOC0712_ResNet50_OHEM_rpn.m` to train a model using ResNet-50 net with OHEM, leveraging RPN proposals (using ResNet-50 net). The accuracy should be ~77.4% in mAP.
 74 |     - **Note**: the training time is ~13 hours on Titian X.
 75 | 0. Run `experiments/script_rfcn_VOC0712_ResNet101_OHEM_rpn.m` to train a model using ResNet-101 net with OHEM, leveraging RPN proposals (using ResNet-101 net). The accuracy should be ~79.5% in mAP.
 76 |     - **Note**: the training time is ~19 hours on Titian X.
 77 | 0. Check other scripts in `./experiments` for more settings.
 78 | 
 79 | **Note:** 
 80 | - In all the experiments, training is performed on VOC 07+12 trainval, and testing is performed on VOC 07 test.
 81 | - Results are subject to some random variations. We have run 'experiments/script_rfcn_VOC0712_ResNet50_OHEM_rpn.m' for 5 times, the results are 77.1%, 77.3%, 77.7%, 77.9%, and 77.0%. The mean is 77.4%, and the std is 0.39%.
 82 | - Running time is not recorded in the test log (which is slower), but instead in an optimized implementation.
 83 | 
 84 | ### Resources
 85 | 
 86 | 0. Experiment logs: [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc44qdRNJTsXLIU-2w), [BaiduYun](http://pan.baidu.com/s/1mhFYejI)
 87 | 
 88 | If the automatic "fetch_data" fails, you may manually download resouces from:
 89 | 
 90 | 0. Pre-complied caffe mex (Windows):
 91 |     - [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc456RlstMF-4wHr1g), [BaiduYun](http://pan.baidu.com/s/1i4OlG7z)
 92 | 0. Demo R-FCN model:
 93 |     - [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc486Tyvkf3koU7R7w), [BaiduYun](http://pan.baidu.com/s/1o77gFXo)
 94 | 0. ImageNet-pretrained networks:
 95 |     - ResNet-50 net [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc46RPYjtbdbNwPJ_w), [BaiduYun](http://pan.baidu.com/s/1kVm4ly3)
 96 |     - ResNet-101 net [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc47z4S7O5Ql6W_0-g), [BaiduYun](http://pan.baidu.com/s/1nvgu1pJ)
 97 | 0. Pre-computed region proposals:
 98 |     - [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc49StWpgPo2GPEB_A), [BaiduYun](http://pan.baidu.com/s/1hrAJ5re)
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_prepare_image_roidb.m:
--------------------------------------------------------------------------------
  1 | function [image_roidb, bbox_means, bbox_stds] = rfcn_prepare_image_roidb(conf, imdbs, roidbs, bbox_means, bbox_stds)
  2 | % [image_roidb] = rfcn_prepare_image_roidb(conf, imdbs, roidbs, cache_img)
  3 | % --------------------------------------------------------
  4 | % R-FCN implementation
  5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  6 | % Copyright (c) 2016, Jifeng Dai
  7 | % Licensed under The MIT License [see LICENSE for details]
  8 | % --------------------------------------------------------   
  9 |     
 10 |     if ~exist('bbox_means', 'var')
 11 |         bbox_means = [];
 12 |         bbox_stds = [];
 13 |     end
 14 |     
 15 |     if ~iscell(imdbs)
 16 |         imdbs = {imdbs};
 17 |         roidbs = {roidbs};
 18 |     end
 19 | 
 20 |     imdbs = imdbs(:);
 21 |     roidbs = roidbs(:);
 22 |     
 23 |     image_roidb = cellfun(@(x, y) ... // @(imdbs, roidbs)
 24 |                     arrayfun(@(z) ... //@([1:length(x.image_ids)])
 25 |                         struct('image_path', x.image_at(z), 'image_id', x.image_ids{z}, 'im_size', x.sizes(z, :), 'imdb_name', x.name, ...
 26 |                         'overlap', y.rois(z).overlap, 'boxes', y.rois(z).boxes, 'class', y.rois(z).class, 'image', [], 'bbox_targets', []), ...
 27 |                         [1:length(x.image_ids)]', 'UniformOutput', true), imdbs, roidbs, 'UniformOutput', false);
 28 |     
 29 |     image_roidb = cat(1, image_roidb{:});
 30 |     
 31 |     % enhance roidb to contain bounding-box regression targets
 32 |     [image_roidb, bbox_means, bbox_stds] = append_bbox_regression_targets(conf, image_roidb, bbox_means, bbox_stds);
 33 | end
 34 | 
 35 | function [image_roidb, means, stds] = append_bbox_regression_targets(conf, image_roidb, means, stds)
 36 |     % means and stds -- (k+1) * 4, include background class
 37 | 
 38 |     num_images = length(image_roidb);
 39 |     % Infer number of classes from the number of columns in gt_overlaps
 40 |     if conf.bbox_class_agnostic
 41 |         num_classes = 1;
 42 |     else
 43 |         num_classes = size(image_roidb(1).overlap, 2);
 44 |     end
 45 |     
 46 |     valid_imgs = true(num_images, 1);
 47 |     for i = 1:num_images
 48 |        rois = image_roidb(i).boxes; 
 49 |        [image_roidb(i).bbox_targets, valid_imgs(i)] = ...
 50 |            compute_targets(conf, rois, image_roidb(i).overlap);
 51 |     end
 52 |     if ~all(valid_imgs)
 53 |         image_roidb = image_roidb(valid_imgs);
 54 |         num_images = length(image_roidb);
 55 |         fprintf('Warning: rfcn_prepare_image_roidb: filter out %d images, which contains zero valid samples\n', sum(~valid_imgs));
 56 |     end
 57 |         
 58 |     if ~(exist('means', 'var') && ~isempty(means) && exist('stds', 'var') && ~isempty(stds))
 59 |         % Compute values needed for means and stds
 60 |         % var(x) = E(x^2) - E(x)^2
 61 |         class_counts = zeros(num_classes, 1) + eps;
 62 |         sums = zeros(num_classes, 4);
 63 |         squared_sums = zeros(num_classes, 4);
 64 |         for i = 1:num_images
 65 |            targets = image_roidb(i).bbox_targets;
 66 |            for cls = 1:num_classes
 67 |               cls_inds = find(targets(:, 1) == cls);
 68 |               if ~isempty(cls_inds)
 69 |                  class_counts(cls) = class_counts(cls) + length(cls_inds); 
 70 |                  sums(cls, :) = sums(cls, :) + sum(targets(cls_inds, 2:end), 1);
 71 |                  squared_sums(cls, :) = squared_sums(cls, :) + sum(targets(cls_inds, 2:end).^2, 1);
 72 |               end
 73 |            end
 74 |         end
 75 | 
 76 |         means = bsxfun(@rdivide, sums, class_counts);
 77 |         stds = (bsxfun(@minus, bsxfun(@rdivide, squared_sums, class_counts), means.^2)).^0.5;
 78 |         
 79 |         % add background class
 80 |         means = [0, 0, 0, 0; means]; 
 81 |         stds = [0, 0, 0, 0; stds];
 82 |     end
 83 |     
 84 |     % Normalize targets
 85 |     for i = 1:num_images
 86 |         targets = image_roidb(i).bbox_targets;
 87 |         for cls = 1:num_classes
 88 |             cls_inds = find(targets(:, 1) == cls);
 89 |             if ~isempty(cls_inds)
 90 |                 image_roidb(i).bbox_targets(cls_inds, 2:end) = ...
 91 |                     bsxfun(@minus, image_roidb(i).bbox_targets(cls_inds, 2:end), means(cls+1, :));
 92 |                 image_roidb(i).bbox_targets(cls_inds, 2:end) = ...
 93 |                     bsxfun(@rdivide, image_roidb(i).bbox_targets(cls_inds, 2:end), stds(cls+1, :));
 94 |             end
 95 |         end
 96 |     end
 97 | end
 98 | 
 99 | 
100 | function [bbox_targets, is_valid] = compute_targets(conf, rois, overlap)
101 | 
102 |     overlap = full(overlap);
103 | 
104 |     [max_overlaps, max_labels] = max(overlap, [], 2);
105 | 
106 |     % ensure ROIs are floats
107 |     rois = single(rois);
108 |     
109 |     bbox_targets = zeros(size(rois, 1), 5, 'single');
110 |     
111 |     % Indices of ground-truth ROIs
112 |     gt_inds = find(max_overlaps == 1);
113 |     
114 |     if ~isempty(gt_inds)
115 |         % Indices of examples for which we try to make predictions
116 |         ex_inds = find(max_overlaps >= conf.bbox_thresh);
117 | 
118 |         % Get IoU overlap between each ex ROI and gt ROI
119 |         ex_gt_overlaps = boxoverlap(rois(ex_inds, :), rois(gt_inds, :));
120 | 
121 |         assert(all(abs(max(ex_gt_overlaps, [], 2) - max_overlaps(ex_inds)) < 10^-4));
122 | 
123 |         % Find which gt ROI each ex ROI has max overlap with:
124 |         % this will be the ex ROI's gt target
125 |         [~, gt_assignment] = max(ex_gt_overlaps, [], 2);
126 |         gt_rois = rois(gt_inds(gt_assignment), :);
127 |         ex_rois = rois(ex_inds, :);
128 | 
129 |         [regression_label] = rfcn_bbox_transform(ex_rois, gt_rois);
130 | 
131 |         if conf.bbox_class_agnostic
132 |             bbox_targets(ex_inds, :) = [max_labels(ex_inds)>0, regression_label];
133 |         else
134 |             bbox_targets(ex_inds, :) = [max_labels(ex_inds), regression_label];
135 |         end
136 |     end
137 |     
138 |     % Select foreground ROIs as those with >= fg_thresh overlap
139 |     is_fg = max_overlaps >= conf.fg_thresh;
140 |     % Select background ROIs as those within [bg_thresh_lo, bg_thresh_hi)
141 |     is_bg = max_overlaps < conf.bg_thresh_hi & max_overlaps >= conf.bg_thresh_lo;
142 |     
143 |     % check if there is any fg or bg sample. If no, filter out this image
144 |     is_valid = true;
145 |     if ~any(is_fg | is_bg)
146 |         is_valid = false;
147 |     end
148 | end
149 | 


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_get_minibatch.m:
--------------------------------------------------------------------------------
  1 | function net_inputs = rfcn_get_minibatch(conf, image_roidb)
  2 | % net_inputs = rfcn_get_minibatch(conf, image_roidb)
  3 | % --------------------------------------------------------
  4 | % R-FCN implementation
  5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  6 | % Copyright (c) 2016, Jifeng Dai
  7 | % Licensed under The MIT License [see LICENSE for details]
  8 | % --------------------------------------------------------
  9 | 
 10 |     num_images = length(image_roidb);
 11 |     if conf.bbox_class_agnostic
 12 |         num_classes = 1;
 13 |     else
 14 |         % Infer number of classes from the number of columns in gt_overlaps
 15 |         num_classes = size(image_roidb(1).overlap, 2);
 16 |     end
 17 |     % Sample random scales to use for each image in this batch
 18 |     random_scale_inds = randi(length(conf.scales), num_images, 1);
 19 |     
 20 |     if conf.batch_size > 0
 21 |         assert(mod(conf.batch_size, num_images) == 0, ...
 22 |             sprintf('num_images %d must divide BATCH_SIZE %d', num_images, conf.batch_size));
 23 | 
 24 |         rois_per_image = conf.batch_size / num_images;
 25 |         fg_rois_per_image = round(rois_per_image * conf.fg_fraction);
 26 |     else
 27 |         rois_per_image = inf;
 28 |         fg_rois_per_image = inf;
 29 |     end
 30 |     
 31 |     % Get the input image blob
 32 |     [im_blob, im_scales] = get_image_blob(conf, image_roidb, random_scale_inds);
 33 |     
 34 |     % build the region of interest and label blobs
 35 |     rois_blob = zeros(0, 5, 'single');
 36 |     labels_blob = zeros(0, 1, 'single');
 37 |     bbox_targets_blob = zeros(0, 4 * (num_classes+1), 'single');
 38 |     bbox_loss_blob = zeros(size(bbox_targets_blob), 'single');
 39 |     
 40 |     for i = 1:num_images
 41 |         [labels, ~, im_rois, bbox_targets, bbox_loss] = ...
 42 |             sample_rois(conf, image_roidb(i), fg_rois_per_image, rois_per_image);
 43 |         
 44 |         % Add to ROIs blob
 45 |         feat_rois = rfcn_map_im_rois_to_feat_rois(conf, im_rois, im_scales(i));
 46 |         batch_ind = i * ones(size(feat_rois, 1), 1);
 47 |         rois_blob_this_image = [batch_ind, feat_rois];
 48 |         rois_blob = [rois_blob; rois_blob_this_image];
 49 |         
 50 |         % Add to labels, bbox targets, and bbox loss blobs
 51 |         labels_blob = [labels_blob; labels];
 52 |         bbox_targets_blob = [bbox_targets_blob; bbox_targets];
 53 |         bbox_loss_blob = [bbox_loss_blob; bbox_loss];
 54 |     end
 55 |     
 56 |     % permute data into caffe c++ memory, thus [num, channels, height, width]
 57 |     im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg
 58 |     im_blob = single(permute(im_blob, [2, 1, 3, 4]));
 59 |     rois_blob = rois_blob - 1; % to c's index (start from 0)
 60 |     rois_blob = single(permute(rois_blob, [3, 4, 2, 1]));
 61 |     labels_blob = single(permute(labels_blob, [3, 4, 2, 1]));
 62 |     bbox_targets_blob = single(permute(bbox_targets_blob, [3, 4, 2, 1])); 
 63 |     bbox_loss_blob = single(permute(bbox_loss_blob, [3, 4, 2, 1]));
 64 |     
 65 |     assert(~isempty(im_blob));
 66 |     assert(~isempty(rois_blob));
 67 |     assert(~isempty(labels_blob));
 68 |     assert(~isempty(bbox_targets_blob));
 69 |     assert(~isempty(bbox_loss_blob));
 70 |     
 71 |     net_inputs = {im_blob, rois_blob, labels_blob, bbox_targets_blob, bbox_loss_blob};
 72 | end
 73 | 
 74 | %% Build an input blob from the images in the roidb at the specified scales.
 75 | function [im_blob, im_scales] = get_image_blob(conf, images, random_scale_inds)
 76 |     
 77 |     num_images = length(images);
 78 |     processed_ims = cell(num_images, 1);
 79 |     im_scales = nan(num_images, 1);
 80 |     for i = 1:num_images
 81 |         im = imread(images(i).image_path);
 82 |         target_size = conf.scales(random_scale_inds(i));
 83 |         
 84 |         [im, im_scale] = prep_im_for_blob(im, conf.image_means, target_size, conf.max_size);
 85 |         
 86 |         im_scales(i) = im_scale;
 87 |         processed_ims{i} = im; 
 88 |     end
 89 |     
 90 |     im_blob = im_list_to_blob(processed_ims);
 91 | end
 92 | 
 93 | %% Generate a random sample of ROIs comprising foreground and background examples.
 94 | function [labels, overlaps, rois, bbox_targets, bbox_loss_weights] = sample_rois(conf, image_roidb, fg_rois_per_image, rois_per_image)
 95 | 
 96 |     [overlaps, labels] = max(image_roidb(1).overlap, [], 2);
 97 | %     labels = image_roidb(1).max_classes;
 98 | %     overlaps = image_roidb(1).max_overlaps;
 99 |     rois = image_roidb(1).boxes;
100 |     
101 |     % Select foreground ROIs as those with >= FG_THRESH overlap
102 |     fg_inds = find(overlaps >= conf.fg_thresh);
103 |     % Guard against the case when an image has fewer than fg_rois_per_image
104 |     % foreground ROIs
105 |     fg_rois_per_this_image = min(fg_rois_per_image, length(fg_inds));
106 |     % Sample foreground regions without replacement
107 |     if ~isempty(fg_inds)
108 |        fg_inds = fg_inds(randperm(length(fg_inds), fg_rois_per_this_image));
109 |     end
110 |     
111 |     % Select background ROIs as those within [BG_THRESH_LO, BG_THRESH_HI)
112 |     bg_inds = find(overlaps < conf.bg_thresh_hi & overlaps >= conf.bg_thresh_lo);
113 |     % Compute number of background ROIs to take from this image (guarding
114 |     % against there being fewer than desired)
115 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image;
116 |     bg_rois_per_this_image = min(bg_rois_per_this_image, length(bg_inds));
117 |     % Sample foreground regions without replacement
118 |     if ~isempty(bg_inds)
119 |        bg_inds = bg_inds(randperm(length(bg_inds), bg_rois_per_this_image));
120 |     end
121 |     % The indices that we're selecting (both fg and bg)
122 |     keep_inds = [fg_inds; bg_inds];
123 |     % Select sampled values from various arrays
124 |     labels = labels(keep_inds);
125 |     % Clamp labels for the background ROIs to 0
126 |     labels((fg_rois_per_this_image+1):end) = 0;
127 |     overlaps = overlaps(keep_inds);
128 |     rois = rois(keep_inds, :);
129 |     
130 |     if conf.bbox_class_agnostic
131 |         assert(all((labels>0) == image_roidb.bbox_targets(keep_inds, 1)));
132 |     else
133 |         assert(all(labels == image_roidb.bbox_targets(keep_inds, 1)));
134 |     end
135 |     
136 |     % Infer number of classes from the number of columns in gt_overlaps
137 |     num_classes = size(image_roidb(1).overlap, 2);
138 |     
139 |     [bbox_targets, bbox_loss_weights] = get_bbox_regression_labels(conf, ...
140 |         image_roidb.bbox_targets(keep_inds, :), num_classes);
141 |     
142 | end
143 | 
144 | function [bbox_targets, bbox_loss_weights] = get_bbox_regression_labels(conf, bbox_target_data, num_classes)
145 | %% Bounding-box regression targets are stored in a compact form in the roidb.
146 |  % This function expands those targets into the 4-of-4*(num_classes+1) representation used
147 |  % by the network (i.e. only one class has non-zero targets).
148 |  % The loss weights are similarly expanded.
149 | % Return (N, (num_classes+1) * 4, 1, 1) blob of regression targets
150 | % Return (N, (num_classes+1 * 4, 1, 1) blob of loss weights
151 |     if conf.bbox_class_agnostic
152 |         num_classes = 1;
153 |     end
154 |     
155 |     clss = bbox_target_data(:, 1);
156 |     bbox_targets = zeros(length(clss), 4 * (num_classes+1), 'single');
157 |     bbox_loss_weights = zeros(size(bbox_targets), 'single');
158 |     inds = find(clss > 0);
159 |     for i = 1:length(inds)
160 |        ind = inds(i);
161 |        cls = clss(ind);
162 |        targets_inds = (1+cls*4):((cls+1)*4);
163 |        bbox_targets(ind, targets_inds) = bbox_target_data(ind, 2:end);
164 |        bbox_loss_weights(ind, targets_inds) = 1;  
165 |     end
166 | end
167 | 
168 | 
169 | 
170 | 


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_test.m:
--------------------------------------------------------------------------------
  1 | function mAP = rfcn_test(conf, imdb, roidb, varargin)
  2 | % mAP = rfcn_test(conf, imdb, roidb, varargin)
  3 | % --------------------------------------------------------
  4 | % R-FCN implementation
  5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  6 | % Copyright (c) 2016, Jifeng Dai
  7 | % Licensed under The MIT License [see LICENSE for details]
  8 | % --------------------------------------------------------
  9 | 
 10 | %% inputs
 11 |     ip = inputParser;
 12 |     ip.addRequired('conf',                              @isstruct);
 13 |     ip.addRequired('imdb',                              @isstruct);
 14 |     ip.addRequired('roidb',                             @isstruct);
 15 |     ip.addParamValue('net_def_file',    '', 			@isstr);
 16 |     ip.addParamValue('net_file',        '', 			@isstr);
 17 |     ip.addParamValue('cache_name',      '', 			@isstr);
 18 |     ip.addParamValue('suffix',          '',             @isstr);
 19 |     ip.addParamValue('ignore_cache',    false,          @islogical);
 20 |     
 21 |     ip.parse(conf, imdb, roidb, varargin{:});
 22 |     opts = ip.Results;
 23 |     
 24 | 
 25 | %%  set cache dir
 26 |     cache_dir = fullfile(pwd, 'output', 'rfcn_cachedir', opts.cache_name, imdb.name);
 27 |     mkdir_if_missing(cache_dir);
 28 | 
 29 | %%  init log
 30 |     timestamp = datestr(datevec(now()), 'yyyymmdd_HHMMSS');
 31 |     mkdir_if_missing(fullfile(cache_dir, 'log'));
 32 |     log_file = fullfile(cache_dir, 'log', ['test_', timestamp, '.txt']);
 33 |     diary(log_file);
 34 |     
 35 |     num_images = length(imdb.image_ids);
 36 |     num_classes = imdb.num_classes;
 37 |     
 38 |     try
 39 |       aboxes = cell(num_classes, 1);
 40 |       if opts.ignore_cache
 41 |           throw('');
 42 |       end
 43 |       for i = 1:num_classes
 44 |         load(fullfile(cache_dir, [imdb.classes{i} '_boxes_' imdb.name opts.suffix]));
 45 |         aboxes{i} = boxes;
 46 |       end
 47 |     catch    
 48 | %%      testing 
 49 |         % init caffe net
 50 |         caffe_log_file_base = fullfile(cache_dir, 'caffe_log');
 51 |         caffe.init_log(caffe_log_file_base);
 52 |         caffe_net = caffe.Net(opts.net_def_file, 'test');
 53 |         caffe_net.copy_from(opts.net_file);
 54 | 
 55 |         % set random seed
 56 |         prev_rng = seed_rand(conf.rng_seed);
 57 |         caffe.set_random_seed(conf.rng_seed);
 58 | 
 59 |         % set gpu/cpu
 60 |         if conf.use_gpu
 61 |             caffe.set_mode_gpu();
 62 |         else
 63 |             caffe.set_mode_cpu();
 64 |         end             
 65 | 
 66 |         % determine the maximum number of rois in testing 
 67 |         max_rois_num_in_gpu = 10000;
 68 | 
 69 |         disp('opts:');
 70 |         disp(opts);
 71 |         disp('conf:');
 72 |         disp(conf);
 73 |         
 74 |         %heuristic: keep an average of 160 detections per class per images prior to NMS
 75 |         max_per_set = 160 * num_images;
 76 |         % heuristic: keep at most 400 detection per class per image prior to NMS
 77 |         max_per_image = 400;
 78 |         % detection thresold for each class (this is adaptively set based on the max_per_set constraint)
 79 |         thresh = -inf * ones(num_classes, 1);
 80 |         % top_scores will hold one minheap of scores per class (used to enforce the max_per_set constraint)
 81 |         top_scores = cell(num_classes, 1);
 82 |         % all detections are collected into:
 83 |         %    all_boxes[cls][image] = N x 5 array of detections in
 84 |         %    (x1, y1, x2, y2, score)
 85 |         aboxes = cell(num_classes, 1);
 86 |         box_inds = cell(num_classes, 1);
 87 |         for i = 1:num_classes
 88 |             aboxes{i} = cell(length(imdb.image_ids), 1);
 89 |             box_inds{i} = cell(length(imdb.image_ids), 1);
 90 |         end
 91 | 
 92 |         count = 0;
 93 |         t_start = tic;
 94 |         for i = 1:num_images
 95 |             count = count + 1;
 96 |             fprintf('%s: test (%s) %d/%d ', procid(), imdb.name, count, num_images);
 97 |             th = tic;
 98 |             d = roidb.rois(i);
 99 |             im = imread(imdb.image_at(i));
100 |             
101 |             [boxes, scores] = rfcn_im_detect(conf, caffe_net, im, d.boxes(~d.gt, :), max_rois_num_in_gpu);
102 | 
103 |             for j = 1:num_classes
104 |                 inds = find(scores(:, j) > thresh(j));
105 |                 if ~isempty(inds)
106 |                     [~, ord] = sort(scores(inds, j), 'descend');
107 |                     ord = ord(1:min(length(ord), max_per_image));
108 |                     inds = inds(ord);
109 |                     cls_boxes = boxes(inds, (1+(j-1)*4):((j)*4));
110 |                     cls_scores = scores(inds, j);
111 |                     aboxes{j}{i} = [aboxes{j}{i}; cat(2, single(cls_boxes), single(cls_scores))];
112 |                     box_inds{j}{i} = [box_inds{j}{i}; inds];
113 |                 else
114 |                     aboxes{j}{i} = [aboxes{j}{i}; zeros(0, 5, 'single')];
115 |                     box_inds{j}{i} = box_inds{j}{i};
116 |                 end
117 |             end
118 |             
119 |             fprintf(' time %.3fs\n', toc(th)); 
120 | 
121 |             if mod(count, 1000) == 0
122 |                 for j = 1:num_classes
123 |                 [aboxes{j}, box_inds{j}, thresh(j)] = ...
124 |                     keep_top_k(aboxes{j}, box_inds{j}, i, max_per_set, thresh(j));
125 |                 end
126 |                 disp(thresh);
127 |             end    
128 |         end
129 | 
130 |         for j = 1:num_classes
131 |             [aboxes{j}, box_inds{j}, thresh(j)] = ...
132 |                 keep_top_k(aboxes{j}, box_inds{j}, i, max_per_set, thresh(j));
133 |         end
134 |         disp(thresh);
135 | 
136 |         for i = 1:num_classes
137 |             top_scores{i} = sort(top_scores{i}, 'descend');  
138 |             if (length(top_scores{i}) > max_per_set)
139 |                 thresh(i) = top_scores{i}(max_per_set);
140 |             end
141 | 
142 |             % go back through and prune out detections below the found threshold
143 |             for j = 1:length(imdb.image_ids)
144 |                 if ~isempty(aboxes{i}{j})
145 |                     I = find(aboxes{i}{j}(:,end) < thresh(i));
146 |                     aboxes{i}{j}(I,:) = [];
147 |                     box_inds{i}{j}(I,:) = [];
148 |                 end
149 |             end
150 | 
151 |             save_file = fullfile(cache_dir, [imdb.classes{i} '_boxes_' imdb.name opts.suffix]);
152 |             boxes = aboxes{i};
153 |             inds = box_inds{i};
154 |             save(save_file, 'boxes', 'inds');
155 |             clear boxes inds;
156 |         end
157 |         fprintf('test all images in %f seconds.\n', toc(t_start));
158 |         
159 |         caffe.reset_all(); 
160 |         rng(prev_rng);
161 |     end
162 | 
163 |     % ------------------------------------------------------------------------
164 |     % Peform AP evaluation
165 |     % ------------------------------------------------------------------------
166 | 
167 |     if isequal(imdb.eval_func, @imdb_eval_voc)
168 |         new_parpool();
169 |         parfor model_ind = 1:num_classes
170 |           cls = imdb.classes{model_ind};
171 |           res(model_ind) = imdb.eval_func(cls, aboxes{model_ind}, imdb, opts.cache_name, opts.suffix);
172 |         end
173 |     else
174 |     % ilsvrc
175 |         res = imdb.eval_func(aboxes, imdb, opts.cache_name, opts.suffix);
176 |     end
177 | 
178 |     if ~isempty(res)
179 |         fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
180 |         fprintf('Results:\n');
181 |         aps = [res(:).ap]' * 100;
182 |         disp(aps);
183 |         disp(mean(aps));
184 |         fprintf('~~~~~~~~~~~~~~~~~~~~\n');
185 |         mAP = mean(aps);
186 |     else
187 |         mAP = nan;
188 |     end
189 |     
190 |     diary off;
191 | end
192 | 
193 | 
194 | % ------------------------------------------------------------------------
195 | function [boxes, box_inds, thresh] = keep_top_k(boxes, box_inds, end_at, top_k, thresh)
196 | % ------------------------------------------------------------------------
197 |     % Keep top K
198 |     X = cat(1, boxes{1:end_at});
199 |     if isempty(X)
200 |         return;
201 |     end
202 |     scores = sort(X(:,end), 'descend');
203 |     thresh = scores(min(length(scores), top_k));
204 |     for image_index = 1:end_at
205 |         if ~isempty(boxes{image_index})
206 |             bbox = boxes{image_index};
207 |             keep = find(bbox(:,end) >= thresh);
208 |             boxes{image_index} = bbox(keep,:);
209 |             box_inds{image_index} = box_inds{image_index}(keep);
210 |         end
211 |     end
212 | end


--------------------------------------------------------------------------------
/imdb/roidb_from_voc.m:
--------------------------------------------------------------------------------
  1 | function roidb = roidb_from_voc(imdb, varargin)
  2 | % roidb = roidb_from_voc(imdb, rootDir)
  3 | %   Builds an regions of interest database from imdb image
  4 | %   database. Uses precomputed selective search boxes available
  5 | %   in the R-CNN data package.
  6 | %
  7 | %   Inspired by Andrea Vedaldi's MKL imdb and roidb code.
  8 | 
  9 | % AUTORIGHTS
 10 | % ---------------------------------------------------------
 11 | % Copyright (c) 2014, Ross Girshick
 12 | % 
 13 | % This file is part of the R-CNN code and is available 
 14 | % under the terms of the Simplified BSD License provided in 
 15 | % LICENSE. Please retain this notice and LICENSE if you use 
 16 | % this file (or any portion of it) in your project.
 17 | % ---------------------------------------------------------
 18 | 
 19 | ip = inputParser;
 20 | ip.addRequired('imdb', @isstruct);
 21 | ip.addParamValue('exclude_difficult_samples',       true,   @islogical);
 22 | ip.addParamValue('with_selective_search',           false,  @islogical);
 23 | ip.addParamValue('with_edge_box',                   false,  @islogical);
 24 | ip.addParamValue('with_self_proposal',              false,  @islogical);
 25 | ip.addParamValue('rootDir',                         '.',    @ischar);
 26 | ip.addParamValue('extension',                       '',     @ischar);
 27 | ip.parse(imdb, varargin{:});
 28 | opts = ip.Results;
 29 | 
 30 | roidb.name = imdb.name;
 31 | if ~isempty(opts.extension)
 32 |     opts.extension = ['_', opts.extension];
 33 | end
 34 | regions_file_ss = fullfile(opts.rootDir, sprintf('/data/selective_search_data/%s%s.mat', roidb.name, opts.extension));
 35 | regions_file_eb = fullfile(opts.rootDir, sprintf('/data/edge_box_data/%s%s.mat', roidb.name, opts.extension));
 36 | regions_file_sp = fullfile(opts.rootDir, sprintf('/data/self_proposal_data/%s%s.mat', roidb.name, opts.extension));
 37 | 
 38 | cache_file_ss = [];
 39 | cache_file_eb = [];
 40 | cache_file_sp = [];
 41 | if opts.with_selective_search 
 42 |     cache_file_ss = 'ss_';
 43 |     if~exist(regions_file_ss, 'file')
 44 |         error('roidb_from_ilsvrc:: cannot find %s', regions_file_ss);
 45 |     end
 46 | end
 47 | 
 48 | if opts.with_edge_box 
 49 |     cache_file_eb = 'eb_';
 50 |     if ~exist(regions_file_eb, 'file')
 51 |         error('roidb_from_ilsvrc:: cannot find %s', regions_file_eb);
 52 |     end
 53 | end
 54 | 
 55 | if opts.with_self_proposal 
 56 |     cache_file_sp = 'sp_';
 57 |     if ~exist(regions_file_sp, 'file')
 58 |         error('roidb_from_ilsvrc:: cannot find %s', regions_file_sp);
 59 |     end
 60 | end
 61 | 
 62 | cache_file = fullfile(opts.rootDir, ['/imdb/cache/roidb_' cache_file_ss cache_file_eb cache_file_sp imdb.name opts.extension]);
 63 | if imdb.flip
 64 |     cache_file = [cache_file '_flip'];
 65 | end
 66 | if opts.exclude_difficult_samples
 67 |     cache_file = [cache_file '_easy'];
 68 | end
 69 | cache_file = [cache_file, '.mat'];
 70 | try
 71 |   load(cache_file);
 72 | catch
 73 |   VOCopts = imdb.details.VOCopts;
 74 | 
 75 |   addpath(fullfile(VOCopts.datadir, 'VOCcode')); 
 76 | 
 77 |   roidb.name = imdb.name;
 78 | 
 79 |   fprintf('Loading region proposals...');
 80 |   regions = [];
 81 |   if opts.with_selective_search
 82 |         regions = load_proposals(regions_file_ss, regions);
 83 |   end
 84 |   if opts.with_edge_box
 85 |         regions = load_proposals(regions_file_eb, regions);
 86 |   end
 87 |   if opts.with_self_proposal
 88 |         regions = load_proposals(regions_file_sp, regions);
 89 |   end
 90 |   fprintf('done\n');
 91 |   if isempty(regions)
 92 |       fprintf('Warrning: no windows proposal is loaded !\n');
 93 |       regions.boxes = cell(length(imdb.image_ids), 1);
 94 |       if imdb.flip
 95 |             regions.images = imdb.image_ids(1:2:end);
 96 |       else
 97 |             regions.images = imdb.image_ids;
 98 |       end
 99 |   end
100 | 
101 |   if ~imdb.flip
102 |       for i = 1:length(imdb.image_ids)
103 |         tic_toc_print('roidb (%s): %d/%d\n', roidb.name, i, length(imdb.image_ids));
104 |         try
105 |           voc_rec = PASreadrecord(sprintf(VOCopts.annopath, imdb.image_ids{i}));
106 |         catch
107 |           voc_rec = [];
108 |         end
109 | 
110 |         [~, image_name1] = fileparts(imdb.image_ids{i});
111 |         [~, image_name2] = fileparts(regions.images{i});
112 |         assert(strcmp(image_name1, image_name2));
113 |             
114 |         roidb.rois(i) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, false, false);
115 |       end
116 |   else
117 |       for i = 1:length(imdb.image_ids)/2
118 |         tic_toc_print('roidb (%s): %d/%d\n', roidb.name, i, length(imdb.image_ids)/2);
119 |         try
120 |           voc_rec = PASreadrecord(sprintf(VOCopts.annopath, imdb.image_ids{i*2-1}));
121 |         catch
122 |           voc_rec = [];
123 |         end
124 |         
125 |         if length(regions.images) == length(imdb.image_ids) / 2
126 |             [~, image_name1] = fileparts(imdb.image_ids{i*2-1});
127 |             [~, image_name2] = fileparts(regions.images{i});
128 |             assert(strcmp(image_name1, image_name2));
129 |             assert(imdb.flip_from(i*2) == i*2-1);
130 |             
131 |             roidb.rois(i*2-1) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, false, false);
132 |             roidb.rois(i*2) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, true, true);
133 |         elseif length(regions.images) == length(imdb.image_ids)
134 |             [~, image_name1] = fileparts(imdb.image_ids{i*2-1});
135 |             [~, image_name2] = fileparts(regions.images{i*2-1});
136 |             assert(strcmp(image_name1, image_name2));
137 | 
138 |             [~, image_name1] = fileparts(imdb.image_ids{i*2});
139 |             [~, image_name2] = fileparts(regions.images{i*2});
140 |             assert(strcmp(image_name1, image_name2));
141 | 
142 |             assert(imdb.flip_from(i*2) == i*2-1);
143 | 
144 |             roidb.rois(i*2-1) = attach_proposals(voc_rec, regions.boxes{i*2-1}, imdb.class_to_id, opts.exclude_difficult_samples, false, false);
145 |             roidb.rois(i*2) = attach_proposals(voc_rec, regions.boxes{i*2}, imdb.class_to_id, opts.exclude_difficult_samples, false, true);
146 |         else
147 |             error('The number of images in region proposals and imdb do not match.');
148 |         end
149 |       end
150 |   end
151 | 
152 |   rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 
153 | 
154 |   fprintf('Saving roidb to cache...');
155 |   save(cache_file, 'roidb', '-v7.3');
156 |   fprintf('done\n');
157 | end
158 | 
159 | 
160 | % ------------------------------------------------------------------------
161 | function rec = attach_proposals(voc_rec, boxes, class_to_id, exclude_difficult_samples, flip_proposal, flip_gt)
162 | % ------------------------------------------------------------------------
163 | 
164 | % change selective search order from [y1 x1 y2 x2] to [x1 y1 x2 y2]
165 | if ~isempty(boxes)
166 |     boxes = boxes(:, [2 1 4 3]);
167 |     if flip_proposal
168 |         boxes(:, [1, 3]) = voc_rec.imgsize(1) + 1 - boxes(:, [3, 1]);
169 |     end
170 | end
171 | 
172 | %           gt: [2108x1 double]
173 | %      overlap: [2108x20 single]
174 | %      dataset: 'voc_2007_trainval'
175 | %        boxes: [2108x4 single]
176 | %         feat: [2108x9216 single]
177 | %        class: [2108x1 uint8]
178 | if isfield(voc_rec, 'objects')
179 |   if exclude_difficult_samples
180 |       valid_objects = ~cat(1, voc_rec.objects(:).difficult);
181 |   else
182 |       valid_objects = 1:length(voc_rec.objects(:));
183 |   end
184 |   gt_boxes = cat(1, voc_rec.objects(valid_objects).bbox);
185 |   if flip_gt
186 |     gt_boxes(:, [1, 3]) = voc_rec.imgsize(1) + 1 - gt_boxes(:, [3, 1]);
187 |   end
188 |   all_boxes = cat(1, gt_boxes, boxes);
189 |   gt_classes = class_to_id.values({voc_rec.objects(valid_objects).class});
190 |   gt_classes = cat(1, gt_classes{:});
191 |   num_gt_boxes = size(gt_boxes, 1);
192 | else
193 |   gt_boxes = [];
194 |   all_boxes = boxes;
195 |   gt_classes = [];
196 |   num_gt_boxes = 0;
197 | end
198 | num_boxes = size(boxes, 1);
199 | 
200 | rec.gt = cat(1, true(num_gt_boxes, 1), false(num_boxes, 1));
201 | rec.overlap = zeros(num_gt_boxes+num_boxes, class_to_id.Count, 'single');
202 | for i = 1:num_gt_boxes
203 |   rec.overlap(:, gt_classes(i)) = ...
204 |       max(rec.overlap(:, gt_classes(i)), boxoverlap(all_boxes, gt_boxes(i, :)));
205 | end
206 | rec.boxes = single(all_boxes);
207 | rec.feat = [];
208 | rec.class = uint8(cat(1, gt_classes, zeros(num_boxes, 1)));
209 | 
210 | % ------------------------------------------------------------------------
211 | function regions = load_proposals(proposal_file, regions)
212 | % ------------------------------------------------------------------------
213 | if isempty(regions)
214 |     regions = load(proposal_file);
215 | else
216 |     regions_more = load(proposal_file);
217 |     if ~all(cellfun(@(x, y) strcmp(x, y), regions.images(:), regions_more.images(:), 'UniformOutput', true))
218 |         error('roidb_from_ilsvrc: %s is has different images list with other proposals.\n', proposal_file);
219 |     end
220 |     regions.boxes = cellfun(@(x, y) [double(x); double(y)], regions.boxes(:), regions_more.boxes(:), 'UniformOutput', false);
221 | end
222 | 


--------------------------------------------------------------------------------
/functions/rfcn/rfcn_train.m:
--------------------------------------------------------------------------------
  1 | function save_model_path = rfcn_train(conf, imdb_train, roidb_train, varargin)
  2 | % save_model_path = rfcn_train(conf, imdb_train, roidb_train, varargin)
  3 | % --------------------------------------------------------
  4 | % R-FCN implementation
  5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  6 | % Copyright (c) 2016, Jifeng Dai
  7 | % Licensed under The MIT License [see LICENSE for details]
  8 | % --------------------------------------------------------
  9 | 
 10 | %% inputs
 11 |     ip = inputParser;
 12 |     ip.addRequired('conf',                              @isstruct);
 13 |     ip.addRequired('imdb_train',                        @iscell);
 14 |     ip.addRequired('roidb_train',                       @iscell);
 15 |     ip.addParamValue('do_val',          false,          @isscalar);
 16 |     ip.addParamValue('imdb_val',        struct(),       @isstruct);
 17 |     ip.addParamValue('roidb_val',       struct(),       @isstruct);
 18 |     ip.addParamValue('val_iters',       500,            @isscalar); 
 19 |     ip.addParamValue('val_interval',    5000,           @isscalar); 
 20 |     ip.addParamValue('snapshot_interval',...
 21 |                                         10000,          @isscalar);
 22 |     ip.addParamValue('solver_def_file', fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_res3a', 'solver_80k120k_lr1_3.prototxt'), ...
 23 |                                                         @isstr);
 24 |     ip.addParamValue('net_file',        fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'ResNet-50-model.caffemodel'), ...
 25 |                                                         @isstr);
 26 |     ip.addParamValue('cache_name',      'ResNet-50L_res3a', ...
 27 |                                                         @isstr);
 28 |     ip.addParamValue('caffe_version',   'Unkonwn',      @isstr);
 29 |     
 30 |     ip.parse(conf, imdb_train, roidb_train, varargin{:});
 31 |     opts = ip.Results;
 32 |     
 33 | %% try to find trained model
 34 |     imdbs_name = cell2mat(cellfun(@(x) x.name, imdb_train, 'UniformOutput', false));
 35 |     cache_dir = fullfile(pwd, 'output', 'rfcn_cachedir', opts.cache_name, imdbs_name);
 36 |     save_model_path = fullfile(cache_dir, 'final');
 37 |     if exist(save_model_path, 'file')
 38 |         return;
 39 |     end
 40 |     
 41 | %% init
 42 |     % set random seed
 43 |     prev_rng = seed_rand(conf.rng_seed);
 44 |     caffe.set_random_seed(conf.rng_seed);
 45 |     
 46 |     % init caffe solver
 47 |     mkdir_if_missing(cache_dir);
 48 |     caffe_log_file_base = fullfile(cache_dir, 'caffe_log');
 49 |     caffe.init_log(caffe_log_file_base);
 50 |     caffe_solver = caffe.Solver(opts.solver_def_file);
 51 |     caffe_solver.net.copy_from(opts.net_file);
 52 | 
 53 |     % init log
 54 |     timestamp = datestr(datevec(now()), 'yyyymmdd_HHMMSS');
 55 |     mkdir_if_missing(fullfile(cache_dir, 'log'));
 56 |     log_file = fullfile(cache_dir, 'log', ['train_', timestamp, '.txt']);
 57 |     diary(log_file);
 58 | 
 59 |     % set gpu/cpu
 60 |     if conf.use_gpu
 61 |         caffe.set_mode_gpu();
 62 |     else
 63 |         caffe.set_mode_cpu();
 64 |     end
 65 |     
 66 |     
 67 |     disp('conf:');
 68 |     disp(conf);
 69 |     disp('opts:');
 70 |     disp(opts);
 71 |     
 72 | %% making tran/val data
 73 |     fprintf('Preparing training data...');
 74 |     [image_roidb_train, bbox_means, bbox_stds] = rfcn_prepare_image_roidb(conf, opts.imdb_train, opts.roidb_train);
 75 |     fprintf('Done.\n');
 76 |     
 77 |     if opts.do_val
 78 |         fprintf('Preparing validation data...');
 79 |         [image_roidb_val] = rfcn_prepare_image_roidb(conf, opts.imdb_val, opts.roidb_val, bbox_means, bbox_stds);
 80 |         fprintf('Done.\n');
 81 | 
 82 |         % fix validation data
 83 |         shuffled_inds_val = generate_random_minibatch([], image_roidb_val, conf.ims_per_batch);
 84 |         shuffled_inds_val = shuffled_inds_val(randperm(length(shuffled_inds_val), opts.val_iters));
 85 |     end
 86 |     
 87 | %% training
 88 |     shuffled_inds = [];
 89 |     train_results = [];  
 90 |     val_results = [];  
 91 |     iter_ = caffe_solver.iter();
 92 |     max_iter = caffe_solver.max_iter();
 93 |     
 94 |     p = new_parpool(1);
 95 |     parfor i=1:1
 96 |         seed_rand(conf.rng_seed);
 97 |     end
 98 |     [shuffled_inds, sub_db_inds] = generate_random_minibatch(shuffled_inds, image_roidb_train, conf.ims_per_batch);
 99 |     parHandle = parfeval(p, @rfcn_get_minibatch, 1, conf, image_roidb_train(sub_db_inds));
100 |     tic
101 |     while (iter_ < max_iter)
102 |         caffe_solver.net.set_phase('train');
103 | 
104 |         % generate minibatch training data
105 |         % gather date
106 |         [~, net_inputs] = fetchNext(parHandle);
107 | 
108 |         % generate minibatch training data
109 |         % generate data asynchronously 
110 |         [shuffled_inds, sub_db_inds] = generate_random_minibatch(shuffled_inds, image_roidb_train, conf.ims_per_batch);
111 |         parHandle = parfeval(p, @rfcn_get_minibatch, 1, conf, image_roidb_train(sub_db_inds));
112 | 
113 |         caffe_solver.net.reshape_as_input(net_inputs);
114 | 
115 |         % one iter SGD update
116 |         caffe_solver.net.set_input_data(net_inputs);
117 |         caffe_solver.step(1);
118 |         
119 |         rst = caffe_solver.net.get_output();
120 |         train_results = parse_rst(train_results, rst);
121 |             
122 |         % do valdiation per val_interval iterations
123 |         if ~mod(iter_, opts.val_interval)
124 |             if opts.do_val
125 |                 caffe_solver.net.set_phase('test');                
126 |                 for i = 1:length(shuffled_inds_val)
127 |                     sub_db_inds = shuffled_inds_val{i};
128 |                     net_inputs = rfcn_get_minibatch(conf, image_roidb_val(sub_db_inds));
129 |                     caffe_solver.net.reshape_as_input(net_inputs);
130 |                     
131 |                     caffe_solver.net.forward(net_inputs);
132 |                     
133 |                     rst = caffe_solver.net.get_output();
134 |                     val_results = parse_rst(val_results, rst);
135 |                 end
136 |             end
137 |             
138 |             show_state(iter_, train_results, val_results);
139 |             toc;tic;
140 |             train_results = [];
141 |             val_results = [];
142 |             diary; diary; % flush diary
143 |         end
144 |         
145 |         % snapshot
146 |         if ~mod(iter_, opts.snapshot_interval)
147 |             snapshot(caffe_solver, bbox_means, bbox_stds, cache_dir, sprintf('iter_%d', iter_));
148 |         end
149 |         
150 |         iter_ = caffe_solver.iter();
151 |     end
152 |     
153 |     % final snapshot
154 |     snapshot(caffe_solver, bbox_means, bbox_stds, cache_dir, sprintf('iter_%d', iter_));
155 |     save_model_path = snapshot(caffe_solver, bbox_means, bbox_stds, cache_dir, 'final');
156 | 
157 |     diary off;
158 |     caffe.reset_all(); 
159 |     rng(prev_rng);
160 | end
161 | 
162 | function [shuffled_inds, sub_inds] = generate_random_minibatch(shuffled_inds, image_roidb_train, ims_per_batch)
163 | 
164 |     % shuffle training data per batch
165 |     if isempty(shuffled_inds)
166 |         % make sure each minibatch, only has horizontal images or vertical
167 |         % images, to save gpu memory
168 |         
169 |         hori_image_inds = arrayfun(@(x) x.im_size(2) >= x.im_size(1), image_roidb_train, 'UniformOutput', true);
170 |         vert_image_inds = ~hori_image_inds;
171 |         hori_image_inds = find(hori_image_inds);
172 |         vert_image_inds = find(vert_image_inds);
173 |         
174 |         % random perm
175 |         lim = floor(length(hori_image_inds) / ims_per_batch) * ims_per_batch;
176 |         hori_image_inds = hori_image_inds(randperm(length(hori_image_inds), lim));
177 |         lim = floor(length(vert_image_inds) / ims_per_batch) * ims_per_batch;
178 |         vert_image_inds = vert_image_inds(randperm(length(vert_image_inds), lim));
179 |         
180 |         % combine sample for each ims_per_batch 
181 |         hori_image_inds = reshape(hori_image_inds, ims_per_batch, []);
182 |         vert_image_inds = reshape(vert_image_inds, ims_per_batch, []);
183 |         
184 |         shuffled_inds = [hori_image_inds, vert_image_inds];
185 |         shuffled_inds = shuffled_inds(:, randperm(size(shuffled_inds, 2)));
186 |         
187 |         shuffled_inds = num2cell(shuffled_inds, 1);
188 |     end
189 |     
190 |     if nargout > 1
191 |         % generate minibatch training data
192 |         sub_inds = shuffled_inds{1};
193 |         assert(length(sub_inds) == ims_per_batch);
194 |         shuffled_inds(1) = [];
195 |     end
196 | end
197 | 
198 | function model_path = snapshot(caffe_solver, bbox_means, bbox_stds, cache_dir, file_name)
199 |     bbox_pred_layer_name = 'rfcn_bbox';
200 |     weights = caffe_solver.net.params(bbox_pred_layer_name, 1).get_data();
201 |     biase = caffe_solver.net.params(bbox_pred_layer_name, 2).get_data();
202 |     weights_back = weights;
203 |     biase_back = biase;
204 |  
205 |     rep_time = size(weights, 4)/length(bbox_means(:));
206 |     
207 |     bbox_stds_flatten = bbox_stds';
208 |     bbox_stds_flatten = bbox_stds_flatten(:);
209 |     bbox_stds_flatten = repmat(bbox_stds_flatten, [1,rep_time])';
210 |     bbox_stds_flatten = bbox_stds_flatten(:);
211 |     bbox_stds_flatten = permute(bbox_stds_flatten, [4,3,2,1]);
212 |     
213 |     bbox_means_flatten = bbox_means';
214 |     bbox_means_flatten = bbox_means_flatten(:);
215 |     bbox_means_flatten = repmat(bbox_means_flatten, [1,rep_time])';
216 |     bbox_means_flatten = bbox_means_flatten(:);
217 |     bbox_means_flatten = permute(bbox_means_flatten, [4,3,2,1]);
218 |     
219 |     % merge bbox_means, bbox_stds into the model
220 |     weights = bsxfun(@times, weights, bbox_stds_flatten); % weights = weights * stds; 
221 |     biase = biase .* bbox_stds_flatten(:) + bbox_means_flatten(:); % bias = bias * stds + means;
222 |     
223 |     caffe_solver.net.set_params_data(bbox_pred_layer_name, 1, weights);
224 |     caffe_solver.net.set_params_data(bbox_pred_layer_name, 2, biase);
225 | 
226 |     model_path = fullfile(cache_dir, file_name);
227 |     caffe_solver.net.save(model_path);
228 |     fprintf('Saved as %s\n', model_path);
229 |     
230 |     % restore net to original state
231 |     caffe_solver.net.set_params_data(bbox_pred_layer_name, 1, weights_back);
232 |     caffe_solver.net.set_params_data(bbox_pred_layer_name, 2, biase_back);
233 | end
234 | 
235 | function show_state(iter, train_results, val_results)
236 |     fprintf('\n------------------------- Iteration %d -------------------------\n', iter);
237 |     fprintf('Training : accuracy %.3g, loss (cls %.3g, reg %.3g)\n', ...
238 |         mean(train_results.accuarcy.data), ...
239 |         mean(train_results.loss_cls.data), ...
240 |         mean(train_results.loss_bbox.data));
241 |     if exist('val_results', 'var') && ~isempty(val_results)
242 |         fprintf('Testing  : accuracy %.3g, loss (cls %.3g, reg %.3g)\n', ...
243 |             mean(val_results.accuarcy.data), ...
244 |             mean(val_results.loss_cls.data), ...
245 |             mean(val_results.loss_bbox.data));
246 |     end
247 | end
248 | 


--------------------------------------------------------------------------------