├── utils ├── cpu_cores.m ├── mkdir_if_missing.m ├── xVOChash_lookup.m ├── RectLTWH2LTRB.m ├── RectLTRB2LTWH.m ├── new_parpool.m ├── xVOCap.m ├── xVOChash_init.m ├── vis_label.m ├── prep_im_for_blob_size.m ├── im_list_to_blob.m ├── symbolic_link.m ├── parse_rst.m ├── procid.m ├── tic_toc_print.m ├── auto_select_gpu.m ├── seed_rand.m ├── active_caffe_mex.m ├── prep_im_for_blob.m ├── boxoverlap.m ├── subsample_images.m ├── subsample_images_per_class.m └── showboxes.m ├── data └── demo │ ├── 000166.jpg │ ├── 001852.jpg │ ├── 002597.jpg │ ├── 004030.jpg │ ├── 005225.jpg │ ├── 000166_boxes.mat │ ├── 001852_boxes.mat │ ├── 002597_boxes.mat │ ├── 004030_boxes.mat │ └── 005225_boxes.mat ├── experiments ├── +Dataset │ ├── private │ │ ├── voc2007_devkit.m │ │ └── voc2012_devkit.m │ ├── voc2007_test_ss.m │ ├── voc0712_trainval_ss.m │ ├── voc2007_test_sp.m │ └── voc0712_trainval_sp.m ├── +Model │ ├── ResNet50_for_RFCN_VOC0712.m │ ├── ResNet101_for_RFCN_VOC0712.m │ ├── ResNet50_for_RFCN_VOC0712_OHEM.m │ └── ResNet101_for_RFCN_VOC0712_OHEM.m ├── script_rfcn_VOC0712_ResNet50_OHEM_ss.m ├── script_rfcn_VOC0712_ResNet50_rpn.m ├── script_rfcn_VOC0712_ResNet101_OHEM_ss.m ├── script_rfcn_VOC0712_ResNet101_rpn.m ├── script_rfcn_VOC0712_ResNet50_OHEM_rpn.m ├── script_rfcn_VOC0712_ResNet101_OHEM_rpn.m └── script_rfcn_demo.m ├── .gitmodules ├── imdb ├── get_voc_opts.m ├── imdb_eval_voc.m ├── imdb_from_voc.m └── roidb_from_voc.m ├── models └── rfcn_prototxts │ ├── ResNet-101L_res3a │ └── solver_80k110k_lr1_3.prototxt │ ├── ResNet-50L_res3a │ └── solver_80k110k_lr1_3.prototxt │ ├── ResNet-101L_OHEM_res3a │ └── solver_80k110k_lr1_3.prototxt │ └── ResNet-50L_OHEM_res3a │ └── solver_80k110k_lr1_3.prototxt ├── .gitattributes ├── fetch_data ├── fetch_region_proposals.m ├── fetch_caffe_mex_windows_vs2013_cuda75.m ├── fetch_model_ResNet50.m ├── fetch_model_ResNet101.m └── fetch_demo_model_ResNet101.m ├── functions ├── rfcn │ ├── rfcn_map_im_rois_to_feat_rois.m │ ├── rfcn_bbox_transform.m │ ├── rfcn_bbox_transform_inv.m │ ├── rfcn_config_ohem.m │ ├── rfcn_config_simple.m │ ├── rfcn_im_detect.m │ ├── rfcn_prepare_image_roidb.m │ ├── rfcn_get_minibatch.m │ ├── rfcn_test.m │ └── rfcn_train.m └── nms │ ├── nms_multiclass.m │ ├── nvmex.m │ ├── nms.m │ ├── nms_mex.cpp │ ├── nms_multiclass_mex.cpp │ └── nms_gpu_mex.cu ├── rfcn_build.m ├── .gitignore ├── startup.m ├── LICENSE └── README.md /utils/cpu_cores.m: -------------------------------------------------------------------------------- 1 | function num = cpu_cores() 2 | num = feature('numcores'); 3 | end -------------------------------------------------------------------------------- /data/demo/000166.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/000166.jpg -------------------------------------------------------------------------------- /data/demo/001852.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/001852.jpg -------------------------------------------------------------------------------- /data/demo/002597.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/002597.jpg -------------------------------------------------------------------------------- /data/demo/004030.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/004030.jpg -------------------------------------------------------------------------------- /data/demo/005225.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/005225.jpg -------------------------------------------------------------------------------- /data/demo/000166_boxes.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/000166_boxes.mat -------------------------------------------------------------------------------- /data/demo/001852_boxes.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/001852_boxes.mat -------------------------------------------------------------------------------- /data/demo/002597_boxes.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/002597_boxes.mat -------------------------------------------------------------------------------- /data/demo/004030_boxes.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/004030_boxes.mat -------------------------------------------------------------------------------- /data/demo/005225_boxes.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijifeng001/R-FCN/HEAD/data/demo/005225_boxes.mat -------------------------------------------------------------------------------- /experiments/+Dataset/private/voc2007_devkit.m: -------------------------------------------------------------------------------- 1 | function path = voc2007_devkit() 2 | path = './datasets/VOCdevkit2007'; 3 | end -------------------------------------------------------------------------------- /experiments/+Dataset/private/voc2012_devkit.m: -------------------------------------------------------------------------------- 1 | function path = voc2012_devkit() 2 | path = './datasets/VOCdevkit2012'; 3 | end -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/caffe"] 2 | path = external/caffe 3 | url = https://github.com/daijifeng001/caffe-rfcn.git 4 | branch = faster-R-CNN 5 | -------------------------------------------------------------------------------- /utils/mkdir_if_missing.m: -------------------------------------------------------------------------------- 1 | function made = mkdir_if_missing(path) 2 | made = false; 3 | if exist(path, 'dir') == 0 4 | mkdir(path); 5 | made = true; 6 | end 7 | -------------------------------------------------------------------------------- /utils/xVOChash_lookup.m: -------------------------------------------------------------------------------- 1 | function ind = xVOChash_lookup(hash,s) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | hsize=numel(hash.key); 5 | h=mod(str2double(s([4 6:end])),hsize)+1; 6 | ind=hash.val{h}(strmatch(s,hash.key{h},'exact')); 7 | -------------------------------------------------------------------------------- /utils/RectLTWH2LTRB.m: -------------------------------------------------------------------------------- 1 | function [ rectsLTRB ] = RectLTWH2LTRB(rectsLTWH) 2 | %rects (l, t, r, b) to (l, t, w, h) 3 | 4 | rectsLTRB = [rectsLTWH(:, 1), rectsLTWH(:, 2), rectsLTWH(:, 1)+rectsLTWH(:,3)-1, rectsLTWH(:,2)+rectsLTWH(:,4)-1]; 5 | end 6 | 7 | -------------------------------------------------------------------------------- /utils/RectLTRB2LTWH.m: -------------------------------------------------------------------------------- 1 | function [ rectsLTWH ] = RectLTRB2LTWH( rectsLTRB ) 2 | %rects (l, t, r, b) to (l, t, w, h) 3 | 4 | rectsLTWH = [rectsLTRB(:, 1), rectsLTRB(:, 2), rectsLTRB(:, 3)-rectsLTRB(:,1)+1, rectsLTRB(:,4)-rectsLTRB(:,2)+1]; 5 | end 6 | 7 | -------------------------------------------------------------------------------- /utils/new_parpool.m: -------------------------------------------------------------------------------- 1 | function p = new_parpool(number) 2 | 3 | if ~exist('number', 'var') 4 | number = cpu_cores(); 5 | end 6 | 7 | if ~isempty(gcp('nocreate')) 8 | delete(gcp); 9 | end 10 | p = parpool(number); 11 | end -------------------------------------------------------------------------------- /utils/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /imdb/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /utils/xVOChash_init.m: -------------------------------------------------------------------------------- 1 | function hash = xVOChash_init(strs) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | hsize=4999; 5 | hash.key=cell(hsize,1); 6 | hash.val=cell(hsize,1); 7 | 8 | for i=1:numel(strs) 9 | s=strs{i}; 10 | h=mod(str2double(s([4 6:end])),hsize)+1; 11 | j=numel(hash.key{h})+1; 12 | hash.key{h}{j}=strs{i}; 13 | hash.val{h}(j)=i; 14 | end 15 | 16 | -------------------------------------------------------------------------------- /utils/vis_label.m: -------------------------------------------------------------------------------- 1 | function vis_label(imdb, roidb) 2 | 3 | rois = roidb.rois; 4 | for iIM = 1:length(rois) 5 | im = imread(imdb.image_at(iIM)); 6 | boxes = arrayfun(@(x) rois(iIM).boxes(rois(iIM).class == x, :), 1:length(imdb.classes), 'UniformOutput', false); 7 | legends = imdb.classes; 8 | showboxes(im, boxes, legends); 9 | pause; 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /models/rfcn_prototxts/ResNet-101L_res3a/solver_80k110k_lr1_3.prototxt: -------------------------------------------------------------------------------- 1 | net: "./models/rfcn_prototxts/ResNet-101L_res3a/train_val.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 80000 6 | display: 20 7 | max_iter: 110000 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | snapshot: 0 12 | #debug_info: true 13 | 14 | -------------------------------------------------------------------------------- /models/rfcn_prototxts/ResNet-50L_res3a/solver_80k110k_lr1_3.prototxt: -------------------------------------------------------------------------------- 1 | net: "./models/rfcn_prototxts/ResNet-50L_res3a/train_val.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 80000 6 | display: 20 7 | max_iter: 110000 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | snapshot: 0 12 | #debug_info: true 13 | 14 | -------------------------------------------------------------------------------- /models/rfcn_prototxts/ResNet-101L_OHEM_res3a/solver_80k110k_lr1_3.prototxt: -------------------------------------------------------------------------------- 1 | net: "./models/rfcn_prototxts/ResNet-101L_OHEM_res3a/train_val.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 80000 6 | display: 20 7 | max_iter: 110000 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | snapshot: 0 12 | #debug_info: true 13 | 14 | -------------------------------------------------------------------------------- /models/rfcn_prototxts/ResNet-50L_OHEM_res3a/solver_80k110k_lr1_3.prototxt: -------------------------------------------------------------------------------- 1 | net: "./models/rfcn_prototxts/ResNet-50L_OHEM_res3a/train_val.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 80000 6 | display: 20 7 | max_iter: 110000 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | snapshot: 0 12 | #debug_info: true 13 | 14 | -------------------------------------------------------------------------------- /utils/prep_im_for_blob_size.m: -------------------------------------------------------------------------------- 1 | function im_scale = prep_im_for_blob_size(im_size, target_size, max_size) 2 | 3 | im_size_min = min(im_size(1:2)); 4 | im_size_max = max(im_size(1:2)); 5 | im_scale = double(target_size) / im_size_min; 6 | 7 | % Prevent the biggest axis from being more than MAX_SIZE 8 | if round(im_scale * im_size_max) > max_size 9 | im_scale = double(max_size) / double(im_size_max); 10 | end 11 | end -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /utils/im_list_to_blob.m: -------------------------------------------------------------------------------- 1 | function blob = im_list_to_blob(ims) 2 | max_shape = max(cell2mat(cellfun(@size, ims(:), 'UniformOutput', false)), [], 1); 3 | assert(all(cellfun(@(x) size(x, 3), ims, 'UniformOutput', true) == 3)); 4 | num_images = length(ims); 5 | blob = zeros(max_shape(1), max_shape(2), 3, num_images, 'single'); 6 | 7 | for i = 1:length(ims) 8 | im = ims{i}; 9 | blob(1:size(im, 1), 1:size(im, 2), :, i) = im; 10 | end 11 | end -------------------------------------------------------------------------------- /utils/symbolic_link.m: -------------------------------------------------------------------------------- 1 | function symbolic_link(link, target) 2 | % symbolic_link(link, target) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | if ispc() 10 | system(sprintf('mklink /J %s %s', link, target)); 11 | else 12 | system(sprintf('ln -s %s %s', link, target)); 13 | end 14 | 15 | end 16 | -------------------------------------------------------------------------------- /fetch_data/fetch_region_proposals.m: -------------------------------------------------------------------------------- 1 | 2 | cur_dir = pwd; 3 | cd(fileparts(mfilename('fullpath'))); 4 | 5 | try 6 | fprintf('Downloading region proposals...\n'); 7 | urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91965&authkey=!AErVqYD6NhjxAfw', ... 8 | 'proposals.zip'); 9 | 10 | fprintf('Unzipping...\n'); 11 | unzip('proposals.zip', '..'); 12 | 13 | fprintf('Done.\n'); 14 | delete('proposals.zip'); 15 | catch 16 | fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 17 | end 18 | 19 | cd(cur_dir); 20 | -------------------------------------------------------------------------------- /fetch_data/fetch_caffe_mex_windows_vs2013_cuda75.m: -------------------------------------------------------------------------------- 1 | 2 | cur_dir = pwd; 3 | cd(fileparts(mfilename('fullpath'))); 4 | 5 | try 6 | fprintf('Downloading caffe_mex...\n'); 7 | urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91961&authkey=!AOkZbLTBfuMB69Y', ... 8 | 'caffe_mex.zip'); 9 | 10 | fprintf('Unzipping...\n'); 11 | unzip('caffe_mex.zip', '..'); 12 | 13 | fprintf('Done.\n'); 14 | delete('caffe_mex.zip'); 15 | catch 16 | fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 17 | end 18 | 19 | cd(cur_dir); 20 | -------------------------------------------------------------------------------- /fetch_data/fetch_model_ResNet50.m: -------------------------------------------------------------------------------- 1 | 2 | cur_dir = pwd; 3 | cd(fileparts(mfilename('fullpath'))); 4 | 5 | try 6 | fprintf('Downloading model_ResNet-50L...\n'); 7 | urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91962&authkey=!AET2I7W3WzcDyf8', ... 8 | 'models_ResNet-50L.zip'); 9 | 10 | fprintf('Unzipping...\n'); 11 | unzip('models_ResNet-50L.zip', '..'); 12 | 13 | fprintf('Done.\n'); 14 | delete('models_ResNet-50L.zip'); 15 | catch 16 | fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 17 | end 18 | 19 | cd(cur_dir); 20 | -------------------------------------------------------------------------------- /fetch_data/fetch_model_ResNet101.m: -------------------------------------------------------------------------------- 1 | 2 | cur_dir = pwd; 3 | cd(fileparts(mfilename('fullpath'))); 4 | 5 | try 6 | fprintf('Downloading model_ResNet-101L...\n'); 7 | urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91963&authkey=!AM-EuzuUJelv9Po', ... 8 | 'models_ResNet-101L.zip'); 9 | 10 | fprintf('Unzipping...\n'); 11 | unzip('models_ResNet-101L.zip', '..'); 12 | 13 | fprintf('Done.\n'); 14 | delete('models_ResNet-101L.zip'); 15 | catch 16 | fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 17 | end 18 | 19 | cd(cur_dir); 20 | -------------------------------------------------------------------------------- /experiments/+Model/ResNet50_for_RFCN_VOC0712.m: -------------------------------------------------------------------------------- 1 | function model = ResNet50_for_RFCN_VOC0712(model) 2 | % ResNet 50layers (finetuned from res3a) 3 | 4 | model.solver_def_file = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_res3a', 'solver_80k110k_lr1_3.prototxt'); 5 | model.test_net_def_file = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_res3a', 'test.prototxt'); 6 | 7 | model.net_file = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'ResNet-50-model.caffemodel'); 8 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'mean_image'); 9 | 10 | end -------------------------------------------------------------------------------- /experiments/+Model/ResNet101_for_RFCN_VOC0712.m: -------------------------------------------------------------------------------- 1 | function model = ResNet101_for_RFCN_VOC0712(model) 2 | % ResNet 101layers (finetuned from res3a) 3 | 4 | model.solver_def_file = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_res3a', 'solver_80k110k_lr1_3.prototxt'); 5 | model.test_net_def_file = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_res3a', 'test.prototxt'); 6 | 7 | model.net_file = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'ResNet-101-model.caffemodel'); 8 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'mean_image'); 9 | 10 | end -------------------------------------------------------------------------------- /utils/parse_rst.m: -------------------------------------------------------------------------------- 1 | function results = parse_rst(results, rst) 2 | % results = parse_rst(results, rst) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | if isempty(results) 10 | for i = 1:length(rst) 11 | results.(rst(i).blob_name).data = []; 12 | end 13 | end 14 | 15 | for i = 1:length(rst) 16 | results.(rst(i).blob_name).data = [results.(rst(i).blob_name).data; rst(i).data(:)]; 17 | end 18 | end -------------------------------------------------------------------------------- /fetch_data/fetch_demo_model_ResNet101.m: -------------------------------------------------------------------------------- 1 | 2 | cur_dir = pwd; 3 | cd(fileparts(mfilename('fullpath'))); 4 | 5 | try 6 | fprintf('Downloading demo_models_ResNet-101L...\n'); 7 | urlwrite('https://onedrive.live.com/download?resid=F371D9563727B96F!91964&authkey=!AOk8r5H95KFO0e8', ... 8 | 'demo_models_ResNet-101L.zip'); 9 | 10 | fprintf('Unzipping...\n'); 11 | unzip('demo_models_ResNet-101L.zip', '..'); 12 | 13 | fprintf('Done.\n'); 14 | delete('demo_models_ResNet-101L.zip'); 15 | catch 16 | fprintf('Error in downloading, please try links in README.md https://github.com/daijifeng001/R-FCN'); 17 | end 18 | 19 | cd(cur_dir); 20 | -------------------------------------------------------------------------------- /utils/procid.m: -------------------------------------------------------------------------------- 1 | function s = procid() 2 | % Returns a string identifying the process. 3 | 4 | % AUTORIGHTS 5 | % ------------------------------------------------------- 6 | % Copyright (C) 2009-2012 Ross Girshick 7 | % 8 | % This file is part of the voc-releaseX code 9 | % (http://people.cs.uchicago.edu/~rbg/latent/) 10 | % and is available under the terms of an MIT-like license 11 | % provided in COPYING. Please retain this notice and 12 | % COPYING if you use this file (or a portion of it) in 13 | % your project. 14 | % ------------------------------------------------------- 15 | 16 | d = pwd(); 17 | i = strfind(d, filesep); 18 | d = d(i(end)+1:end); 19 | s = d; 20 | -------------------------------------------------------------------------------- /experiments/+Model/ResNet50_for_RFCN_VOC0712_OHEM.m: -------------------------------------------------------------------------------- 1 | function model = ResNet50_for_RFCN_VOC0712_OHEM(model) 2 | % ResNet 50layers with OHEM training (finetuned from res3a) 3 | 4 | model.solver_def_file = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_OHEM_res3a', 'solver_80k110k_lr1_3.prototxt'); 5 | model.test_net_def_file = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_OHEM_res3a', 'test.prototxt'); 6 | 7 | model.net_file = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'ResNet-50-model.caffemodel'); 8 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'mean_image'); 9 | 10 | end -------------------------------------------------------------------------------- /experiments/+Model/ResNet101_for_RFCN_VOC0712_OHEM.m: -------------------------------------------------------------------------------- 1 | function model = ResNet101_for_RFCN_VOC0712_OHEM(model) 2 | % ResNet 101layers with OHEM training (finetuned from res3a) 3 | 4 | model.solver_def_file = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_OHEM_res3a', 'solver_80k110k_lr1_3.prototxt'); 5 | model.test_net_def_file = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_OHEM_res3a', 'test.prototxt'); 6 | 7 | model.net_file = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'ResNet-101-model.caffemodel'); 8 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'mean_image'); 9 | 10 | end -------------------------------------------------------------------------------- /functions/rfcn/rfcn_map_im_rois_to_feat_rois.m: -------------------------------------------------------------------------------- 1 | function [feat_rois] = rfcn_map_im_rois_to_feat_rois(conf, im_rois, im_scale_factor) 2 | % [feat_rois] = rfcn_map_im_rois_to_feat_rois(conf, im_rois, im_scale_factor) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | %% Map a ROI in image-pixel coordinates to a ROI in feature coordinates. 11 | % in matlab's index (start from 1) 12 | 13 | feat_rois = round((im_rois-1) * im_scale_factor) + 1; 14 | 15 | %feat_rois = round((im_rois-1) * im_scale_factor / single(conf.feat_stride)) + 1; 16 | 17 | end -------------------------------------------------------------------------------- /utils/tic_toc_print.m: -------------------------------------------------------------------------------- 1 | function tic_toc_print(fmt, varargin) 2 | % Print only after 1 second has passed since the last print. 3 | % Arguments are the same as for fprintf. 4 | 5 | % AUTORIGHTS 6 | % ------------------------------------------------------- 7 | % Copyright (C) 2009-2012 Ross Girshick 8 | % 9 | % This file is part of the voc-releaseX code 10 | % (http://people.cs.uchicago.edu/~rbg/latent/) 11 | % and is available under the terms of an MIT-like license 12 | % provided in COPYING. Please retain this notice and 13 | % COPYING if you use this file (or a portion of it) in 14 | % your project. 15 | % ------------------------------------------------------- 16 | 17 | persistent th; 18 | 19 | if isempty(th) 20 | th = tic(); 21 | end 22 | 23 | if toc(th) > 1 24 | fprintf(fmt, varargin{:}); 25 | drawnow; 26 | th = tic(); 27 | end 28 | -------------------------------------------------------------------------------- /rfcn_build.m: -------------------------------------------------------------------------------- 1 | function rfcn_build() 2 | % rfcn_build() 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | % Compile nms_mex 11 | if ~exist('nms_mex', 'file') 12 | fprintf('Compiling nms_mex\n'); 13 | 14 | mex -O -outdir bin ... 15 | CXXFLAGS="\$CXXFLAGS -std=c++11" ... 16 | -largeArrayDims ... 17 | functions/nms/nms_mex.cpp ... 18 | -output nms_mex; 19 | end 20 | 21 | if ~exist('nms_gpu_mex', 'file') 22 | fprintf('Compiling nms_gpu_mex\n'); 23 | addpath(fullfile(pwd, 'functions', 'nms')); 24 | nvmex('functions/nms/nms_gpu_mex.cu', 'bin'); 25 | delete('nms_gpu_mex.o'); 26 | end 27 | 28 | 29 | -------------------------------------------------------------------------------- /utils/auto_select_gpu.m: -------------------------------------------------------------------------------- 1 | function gpu_id = auto_select_gpu() 2 | % gpu_id = auto_select_gpu() 3 | % Select the gpu which has the maximum free memory 4 | % -------------------------------------------------------- 5 | % Faster R-CNN 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | % deselects all GPU devices 11 | gpuDevice([]); 12 | 13 | maxFreeMemory = 0; 14 | for i = 1:gpuDeviceCount 15 | g = gpuDevice(i); 16 | freeMemory = g.FreeMemory(); 17 | fprintf('GPU %d: free memory %d\n', i, freeMemory); 18 | if freeMemory > maxFreeMemory 19 | maxFreeMemory = freeMemory; 20 | gpu_id = i; 21 | end 22 | end 23 | fprintf('Use GPU %d\n', gpu_id); 24 | 25 | % deselects all GPU devices 26 | gpuDevice([]); 27 | end 28 | -------------------------------------------------------------------------------- /experiments/+Dataset/voc2007_test_ss.m: -------------------------------------------------------------------------------- 1 | function dataset = voc2007_test_ss(dataset, usage, use_flip) 2 | % Pascal voc 2007 test set with selective search 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit = voc2007_devkit(); 8 | 9 | switch usage 10 | case {'train'} 11 | dataset.imdb_train = { imdb_from_voc(devkit, 'test', '2007', use_flip) }; 12 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false); 13 | case {'test'} 14 | dataset.imdb_test = imdb_from_voc(devkit, 'test', '2007', use_flip) ; 15 | dataset.roidb_test = dataset.imdb_test.roidb_func(dataset.imdb_test, 'with_selective_search', true); 16 | otherwise 17 | error('usage = ''train'' or ''test'''); 18 | end 19 | 20 | end -------------------------------------------------------------------------------- /utils/seed_rand.m: -------------------------------------------------------------------------------- 1 | function prev_rng = seed_rand(seed) 2 | % seed_rand - Set random number generator to a fixed seed. 3 | % prev_rng = seed_rand(seed) 4 | % 5 | % Strategic use ensures that results are reproducible. 6 | % 7 | % To restore the previous rng after calling this do: 8 | % rng(prev_rng); 9 | 10 | % AUTORIGHTS 11 | % --------------------------------------------------------- 12 | % Copyright (c) 2014, Ross Girshick 13 | % 14 | % This file is part of the R-CNN code and is available 15 | % under the terms of the Simplified BSD License provided in 16 | % LICENSE. Please retain this notice and LICENSE if you use 17 | % this file (or any portion of it) in your project. 18 | % --------------------------------------------------------- 19 | 20 | if nargin < 1 21 | % This value works best for me. 22 | seed = 3; 23 | % Just kidding, of course ;-). 24 | end 25 | 26 | prev_rng = rng; 27 | rng(seed, 'twister') 28 | -------------------------------------------------------------------------------- /experiments/+Dataset/voc0712_trainval_ss.m: -------------------------------------------------------------------------------- 1 | function dataset = voc0712_trainval_ss(dataset, usage, use_flip) 2 | % Pascal voc 0712 trainval set with selective search 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit2007 = voc2007_devkit(); 8 | devkit2012 = voc2012_devkit(); 9 | 10 | switch usage 11 | case {'train'} 12 | dataset.imdb_train = { imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ... 13 | imdb_from_voc(devkit2012, 'trainval', '2012', use_flip)}; 14 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false); 15 | case {'test'} 16 | error('only supports one source test currently'); 17 | otherwise 18 | error('usage = ''train'' or ''test'''); 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # User Ingore 12 | models/fast_rcnn_prototxts/ 13 | models/pre_trained_model/ 14 | models/rpn_prototxts/ 15 | data/ 16 | datasets/ 17 | output/ 18 | cachedir/ 19 | imdb/cache 20 | bin/ 21 | external/caffe/matlab 22 | fetch_data/*.zip 23 | *.caffemodel 24 | *.mat 25 | 26 | # Windows Installer files 27 | *.cab 28 | *.msi 29 | *.msm 30 | *.msp 31 | 32 | # Windows shortcuts 33 | *.lnk 34 | 35 | # ========================= 36 | # Operating System Files 37 | # ========================= 38 | 39 | # OSX 40 | # ========================= 41 | 42 | .DS_Store 43 | .AppleDouble 44 | .LSOverride 45 | 46 | # Thumbnails 47 | ._* 48 | 49 | # Files that might appear on external disk 50 | .Spotlight-V100 51 | .Trashes 52 | 53 | # Directories potentially created on remote AFP share 54 | .AppleDB 55 | .AppleDesktop 56 | Network Trash Folder 57 | Temporary Items 58 | .apdisk 59 | -------------------------------------------------------------------------------- /experiments/+Dataset/voc2007_test_sp.m: -------------------------------------------------------------------------------- 1 | function dataset = voc2007_test_sp(dataset, usage, use_flip, extension) 2 | % Pascal voc 2007 test set with *pre-computed* RPN proposals (trained with ResNet50 or ResNet101) 3 | % extension = "resnet50" or "resnet101" for specifying pre-computed RPN proposals 4 | % set opts.imdb_train opts.roidb_train 5 | 6 | 7 | % change to point to your devkit install 8 | devkit = voc2007_devkit(); 9 | 10 | switch usage 11 | case {'train'} 12 | dataset.imdb_train = { imdb_from_voc(devkit, 'test', '2007', use_flip) }; 13 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x, 'with_self_proposal', true, 'extension', extension), dataset.imdb_train, 'UniformOutput', false); 14 | case {'test'} 15 | dataset.imdb_test = imdb_from_voc(devkit, 'test', '2007', use_flip); 16 | dataset.roidb_test = dataset.imdb_test.roidb_func(dataset.imdb_test, 'with_self_proposal', true, 'extension', extension); 17 | otherwise 18 | error('usage = ''train'' or ''test'''); 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /utils/active_caffe_mex.m: -------------------------------------------------------------------------------- 1 | function active_caffe_mex(gpu_id, caffe_version) 2 | % active_caffe_mex(gpu_id, caffe_version) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | % set gpu in matlab 10 | gpuDevice(gpu_id); 11 | 12 | if ~exist('caffe_version', 'var') || isempty(caffe_version) 13 | caffe_version = 'caffe'; 14 | end 15 | cur_dir = pwd; 16 | caffe_dir = fullfile(pwd, 'external', 'caffe', 'matlab', caffe_version); 17 | 18 | if ~exist(caffe_dir, 'dir') 19 | warning('Specified caffe folder (%s) is not exist, change to default one (%s)', ... 20 | caffe_dir, fullfile(pwd, 'external', 'caffe', 'matlab')); 21 | caffe_dir = fullfile(pwd, 'external', 'caffe', 'matlab'); 22 | end 23 | 24 | addpath(genpath(caffe_dir)); 25 | cd(caffe_dir); 26 | caffe.set_device(gpu_id-1); 27 | cd(cur_dir); 28 | end 29 | -------------------------------------------------------------------------------- /experiments/+Dataset/voc0712_trainval_sp.m: -------------------------------------------------------------------------------- 1 | function dataset = voc0712_trainval_sp(dataset, usage, use_flip, extension) 2 | % Pascal voc 0712 trainval set with *pre-computed* RPN proposals (trained with ResNet50 or ResNet101) 3 | % extension = "resnet50" or "resnet101" for specifying pre-computed RPN proposals 4 | % set opts.imdb_train opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit2007 = voc2007_devkit(); 8 | devkit2012 = voc2012_devkit(); 9 | 10 | switch usage 11 | case {'train'} 12 | dataset.imdb_train = { imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ... 13 | imdb_from_voc(devkit2012, 'trainval', '2012', use_flip)}; 14 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x, 'with_self_proposal', true, 'extension', extension), dataset.imdb_train, 'UniformOutput', false); 15 | case {'test'} 16 | error('only supports one source test currently'); 17 | otherwise 18 | error('usage = ''train'' or ''test'''); 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /functions/rfcn/rfcn_bbox_transform.m: -------------------------------------------------------------------------------- 1 | function [regression_label] = rfcn_bbox_transform(ex_boxes, gt_boxes) 2 | % [regression_label] = rfcn_bbox_transform(ex_boxes, gt_boxes) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | ex_widths = ex_boxes(:, 3) - ex_boxes(:, 1) + 1; 11 | ex_heights = ex_boxes(:, 4) - ex_boxes(:, 2) + 1; 12 | ex_ctr_x = ex_boxes(:, 1) + 0.5 * (ex_widths - 1); 13 | ex_ctr_y = ex_boxes(:, 2) + 0.5 * (ex_heights - 1); 14 | 15 | gt_widths = gt_boxes(:, 3) - gt_boxes(:, 1) + 1; 16 | gt_heights = gt_boxes(:, 4) - gt_boxes(:, 2) + 1; 17 | gt_ctr_x = gt_boxes(:, 1) + 0.5 * (gt_widths - 1); 18 | gt_ctr_y = gt_boxes(:, 2) + 0.5 * (gt_heights - 1); 19 | 20 | targets_dx = (gt_ctr_x - ex_ctr_x) ./ (ex_widths+eps); 21 | targets_dy = (gt_ctr_y - ex_ctr_y) ./ (ex_heights+eps); 22 | targets_dw = log(gt_widths ./ ex_widths); 23 | targets_dh = log(gt_heights ./ ex_heights); 24 | 25 | regression_label = [targets_dx, targets_dy, targets_dw, targets_dh]; 26 | end -------------------------------------------------------------------------------- /startup.m: -------------------------------------------------------------------------------- 1 | function startup() 2 | % startup() 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | curdir = fileparts(mfilename('fullpath')); 11 | addpath(genpath(fullfile(curdir, 'utils'))); 12 | addpath(genpath(fullfile(curdir, 'functions'))); 13 | addpath(genpath(fullfile(curdir, 'bin'))); 14 | addpath(genpath(fullfile(curdir, 'experiments'))); 15 | addpath(genpath(fullfile(curdir, 'imdb'))); 16 | 17 | mkdir_if_missing(fullfile(curdir, 'datasets')); 18 | 19 | mkdir_if_missing(fullfile(curdir, 'external')); 20 | 21 | caffe_path = fullfile(curdir, 'external', 'caffe', 'matlab'); 22 | if exist(caffe_path, 'dir') == 0 23 | error('matcaffe is missing from external/caffe/matlab; See README.md'); 24 | end 25 | addpath(genpath(caffe_path)); 26 | 27 | mkdir_if_missing(fullfile(curdir, 'imdb', 'cache')); 28 | 29 | mkdir_if_missing(fullfile(curdir, 'output')); 30 | 31 | mkdir_if_missing(fullfile(curdir, 'models')); 32 | 33 | fprintf('rfcn startup done\n'); 34 | end 35 | -------------------------------------------------------------------------------- /utils/prep_im_for_blob.m: -------------------------------------------------------------------------------- 1 | function [im, im_scale] = prep_im_for_blob(im, im_means, target_size, max_size) 2 | im = single(im); 3 | 4 | if ~isa(im, 'gpuArray') 5 | try 6 | im = bsxfun(@minus, im, im_means); 7 | catch 8 | im_means = imresize(im_means, [size(im, 1), size(im, 2)], 'bilinear', 'antialiasing', false); 9 | im = bsxfun(@minus, im, im_means); 10 | end 11 | im_scale = prep_im_for_blob_size(size(im), target_size, max_size); 12 | 13 | target_size = round([size(im, 1), size(im, 2)] * im_scale); 14 | im = imresize(im, target_size, 'bilinear', 'antialiasing', false); 15 | else 16 | % for im as gpuArray 17 | try 18 | im = bsxfun(@minus, im, im_means); 19 | catch 20 | im_means_scale = max(double(size(im, 1)) / size(im_means, 1), double(size(im, 2)) / size(im_means, 2)); 21 | im_means = imresize(im_means, im_means_scale); 22 | y_start = floor((size(im_means, 1) - size(im, 1)) / 2) + 1; 23 | x_start = floor((size(im_means, 2) - size(im, 2)) / 2) + 1; 24 | im_means = im_means(y_start:(y_start+size(im, 1)-1), x_start:(x_start+size(im, 2)-1)); 25 | im = bsxfun(@minus, im, im_means); 26 | end 27 | 28 | im_scale = prep_im_for_blob_size(size(im), target_size, max_size); 29 | im = imresize(im, im_scale); 30 | end 31 | end -------------------------------------------------------------------------------- /utils/boxoverlap.m: -------------------------------------------------------------------------------- 1 | function o = boxoverlap(a, b) 2 | % Compute the symmetric intersection over union overlap between a set of 3 | % bounding boxes in a and a single bounding box in b. 4 | % 5 | % a a matrix where each row specifies a bounding box 6 | % b a matrix where each row specifies a bounding box 7 | 8 | % AUTORIGHTS 9 | % ------------------------------------------------------- 10 | % Copyright (C) 2011-2012 Ross Girshick 11 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick 12 | % 13 | % This file is part of the voc-releaseX code 14 | % (http://people.cs.uchicago.edu/~rbg/latent/) 15 | % and is available under the terms of an MIT-like license 16 | % provided in COPYING. Please retain this notice and 17 | % COPYING if you use this file (or a portion of it) in 18 | % your project. 19 | % ------------------------------------------------------- 20 | 21 | o = cell(1, size(b, 1)); 22 | for i = 1:size(b, 1) 23 | x1 = max(a(:,1), b(i,1)); 24 | y1 = max(a(:,2), b(i,2)); 25 | x2 = min(a(:,3), b(i,3)); 26 | y2 = min(a(:,4), b(i,4)); 27 | 28 | w = x2-x1+1; 29 | h = y2-y1+1; 30 | inter = w.*h; 31 | aarea = (a(:,3)-a(:,1)+1) .* (a(:,4)-a(:,2)+1); 32 | barea = (b(i,3)-b(i,1)+1) * (b(i,4)-b(i,2)+1); 33 | % intersection over union overlap 34 | o{i} = inter ./ (aarea+barea-inter); 35 | % set invalid entries to 0 overlap 36 | o{i}(w <= 0) = 0; 37 | o{i}(h <= 0) = 0; 38 | end 39 | 40 | o = cell2mat(o); 41 | -------------------------------------------------------------------------------- /functions/rfcn/rfcn_bbox_transform_inv.m: -------------------------------------------------------------------------------- 1 | function [pred_boxes] = rfcn_bbox_transform_inv(boxes, box_deltas) 2 | % [pred_boxes] = rfcn_bbox_transform_inv(boxes, box_deltas) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | src_w = double(boxes(:, 3) - boxes(:, 1) + 1); 11 | src_h = double(boxes(:, 4) - boxes(:, 2) + 1); 12 | src_ctr_x = double(boxes(:, 1) + 0.5*(src_w-1)); 13 | src_ctr_y = double(boxes(:, 2) + 0.5*(src_h-1)); 14 | 15 | dst_ctr_x = double(box_deltas(:, 1:4:end)); 16 | dst_ctr_y = double(box_deltas(:, 2:4:end)); 17 | dst_scl_x = double(box_deltas(:, 3:4:end)); 18 | dst_scl_y = double(box_deltas(:, 4:4:end)); 19 | 20 | pred_ctr_x = bsxfun(@plus, bsxfun(@times, dst_ctr_x, src_w), src_ctr_x); 21 | pred_ctr_y = bsxfun(@plus, bsxfun(@times, dst_ctr_y, src_h), src_ctr_y); 22 | pred_w = bsxfun(@times, exp(dst_scl_x), src_w); 23 | pred_h = bsxfun(@times, exp(dst_scl_y), src_h); 24 | pred_boxes = zeros(size(box_deltas), 'single'); 25 | pred_boxes(:, 1:4:end) = pred_ctr_x - 0.5*(pred_w-1); 26 | pred_boxes(:, 2:4:end) = pred_ctr_y - 0.5*(pred_h-1); 27 | pred_boxes(:, 3:4:end) = pred_ctr_x + 0.5*(pred_w-1); 28 | pred_boxes(:, 4:4:end) = pred_ctr_y + 0.5*(pred_h-1); 29 | end -------------------------------------------------------------------------------- /utils/subsample_images.m: -------------------------------------------------------------------------------- 1 | function [imdbs, roidbs] = subsample_images(imdbs, roidbs, max_num_neg_images, seed) 2 | 3 | if ~exist('seed', 'var') 4 | seed = 6; 5 | end 6 | 7 | % class_num = cellfun(@(x) length(x.class_ids), imdbs, 'UniformOutput', true); 8 | % assert(length(unique(class_num)) == 1); 9 | % class_num = unique(class_num); 10 | 11 | rois = cellfun(@(x) x.rois(:), roidbs, 'UniformOutput', false); 12 | rois_combine = cell2mat(rois(:)); 13 | 14 | % fix the random seed for repeatability 15 | prev_rng = seed_rand(seed); 16 | inds = randperm(length(rois_combine), max_num_neg_images); 17 | inds = sort(inds); 18 | 19 | img_idx_start = 1; 20 | for i = 1:length(imdbs) 21 | imdb_img_num = length(imdbs{i}.image_ids); 22 | img_idx_end = img_idx_start + imdb_img_num - 1; 23 | inds_start = find(inds >= img_idx_start, 1, 'first'); 24 | inds_end = find(inds <= img_idx_end, 1, 'last'); 25 | 26 | inds_sub = inds(inds_start:inds_end); 27 | inds_sub = inds_sub - img_idx_start + 1; 28 | 29 | imdbs{i}.image_ids = imdbs{i}.image_ids(inds_sub); 30 | imdbs{i}.sizes = imdbs{i}.sizes(inds_sub, :); 31 | if isfield(imdbs{i}, 'image_dir') 32 | imdbs{i}.image_at = @(x) ... 33 | sprintf('%s/%s.%s', imdbs{i}.image_dir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 34 | else 35 | imdbs{i}.image_at = @(x) ... 36 | sprintf('%s/%s.%s', imdbs{i}.imagedir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 37 | end 38 | roidbs{i}.rois = roidbs{i}.rois(inds_sub); 39 | 40 | img_idx_start = img_idx_start + imdb_img_num; 41 | end 42 | 43 | % restore previous rng 44 | rng(prev_rng); 45 | 46 | end -------------------------------------------------------------------------------- /functions/nms/nms_multiclass.m: -------------------------------------------------------------------------------- 1 | function picks = nms_multiclass(boxes, overlap) 2 | % top = nms(boxes, overlap) 3 | % Non-maximum suppression. (FAST VERSION) 4 | % Greedily select high-scoring detections and skip detections 5 | % that are significantly covered by a previously selected 6 | % detection. 7 | % 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m), 9 | % but an inner loop has been eliminated to significantly speed it 10 | % up in the case of a large number of boxes 11 | 12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz 13 | % All rights reserved. 14 | % 15 | % This file is part of the Exemplar-SVM library and is made 16 | % available under the terms of the MIT license (see COPYING file). 17 | % Project homepage: https://github.com/quantombone/exemplarsvm 18 | 19 | 20 | if isempty(boxes) 21 | picks = {}; 22 | return; 23 | end 24 | 25 | if size(boxes, 1) < 10000 26 | picks = nms_multiclass_mex(double(boxes), double(overlap)); 27 | return; 28 | end 29 | 30 | x1 = boxes(:,1); 31 | y1 = boxes(:,2); 32 | x2 = boxes(:,3); 33 | y2 = boxes(:,4); 34 | 35 | area = (x2-x1+1) .* (y2-y1+1); 36 | 37 | picks = cell(size(boxes, 2)-4, 1); 38 | for iS = 5:size(boxes, 2) 39 | s = boxes(:,iS); 40 | [~, I] = sort(s); 41 | 42 | pick = s*0; 43 | counter = 1; 44 | while ~isempty(I) 45 | last = length(I); 46 | i = I(last); 47 | pick(counter) = i; 48 | counter = counter + 1; 49 | 50 | xx1 = max(x1(i), x1(I(1:last-1))); 51 | yy1 = max(y1(i), y1(I(1:last-1))); 52 | xx2 = min(x2(i), x2(I(1:last-1))); 53 | yy2 = min(y2(i), y2(I(1:last-1))); 54 | 55 | w = max(0.0, xx2-xx1+1); 56 | h = max(0.0, yy2-yy1+1); 57 | 58 | inter = w.*h; 59 | o = inter ./ (area(i) + area(I(1:last-1)) - inter); 60 | 61 | I = I(o<=overlap); 62 | end 63 | 64 | pick = pick(1:(counter-1)); 65 | picks{iS-4} = pick; 66 | end 67 | -------------------------------------------------------------------------------- /functions/nms/nvmex.m: -------------------------------------------------------------------------------- 1 | function nvmex(cuFileName, outDir) 2 | %NVMEX Compiles and links a CUDA file for MATLAB usage 3 | % NVMEX(FILENAME) will create a MEX-File (also with the name FILENAME) by 4 | % invoking the CUDA compiler, nvcc, and then linking with the MEX 5 | % function in MATLAB. 6 | 7 | if ispc % Windows 8 | Host_Compiler_Location = '-ccbin "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\x86_amd64"'; 9 | CUDA_INC_Location = ['"' getenv('CUDA_PATH') '\include"']; 10 | CUDA_SAMPLES_Location =['"' getenv('NVCUDASAMPLES6_5_ROOT') '\common\inc"']; 11 | PIC_Option = ''; 12 | if ( strcmp(computer('arch'),'win32') ==1) 13 | machine_str = ' --machine 32 '; 14 | CUDA_LIB_Location = ['"' getenv('CUDA_PATH') '\lib\Win32"']; 15 | elseif ( strcmp(computer('arch'),'win64') ==1) 16 | machine_str = ' --machine 64 '; 17 | CUDA_LIB_Location = ['"' getenv('CUDA_PATH') '\lib\x64"']; 18 | end 19 | NVCC = 'nvcc'; 20 | else % Mac and Linux (assuming gcc is on the path) 21 | CUDA_INC_Location = '/usr/local/cuda/include'; 22 | CUDA_SAMPLES_Location = '/usr/local/cuda/samples/common/inc'; 23 | Host_Compiler_Location = ' '; 24 | PIC_Option = ' --compiler-options -fPIC '; 25 | machine_str = []; 26 | CUDA_LIB_Location = '/usr/local/cuda/lib64'; 27 | NVCC = '/usr/local/cuda/bin/nvcc'; 28 | end 29 | % !!! End of things to modify !!! 30 | [~, filename] = fileparts(cuFileName); 31 | nvccCommandLine = [ ... 32 | NVCC ' --compile ' Host_Compiler_Location ' ' ... 33 | '-o ' filename '.o ' ... 34 | machine_str PIC_Option ... 35 | ' -I' '"' matlabroot '/extern/include "' ... 36 | ' -I' CUDA_INC_Location ' -I' CUDA_SAMPLES_Location ... 37 | ' "' cuFileName '" ' 38 | ]; 39 | mexCommandLine = ['mex ' '-outdir ' outDir ' ' filename '.o' ' -L' CUDA_LIB_Location ' -lcudart']; 40 | disp(nvccCommandLine); 41 | warning off; 42 | status = system(nvccCommandLine); 43 | warning on; 44 | if status < 0 45 | error 'Error invoking nvcc'; 46 | end 47 | disp(mexCommandLine); 48 | eval(mexCommandLine); 49 | end 50 | -------------------------------------------------------------------------------- /functions/nms/nms.m: -------------------------------------------------------------------------------- 1 | function pick = nms(boxes, overlap, use_gpu) 2 | % top = nms(boxes, overlap) 3 | % Non-maximum suppression. (FAST VERSION) 4 | % Greedily select high-scoring detections and skip detections 5 | % that are significantly covered by a previously selected 6 | % detection. 7 | % 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m), 9 | % but an inner loop has been eliminated to significantly speed it 10 | % up in the case of a large number of boxes 11 | 12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz 13 | % All rights reserved. 14 | % 15 | % This file is part of the Exemplar-SVM library and is made 16 | % available under the terms of the MIT license (see COPYING file). 17 | % Project homepage: https://github.com/quantombone/exemplarsvm 18 | 19 | 20 | if isempty(boxes) 21 | pick = []; 22 | return; 23 | end 24 | 25 | if ~exist('use_gpu', 'var') 26 | use_gpu = false; 27 | end 28 | 29 | if use_gpu 30 | s = boxes(:, end); 31 | if ~issorted(s(end:-1:1)) 32 | [~, I] = sort(s, 'descend'); 33 | boxes = boxes(I, :); 34 | pick = nms_gpu_mex(single(boxes)', double(overlap)); 35 | pick = I(pick); 36 | else 37 | pick = nms_gpu_mex(single(boxes)', double(overlap)); 38 | end 39 | return; 40 | end 41 | 42 | if size(boxes, 1) < 1000000 43 | pick = nms_mex(double(boxes), double(overlap)); 44 | return; 45 | end 46 | 47 | x1 = boxes(:,1); 48 | y1 = boxes(:,2); 49 | x2 = boxes(:,3); 50 | y2 = boxes(:,4); 51 | s = boxes(:,end); 52 | 53 | area = (x2-x1+1) .* (y2-y1+1); 54 | [vals, I] = sort(s); 55 | 56 | pick = s*0; 57 | counter = 1; 58 | while ~isempty(I) 59 | last = length(I); 60 | i = I(last); 61 | pick(counter) = i; 62 | counter = counter + 1; 63 | 64 | xx1 = max(x1(i), x1(I(1:last-1))); 65 | yy1 = max(y1(i), y1(I(1:last-1))); 66 | xx2 = min(x2(i), x2(I(1:last-1))); 67 | yy2 = min(y2(i), y2(I(1:last-1))); 68 | 69 | w = max(0.0, xx2-xx1+1); 70 | h = max(0.0, yy2-yy1+1); 71 | 72 | inter = w.*h; 73 | o = inter ./ (area(i) + area(I(1:last-1)) - inter); 74 | 75 | I = I(find(o<=overlap)); 76 | end 77 | 78 | pick = pick(1:(counter-1)); 79 | -------------------------------------------------------------------------------- /utils/subsample_images_per_class.m: -------------------------------------------------------------------------------- 1 | function [imdbs, roidbs] = subsample_images_per_class(imdbs, roidbs, max_per_class_image_num, seed) 2 | 3 | if ~exist('seed', 'var') 4 | seed = 6; 5 | end 6 | 7 | class_num = cellfun(@(x) length(x.class_ids), imdbs, 'UniformOutput', true); 8 | assert(length(unique(class_num)) == 1); 9 | class_num = unique(class_num); 10 | 11 | rois = cellfun(@(x) x.rois, roidbs, 'UniformOutput', false); 12 | rois_combine = cell2mat(rois(:)); 13 | rois_combine_class = arrayfun(@(x) x.class, rois_combine, 'UniformOutput', false); 14 | 15 | %% select images with max_image_num 16 | 17 | % fix the random seed for repeatability 18 | prev_rng = seed_rand(seed); 19 | inds = cell(class_num, 1); 20 | rois_combine_length = length(rois_combine); 21 | valid_idxs = cell(class_num, 1); 22 | parfor i = 1:class_num 23 | valid_idxs{i} = cellfun(@(x) any(x == i), rois_combine_class, 'UniformOutput', false); 24 | valid_idxs{i} = cell2mat(valid_idxs{i}); 25 | end 26 | 27 | for i = 1:class_num 28 | valid_num = sum(valid_idxs{i}); 29 | 30 | num = min(valid_num, max_per_class_image_num); 31 | inds{i} = 1:rois_combine_length; 32 | inds{i} = inds{i}(valid_idxs{i}); 33 | inds{i} = inds{i}(randperm(length(inds{i}), num)); 34 | end 35 | 36 | inds = cell2mat(inds')'; 37 | inds = unique(inds); 38 | 39 | % restore previous rng 40 | rng(prev_rng); 41 | 42 | img_idx_start = 1; 43 | for i = 1:length(imdbs) 44 | imdb_img_num = length(imdbs{i}.image_ids); 45 | img_idx_end = img_idx_start + imdb_img_num - 1; 46 | inds_start = find(inds >= img_idx_start, 1, 'first'); 47 | inds_end = find(inds <= img_idx_end, 1, 'last'); 48 | 49 | inds_sub = inds(inds_start:inds_end); 50 | inds_sub = inds_sub - img_idx_start + 1; 51 | 52 | imdbs{i}.image_ids = imdbs{i}.image_ids(inds_sub); 53 | imdbs{i}.sizes = imdbs{i}.sizes(inds_sub, :); 54 | if isfield(imdbs{i}, 'image_dir') 55 | imdbs{i}.image_at = @(x) ... 56 | sprintf('%s/%s.%s', imdbs{i}.image_dir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 57 | else 58 | imdbs{i}.image_at = @(x) ... 59 | sprintf('%s/%s.%s', imdbs{i}.imagedir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 60 | end 61 | roidbs{i}.rois = roidbs{i}.rois(inds_sub); 62 | 63 | img_idx_start = img_idx_start + imdb_img_num; 64 | end 65 | 66 | 67 | -------------------------------------------------------------------------------- /experiments/script_rfcn_VOC0712_ResNet50_OHEM_ss.m: -------------------------------------------------------------------------------- 1 | function script_rfcn_VOC0712_ResNet50_OHEM_ss() 2 | % script_rfcn_VOC0712_ResNet50_OHEM_ss() 3 | % RFCN training and testing with OHEM using ResNet50 model and selective 4 | % search proposals 5 | % -------------------------------------------------------- 6 | % R-FCN implementation 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 8 | % Copyright (c) 2016, Jifeng Dai 9 | % Licensed under The MIT License [see LICENSE for details] 10 | % -------------------------------------------------------- 11 | 12 | clc; 13 | clear mex; 14 | clear is_valid_handle; % to clear init_key 15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 16 | %% -------------------- CONFIG -------------------- 17 | opts.caffe_version = 'caffe_rfcn'; 18 | opts.gpu_id = auto_select_gpu; 19 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 20 | 21 | % model 22 | model = Model.ResNet50_for_RFCN_VOC0712_OHEM(); 23 | % cache name 24 | opts.cache_name = 'rfcn_VOC0712_ResNet50_OHEM_ss'; 25 | % config 26 | conf = rfcn_config_ohem('image_means', model.mean_image); 27 | % train/test data 28 | fprintf('Loading dataset...') 29 | dataset = []; 30 | dataset = Dataset.voc0712_trainval_ss(dataset, 'train', conf.use_flipped); 31 | dataset = Dataset.voc2007_test_ss(dataset, 'test', false); 32 | fprintf('Done.\n'); 33 | 34 | % do validation, or not 35 | opts.do_val = true; 36 | 37 | %% -------------------- TRAINING -------------------- 38 | 39 | opts.rfcn_model = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 40 | 'do_val', opts.do_val, ... 41 | 'imdb_val', dataset.imdb_test, ... 42 | 'roidb_val', dataset.roidb_test, ... 43 | 'solver_def_file', model.solver_def_file, ... 44 | 'net_file', model.net_file, ... 45 | 'cache_name', opts.cache_name, ... 46 | 'caffe_version', opts.caffe_version); 47 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model'); 48 | 49 | %% -------------------- TESTING -------------------- 50 | rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 51 | 'net_def_file', model.test_net_def_file, ... 52 | 'net_file', opts.rfcn_model, ... 53 | 'cache_name', opts.cache_name,... 54 | 'ignore_cache', true); 55 | 56 | end 57 | -------------------------------------------------------------------------------- /experiments/script_rfcn_VOC0712_ResNet50_rpn.m: -------------------------------------------------------------------------------- 1 | function script_rfcn_VOC0712_ResNet50_rpn() 2 | % script_rfcn_VOC0712_ResNet50_rpn() 3 | % RFCN training and testing with OHEM using ResNet50 model and RPN proposals 4 | % -------------------------------------------------------- 5 | % R-FCN implementation 6 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 7 | % Copyright (c) 2016, Jifeng Dai 8 | % Licensed under The MIT License [see LICENSE for details] 9 | % -------------------------------------------------------- 10 | 11 | 12 | clc; 13 | clear mex; 14 | clear is_valid_handle; % to clear init_key 15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 16 | %% -------------------- CONFIG -------------------- 17 | opts.caffe_version = 'caffe_rfcn'; 18 | opts.gpu_id = auto_select_gpu; 19 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 20 | 21 | % model 22 | model = Model.ResNet50_for_RFCN_VOC0712(); 23 | % cache name 24 | opts.cache_name = 'rfcn_VOC0712_ResNet50_rpn_resnet50'; 25 | % config 26 | conf = rfcn_config_simple('image_means', model.mean_image); 27 | % train/test data 28 | fprintf('Loading dataset...') 29 | dataset = []; 30 | dataset = Dataset.voc0712_trainval_sp(dataset, 'train', conf.use_flipped, 'resnet50'); 31 | dataset = Dataset.voc2007_test_sp(dataset, 'test', false, 'resnet50'); 32 | fprintf('Done.\n'); 33 | 34 | % do validation, or not 35 | opts.do_val = true; 36 | 37 | %% -------------------- TRAINING -------------------- 38 | 39 | opts.rfcn_model = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 40 | 'do_val', opts.do_val, ... 41 | 'imdb_val', dataset.imdb_test, ... 42 | 'roidb_val', dataset.roidb_test, ... 43 | 'solver_def_file', model.solver_def_file, ... 44 | 'net_file', model.net_file, ... 45 | 'cache_name', opts.cache_name, ... 46 | 'caffe_version', opts.caffe_version); 47 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model'); 48 | 49 | %% -------------------- TESTING -------------------- 50 | rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 51 | 'net_def_file', model.test_net_def_file, ... 52 | 'net_file', opts.rfcn_model, ... 53 | 'cache_name', opts.cache_name,... 54 | 'ignore_cache', true); 55 | 56 | end 57 | -------------------------------------------------------------------------------- /experiments/script_rfcn_VOC0712_ResNet101_OHEM_ss.m: -------------------------------------------------------------------------------- 1 | function script_rfcn_VOC0712_ResNet101_OHEM_ss() 2 | % script_rfcn_VOC0712_ResNet101_OHEM_ss() 3 | % RFCN training and testing with OHEM using ResNet101 model and selective 4 | % search proposals 5 | % -------------------------------------------------------- 6 | % R-FCN implementation 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 8 | % Copyright (c) 2016, Jifeng Dai 9 | % Licensed under The MIT License [see LICENSE for details] 10 | % -------------------------------------------------------- 11 | 12 | clc; 13 | clear mex; 14 | clear is_valid_handle; % to clear init_key 15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 16 | %% -------------------- CONFIG -------------------- 17 | opts.caffe_version = 'caffe_rfcn'; 18 | opts.gpu_id = auto_select_gpu; 19 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 20 | 21 | % model 22 | model = Model.ResNet101_for_RFCN_VOC0712_OHEM(); 23 | % cache name 24 | opts.cache_name = 'rfcn_VOC0712_ResNet101_OHEM_ss'; 25 | % config 26 | conf = rfcn_config_ohem('image_means', model.mean_image); 27 | % train/test data 28 | fprintf('Loading dataset...') 29 | dataset = []; 30 | dataset = Dataset.voc0712_trainval_ss(dataset, 'train', conf.use_flipped); 31 | dataset = Dataset.voc2007_test_ss(dataset, 'test', false); 32 | fprintf('Done.\n'); 33 | 34 | % do validation, or not 35 | opts.do_val = true; 36 | 37 | %% -------------------- TRAINING -------------------- 38 | 39 | opts.rfcn_model = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 40 | 'do_val', opts.do_val, ... 41 | 'imdb_val', dataset.imdb_test, ... 42 | 'roidb_val', dataset.roidb_test, ... 43 | 'solver_def_file', model.solver_def_file, ... 44 | 'net_file', model.net_file, ... 45 | 'cache_name', opts.cache_name, ... 46 | 'caffe_version', opts.caffe_version); 47 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model'); 48 | 49 | %% -------------------- TESTING -------------------- 50 | rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 51 | 'net_def_file', model.test_net_def_file, ... 52 | 'net_file', opts.rfcn_model, ... 53 | 'cache_name', opts.cache_name,... 54 | 'ignore_cache', true); 55 | 56 | end 57 | -------------------------------------------------------------------------------- /experiments/script_rfcn_VOC0712_ResNet101_rpn.m: -------------------------------------------------------------------------------- 1 | function script_rfcn_VOC0712_ResNet101_rpn() 2 | % script_rfcn_VOC0712_ResNet101_rpn() 3 | % RFCN training and testing with OHEM using ResNet101 model and RPN 4 | % proposals 5 | % -------------------------------------------------------- 6 | % R-FCN implementation 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 8 | % Copyright (c) 2016, Jifeng Dai 9 | % Licensed under The MIT License [see LICENSE for details] 10 | % -------------------------------------------------------- 11 | 12 | 13 | clc; 14 | clear mex; 15 | clear is_valid_handle; % to clear init_key 16 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 17 | %% -------------------- CONFIG -------------------- 18 | opts.caffe_version = 'caffe_rfcn'; 19 | opts.gpu_id = auto_select_gpu; 20 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 21 | 22 | % model 23 | model = Model.ResNet101_for_RFCN_VOC0712(); 24 | % cache name 25 | opts.cache_name = 'rfcn_VOC0712_ResNet101_rpn_resnet101'; 26 | % config 27 | conf = rfcn_config_simple('image_means', model.mean_image); 28 | % train/test data 29 | fprintf('Loading dataset...') 30 | dataset = []; 31 | dataset = Dataset.voc0712_trainval_sp(dataset, 'train', conf.use_flipped, 'resnet101'); 32 | dataset = Dataset.voc2007_test_sp(dataset, 'test', false, 'resnet101'); 33 | fprintf('Done.\n'); 34 | 35 | % do validation, or not 36 | opts.do_val = true; 37 | 38 | %% -------------------- TRAINING -------------------- 39 | 40 | opts.rfcn_model = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 41 | 'do_val', opts.do_val, ... 42 | 'imdb_val', dataset.imdb_test, ... 43 | 'roidb_val', dataset.roidb_test, ... 44 | 'solver_def_file', model.solver_def_file, ... 45 | 'net_file', model.net_file, ... 46 | 'cache_name', opts.cache_name, ... 47 | 'caffe_version', opts.caffe_version); 48 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model'); 49 | 50 | %% -------------------- TESTING -------------------- 51 | rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 52 | 'net_def_file', model.test_net_def_file, ... 53 | 'net_file', opts.rfcn_model, ... 54 | 'cache_name', opts.cache_name,... 55 | 'ignore_cache', true); 56 | 57 | end 58 | -------------------------------------------------------------------------------- /experiments/script_rfcn_VOC0712_ResNet50_OHEM_rpn.m: -------------------------------------------------------------------------------- 1 | function script_rfcn_VOC0712_ResNet50_OHEM_rpn() 2 | % script_rfcn_VOC0712_ResNet50_OHEM_rpn() 3 | % RFCN training and testing with OHEM using ResNet50 model and RPN proposals 4 | % -------------------------------------------------------- 5 | % R-FCN implementation 6 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 7 | % Copyright (c) 2016, Jifeng Dai 8 | % Licensed under The MIT License [see LICENSE for details] 9 | % -------------------------------------------------------- 10 | 11 | 12 | clc; 13 | clear mex; 14 | clear is_valid_handle; % to clear init_key 15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 16 | %% -------------------- CONFIG -------------------- 17 | opts.caffe_version = 'caffe_rfcn'; 18 | opts.gpu_id = auto_select_gpu; 19 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 20 | 21 | % model 22 | model = Model.ResNet50_for_RFCN_VOC0712_OHEM(); 23 | % cache name 24 | opts.cache_name = 'rfcn_VOC0712_ResNet50_OHEM_rpn_resnet50'; 25 | % config 26 | conf = rfcn_config_ohem('image_means', model.mean_image); 27 | % train/test data 28 | fprintf('Loading dataset...') 29 | dataset = []; 30 | dataset = Dataset.voc0712_trainval_sp(dataset, 'train', conf.use_flipped, 'resnet50'); 31 | dataset = Dataset.voc2007_test_sp(dataset, 'test', false, 'resnet50'); 32 | fprintf('Done.\n'); 33 | 34 | % do validation, or not 35 | opts.do_val = true; 36 | 37 | %% -------------------- TRAINING -------------------- 38 | 39 | opts.rfcn_model = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 40 | 'do_val', opts.do_val, ... 41 | 'imdb_val', dataset.imdb_test, ... 42 | 'roidb_val', dataset.roidb_test, ... 43 | 'solver_def_file', model.solver_def_file, ... 44 | 'net_file', model.net_file, ... 45 | 'cache_name', opts.cache_name, ... 46 | 'caffe_version', opts.caffe_version); 47 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model'); 48 | 49 | %% -------------------- TESTING -------------------- 50 | rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 51 | 'net_def_file', model.test_net_def_file, ... 52 | 'net_file', opts.rfcn_model, ... 53 | 'cache_name', opts.cache_name,... 54 | 'ignore_cache', true); 55 | 56 | end 57 | -------------------------------------------------------------------------------- /experiments/script_rfcn_VOC0712_ResNet101_OHEM_rpn.m: -------------------------------------------------------------------------------- 1 | function script_rfcn_VOC0712_ResNet101_OHEM_rpn() 2 | % script_rfcn_VOC0712_ResNet101_OHEM_rpn() 3 | % RFCN training and testing with OHEM using ResNet101 model and RPN 4 | % proposals 5 | % -------------------------------------------------------- 6 | % R-FCN implementation 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 8 | % Copyright (c) 2016, Jifeng Dai 9 | % Licensed under The MIT License [see LICENSE for details] 10 | % -------------------------------------------------------- 11 | 12 | 13 | clc; 14 | clear mex; 15 | clear is_valid_handle; % to clear init_key 16 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 17 | %% -------------------- CONFIG -------------------- 18 | opts.caffe_version = 'caffe_rfcn'; 19 | opts.gpu_id = auto_select_gpu; 20 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 21 | 22 | % model 23 | model = Model.ResNet101_for_RFCN_VOC0712_OHEM(); 24 | % cache name 25 | opts.cache_name = 'rfcn_VOC0712_ResNet101_OHEM_rpn_resnet101'; 26 | % config 27 | conf = rfcn_config_ohem('image_means', model.mean_image); 28 | % train/test data 29 | fprintf('Loading dataset...') 30 | dataset = []; 31 | dataset = Dataset.voc0712_trainval_sp(dataset, 'train', conf.use_flipped, 'resnet101'); 32 | dataset = Dataset.voc2007_test_sp(dataset, 'test', false, 'resnet101'); 33 | fprintf('Done.\n'); 34 | 35 | % do validation, or not 36 | opts.do_val = true; 37 | 38 | %% -------------------- TRAINING -------------------- 39 | 40 | opts.rfcn_model = rfcn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 41 | 'do_val', opts.do_val, ... 42 | 'imdb_val', dataset.imdb_test, ... 43 | 'roidb_val', dataset.roidb_test, ... 44 | 'solver_def_file', model.solver_def_file, ... 45 | 'net_file', model.net_file, ... 46 | 'cache_name', opts.cache_name, ... 47 | 'caffe_version', opts.caffe_version); 48 | assert(exist(opts.rfcn_model, 'file') ~= 0, 'not found trained model'); 49 | 50 | %% -------------------- TESTING -------------------- 51 | rfcn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 52 | 'net_def_file', model.test_net_def_file, ... 53 | 'net_file', opts.rfcn_model, ... 54 | 'cache_name', opts.cache_name,... 55 | 'ignore_cache', true); 56 | 57 | end 58 | -------------------------------------------------------------------------------- /utils/showboxes.m: -------------------------------------------------------------------------------- 1 | function showboxes(im, boxes, legends, color_conf) 2 | % Draw bounding boxes on top of an image. 3 | % showboxes(im, boxes) 4 | % 5 | % ------------------------------------------------------- 6 | 7 | fix_width = 800; 8 | if isa(im, 'gpuArray') 9 | im = gather(im); 10 | end 11 | imsz = size(im); 12 | scale = fix_width / imsz(2); 13 | im = imresize(im, scale); 14 | 15 | if size(boxes{1}, 2) >= 5 16 | boxes = cellfun(@(x) [x(:, 1:4) * scale, x(:, 5)], boxes, 'UniformOutput', false); 17 | else 18 | boxes = cellfun(@(x) x(:, 1:4) * scale, boxes, 'UniformOutput', false); 19 | end 20 | 21 | if ~exist('color_conf', 'var') 22 | color_conf = 'default'; 23 | end 24 | 25 | image(im); 26 | axis image; 27 | axis off; 28 | set(gcf, 'Color', 'white'); 29 | 30 | valid_boxes = cellfun(@(x) ~isempty(x), boxes, 'UniformOutput', true); 31 | valid_boxes_num = sum(valid_boxes); 32 | 33 | if valid_boxes_num > 0 34 | switch color_conf 35 | case 'default' 36 | colors_candidate = colormap('hsv'); 37 | colors_candidate = colors_candidate(1:(floor(size(colors_candidate, 1)/valid_boxes_num)):end, :); 38 | colors_candidate = mat2cell(colors_candidate, ones(size(colors_candidate, 1), 1))'; 39 | colors = cell(size(valid_boxes)); 40 | colors(valid_boxes) = colors_candidate(1:sum(valid_boxes)); 41 | case 'voc' 42 | colors_candidate = colormap('hsv'); 43 | colors_candidate = colors_candidate(1:(floor(size(colors_candidate, 1)/20)):end, :); 44 | colors_candidate = mat2cell(colors_candidate, ones(size(colors_candidate, 1), 1))'; 45 | colors = colors_candidate; 46 | end 47 | 48 | 49 | for i = 1:length(boxes) 50 | if isempty(boxes{i}) 51 | continue; 52 | end 53 | 54 | for j = 1:size(boxes{i}) 55 | box = boxes{i}(j, 1:4); 56 | if size(boxes{i}, 2) >= 5 57 | score = boxes{i}(j, end); 58 | linewidth = 2 + min(max(score, 0), 1) * 2; 59 | rectangle('Position', RectLTRB2LTWH(box), 'LineWidth', linewidth, 'EdgeColor', colors{i}); 60 | label = sprintf('%s : %.3f', legends{i}, score); 61 | text(double(box(1))+2, double(box(2)), label, 'BackgroundColor', 'w'); 62 | else 63 | linewidth = 2; 64 | rectangle('Position', RectLTRB2LTWH(box), 'LineWidth', linewidth, 'EdgeColor', colors{i}); 65 | label = sprintf('%s(%d)', legends{i}, i); 66 | text(double(box(1))+2, double(box(2)), label, 'BackgroundColor', 'w'); 67 | end 68 | end 69 | 70 | end 71 | end 72 | end 73 | 74 | function [ rectsLTWH ] = RectLTRB2LTWH( rectsLTRB ) 75 | %rects (l, t, r, b) to (l, t, w, h) 76 | 77 | rectsLTWH = [rectsLTRB(:, 1), rectsLTRB(:, 2), rectsLTRB(:, 3)-rectsLTRB(:,1)+1, rectsLTRB(:,4)-rectsLTRB(2)+1]; 78 | end 79 | 80 | -------------------------------------------------------------------------------- /functions/rfcn/rfcn_config_ohem.m: -------------------------------------------------------------------------------- 1 | function conf = rfcn_config_ohem(varargin) 2 | % conf = rfcn_config(varargin) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | % 10 | ip = inputParser; 11 | 12 | %% training 13 | % whether use gpu 14 | ip.addParamValue('use_gpu', gpuDeviceCount > 0, ... 15 | @islogical); 16 | % Image scales -- the short edge of input image 17 | ip.addParamValue('scales', 600, @ismatrix); 18 | % Max pixel size of a scaled input image 19 | ip.addParamValue('max_size', 1000, @isscalar); 20 | % Images per batch 21 | ip.addParamValue('ims_per_batch', 2, @isscalar); 22 | % Minibatch size, set as -1 if using all the rois 23 | ip.addParamValue('batch_size', -1, @isscalar); 24 | % Fraction of minibatch that is foreground labeled (class > 0), 25 | % which is disabled when batch_size = -1 26 | ip.addParamValue('fg_fraction', -1, @isscalar); 27 | % Overlap threshold for a ROI to be considered foreground (if >= fg_thresh) 28 | ip.addParamValue('fg_thresh', 0.5, @isscalar); 29 | % Overlap threshold for a ROI to be considered background (class = 0 if 30 | % overlap in [bg_thresh_lo, bg_thresh_hi)) 31 | ip.addParamValue('bg_thresh_hi', 0.5, @isscalar); 32 | ip.addParamValue('bg_thresh_lo', 0.0, @isscalar); 33 | % mean image, in RGB order 34 | ip.addParamValue('image_means', 128, @ismatrix); 35 | % Use horizontally-flipped images during training? 36 | ip.addParamValue('use_flipped', true, @islogical); 37 | % Vaild training sample (IoU > bbox_thresh) for bounding box regresion 38 | ip.addParamValue('bbox_thresh', 0.5, @isscalar); 39 | % Whether to perform class agnostic bbox regression 40 | ip.addParamValue('bbox_class_agnostic', true, @islogical); 41 | 42 | % random seed 43 | ip.addParamValue('rng_seed', 6, @isscalar); 44 | 45 | 46 | %% testing 47 | ip.addParamValue('test_scales', 600, @isscalar); 48 | ip.addParamValue('test_max_size', 1000, @isscalar); 49 | ip.addParamValue('test_nms', 0.3, @isscalar); 50 | ip.addParamValue('test_binary', false, @islogical); 51 | 52 | ip.parse(varargin{:}); 53 | conf = ip.Results; 54 | 55 | % if image_means is a file, load it 56 | if ischar(conf.image_means) 57 | s = load(conf.image_means); 58 | s_fieldnames = fieldnames(s); 59 | assert(length(s_fieldnames) == 1); 60 | conf.image_means = s.(s_fieldnames{1}); 61 | end 62 | 63 | end -------------------------------------------------------------------------------- /functions/rfcn/rfcn_config_simple.m: -------------------------------------------------------------------------------- 1 | function conf = rfcn_config_simple(varargin) 2 | % conf = rfcn_config(varargin) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | % 10 | ip = inputParser; 11 | 12 | %% training 13 | % whether use gpu 14 | ip.addParamValue('use_gpu', gpuDeviceCount > 0, ... 15 | @islogical); 16 | % Image scales -- the short edge of input image 17 | ip.addParamValue('scales', 600, @ismatrix); 18 | % Max pixel size of a scaled input image 19 | ip.addParamValue('max_size', 1000, @isscalar); 20 | % Images per batch 21 | ip.addParamValue('ims_per_batch', 2, @isscalar); 22 | % Minibatch size, set as -1 if using all the rois 23 | ip.addParamValue('batch_size', 256, @isscalar); 24 | % Fraction of minibatch that is foreground labeled (class > 0), 25 | % which is disabled when batch_size = -1 26 | ip.addParamValue('fg_fraction', 0.25, @isscalar); 27 | % Overlap threshold for a ROI to be considered foreground (if >= fg_thresh) 28 | ip.addParamValue('fg_thresh', 0.5, @isscalar); 29 | % Overlap threshold for a ROI to be considered background (class = 0 if 30 | % overlap in [bg_thresh_lo, bg_thresh_hi)) 31 | ip.addParamValue('bg_thresh_hi', 0.5, @isscalar); 32 | ip.addParamValue('bg_thresh_lo', 0.1, @isscalar); 33 | % mean image, in RGB order 34 | ip.addParamValue('image_means', 128, @ismatrix); 35 | % Use horizontally-flipped images during training? 36 | ip.addParamValue('use_flipped', true, @islogical); 37 | % Vaild training sample (IoU > bbox_thresh) for bounding box regresion 38 | ip.addParamValue('bbox_thresh', 0.5, @isscalar); 39 | % Whether to perform class agnostic bbox regression 40 | ip.addParamValue('bbox_class_agnostic', true, @islogical); 41 | 42 | % random seed 43 | ip.addParamValue('rng_seed', 6, @isscalar); 44 | 45 | 46 | %% testing 47 | ip.addParamValue('test_scales', 600, @isscalar); 48 | ip.addParamValue('test_max_size', 1000, @isscalar); 49 | ip.addParamValue('test_nms', 0.3, @isscalar); 50 | ip.addParamValue('test_binary', false, @islogical); 51 | 52 | ip.parse(varargin{:}); 53 | conf = ip.Results; 54 | 55 | % if image_means is a file, load it 56 | if ischar(conf.image_means) 57 | s = load(conf.image_means); 58 | s_fieldnames = fieldnames(s); 59 | assert(length(s_fieldnames) == 1); 60 | conf.image_means = s.(s_fieldnames{1}); 61 | end 62 | 63 | end -------------------------------------------------------------------------------- /imdb/imdb_eval_voc.m: -------------------------------------------------------------------------------- 1 | function res = imdb_eval_voc(cls, boxes, imdb, cache_name, suffix) 2 | % res = imdb_eval_voc(cls, boxes, imdb, suffix) 3 | % Use the VOCdevkit to evaluate detections specified in boxes 4 | % for class cls against the ground-truth boxes in the image 5 | % database imdb. Results files are saved with an optional 6 | % suffix. 7 | 8 | % AUTORIGHTS 9 | % --------------------------------------------------------- 10 | % Copyright (c) 2014, Ross Girshick 11 | % 12 | % This file is part of the R-CNN code and is available 13 | % under the terms of the Simplified BSD License provided in 14 | % LICENSE. Please retain this notice and LICENSE if you use 15 | % this file (or any portion of it) in your project. 16 | % --------------------------------------------------------- 17 | 18 | % Add a random string ("salt") to the end of the results file name 19 | % to prevent concurrent evaluations from clobbering each other 20 | use_res_salt = true; 21 | % Delete results files after computing APs 22 | rm_res = true; 23 | % comp4 because we use outside data (ILSVRC2012) 24 | comp_id = 'comp4'; 25 | % draw each class curve 26 | draw_curve = true; 27 | 28 | % save results 29 | if ~exist('suffix', 'var') || isempty(suffix) || strcmp(suffix, '') 30 | suffix = ''; 31 | else 32 | if suffix(1) ~= '_' 33 | suffix = ['_' suffix]; 34 | end 35 | end 36 | 37 | conf.cache_dir = fullfile('output', 'rfcn_cachedir', cache_name, imdb.name); 38 | VOCopts = imdb.details.VOCopts; 39 | image_ids = imdb.image_ids; 40 | test_set = VOCopts.testset; 41 | year = VOCopts.dataset(4:end); 42 | 43 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 44 | 45 | if use_res_salt 46 | prev_rng = rng; 47 | rng shuffle; 48 | salt = sprintf('%d', randi(100000)); 49 | res_id = [comp_id '-' salt]; 50 | rng(prev_rng); 51 | else 52 | res_id = comp_id; 53 | end 54 | res_fn = sprintf(VOCopts.detrespath, res_id, cls); 55 | 56 | % write out detections in PASCAL format and score 57 | fid = fopen(res_fn, 'w'); 58 | for i = 1:length(image_ids); 59 | bbox = boxes{i}; 60 | keep = nms(bbox, 0.3); 61 | bbox = bbox(keep,:); 62 | for j = 1:size(bbox,1) 63 | fprintf(fid, '%s %f %.3f %.3f %.3f %.3f\n', image_ids{i}, bbox(j,end), bbox(j,1:4)); 64 | end 65 | end 66 | fclose(fid); 67 | 68 | recall = []; 69 | prec = []; 70 | ap = 0; 71 | ap_auc = 0; 72 | 73 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 74 | if do_eval 75 | % Bug in VOCevaldet requires that tic has been called first 76 | tic; 77 | [recall, prec, ap] = VOCevaldet(VOCopts, res_id, cls, draw_curve); 78 | ap_auc = xVOCap(recall, prec); 79 | 80 | % force plot limits 81 | ylim([0 1]); 82 | xlim([0 1]); 83 | 84 | print(gcf, '-djpeg', '-r0', ... 85 | fullfile(conf.cache_dir, [cls '_pr_' imdb.name suffix '.jpg'])); 86 | end 87 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 88 | 89 | save(fullfile(conf.cache_dir, [cls '_pr_' imdb.name suffix]), ... 90 | 'recall', 'prec', 'ap', 'ap_auc'); 91 | 92 | res.recall = recall; 93 | res.prec = prec; 94 | res.ap = ap; 95 | res.ap_auc = ap_auc; 96 | if rm_res 97 | delete(res_fn); 98 | end 99 | 100 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 101 | -------------------------------------------------------------------------------- /functions/nms/nms_mex.cpp: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | #ifdef _MSC_VER 3 | #include 4 | #include 5 | #endif 6 | #include 7 | #include 8 | using namespace std; 9 | 10 | struct score { 11 | double s; 12 | int idx; 13 | bool operator() (score i, score j) { return (i.idx < j.idx);} 14 | } score; 15 | 16 | template 17 | void nms(const mxArray *input_boxes, double overlap, vector &vPick, int &nPick) 18 | { 19 | int nSample = (int)mxGetM(input_boxes); 20 | int nDim_boxes = (int)mxGetN(input_boxes); 21 | 22 | T *pBoxes = (T*)mxGetData(input_boxes); 23 | 24 | vector vArea(nSample); 25 | for (int i = 0; i < nSample; ++i) 26 | { 27 | vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 28 | * (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1); 29 | if (vArea[i] < 0) 30 | mexErrMsgTxt("Boxes area must >= 0"); 31 | } 32 | 33 | std::multimap scores; 34 | for (int i = 0; i < nSample; ++i) 35 | scores.insert(std::pair(pBoxes[4*nSample + i], i)); 36 | 37 | nPick = 0; 38 | 39 | do 40 | { 41 | int last = scores.rbegin()->second; 42 | vPick[nPick] = last; 43 | nPick += 1; 44 | 45 | for (typename std::multimap::iterator it = scores.begin(); it != scores.end();) 46 | { 47 | int it_idx = it->second; 48 | T xx1 = max(pBoxes[0*nSample + last], pBoxes[0*nSample + it_idx]); 49 | T yy1 = max(pBoxes[1*nSample + last], pBoxes[1*nSample + it_idx]); 50 | T xx2 = min(pBoxes[2*nSample + last], pBoxes[2*nSample + it_idx]); 51 | T yy2 = min(pBoxes[3*nSample + last], pBoxes[3*nSample + it_idx]); 52 | 53 | double w = max(T(0.0), xx2-xx1+1), h = max(T(0.0), yy2-yy1+1); 54 | 55 | double ov = w*h / (vArea[last] + vArea[it_idx] - w*h); 56 | 57 | if (ov > overlap) 58 | { 59 | it = scores.erase(it); 60 | } 61 | else 62 | { 63 | it++; 64 | } 65 | } 66 | 67 | } while (scores.size() != 0); 68 | } 69 | 70 | 71 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 72 | { 73 | if (nrhs != 2) 74 | mexErrMsgTxt("Wrong number of inputs"); 75 | if (nlhs != 1) 76 | mexErrMsgTxt("One output"); 77 | 78 | const mxArray *input_boxes = prhs[0]; 79 | if (mxGetClassID(input_boxes) != mxDOUBLE_CLASS && mxGetClassID(input_boxes) != mxSINGLE_CLASS) 80 | mexErrMsgTxt("Input boxes must be Double or Single"); 81 | 82 | const mxArray *input_overlap = prhs[1]; 83 | if (mxGetClassID(input_overlap) != mxDOUBLE_CLASS ) 84 | mexErrMsgTxt("Input overlap must be Double"); 85 | 86 | double overlap = mxGetScalar(input_overlap); 87 | 88 | int nSample = (int)mxGetM(input_boxes); 89 | int nDim_boxes = (int)mxGetN(input_boxes); 90 | 91 | if (nSample * nDim_boxes == 0) 92 | { 93 | plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL); 94 | return; 95 | } 96 | 97 | if (nDim_boxes != 5) 98 | mexErrMsgTxt("nms_mex boxes must has 5 columns"); 99 | 100 | 101 | int nPick = 0; 102 | vector vPick(nSample); 103 | if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS) 104 | nms(input_boxes, overlap, vPick, nPick); 105 | else 106 | nms(input_boxes, overlap, vPick, nPick); 107 | 108 | plhs[0] = mxCreateNumericMatrix(nPick, 1, mxDOUBLE_CLASS, mxREAL); 109 | double *pRst = mxGetPr(plhs[0]); 110 | for (int i = 0; i < nPick; ++i) 111 | pRst[i] = vPick[i] + 1; 112 | } 113 | -------------------------------------------------------------------------------- /imdb/imdb_from_voc.m: -------------------------------------------------------------------------------- 1 | function imdb = imdb_from_voc(root_dir, image_set, year, flip) 2 | % imdb = imdb_from_voc(root_dir, image_set, year) 3 | % Builds an image database for the PASCAL VOC devkit located 4 | % at root_dir using the image_set and year. 5 | % 6 | % Inspired by Andrea Vedaldi's MKL imdb and roidb code. 7 | 8 | % AUTORIGHTS 9 | % --------------------------------------------------------- 10 | % Copyright (c) 2014, Ross Girshick 11 | % 12 | % This file is part of the R-CNN code and is available 13 | % under the terms of the Simplified BSD License provided in 14 | % LICENSE. Please retain this notice and LICENSE if you use 15 | % this file (or any portion of it) in your project. 16 | % --------------------------------------------------------- 17 | 18 | %imdb.name = 'voc_train_2007' 19 | %imdb.image_dir = '/work4/rbg/VOC2007/VOCdevkit/VOC2007/JPEGImages/' 20 | %imdb.extension = '.jpg' 21 | %imdb.image_ids = {'000001', ... } 22 | %imdb.sizes = [numimages x 2] 23 | %imdb.classes = {'aeroplane', ... } 24 | %imdb.num_classes 25 | %imdb.class_to_id 26 | %imdb.class_ids 27 | %imdb.eval_func = pointer to the function that evaluates detections 28 | %imdb.roidb_func = pointer to the function that returns regions of interest 29 | 30 | if nargin < 4 31 | flip = false; 32 | end 33 | 34 | cache_file = ['./imdb/cache/imdb_voc_' year '_' image_set]; 35 | if flip 36 | cache_file = [cache_file, '_flip']; 37 | end 38 | try 39 | load(cache_file); 40 | catch 41 | VOCopts = get_voc_opts(root_dir); 42 | VOCopts.testset = image_set; 43 | 44 | imdb.name = ['voc_' year '_' image_set]; 45 | imdb.image_dir = fileparts(VOCopts.imgpath); 46 | imdb.image_ids = textread(sprintf(VOCopts.imgsetpath, image_set), '%s'); 47 | imdb.extension = 'jpg'; 48 | imdb.flip = flip; 49 | if flip 50 | image_at = @(i) sprintf('%s/%s.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension); 51 | flip_image_at = @(i) sprintf('%s/%s_flip.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension); 52 | for i = 1:length(imdb.image_ids) 53 | if ~exist(flip_image_at(i), 'file') 54 | im = imread(image_at(i)); 55 | imwrite(fliplr(im), flip_image_at(i)); 56 | end 57 | end 58 | img_num = length(imdb.image_ids)*2; 59 | image_ids = imdb.image_ids; 60 | imdb.image_ids(1:2:img_num) = image_ids; 61 | imdb.image_ids(2:2:img_num) = cellfun(@(x) [x, '_flip'], image_ids, 'UniformOutput', false); 62 | imdb.flip_from = zeros(img_num, 1); 63 | imdb.flip_from(2:2:img_num) = 1:2:img_num; 64 | end 65 | imdb.classes = VOCopts.classes; 66 | imdb.num_classes = length(imdb.classes); 67 | imdb.class_to_id = ... 68 | containers.Map(imdb.classes, 1:imdb.num_classes); 69 | imdb.class_ids = 1:imdb.num_classes; 70 | 71 | % private VOC details 72 | imdb.details.VOCopts = VOCopts; 73 | 74 | % VOC specific functions for evaluation and region of interest DB 75 | imdb.eval_func = @imdb_eval_voc; 76 | imdb.roidb_func = @roidb_from_voc; 77 | imdb.image_at = @(i) ... 78 | sprintf('%s/%s.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension); 79 | 80 | for i = 1:length(imdb.image_ids) 81 | tic_toc_print('imdb (%s): %d/%d\n', imdb.name, i, length(imdb.image_ids)); 82 | info = imfinfo(sprintf(VOCopts.imgpath, imdb.image_ids{i})); 83 | imdb.sizes(i, :) = [info.Height info.Width]; 84 | end 85 | 86 | fprintf('Saving imdb to cache...'); 87 | save(cache_file, 'imdb', '-v7.3'); 88 | fprintf('done\n'); 89 | end 90 | -------------------------------------------------------------------------------- /experiments/script_rfcn_demo.m: -------------------------------------------------------------------------------- 1 | function script_rfcn_demo() 2 | % script_rfcn_demo() 3 | % A demo of R-FCN for object detection using ResNet101 model and RPN 4 | % proposals 5 | % -------------------------------------------------------- 6 | % R-FCN implementation 7 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 8 | % Copyright (c) 2016, Jifeng Dai 9 | % Licensed under The MIT License [see LICENSE for details] 10 | % -------------------------------------------------------- 11 | 12 | clc; 13 | clear mex; 14 | clear is_valid_handle; % to clear init_key 15 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 16 | %% -------------------- CONFIG -------------------- 17 | opts.caffe_version = 'caffe_rfcn'; 18 | opts.gpu_id = auto_select_gpu; 19 | opts.use_gpu = true; 20 | opts.max_rois_num_in_gpu = 5000; 21 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 22 | classes = {'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair',... 23 | 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', ... 24 | 'sheep', 'sofa', 'train', 'tvmonitor'}; 25 | 26 | demo_dir = fullfile(pwd, 'data', 'demo'); 27 | 28 | % conf 29 | 30 | conf = rfcn_config_ohem('image_means',... 31 | fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-101L', 'mean_image')); 32 | 33 | %% -------------------- INIT MODEL ----------------- 34 | rfcn_net_def = fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-101L_OHEM_res3a', 'test.prototxt'); 35 | rfcn_net = fullfile(pwd, 'output', 'rfcn_demo', ... 36 | 'rfcn_VOC0712_ResNet101_OHEM_rpn_resnet101','final'); 37 | 38 | caffe_net = caffe.Net(rfcn_net_def, 'test'); 39 | caffe_net.copy_from(rfcn_net); 40 | 41 | % set gpu/cpu 42 | if opts.use_gpu 43 | caffe.set_mode_gpu(); 44 | else 45 | caffe.set_mode_cpu(); 46 | end 47 | %% -------------------- WARM UP -------------------- 48 | % the first run will be slower; use an empty image to warm up 49 | for j = 1:2 % we warm up 2 times 50 | im = uint8(ones(375, 500, 3)*128); 51 | proposals = repmat([1,1,400,275], [2000, 1]); 52 | proposals = proposals+100*rand(size(proposals)); 53 | [boxes, scores] = rfcn_im_detect(conf, caffe_net, im, proposals, opts.max_rois_num_in_gpu); 54 | end 55 | 56 | %% -------------------- TESTING -------------------- 57 | im_names = {'000166', '001852', '002597', '004030', '005225'}; 58 | running_time = zeros(length(im_names), 1); 59 | for j = 1:length(im_names) 60 | im = imread(fullfile(demo_dir, [im_names{j}, '.jpg'])); 61 | proposals = load(fullfile(demo_dir, [im_names{j}, '_boxes.mat'])); 62 | proposals = single(proposals.boxes); 63 | tic 64 | [boxes, scores] = rfcn_im_detect(conf, caffe_net, im, proposals, opts.max_rois_num_in_gpu); 65 | th = toc; 66 | fprintf('%s, (%dx%d): time %.3fs\n', im_names{j}, size(im, 1), size(im, 2), th); 67 | running_time(j) = th; 68 | boxes_cell = cell(length(classes), 1); 69 | thres = 0.6; 70 | for i = 1:length(boxes_cell) 71 | boxes_cell{i} = [boxes(:, (1+(i-1)*4):(i*4)), scores(:, i)]; 72 | boxes_cell{i} = boxes_cell{i}(nms(boxes_cell{i}, 0.3), :); 73 | 74 | I = boxes_cell{i}(:, 5) >= thres; 75 | boxes_cell{i} = boxes_cell{i}(I, :); 76 | end 77 | figure(j); 78 | showboxes(im, boxes_cell, classes, 'voc'); 79 | pause(0.1); 80 | end 81 | fprintf('mean time: %.3fs\n', mean(running_time)); 82 | end 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Faster R-CNN 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2015 Microsoft Corporation 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | ************************************************************************ 26 | 27 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION 28 | 29 | This project, Faster R-CNN, incorporates material from the project(s) listed below (collectively, "Third Party Code"). Microsoft is not the original author of the Third Party Code. The original copyright notice and license under which Microsoft received such Third Party Code are set out below. This Third Party Code is licensed to you under their original license terms set forth below. Microsoft reserves all other rights not expressly granted, whether by implication, estoppel or otherwise. 30 | 31 | 1. Caffe, version 0.9, (https://github.com/BVLC/caffe/) 32 | 33 | COPYRIGHT 34 | 35 | All contributions by the University of California: 36 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents) 37 | All rights reserved. 38 | 39 | All other contributions: 40 | Copyright (c) 2014, 2015, the respective contributors 41 | All rights reserved. 42 | 43 | Caffe uses a shared copyright model: each contributor holds copyright over their contributions to Caffe. The project versioning records all such contribution and copyright details. If a contributor wants to further mark their specific copyright on a particular contribution, they should indicate their copyright solely in the commit message of the change when it is committed. 44 | 45 | The BSD 2-Clause License 46 | 47 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 48 | 49 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 50 | 51 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 52 | 53 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 | 55 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION********** 56 | 57 | 58 | -------------------------------------------------------------------------------- /functions/nms/nms_multiclass_mex.cpp: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | #ifdef WIN32 3 | #include 4 | #include 5 | #else 6 | #include 7 | #endif 8 | #include 9 | #include 10 | #include 11 | using namespace std; 12 | 13 | struct score { 14 | double s; 15 | int idx; 16 | bool operator() (score i, score j) { return (i.idx < j.idx);} 17 | } score; 18 | 19 | template 20 | void nms(const mxArray *input_boxes, int iScoreIdx, double overlap, const vector &vArea, vector &vPick, int &nPick) 21 | { 22 | int nSample = (int)mxGetM(input_boxes); 23 | int nDim_boxes = (int)mxGetN(input_boxes); 24 | 25 | T *pBoxes = (T*)mxGetData(input_boxes); 26 | 27 | //vector vArea(nSample); 28 | //for (int i = 0; i < nSample; ++i) 29 | //{ 30 | // vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 31 | // * (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1); 32 | // if (vArea[i] < 0) 33 | // mexErrMsgTxt("Boxes area must >= 0"); 34 | //} 35 | 36 | std::multimap scores; 37 | for (int i = 0; i < nSample; ++i) 38 | scores.insert(std::pair(pBoxes[iScoreIdx*nSample + i], i)); 39 | 40 | nPick = 0; 41 | 42 | do 43 | { 44 | int last = scores.rbegin()->second; 45 | vPick[nPick] = last; 46 | nPick += 1; 47 | 48 | for (typename std::multimap::iterator it = scores.begin(); it != scores.end();) 49 | { 50 | int it_idx = it->second; 51 | T xx1 = std::max(pBoxes[0*nSample + last], pBoxes[0*nSample + it_idx]); 52 | T yy1 = std::max(pBoxes[1*nSample + last], pBoxes[1*nSample + it_idx]); 53 | T xx2 = std::min(pBoxes[2*nSample + last], pBoxes[2*nSample + it_idx]); 54 | T yy2 = std::min(pBoxes[3*nSample + last], pBoxes[3*nSample + it_idx]); 55 | 56 | double w = max(0.0, xx2-xx1+1), h = max(0.0, yy2-yy1+1); 57 | 58 | double ov = w*h / (vArea[last] + vArea[it_idx] - w*h); 59 | 60 | if (ov > overlap) 61 | { 62 | #ifdef WIN32 63 | it = scores.erase(it); 64 | #else 65 | typename std::multimap::iterator save=it; ++save; 66 | scores.erase(it); 67 | it=save; 68 | #endif 69 | } 70 | else 71 | { 72 | it++; 73 | } 74 | } 75 | 76 | } while (scores.size() != 0); 77 | } 78 | 79 | 80 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[]) 81 | { 82 | if (nrhs != 2) 83 | mexErrMsgTxt("Wrong number of inputs"); 84 | if (nlhs != 1) 85 | mexErrMsgTxt("One output"); 86 | 87 | const mxArray *input_boxes = prhs[0]; 88 | if (mxGetClassID(input_boxes) != mxDOUBLE_CLASS && mxGetClassID(input_boxes) != mxSINGLE_CLASS) 89 | mexErrMsgTxt("Input boxes must be Double or Single"); 90 | 91 | const mxArray *input_overlap = prhs[1]; 92 | if (mxGetClassID(input_overlap) != mxDOUBLE_CLASS ) 93 | mexErrMsgTxt("Input overlap must be Double"); 94 | 95 | double overlap = mxGetScalar(input_overlap); 96 | 97 | int nSample = (int)mxGetM(input_boxes); 98 | int nDim_boxes = (int)mxGetN(input_boxes); 99 | 100 | if (nSample * nDim_boxes == 0) 101 | { 102 | plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL); 103 | return; 104 | } 105 | 106 | if (nDim_boxes < 5) 107 | mexErrMsgTxt("nms_mex boxes must has least 5 columns"); 108 | 109 | vector vArea(nSample); 110 | if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS) 111 | { 112 | double *pBoxes = (double*)mxGetData(input_boxes); 113 | for (int i = 0; i < nSample; ++i) 114 | { 115 | vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 116 | * (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1); 117 | if (vArea[i] < 0) 118 | mexErrMsgTxt("Boxes area must >= 0"); 119 | } 120 | } 121 | else 122 | { 123 | if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS) 124 | { 125 | float *pBoxes = (float*)mxGetData(input_boxes); 126 | for (int i = 0; i < nSample; ++i) 127 | { 128 | vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 129 | * (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1); 130 | if (vArea[i] < 0) 131 | mexErrMsgTxt("Boxes area must >= 0"); 132 | } 133 | } 134 | } 135 | 136 | vector nPick(nDim_boxes - 4, 0); 137 | vector > vPicks(nDim_boxes - 4); 138 | plhs[0] = mxCreateCellMatrix_730(nDim_boxes - 4, 1); 139 | 140 | #pragma omp parallel for ordered schedule(dynamic) 141 | for (int i = 0; i < vPicks.size(); ++i) 142 | { 143 | vPicks[i].resize(nSample); 144 | 145 | if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS) 146 | nms(input_boxes, i+4, overlap, vArea, vPicks[i], nPick[i]); 147 | else 148 | nms(input_boxes, i+4, overlap, vArea, vPicks[i], nPick[i]); 149 | 150 | mxArray *mxPick = mxCreateNumericMatrix(nPick[i], 1, mxDOUBLE_CLASS, mxREAL); 151 | double *pRst = mxGetPr(mxPick); 152 | for (int j = 0; j < nPick[i]; ++j) 153 | pRst[j] = vPicks[i][j] + 1; 154 | 155 | mxSetCell(plhs[0], i, mxPick); 156 | } 157 | 158 | } -------------------------------------------------------------------------------- /functions/rfcn/rfcn_im_detect.m: -------------------------------------------------------------------------------- 1 | function [pred_boxes, scores] = rfcn_im_detect(conf, caffe_net, im, boxes, max_rois_num_in_gpu) 2 | % [pred_boxes, scores] = rfcn_im_detect(conf, caffe_net, im, boxes, max_rois_num_in_gpu) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | 9 | [im_blob, rois_blob, ~] = get_blobs(conf, im, boxes); 10 | 11 | % When mapping from image ROIs to feature map ROIs, there's some aliasing 12 | % (some distinct image ROIs get mapped to the same feature ROI). 13 | % Here, we identify duplicate feature ROIs, so we only compute features 14 | % on the unique subset. 15 | [~, index, inv_index] = unique(rois_blob, 'rows'); 16 | rois_blob = rois_blob(index, :); 17 | boxes = boxes(index, :); 18 | 19 | % permute data into caffe c++ memory, thus [num, channels, height, width] 20 | im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg 21 | im_blob = permute(im_blob, [2, 1, 3, 4]); 22 | im_blob = single(im_blob); 23 | rois_blob = rois_blob - 1; % to c's index (start from 0) 24 | rois_blob = permute(rois_blob, [3, 4, 2, 1]); 25 | rois_blob = single(rois_blob); 26 | 27 | total_rois = size(rois_blob, 4); 28 | total_scores = cell(ceil(total_rois / max_rois_num_in_gpu), 1); 29 | total_box_deltas = cell(ceil(total_rois / max_rois_num_in_gpu), 1); 30 | for i = 1:ceil(total_rois / max_rois_num_in_gpu) 31 | 32 | sub_ind_start = 1 + (i-1) * max_rois_num_in_gpu; 33 | sub_ind_end = min(total_rois, i * max_rois_num_in_gpu); 34 | sub_rois_blob = rois_blob(:, :, :, sub_ind_start:sub_ind_end); 35 | 36 | net_inputs = {im_blob, sub_rois_blob}; 37 | 38 | % Reshape net's input blobs 39 | caffe_net.reshape_as_input(net_inputs); 40 | caffe_net.forward(net_inputs); 41 | 42 | if conf.test_binary 43 | % simulate binary logistic regression 44 | scores = caffe_net.blobs('cls_score').get_data(); 45 | scores = squeeze(scores)'; 46 | % Return scores as fg - bg 47 | scores = bsxfun(@minus, scores, scores(:, 1)); 48 | else 49 | % use softmax estimated probabilities 50 | scores = caffe_net.blobs('cls_prob').get_data(); 51 | scores = squeeze(scores)'; 52 | end 53 | 54 | % Apply bounding-box regression deltas 55 | box_deltas = caffe_net.blobs('bbox_pred').get_data(); 56 | box_deltas = squeeze(box_deltas)'; 57 | 58 | total_scores{i} = scores; 59 | total_box_deltas{i} = box_deltas; 60 | end 61 | 62 | scores = cell2mat(total_scores); 63 | box_deltas = cell2mat(total_box_deltas); 64 | 65 | pred_boxes = rfcn_bbox_transform_inv(boxes, box_deltas); 66 | pred_boxes = clip_boxes(pred_boxes, size(im, 2), size(im, 1)); 67 | 68 | % Map scores and predictions back to the original set of boxes 69 | scores = scores(inv_index, :); 70 | pred_boxes = pred_boxes(inv_index, :); 71 | 72 | % remove scores and boxes for back-ground 73 | pred_boxes = pred_boxes(:, 5:end); 74 | scores = scores(:, 2:end); 75 | if conf.bbox_class_agnostic 76 | pred_boxes = repmat(pred_boxes, [1, size(scores,2)]); 77 | end 78 | end 79 | 80 | function [data_blob, rois_blob, im_scale_factors] = get_blobs(conf, im, rois) 81 | [data_blob, im_scale_factors] = get_image_blob(conf, im); 82 | rois_blob = get_rois_blob(conf, rois, im_scale_factors); 83 | end 84 | 85 | function [blob, im_scales] = get_image_blob(conf, im) 86 | [ims, im_scales] = arrayfun(@(x) prep_im_for_blob(im, conf.image_means, x, conf.test_max_size), conf.test_scales, 'UniformOutput', false); 87 | im_scales = cell2mat(im_scales); 88 | blob = im_list_to_blob(ims); 89 | end 90 | 91 | function [rois_blob] = get_rois_blob(conf, im_rois, im_scale_factors) 92 | [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, im_scale_factors); 93 | rois_blob = single([levels, feat_rois]); 94 | end 95 | 96 | function [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, scales) 97 | im_rois = single(im_rois); 98 | 99 | if length(scales) > 1 100 | widths = im_rois(:, 3) - im_rois(:, 1) + 1; 101 | heights = im_rois(:, 4) - im_rois(:, 2) + 1; 102 | 103 | areas = widths .* heights; 104 | scaled_areas = bsxfun(@times, areas(:), scales(:)'.^2); 105 | [~, levels] = min(abs(scaled_areas - 224.^2), [], 2); 106 | else 107 | levels = ones(size(im_rois, 1), 1); 108 | end 109 | 110 | feat_rois = round(bsxfun(@times, im_rois-1, scales(levels))) + 1; 111 | end 112 | 113 | function boxes = clip_boxes(boxes, im_width, im_height) 114 | % x1 >= 1 & <= im_width 115 | boxes(:, 1:4:end) = max(min(boxes(:, 1:4:end), im_width), 1); 116 | % y1 >= 1 & <= im_height 117 | boxes(:, 2:4:end) = max(min(boxes(:, 2:4:end), im_height), 1); 118 | % x2 >= 1 & <= im_width 119 | boxes(:, 3:4:end) = max(min(boxes(:, 3:4:end), im_width), 1); 120 | % y2 >= 1 & <= im_height 121 | boxes(:, 4:4:end) = max(min(boxes(:, 4:4:end), im_height), 1); 122 | end -------------------------------------------------------------------------------- /functions/nms/nms_gpu_mex.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Example of how to use the mxGPUArray API in a MEX file. This example shows 3 | * how to write a MEX function that takes a gpuArray input and returns a 4 | * gpuArray output, e.g. B=mexFunction(A). 5 | * 6 | * Copyright 2012 The MathWorks, Inc. 7 | */ 8 | 9 | #include "mex.h" 10 | #include 11 | #include 12 | 13 | #define DIVUP(m,n) ((m)/(n)+((m)%(n)>0)) 14 | int const threadsPerBlock = (sizeof(unsigned long long) * 8); 15 | 16 | /* 17 | * Device code 18 | */ 19 | __device__ inline float devIoU(float const * const a, float const * const b) 20 | { 21 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 22 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 23 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 24 | float interS = width * height; 25 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 26 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 27 | return interS / (Sa + Sb - interS); 28 | } 29 | 30 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thres, const float *dev_boxes, unsigned long long *dev_mask) 31 | { 32 | const int row_start = blockIdx.y, col_start = blockIdx.x; 33 | const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock), col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 34 | 35 | //if (row_start > col_start) return; 36 | 37 | __shared__ float block_boxes[threadsPerBlock * 5]; 38 | if (threadIdx.x < col_size) 39 | { 40 | block_boxes[threadIdx.x * 5 + 0] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 41 | block_boxes[threadIdx.x * 5 + 1] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 42 | block_boxes[threadIdx.x * 5 + 2] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 44 | block_boxes[threadIdx.x * 5 + 4] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 45 | } 46 | __syncthreads(); 47 | 48 | if (threadIdx.x < row_size) 49 | { 50 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 51 | const float *cur_box = dev_boxes + cur_box_idx * 5; 52 | int i = 0; 53 | unsigned long long t = 0; 54 | int start = 0; 55 | if (row_start == col_start) start = threadIdx.x + 1; 56 | for (i = start; i < col_size; i++) 57 | { 58 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thres) 59 | { 60 | t |= 1ULL << i; 61 | } 62 | } 63 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 64 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 65 | } 66 | } 67 | 68 | /* 69 | * Host code 70 | */ 71 | void mexFunction(int nlhs, mxArray *plhs[], 72 | int nrhs, const mxArray *prhs[]) 73 | { 74 | 75 | /* Declare all variables.*/ 76 | mxArray const *boxes, *ov_thres; 77 | float *boxes_host = NULL; 78 | float *boxes_dev = NULL; 79 | unsigned long long *mask_dev = NULL; 80 | 81 | /* Throw an error if the input is not a array. */ 82 | if (nrhs != 2) { 83 | mexErrMsgTxt("nms_gpu_mex::need 2 inputs"); 84 | } 85 | 86 | boxes = prhs[0]; 87 | if (mxGetClassID(boxes) != mxSINGLE_CLASS) { 88 | mexErrMsgTxt("nms_gpu_mex::input boxes must be single"); 89 | } 90 | 91 | ov_thres = prhs[1]; 92 | if (mxGetClassID(ov_thres) != mxDOUBLE_CLASS) { 93 | mexErrMsgTxt("nms_gpu_mex::input boxes must be double"); 94 | } 95 | 96 | float nms_overlap_thres = (float)mxGetScalar(ov_thres); 97 | 98 | int boxes_dim = mxGetM(boxes); 99 | int boxes_num = mxGetN(boxes); 100 | if (boxes_dim != 5) 101 | { 102 | mexErrMsgTxt("nms_gpu_mex::input boxes's row must be 5"); 103 | } 104 | 105 | boxes_host = (float *)(mxGetPr(boxes)); 106 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 107 | 108 | cudaMalloc(&boxes_dev, mxGetNumberOfElements(boxes) * sizeof(float)); 109 | cudaMemcpy(boxes_dev, boxes_host, mxGetNumberOfElements(boxes) * sizeof(float), cudaMemcpyHostToDevice); 110 | 111 | /* Create a GPUArray to hold the result and get its underlying pointer. */ 112 | cudaMalloc(&mask_dev, boxes_num * col_blocks * sizeof(unsigned long long)); 113 | 114 | 115 | /* 116 | * Call the kernel using the CUDA runtime API. We are using a 1-d grid here, 117 | * and it would be possible for the number of elements to be too large for 118 | * the grid. For this example we are not guarding against this possibility. 119 | */ 120 | 121 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), DIVUP(boxes_num, threadsPerBlock)); 122 | dim3 threads(threadsPerBlock); 123 | nms_kernel << > >(boxes_num, nms_overlap_thres, boxes_dev, mask_dev); 124 | 125 | std::vector mask_host(boxes_num * col_blocks); 126 | cudaMemcpy(&mask_host[0], mask_dev, sizeof(unsigned long long) * boxes_num * col_blocks, cudaMemcpyDeviceToHost); 127 | 128 | std::vector remv(col_blocks); 129 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 130 | 131 | std::vector keep; 132 | keep.reserve(boxes_num); 133 | for (int i = 0; i < boxes_num; i++) 134 | { 135 | int nblock = i / threadsPerBlock; 136 | int inblock = i % threadsPerBlock; 137 | 138 | if (!(remv[nblock] & (1ULL << inblock))) 139 | { 140 | keep.push_back(i + 1); // to matlab's index 141 | 142 | unsigned long long *p = &mask_host[0] + i * col_blocks; 143 | for (int j = nblock; j < col_blocks; j++) 144 | { 145 | remv[j] |= p[j]; 146 | } 147 | } 148 | } 149 | 150 | /* Wrap the result up as a MATLAB cpuArray for return. */ 151 | mwSize dims[4] = { (int)keep.size(), 1, 1, 1 }; 152 | plhs[0] = mxCreateNumericArray(4, dims, mxINT32_CLASS, mxREAL); 153 | 154 | int *output = (int *)(mxGetPr(plhs[0])); 155 | memcpy(output, &keep[0], (int)keep.size() * sizeof(int)); 156 | 157 | 158 | cudaFree(boxes_dev); 159 | cudaFree(mask_dev); 160 | } 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # *R-FCN*: Object Detection via Region-based Fully Convolutional Networks 2 | 3 | By Jifeng Dai, Yi Li, Kaiming He, Jian Sun 4 | 5 | **It is highly recommended to use the [deformable R-FCN](https://github.com/msracver/Deformable-ConvNets) implemented in MXNet, which significantly increases the accuracy at very low extra computational overhead.** 6 | 7 | *A [python version of R-FCN](https://github.com/Orpine/py-R-FCN) is available, which supports end-to-end training/inference of R-FCN for object detection.* 8 | 9 | 10 | ### Introduction 11 | 12 | **R-FCN** is a region-based object detection framework leveraging deep fully-convolutional networks, which is accurate and efficient. In contrast to previous region-based detectors such as Fast/Faster R-CNN that apply a costly per-region sub-network hundreds of times, our region-based detector is fully convolutional with almost all computation shared on the entire image. R-FCN can natually adopt powerful fully convolutional image classifier backbones, such as [ResNets](https://github.com/KaimingHe/deep-residual-networks), for object detection. 13 | 14 | R-FCN was initially described in a [NIPS 2016 paper](https://arxiv.org/abs/1605.06409). 15 | 16 | This code has been tested on Windows 7/8 64 bit, Windows Server 2012 R2, and Ubuntu 14.04, with Matlab 2014a. 17 | 18 | ### License 19 | 20 | R-FCN is released under the MIT License (refer to the LICENSE file for details). 21 | 22 | ### Citing R-FCN 23 | 24 | If you find R-FCN useful in your research, please consider citing: 25 | 26 | @article{dai16rfcn, 27 | Author = {Jifeng Dai, Yi Li, Kaiming He, Jian Sun}, 28 | Title = {{R-FCN}: Object Detection via Region-based Fully Convolutional Networks}, 29 | Journal = {arXiv preprint arXiv:1605.06409}, 30 | Year = {2016} 31 | } 32 | 33 | ### Main Results 34 | 35 | | | training data | test data | mAP | time/img (K40) | time/img (Titian X)| 36 | |-------------------|:-------------------:|:---------------------:|:-----:|:--------------:|:------------------:| 37 | |R-FCN, ResNet-50 | VOC 07+12 trainval | VOC 07 test | 77.4% | 0.12sec | 0.09sec | 38 | |R-FCN, ResNet-101 | VOC 07+12 trainval | VOC 07 test | 79.5% | 0.17sec | 0.12sec | 39 | 40 | 41 | ### Requirements: software 42 | 43 | 0. `Caffe` build for R-FCN (included in this repository, see `external/caffe`) 44 | - If you are using Windows, you may download a compiled mex file by running `fetch_data/fetch_caffe_mex_windows_vs2013_cuda75.m` 45 | - If you are using Linux or you want to compile for Windows, please recompile [our Caffe branch](https://github.com/daijifeng001/caffe-rfcn). 46 | 0. MATLAB 2014a or later 47 | 48 | 49 | ### Requirements: hardware 50 | 51 | GPU: Titan, Titan X, K40, K80. 52 | 53 | ### Demo 54 | 0. Run `fetch_data/fetch_caffe_mex_windows_vs2013_cuda75.m` to download a compiled Caffe mex (for Windows only). 55 | 0. Run `fetch_data/fetch_demo_model_ResNet101.m` to download a R-FCN model using ResNet-101 net (trained on VOC 07+12 trainval). 56 | 0. Run `rfcn_build.m`. 57 | 0. Run `startup.m`. 58 | 0. Run `experiments/script_rfcn_demo.m` to apply the R-FCN model on demo images. 59 | 60 | ### Preparation for Training & Testing 61 | 0. Run `fetch_data/fetch_caffe_mex_windows_vs2013_cuda75.m` to download a compiled Caffe mex (for Windows only). 62 | 0. Run `fetch_data/fetch_model_ResNet50.m` to download an ImageNet-pre-trained ResNet-50 net. 63 | 0. Run `fetch_data/fetch_model_ResNet101.m` to download an ImageNet-pre-trained ResNet-101 net. 64 | 0. Run `fetch_data/fetch_region_proposals.m` to download the pre-computed region proposals. 65 | 0. Download VOC 2007 and 2012 data to ./datasets. 66 | 0. Run `rfcn_build.m`. 67 | 0. Run `startup.m`. 68 | 69 | 70 | ### Training & Testing 71 | 0. Run `experiments/script_rfcn_VOC0712_ResNet50_OHEM_ss.m` to train a model using ResNet-50 net with online hard example mining (OHEM), leveraging selective search proposals. The accuracy should be ~75.4% in mAP. 72 | - **Note**: the training time is ~13 hours on Titian X. 73 | 0. Run `experiments/script_rfcn_VOC0712_ResNet50_OHEM_rpn.m` to train a model using ResNet-50 net with OHEM, leveraging RPN proposals (using ResNet-50 net). The accuracy should be ~77.4% in mAP. 74 | - **Note**: the training time is ~13 hours on Titian X. 75 | 0. Run `experiments/script_rfcn_VOC0712_ResNet101_OHEM_rpn.m` to train a model using ResNet-101 net with OHEM, leveraging RPN proposals (using ResNet-101 net). The accuracy should be ~79.5% in mAP. 76 | - **Note**: the training time is ~19 hours on Titian X. 77 | 0. Check other scripts in `./experiments` for more settings. 78 | 79 | **Note:** 80 | - In all the experiments, training is performed on VOC 07+12 trainval, and testing is performed on VOC 07 test. 81 | - Results are subject to some random variations. We have run 'experiments/script_rfcn_VOC0712_ResNet50_OHEM_rpn.m' for 5 times, the results are 77.1%, 77.3%, 77.7%, 77.9%, and 77.0%. The mean is 77.4%, and the std is 0.39%. 82 | - Running time is not recorded in the test log (which is slower), but instead in an optimized implementation. 83 | 84 | ### Resources 85 | 86 | 0. Experiment logs: [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc44qdRNJTsXLIU-2w), [BaiduYun](http://pan.baidu.com/s/1mhFYejI) 87 | 88 | If the automatic "fetch_data" fails, you may manually download resouces from: 89 | 90 | 0. Pre-complied caffe mex (Windows): 91 | - [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc456RlstMF-4wHr1g), [BaiduYun](http://pan.baidu.com/s/1i4OlG7z) 92 | 0. Demo R-FCN model: 93 | - [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc486Tyvkf3koU7R7w), [BaiduYun](http://pan.baidu.com/s/1o77gFXo) 94 | 0. ImageNet-pretrained networks: 95 | - ResNet-50 net [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc46RPYjtbdbNwPJ_w), [BaiduYun](http://pan.baidu.com/s/1kVm4ly3) 96 | - ResNet-101 net [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc47z4S7O5Ql6W_0-g), [BaiduYun](http://pan.baidu.com/s/1nvgu1pJ) 97 | 0. Pre-computed region proposals: 98 | - [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhc49StWpgPo2GPEB_A), [BaiduYun](http://pan.baidu.com/s/1hrAJ5re) 99 | 100 | 101 | -------------------------------------------------------------------------------- /functions/rfcn/rfcn_prepare_image_roidb.m: -------------------------------------------------------------------------------- 1 | function [image_roidb, bbox_means, bbox_stds] = rfcn_prepare_image_roidb(conf, imdbs, roidbs, bbox_means, bbox_stds) 2 | % [image_roidb] = rfcn_prepare_image_roidb(conf, imdbs, roidbs, cache_img) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | if ~exist('bbox_means', 'var') 11 | bbox_means = []; 12 | bbox_stds = []; 13 | end 14 | 15 | if ~iscell(imdbs) 16 | imdbs = {imdbs}; 17 | roidbs = {roidbs}; 18 | end 19 | 20 | imdbs = imdbs(:); 21 | roidbs = roidbs(:); 22 | 23 | image_roidb = cellfun(@(x, y) ... // @(imdbs, roidbs) 24 | arrayfun(@(z) ... //@([1:length(x.image_ids)]) 25 | struct('image_path', x.image_at(z), 'image_id', x.image_ids{z}, 'im_size', x.sizes(z, :), 'imdb_name', x.name, ... 26 | 'overlap', y.rois(z).overlap, 'boxes', y.rois(z).boxes, 'class', y.rois(z).class, 'image', [], 'bbox_targets', []), ... 27 | [1:length(x.image_ids)]', 'UniformOutput', true), imdbs, roidbs, 'UniformOutput', false); 28 | 29 | image_roidb = cat(1, image_roidb{:}); 30 | 31 | % enhance roidb to contain bounding-box regression targets 32 | [image_roidb, bbox_means, bbox_stds] = append_bbox_regression_targets(conf, image_roidb, bbox_means, bbox_stds); 33 | end 34 | 35 | function [image_roidb, means, stds] = append_bbox_regression_targets(conf, image_roidb, means, stds) 36 | % means and stds -- (k+1) * 4, include background class 37 | 38 | num_images = length(image_roidb); 39 | % Infer number of classes from the number of columns in gt_overlaps 40 | if conf.bbox_class_agnostic 41 | num_classes = 1; 42 | else 43 | num_classes = size(image_roidb(1).overlap, 2); 44 | end 45 | 46 | valid_imgs = true(num_images, 1); 47 | for i = 1:num_images 48 | rois = image_roidb(i).boxes; 49 | [image_roidb(i).bbox_targets, valid_imgs(i)] = ... 50 | compute_targets(conf, rois, image_roidb(i).overlap); 51 | end 52 | if ~all(valid_imgs) 53 | image_roidb = image_roidb(valid_imgs); 54 | num_images = length(image_roidb); 55 | fprintf('Warning: rfcn_prepare_image_roidb: filter out %d images, which contains zero valid samples\n', sum(~valid_imgs)); 56 | end 57 | 58 | if ~(exist('means', 'var') && ~isempty(means) && exist('stds', 'var') && ~isempty(stds)) 59 | % Compute values needed for means and stds 60 | % var(x) = E(x^2) - E(x)^2 61 | class_counts = zeros(num_classes, 1) + eps; 62 | sums = zeros(num_classes, 4); 63 | squared_sums = zeros(num_classes, 4); 64 | for i = 1:num_images 65 | targets = image_roidb(i).bbox_targets; 66 | for cls = 1:num_classes 67 | cls_inds = find(targets(:, 1) == cls); 68 | if ~isempty(cls_inds) 69 | class_counts(cls) = class_counts(cls) + length(cls_inds); 70 | sums(cls, :) = sums(cls, :) + sum(targets(cls_inds, 2:end), 1); 71 | squared_sums(cls, :) = squared_sums(cls, :) + sum(targets(cls_inds, 2:end).^2, 1); 72 | end 73 | end 74 | end 75 | 76 | means = bsxfun(@rdivide, sums, class_counts); 77 | stds = (bsxfun(@minus, bsxfun(@rdivide, squared_sums, class_counts), means.^2)).^0.5; 78 | 79 | % add background class 80 | means = [0, 0, 0, 0; means]; 81 | stds = [0, 0, 0, 0; stds]; 82 | end 83 | 84 | % Normalize targets 85 | for i = 1:num_images 86 | targets = image_roidb(i).bbox_targets; 87 | for cls = 1:num_classes 88 | cls_inds = find(targets(:, 1) == cls); 89 | if ~isempty(cls_inds) 90 | image_roidb(i).bbox_targets(cls_inds, 2:end) = ... 91 | bsxfun(@minus, image_roidb(i).bbox_targets(cls_inds, 2:end), means(cls+1, :)); 92 | image_roidb(i).bbox_targets(cls_inds, 2:end) = ... 93 | bsxfun(@rdivide, image_roidb(i).bbox_targets(cls_inds, 2:end), stds(cls+1, :)); 94 | end 95 | end 96 | end 97 | end 98 | 99 | 100 | function [bbox_targets, is_valid] = compute_targets(conf, rois, overlap) 101 | 102 | overlap = full(overlap); 103 | 104 | [max_overlaps, max_labels] = max(overlap, [], 2); 105 | 106 | % ensure ROIs are floats 107 | rois = single(rois); 108 | 109 | bbox_targets = zeros(size(rois, 1), 5, 'single'); 110 | 111 | % Indices of ground-truth ROIs 112 | gt_inds = find(max_overlaps == 1); 113 | 114 | if ~isempty(gt_inds) 115 | % Indices of examples for which we try to make predictions 116 | ex_inds = find(max_overlaps >= conf.bbox_thresh); 117 | 118 | % Get IoU overlap between each ex ROI and gt ROI 119 | ex_gt_overlaps = boxoverlap(rois(ex_inds, :), rois(gt_inds, :)); 120 | 121 | assert(all(abs(max(ex_gt_overlaps, [], 2) - max_overlaps(ex_inds)) < 10^-4)); 122 | 123 | % Find which gt ROI each ex ROI has max overlap with: 124 | % this will be the ex ROI's gt target 125 | [~, gt_assignment] = max(ex_gt_overlaps, [], 2); 126 | gt_rois = rois(gt_inds(gt_assignment), :); 127 | ex_rois = rois(ex_inds, :); 128 | 129 | [regression_label] = rfcn_bbox_transform(ex_rois, gt_rois); 130 | 131 | if conf.bbox_class_agnostic 132 | bbox_targets(ex_inds, :) = [max_labels(ex_inds)>0, regression_label]; 133 | else 134 | bbox_targets(ex_inds, :) = [max_labels(ex_inds), regression_label]; 135 | end 136 | end 137 | 138 | % Select foreground ROIs as those with >= fg_thresh overlap 139 | is_fg = max_overlaps >= conf.fg_thresh; 140 | % Select background ROIs as those within [bg_thresh_lo, bg_thresh_hi) 141 | is_bg = max_overlaps < conf.bg_thresh_hi & max_overlaps >= conf.bg_thresh_lo; 142 | 143 | % check if there is any fg or bg sample. If no, filter out this image 144 | is_valid = true; 145 | if ~any(is_fg | is_bg) 146 | is_valid = false; 147 | end 148 | end 149 | -------------------------------------------------------------------------------- /functions/rfcn/rfcn_get_minibatch.m: -------------------------------------------------------------------------------- 1 | function net_inputs = rfcn_get_minibatch(conf, image_roidb) 2 | % net_inputs = rfcn_get_minibatch(conf, image_roidb) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | num_images = length(image_roidb); 11 | if conf.bbox_class_agnostic 12 | num_classes = 1; 13 | else 14 | % Infer number of classes from the number of columns in gt_overlaps 15 | num_classes = size(image_roidb(1).overlap, 2); 16 | end 17 | % Sample random scales to use for each image in this batch 18 | random_scale_inds = randi(length(conf.scales), num_images, 1); 19 | 20 | if conf.batch_size > 0 21 | assert(mod(conf.batch_size, num_images) == 0, ... 22 | sprintf('num_images %d must divide BATCH_SIZE %d', num_images, conf.batch_size)); 23 | 24 | rois_per_image = conf.batch_size / num_images; 25 | fg_rois_per_image = round(rois_per_image * conf.fg_fraction); 26 | else 27 | rois_per_image = inf; 28 | fg_rois_per_image = inf; 29 | end 30 | 31 | % Get the input image blob 32 | [im_blob, im_scales] = get_image_blob(conf, image_roidb, random_scale_inds); 33 | 34 | % build the region of interest and label blobs 35 | rois_blob = zeros(0, 5, 'single'); 36 | labels_blob = zeros(0, 1, 'single'); 37 | bbox_targets_blob = zeros(0, 4 * (num_classes+1), 'single'); 38 | bbox_loss_blob = zeros(size(bbox_targets_blob), 'single'); 39 | 40 | for i = 1:num_images 41 | [labels, ~, im_rois, bbox_targets, bbox_loss] = ... 42 | sample_rois(conf, image_roidb(i), fg_rois_per_image, rois_per_image); 43 | 44 | % Add to ROIs blob 45 | feat_rois = rfcn_map_im_rois_to_feat_rois(conf, im_rois, im_scales(i)); 46 | batch_ind = i * ones(size(feat_rois, 1), 1); 47 | rois_blob_this_image = [batch_ind, feat_rois]; 48 | rois_blob = [rois_blob; rois_blob_this_image]; 49 | 50 | % Add to labels, bbox targets, and bbox loss blobs 51 | labels_blob = [labels_blob; labels]; 52 | bbox_targets_blob = [bbox_targets_blob; bbox_targets]; 53 | bbox_loss_blob = [bbox_loss_blob; bbox_loss]; 54 | end 55 | 56 | % permute data into caffe c++ memory, thus [num, channels, height, width] 57 | im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg 58 | im_blob = single(permute(im_blob, [2, 1, 3, 4])); 59 | rois_blob = rois_blob - 1; % to c's index (start from 0) 60 | rois_blob = single(permute(rois_blob, [3, 4, 2, 1])); 61 | labels_blob = single(permute(labels_blob, [3, 4, 2, 1])); 62 | bbox_targets_blob = single(permute(bbox_targets_blob, [3, 4, 2, 1])); 63 | bbox_loss_blob = single(permute(bbox_loss_blob, [3, 4, 2, 1])); 64 | 65 | assert(~isempty(im_blob)); 66 | assert(~isempty(rois_blob)); 67 | assert(~isempty(labels_blob)); 68 | assert(~isempty(bbox_targets_blob)); 69 | assert(~isempty(bbox_loss_blob)); 70 | 71 | net_inputs = {im_blob, rois_blob, labels_blob, bbox_targets_blob, bbox_loss_blob}; 72 | end 73 | 74 | %% Build an input blob from the images in the roidb at the specified scales. 75 | function [im_blob, im_scales] = get_image_blob(conf, images, random_scale_inds) 76 | 77 | num_images = length(images); 78 | processed_ims = cell(num_images, 1); 79 | im_scales = nan(num_images, 1); 80 | for i = 1:num_images 81 | im = imread(images(i).image_path); 82 | target_size = conf.scales(random_scale_inds(i)); 83 | 84 | [im, im_scale] = prep_im_for_blob(im, conf.image_means, target_size, conf.max_size); 85 | 86 | im_scales(i) = im_scale; 87 | processed_ims{i} = im; 88 | end 89 | 90 | im_blob = im_list_to_blob(processed_ims); 91 | end 92 | 93 | %% Generate a random sample of ROIs comprising foreground and background examples. 94 | function [labels, overlaps, rois, bbox_targets, bbox_loss_weights] = sample_rois(conf, image_roidb, fg_rois_per_image, rois_per_image) 95 | 96 | [overlaps, labels] = max(image_roidb(1).overlap, [], 2); 97 | % labels = image_roidb(1).max_classes; 98 | % overlaps = image_roidb(1).max_overlaps; 99 | rois = image_roidb(1).boxes; 100 | 101 | % Select foreground ROIs as those with >= FG_THRESH overlap 102 | fg_inds = find(overlaps >= conf.fg_thresh); 103 | % Guard against the case when an image has fewer than fg_rois_per_image 104 | % foreground ROIs 105 | fg_rois_per_this_image = min(fg_rois_per_image, length(fg_inds)); 106 | % Sample foreground regions without replacement 107 | if ~isempty(fg_inds) 108 | fg_inds = fg_inds(randperm(length(fg_inds), fg_rois_per_this_image)); 109 | end 110 | 111 | % Select background ROIs as those within [BG_THRESH_LO, BG_THRESH_HI) 112 | bg_inds = find(overlaps < conf.bg_thresh_hi & overlaps >= conf.bg_thresh_lo); 113 | % Compute number of background ROIs to take from this image (guarding 114 | % against there being fewer than desired) 115 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image; 116 | bg_rois_per_this_image = min(bg_rois_per_this_image, length(bg_inds)); 117 | % Sample foreground regions without replacement 118 | if ~isempty(bg_inds) 119 | bg_inds = bg_inds(randperm(length(bg_inds), bg_rois_per_this_image)); 120 | end 121 | % The indices that we're selecting (both fg and bg) 122 | keep_inds = [fg_inds; bg_inds]; 123 | % Select sampled values from various arrays 124 | labels = labels(keep_inds); 125 | % Clamp labels for the background ROIs to 0 126 | labels((fg_rois_per_this_image+1):end) = 0; 127 | overlaps = overlaps(keep_inds); 128 | rois = rois(keep_inds, :); 129 | 130 | if conf.bbox_class_agnostic 131 | assert(all((labels>0) == image_roidb.bbox_targets(keep_inds, 1))); 132 | else 133 | assert(all(labels == image_roidb.bbox_targets(keep_inds, 1))); 134 | end 135 | 136 | % Infer number of classes from the number of columns in gt_overlaps 137 | num_classes = size(image_roidb(1).overlap, 2); 138 | 139 | [bbox_targets, bbox_loss_weights] = get_bbox_regression_labels(conf, ... 140 | image_roidb.bbox_targets(keep_inds, :), num_classes); 141 | 142 | end 143 | 144 | function [bbox_targets, bbox_loss_weights] = get_bbox_regression_labels(conf, bbox_target_data, num_classes) 145 | %% Bounding-box regression targets are stored in a compact form in the roidb. 146 | % This function expands those targets into the 4-of-4*(num_classes+1) representation used 147 | % by the network (i.e. only one class has non-zero targets). 148 | % The loss weights are similarly expanded. 149 | % Return (N, (num_classes+1) * 4, 1, 1) blob of regression targets 150 | % Return (N, (num_classes+1 * 4, 1, 1) blob of loss weights 151 | if conf.bbox_class_agnostic 152 | num_classes = 1; 153 | end 154 | 155 | clss = bbox_target_data(:, 1); 156 | bbox_targets = zeros(length(clss), 4 * (num_classes+1), 'single'); 157 | bbox_loss_weights = zeros(size(bbox_targets), 'single'); 158 | inds = find(clss > 0); 159 | for i = 1:length(inds) 160 | ind = inds(i); 161 | cls = clss(ind); 162 | targets_inds = (1+cls*4):((cls+1)*4); 163 | bbox_targets(ind, targets_inds) = bbox_target_data(ind, 2:end); 164 | bbox_loss_weights(ind, targets_inds) = 1; 165 | end 166 | end 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /functions/rfcn/rfcn_test.m: -------------------------------------------------------------------------------- 1 | function mAP = rfcn_test(conf, imdb, roidb, varargin) 2 | % mAP = rfcn_test(conf, imdb, roidb, varargin) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | %% inputs 11 | ip = inputParser; 12 | ip.addRequired('conf', @isstruct); 13 | ip.addRequired('imdb', @isstruct); 14 | ip.addRequired('roidb', @isstruct); 15 | ip.addParamValue('net_def_file', '', @isstr); 16 | ip.addParamValue('net_file', '', @isstr); 17 | ip.addParamValue('cache_name', '', @isstr); 18 | ip.addParamValue('suffix', '', @isstr); 19 | ip.addParamValue('ignore_cache', false, @islogical); 20 | 21 | ip.parse(conf, imdb, roidb, varargin{:}); 22 | opts = ip.Results; 23 | 24 | 25 | %% set cache dir 26 | cache_dir = fullfile(pwd, 'output', 'rfcn_cachedir', opts.cache_name, imdb.name); 27 | mkdir_if_missing(cache_dir); 28 | 29 | %% init log 30 | timestamp = datestr(datevec(now()), 'yyyymmdd_HHMMSS'); 31 | mkdir_if_missing(fullfile(cache_dir, 'log')); 32 | log_file = fullfile(cache_dir, 'log', ['test_', timestamp, '.txt']); 33 | diary(log_file); 34 | 35 | num_images = length(imdb.image_ids); 36 | num_classes = imdb.num_classes; 37 | 38 | try 39 | aboxes = cell(num_classes, 1); 40 | if opts.ignore_cache 41 | throw(''); 42 | end 43 | for i = 1:num_classes 44 | load(fullfile(cache_dir, [imdb.classes{i} '_boxes_' imdb.name opts.suffix])); 45 | aboxes{i} = boxes; 46 | end 47 | catch 48 | %% testing 49 | % init caffe net 50 | caffe_log_file_base = fullfile(cache_dir, 'caffe_log'); 51 | caffe.init_log(caffe_log_file_base); 52 | caffe_net = caffe.Net(opts.net_def_file, 'test'); 53 | caffe_net.copy_from(opts.net_file); 54 | 55 | % set random seed 56 | prev_rng = seed_rand(conf.rng_seed); 57 | caffe.set_random_seed(conf.rng_seed); 58 | 59 | % set gpu/cpu 60 | if conf.use_gpu 61 | caffe.set_mode_gpu(); 62 | else 63 | caffe.set_mode_cpu(); 64 | end 65 | 66 | % determine the maximum number of rois in testing 67 | max_rois_num_in_gpu = 10000; 68 | 69 | disp('opts:'); 70 | disp(opts); 71 | disp('conf:'); 72 | disp(conf); 73 | 74 | %heuristic: keep an average of 160 detections per class per images prior to NMS 75 | max_per_set = 160 * num_images; 76 | % heuristic: keep at most 400 detection per class per image prior to NMS 77 | max_per_image = 400; 78 | % detection thresold for each class (this is adaptively set based on the max_per_set constraint) 79 | thresh = -inf * ones(num_classes, 1); 80 | % top_scores will hold one minheap of scores per class (used to enforce the max_per_set constraint) 81 | top_scores = cell(num_classes, 1); 82 | % all detections are collected into: 83 | % all_boxes[cls][image] = N x 5 array of detections in 84 | % (x1, y1, x2, y2, score) 85 | aboxes = cell(num_classes, 1); 86 | box_inds = cell(num_classes, 1); 87 | for i = 1:num_classes 88 | aboxes{i} = cell(length(imdb.image_ids), 1); 89 | box_inds{i} = cell(length(imdb.image_ids), 1); 90 | end 91 | 92 | count = 0; 93 | t_start = tic; 94 | for i = 1:num_images 95 | count = count + 1; 96 | fprintf('%s: test (%s) %d/%d ', procid(), imdb.name, count, num_images); 97 | th = tic; 98 | d = roidb.rois(i); 99 | im = imread(imdb.image_at(i)); 100 | 101 | [boxes, scores] = rfcn_im_detect(conf, caffe_net, im, d.boxes(~d.gt, :), max_rois_num_in_gpu); 102 | 103 | for j = 1:num_classes 104 | inds = find(scores(:, j) > thresh(j)); 105 | if ~isempty(inds) 106 | [~, ord] = sort(scores(inds, j), 'descend'); 107 | ord = ord(1:min(length(ord), max_per_image)); 108 | inds = inds(ord); 109 | cls_boxes = boxes(inds, (1+(j-1)*4):((j)*4)); 110 | cls_scores = scores(inds, j); 111 | aboxes{j}{i} = [aboxes{j}{i}; cat(2, single(cls_boxes), single(cls_scores))]; 112 | box_inds{j}{i} = [box_inds{j}{i}; inds]; 113 | else 114 | aboxes{j}{i} = [aboxes{j}{i}; zeros(0, 5, 'single')]; 115 | box_inds{j}{i} = box_inds{j}{i}; 116 | end 117 | end 118 | 119 | fprintf(' time %.3fs\n', toc(th)); 120 | 121 | if mod(count, 1000) == 0 122 | for j = 1:num_classes 123 | [aboxes{j}, box_inds{j}, thresh(j)] = ... 124 | keep_top_k(aboxes{j}, box_inds{j}, i, max_per_set, thresh(j)); 125 | end 126 | disp(thresh); 127 | end 128 | end 129 | 130 | for j = 1:num_classes 131 | [aboxes{j}, box_inds{j}, thresh(j)] = ... 132 | keep_top_k(aboxes{j}, box_inds{j}, i, max_per_set, thresh(j)); 133 | end 134 | disp(thresh); 135 | 136 | for i = 1:num_classes 137 | top_scores{i} = sort(top_scores{i}, 'descend'); 138 | if (length(top_scores{i}) > max_per_set) 139 | thresh(i) = top_scores{i}(max_per_set); 140 | end 141 | 142 | % go back through and prune out detections below the found threshold 143 | for j = 1:length(imdb.image_ids) 144 | if ~isempty(aboxes{i}{j}) 145 | I = find(aboxes{i}{j}(:,end) < thresh(i)); 146 | aboxes{i}{j}(I,:) = []; 147 | box_inds{i}{j}(I,:) = []; 148 | end 149 | end 150 | 151 | save_file = fullfile(cache_dir, [imdb.classes{i} '_boxes_' imdb.name opts.suffix]); 152 | boxes = aboxes{i}; 153 | inds = box_inds{i}; 154 | save(save_file, 'boxes', 'inds'); 155 | clear boxes inds; 156 | end 157 | fprintf('test all images in %f seconds.\n', toc(t_start)); 158 | 159 | caffe.reset_all(); 160 | rng(prev_rng); 161 | end 162 | 163 | % ------------------------------------------------------------------------ 164 | % Peform AP evaluation 165 | % ------------------------------------------------------------------------ 166 | 167 | if isequal(imdb.eval_func, @imdb_eval_voc) 168 | new_parpool(); 169 | parfor model_ind = 1:num_classes 170 | cls = imdb.classes{model_ind}; 171 | res(model_ind) = imdb.eval_func(cls, aboxes{model_ind}, imdb, opts.cache_name, opts.suffix); 172 | end 173 | else 174 | % ilsvrc 175 | res = imdb.eval_func(aboxes, imdb, opts.cache_name, opts.suffix); 176 | end 177 | 178 | if ~isempty(res) 179 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 180 | fprintf('Results:\n'); 181 | aps = [res(:).ap]' * 100; 182 | disp(aps); 183 | disp(mean(aps)); 184 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 185 | mAP = mean(aps); 186 | else 187 | mAP = nan; 188 | end 189 | 190 | diary off; 191 | end 192 | 193 | 194 | % ------------------------------------------------------------------------ 195 | function [boxes, box_inds, thresh] = keep_top_k(boxes, box_inds, end_at, top_k, thresh) 196 | % ------------------------------------------------------------------------ 197 | % Keep top K 198 | X = cat(1, boxes{1:end_at}); 199 | if isempty(X) 200 | return; 201 | end 202 | scores = sort(X(:,end), 'descend'); 203 | thresh = scores(min(length(scores), top_k)); 204 | for image_index = 1:end_at 205 | if ~isempty(boxes{image_index}) 206 | bbox = boxes{image_index}; 207 | keep = find(bbox(:,end) >= thresh); 208 | boxes{image_index} = bbox(keep,:); 209 | box_inds{image_index} = box_inds{image_index}(keep); 210 | end 211 | end 212 | end -------------------------------------------------------------------------------- /imdb/roidb_from_voc.m: -------------------------------------------------------------------------------- 1 | function roidb = roidb_from_voc(imdb, varargin) 2 | % roidb = roidb_from_voc(imdb, rootDir) 3 | % Builds an regions of interest database from imdb image 4 | % database. Uses precomputed selective search boxes available 5 | % in the R-CNN data package. 6 | % 7 | % Inspired by Andrea Vedaldi's MKL imdb and roidb code. 8 | 9 | % AUTORIGHTS 10 | % --------------------------------------------------------- 11 | % Copyright (c) 2014, Ross Girshick 12 | % 13 | % This file is part of the R-CNN code and is available 14 | % under the terms of the Simplified BSD License provided in 15 | % LICENSE. Please retain this notice and LICENSE if you use 16 | % this file (or any portion of it) in your project. 17 | % --------------------------------------------------------- 18 | 19 | ip = inputParser; 20 | ip.addRequired('imdb', @isstruct); 21 | ip.addParamValue('exclude_difficult_samples', true, @islogical); 22 | ip.addParamValue('with_selective_search', false, @islogical); 23 | ip.addParamValue('with_edge_box', false, @islogical); 24 | ip.addParamValue('with_self_proposal', false, @islogical); 25 | ip.addParamValue('rootDir', '.', @ischar); 26 | ip.addParamValue('extension', '', @ischar); 27 | ip.parse(imdb, varargin{:}); 28 | opts = ip.Results; 29 | 30 | roidb.name = imdb.name; 31 | if ~isempty(opts.extension) 32 | opts.extension = ['_', opts.extension]; 33 | end 34 | regions_file_ss = fullfile(opts.rootDir, sprintf('/data/selective_search_data/%s%s.mat', roidb.name, opts.extension)); 35 | regions_file_eb = fullfile(opts.rootDir, sprintf('/data/edge_box_data/%s%s.mat', roidb.name, opts.extension)); 36 | regions_file_sp = fullfile(opts.rootDir, sprintf('/data/self_proposal_data/%s%s.mat', roidb.name, opts.extension)); 37 | 38 | cache_file_ss = []; 39 | cache_file_eb = []; 40 | cache_file_sp = []; 41 | if opts.with_selective_search 42 | cache_file_ss = 'ss_'; 43 | if~exist(regions_file_ss, 'file') 44 | error('roidb_from_ilsvrc:: cannot find %s', regions_file_ss); 45 | end 46 | end 47 | 48 | if opts.with_edge_box 49 | cache_file_eb = 'eb_'; 50 | if ~exist(regions_file_eb, 'file') 51 | error('roidb_from_ilsvrc:: cannot find %s', regions_file_eb); 52 | end 53 | end 54 | 55 | if opts.with_self_proposal 56 | cache_file_sp = 'sp_'; 57 | if ~exist(regions_file_sp, 'file') 58 | error('roidb_from_ilsvrc:: cannot find %s', regions_file_sp); 59 | end 60 | end 61 | 62 | cache_file = fullfile(opts.rootDir, ['/imdb/cache/roidb_' cache_file_ss cache_file_eb cache_file_sp imdb.name opts.extension]); 63 | if imdb.flip 64 | cache_file = [cache_file '_flip']; 65 | end 66 | if opts.exclude_difficult_samples 67 | cache_file = [cache_file '_easy']; 68 | end 69 | cache_file = [cache_file, '.mat']; 70 | try 71 | load(cache_file); 72 | catch 73 | VOCopts = imdb.details.VOCopts; 74 | 75 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 76 | 77 | roidb.name = imdb.name; 78 | 79 | fprintf('Loading region proposals...'); 80 | regions = []; 81 | if opts.with_selective_search 82 | regions = load_proposals(regions_file_ss, regions); 83 | end 84 | if opts.with_edge_box 85 | regions = load_proposals(regions_file_eb, regions); 86 | end 87 | if opts.with_self_proposal 88 | regions = load_proposals(regions_file_sp, regions); 89 | end 90 | fprintf('done\n'); 91 | if isempty(regions) 92 | fprintf('Warrning: no windows proposal is loaded !\n'); 93 | regions.boxes = cell(length(imdb.image_ids), 1); 94 | if imdb.flip 95 | regions.images = imdb.image_ids(1:2:end); 96 | else 97 | regions.images = imdb.image_ids; 98 | end 99 | end 100 | 101 | if ~imdb.flip 102 | for i = 1:length(imdb.image_ids) 103 | tic_toc_print('roidb (%s): %d/%d\n', roidb.name, i, length(imdb.image_ids)); 104 | try 105 | voc_rec = PASreadrecord(sprintf(VOCopts.annopath, imdb.image_ids{i})); 106 | catch 107 | voc_rec = []; 108 | end 109 | 110 | [~, image_name1] = fileparts(imdb.image_ids{i}); 111 | [~, image_name2] = fileparts(regions.images{i}); 112 | assert(strcmp(image_name1, image_name2)); 113 | 114 | roidb.rois(i) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, false, false); 115 | end 116 | else 117 | for i = 1:length(imdb.image_ids)/2 118 | tic_toc_print('roidb (%s): %d/%d\n', roidb.name, i, length(imdb.image_ids)/2); 119 | try 120 | voc_rec = PASreadrecord(sprintf(VOCopts.annopath, imdb.image_ids{i*2-1})); 121 | catch 122 | voc_rec = []; 123 | end 124 | 125 | if length(regions.images) == length(imdb.image_ids) / 2 126 | [~, image_name1] = fileparts(imdb.image_ids{i*2-1}); 127 | [~, image_name2] = fileparts(regions.images{i}); 128 | assert(strcmp(image_name1, image_name2)); 129 | assert(imdb.flip_from(i*2) == i*2-1); 130 | 131 | roidb.rois(i*2-1) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, false, false); 132 | roidb.rois(i*2) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, true, true); 133 | elseif length(regions.images) == length(imdb.image_ids) 134 | [~, image_name1] = fileparts(imdb.image_ids{i*2-1}); 135 | [~, image_name2] = fileparts(regions.images{i*2-1}); 136 | assert(strcmp(image_name1, image_name2)); 137 | 138 | [~, image_name1] = fileparts(imdb.image_ids{i*2}); 139 | [~, image_name2] = fileparts(regions.images{i*2}); 140 | assert(strcmp(image_name1, image_name2)); 141 | 142 | assert(imdb.flip_from(i*2) == i*2-1); 143 | 144 | roidb.rois(i*2-1) = attach_proposals(voc_rec, regions.boxes{i*2-1}, imdb.class_to_id, opts.exclude_difficult_samples, false, false); 145 | roidb.rois(i*2) = attach_proposals(voc_rec, regions.boxes{i*2}, imdb.class_to_id, opts.exclude_difficult_samples, false, true); 146 | else 147 | error('The number of images in region proposals and imdb do not match.'); 148 | end 149 | end 150 | end 151 | 152 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 153 | 154 | fprintf('Saving roidb to cache...'); 155 | save(cache_file, 'roidb', '-v7.3'); 156 | fprintf('done\n'); 157 | end 158 | 159 | 160 | % ------------------------------------------------------------------------ 161 | function rec = attach_proposals(voc_rec, boxes, class_to_id, exclude_difficult_samples, flip_proposal, flip_gt) 162 | % ------------------------------------------------------------------------ 163 | 164 | % change selective search order from [y1 x1 y2 x2] to [x1 y1 x2 y2] 165 | if ~isempty(boxes) 166 | boxes = boxes(:, [2 1 4 3]); 167 | if flip_proposal 168 | boxes(:, [1, 3]) = voc_rec.imgsize(1) + 1 - boxes(:, [3, 1]); 169 | end 170 | end 171 | 172 | % gt: [2108x1 double] 173 | % overlap: [2108x20 single] 174 | % dataset: 'voc_2007_trainval' 175 | % boxes: [2108x4 single] 176 | % feat: [2108x9216 single] 177 | % class: [2108x1 uint8] 178 | if isfield(voc_rec, 'objects') 179 | if exclude_difficult_samples 180 | valid_objects = ~cat(1, voc_rec.objects(:).difficult); 181 | else 182 | valid_objects = 1:length(voc_rec.objects(:)); 183 | end 184 | gt_boxes = cat(1, voc_rec.objects(valid_objects).bbox); 185 | if flip_gt 186 | gt_boxes(:, [1, 3]) = voc_rec.imgsize(1) + 1 - gt_boxes(:, [3, 1]); 187 | end 188 | all_boxes = cat(1, gt_boxes, boxes); 189 | gt_classes = class_to_id.values({voc_rec.objects(valid_objects).class}); 190 | gt_classes = cat(1, gt_classes{:}); 191 | num_gt_boxes = size(gt_boxes, 1); 192 | else 193 | gt_boxes = []; 194 | all_boxes = boxes; 195 | gt_classes = []; 196 | num_gt_boxes = 0; 197 | end 198 | num_boxes = size(boxes, 1); 199 | 200 | rec.gt = cat(1, true(num_gt_boxes, 1), false(num_boxes, 1)); 201 | rec.overlap = zeros(num_gt_boxes+num_boxes, class_to_id.Count, 'single'); 202 | for i = 1:num_gt_boxes 203 | rec.overlap(:, gt_classes(i)) = ... 204 | max(rec.overlap(:, gt_classes(i)), boxoverlap(all_boxes, gt_boxes(i, :))); 205 | end 206 | rec.boxes = single(all_boxes); 207 | rec.feat = []; 208 | rec.class = uint8(cat(1, gt_classes, zeros(num_boxes, 1))); 209 | 210 | % ------------------------------------------------------------------------ 211 | function regions = load_proposals(proposal_file, regions) 212 | % ------------------------------------------------------------------------ 213 | if isempty(regions) 214 | regions = load(proposal_file); 215 | else 216 | regions_more = load(proposal_file); 217 | if ~all(cellfun(@(x, y) strcmp(x, y), regions.images(:), regions_more.images(:), 'UniformOutput', true)) 218 | error('roidb_from_ilsvrc: %s is has different images list with other proposals.\n', proposal_file); 219 | end 220 | regions.boxes = cellfun(@(x, y) [double(x); double(y)], regions.boxes(:), regions_more.boxes(:), 'UniformOutput', false); 221 | end 222 | -------------------------------------------------------------------------------- /functions/rfcn/rfcn_train.m: -------------------------------------------------------------------------------- 1 | function save_model_path = rfcn_train(conf, imdb_train, roidb_train, varargin) 2 | % save_model_path = rfcn_train(conf, imdb_train, roidb_train, varargin) 3 | % -------------------------------------------------------- 4 | % R-FCN implementation 5 | % Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | % Copyright (c) 2016, Jifeng Dai 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | %% inputs 11 | ip = inputParser; 12 | ip.addRequired('conf', @isstruct); 13 | ip.addRequired('imdb_train', @iscell); 14 | ip.addRequired('roidb_train', @iscell); 15 | ip.addParamValue('do_val', false, @isscalar); 16 | ip.addParamValue('imdb_val', struct(), @isstruct); 17 | ip.addParamValue('roidb_val', struct(), @isstruct); 18 | ip.addParamValue('val_iters', 500, @isscalar); 19 | ip.addParamValue('val_interval', 5000, @isscalar); 20 | ip.addParamValue('snapshot_interval',... 21 | 10000, @isscalar); 22 | ip.addParamValue('solver_def_file', fullfile(pwd, 'models', 'rfcn_prototxts', 'ResNet-50L_res3a', 'solver_80k120k_lr1_3.prototxt'), ... 23 | @isstr); 24 | ip.addParamValue('net_file', fullfile(pwd, 'models', 'pre_trained_models', 'ResNet-50L', 'ResNet-50-model.caffemodel'), ... 25 | @isstr); 26 | ip.addParamValue('cache_name', 'ResNet-50L_res3a', ... 27 | @isstr); 28 | ip.addParamValue('caffe_version', 'Unkonwn', @isstr); 29 | 30 | ip.parse(conf, imdb_train, roidb_train, varargin{:}); 31 | opts = ip.Results; 32 | 33 | %% try to find trained model 34 | imdbs_name = cell2mat(cellfun(@(x) x.name, imdb_train, 'UniformOutput', false)); 35 | cache_dir = fullfile(pwd, 'output', 'rfcn_cachedir', opts.cache_name, imdbs_name); 36 | save_model_path = fullfile(cache_dir, 'final'); 37 | if exist(save_model_path, 'file') 38 | return; 39 | end 40 | 41 | %% init 42 | % set random seed 43 | prev_rng = seed_rand(conf.rng_seed); 44 | caffe.set_random_seed(conf.rng_seed); 45 | 46 | % init caffe solver 47 | mkdir_if_missing(cache_dir); 48 | caffe_log_file_base = fullfile(cache_dir, 'caffe_log'); 49 | caffe.init_log(caffe_log_file_base); 50 | caffe_solver = caffe.Solver(opts.solver_def_file); 51 | caffe_solver.net.copy_from(opts.net_file); 52 | 53 | % init log 54 | timestamp = datestr(datevec(now()), 'yyyymmdd_HHMMSS'); 55 | mkdir_if_missing(fullfile(cache_dir, 'log')); 56 | log_file = fullfile(cache_dir, 'log', ['train_', timestamp, '.txt']); 57 | diary(log_file); 58 | 59 | % set gpu/cpu 60 | if conf.use_gpu 61 | caffe.set_mode_gpu(); 62 | else 63 | caffe.set_mode_cpu(); 64 | end 65 | 66 | 67 | disp('conf:'); 68 | disp(conf); 69 | disp('opts:'); 70 | disp(opts); 71 | 72 | %% making tran/val data 73 | fprintf('Preparing training data...'); 74 | [image_roidb_train, bbox_means, bbox_stds] = rfcn_prepare_image_roidb(conf, opts.imdb_train, opts.roidb_train); 75 | fprintf('Done.\n'); 76 | 77 | if opts.do_val 78 | fprintf('Preparing validation data...'); 79 | [image_roidb_val] = rfcn_prepare_image_roidb(conf, opts.imdb_val, opts.roidb_val, bbox_means, bbox_stds); 80 | fprintf('Done.\n'); 81 | 82 | % fix validation data 83 | shuffled_inds_val = generate_random_minibatch([], image_roidb_val, conf.ims_per_batch); 84 | shuffled_inds_val = shuffled_inds_val(randperm(length(shuffled_inds_val), opts.val_iters)); 85 | end 86 | 87 | %% training 88 | shuffled_inds = []; 89 | train_results = []; 90 | val_results = []; 91 | iter_ = caffe_solver.iter(); 92 | max_iter = caffe_solver.max_iter(); 93 | 94 | p = new_parpool(1); 95 | parfor i=1:1 96 | seed_rand(conf.rng_seed); 97 | end 98 | [shuffled_inds, sub_db_inds] = generate_random_minibatch(shuffled_inds, image_roidb_train, conf.ims_per_batch); 99 | parHandle = parfeval(p, @rfcn_get_minibatch, 1, conf, image_roidb_train(sub_db_inds)); 100 | tic 101 | while (iter_ < max_iter) 102 | caffe_solver.net.set_phase('train'); 103 | 104 | % generate minibatch training data 105 | % gather date 106 | [~, net_inputs] = fetchNext(parHandle); 107 | 108 | % generate minibatch training data 109 | % generate data asynchronously 110 | [shuffled_inds, sub_db_inds] = generate_random_minibatch(shuffled_inds, image_roidb_train, conf.ims_per_batch); 111 | parHandle = parfeval(p, @rfcn_get_minibatch, 1, conf, image_roidb_train(sub_db_inds)); 112 | 113 | caffe_solver.net.reshape_as_input(net_inputs); 114 | 115 | % one iter SGD update 116 | caffe_solver.net.set_input_data(net_inputs); 117 | caffe_solver.step(1); 118 | 119 | rst = caffe_solver.net.get_output(); 120 | train_results = parse_rst(train_results, rst); 121 | 122 | % do valdiation per val_interval iterations 123 | if ~mod(iter_, opts.val_interval) 124 | if opts.do_val 125 | caffe_solver.net.set_phase('test'); 126 | for i = 1:length(shuffled_inds_val) 127 | sub_db_inds = shuffled_inds_val{i}; 128 | net_inputs = rfcn_get_minibatch(conf, image_roidb_val(sub_db_inds)); 129 | caffe_solver.net.reshape_as_input(net_inputs); 130 | 131 | caffe_solver.net.forward(net_inputs); 132 | 133 | rst = caffe_solver.net.get_output(); 134 | val_results = parse_rst(val_results, rst); 135 | end 136 | end 137 | 138 | show_state(iter_, train_results, val_results); 139 | toc;tic; 140 | train_results = []; 141 | val_results = []; 142 | diary; diary; % flush diary 143 | end 144 | 145 | % snapshot 146 | if ~mod(iter_, opts.snapshot_interval) 147 | snapshot(caffe_solver, bbox_means, bbox_stds, cache_dir, sprintf('iter_%d', iter_)); 148 | end 149 | 150 | iter_ = caffe_solver.iter(); 151 | end 152 | 153 | % final snapshot 154 | snapshot(caffe_solver, bbox_means, bbox_stds, cache_dir, sprintf('iter_%d', iter_)); 155 | save_model_path = snapshot(caffe_solver, bbox_means, bbox_stds, cache_dir, 'final'); 156 | 157 | diary off; 158 | caffe.reset_all(); 159 | rng(prev_rng); 160 | end 161 | 162 | function [shuffled_inds, sub_inds] = generate_random_minibatch(shuffled_inds, image_roidb_train, ims_per_batch) 163 | 164 | % shuffle training data per batch 165 | if isempty(shuffled_inds) 166 | % make sure each minibatch, only has horizontal images or vertical 167 | % images, to save gpu memory 168 | 169 | hori_image_inds = arrayfun(@(x) x.im_size(2) >= x.im_size(1), image_roidb_train, 'UniformOutput', true); 170 | vert_image_inds = ~hori_image_inds; 171 | hori_image_inds = find(hori_image_inds); 172 | vert_image_inds = find(vert_image_inds); 173 | 174 | % random perm 175 | lim = floor(length(hori_image_inds) / ims_per_batch) * ims_per_batch; 176 | hori_image_inds = hori_image_inds(randperm(length(hori_image_inds), lim)); 177 | lim = floor(length(vert_image_inds) / ims_per_batch) * ims_per_batch; 178 | vert_image_inds = vert_image_inds(randperm(length(vert_image_inds), lim)); 179 | 180 | % combine sample for each ims_per_batch 181 | hori_image_inds = reshape(hori_image_inds, ims_per_batch, []); 182 | vert_image_inds = reshape(vert_image_inds, ims_per_batch, []); 183 | 184 | shuffled_inds = [hori_image_inds, vert_image_inds]; 185 | shuffled_inds = shuffled_inds(:, randperm(size(shuffled_inds, 2))); 186 | 187 | shuffled_inds = num2cell(shuffled_inds, 1); 188 | end 189 | 190 | if nargout > 1 191 | % generate minibatch training data 192 | sub_inds = shuffled_inds{1}; 193 | assert(length(sub_inds) == ims_per_batch); 194 | shuffled_inds(1) = []; 195 | end 196 | end 197 | 198 | function model_path = snapshot(caffe_solver, bbox_means, bbox_stds, cache_dir, file_name) 199 | bbox_pred_layer_name = 'rfcn_bbox'; 200 | weights = caffe_solver.net.params(bbox_pred_layer_name, 1).get_data(); 201 | biase = caffe_solver.net.params(bbox_pred_layer_name, 2).get_data(); 202 | weights_back = weights; 203 | biase_back = biase; 204 | 205 | rep_time = size(weights, 4)/length(bbox_means(:)); 206 | 207 | bbox_stds_flatten = bbox_stds'; 208 | bbox_stds_flatten = bbox_stds_flatten(:); 209 | bbox_stds_flatten = repmat(bbox_stds_flatten, [1,rep_time])'; 210 | bbox_stds_flatten = bbox_stds_flatten(:); 211 | bbox_stds_flatten = permute(bbox_stds_flatten, [4,3,2,1]); 212 | 213 | bbox_means_flatten = bbox_means'; 214 | bbox_means_flatten = bbox_means_flatten(:); 215 | bbox_means_flatten = repmat(bbox_means_flatten, [1,rep_time])'; 216 | bbox_means_flatten = bbox_means_flatten(:); 217 | bbox_means_flatten = permute(bbox_means_flatten, [4,3,2,1]); 218 | 219 | % merge bbox_means, bbox_stds into the model 220 | weights = bsxfun(@times, weights, bbox_stds_flatten); % weights = weights * stds; 221 | biase = biase .* bbox_stds_flatten(:) + bbox_means_flatten(:); % bias = bias * stds + means; 222 | 223 | caffe_solver.net.set_params_data(bbox_pred_layer_name, 1, weights); 224 | caffe_solver.net.set_params_data(bbox_pred_layer_name, 2, biase); 225 | 226 | model_path = fullfile(cache_dir, file_name); 227 | caffe_solver.net.save(model_path); 228 | fprintf('Saved as %s\n', model_path); 229 | 230 | % restore net to original state 231 | caffe_solver.net.set_params_data(bbox_pred_layer_name, 1, weights_back); 232 | caffe_solver.net.set_params_data(bbox_pred_layer_name, 2, biase_back); 233 | end 234 | 235 | function show_state(iter, train_results, val_results) 236 | fprintf('\n------------------------- Iteration %d -------------------------\n', iter); 237 | fprintf('Training : accuracy %.3g, loss (cls %.3g, reg %.3g)\n', ... 238 | mean(train_results.accuarcy.data), ... 239 | mean(train_results.loss_cls.data), ... 240 | mean(train_results.loss_bbox.data)); 241 | if exist('val_results', 'var') && ~isempty(val_results) 242 | fprintf('Testing : accuracy %.3g, loss (cls %.3g, reg %.3g)\n', ... 243 | mean(val_results.accuarcy.data), ... 244 | mean(val_results.loss_cls.data), ... 245 | mean(val_results.loss_bbox.data)); 246 | end 247 | end 248 | --------------------------------------------------------------------------------