├── .gitattributes ├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── experiments ├── +Dataset │ ├── private │ │ ├── voc0712_devkit.m │ │ ├── voc2007_devkit.m │ │ └── voc2012_devkit.m │ ├── voc0712_trainval.m │ ├── voc0712_trainval_ss.m │ ├── voc0712plus_trainval.m │ ├── voc0712plus_trainval_ss.m │ ├── voc2007_test.m │ ├── voc2007_test_ss.m │ ├── voc2007_trainval.m │ ├── voc2007_trainval_ss.m │ ├── voc2012_test.m │ └── voc2012_trainval.m ├── +Faster_RCNN_Train │ ├── do_fast_rcnn_test.m │ ├── do_fast_rcnn_train.m │ ├── do_proposal_test.m │ ├── do_proposal_train.m │ ├── gather_rpn_fast_rcnn_models.m │ └── set_cache_folder.m ├── +Model │ ├── VGG16_for_Fast_RCNN_VOC0712.m │ ├── VGG16_for_Fast_RCNN_VOC2007.m │ ├── VGG16_for_Faster_RCNN_VOC0712.m │ ├── VGG16_for_Faster_RCNN_VOC0712plus.m │ ├── VGG16_for_Faster_RCNN_VOC2007.m │ ├── VGG16_for_Faster_RCNN_VOC2012.m │ ├── ZF_for_Fast_RCNN_VOC0712.m │ ├── ZF_for_Fast_RCNN_VOC2007.m │ ├── ZF_for_Faster_RCNN_VOC0712.m │ └── ZF_for_Faster_RCNN_VOC2007.m ├── script_fast_rcnn_VOC0712_VGG16.m ├── script_fast_rcnn_VOC0712_ZF.m ├── script_fast_rcnn_VOC2007_VGG16.m ├── script_fast_rcnn_VOC2007_ZF.m ├── script_faster_rcnn_VOC0712_VGG16.m ├── script_faster_rcnn_VOC0712_ZF.m ├── script_faster_rcnn_VOC0712plus_VGG16.m ├── script_faster_rcnn_VOC2007_VGG16.m ├── script_faster_rcnn_VOC2007_ZF.m ├── script_faster_rcnn_VOC2012_VGG16.m └── script_faster_rcnn_demo.m ├── faster_rcnn_build.m ├── fetch_data ├── fetch_caffe_mex_windows_vs2013_cuda65.m ├── fetch_faster_rcnn_final_model.m ├── fetch_model_VGG16.m └── fetch_model_ZF.m ├── functions ├── fast_rcnn │ ├── fast_rcnn_bbox_transform.m │ ├── fast_rcnn_bbox_transform_inv.m │ ├── fast_rcnn_config.m │ ├── fast_rcnn_conv_feat_detect.m │ ├── fast_rcnn_generate_sliding_windows.m │ ├── fast_rcnn_get_minibatch.m │ ├── fast_rcnn_im_detect.m │ ├── fast_rcnn_map_im_rois_to_feat_rois.m │ ├── fast_rcnn_prepare_image_roidb.m │ ├── fast_rcnn_test.m │ └── fast_rcnn_train.m ├── nms │ ├── nms.m │ ├── nms_gpu_mex.cu │ ├── nms_mex.cpp │ ├── nms_multiclass.m │ ├── nms_multiclass_mex.cpp │ └── nvmex.m └── rpn │ ├── proposal_calc_output_size.m │ ├── proposal_config.m │ ├── proposal_generate_anchors.m │ ├── proposal_generate_minibatch.m │ ├── proposal_im_detect.m │ ├── proposal_locate_anchors.m │ ├── proposal_prepare_image_roidb.m │ ├── proposal_test.m │ ├── proposal_train.m │ └── proposal_visual_debug.m ├── imdb ├── get_voc_opts.m ├── imdb_eval_voc.m ├── imdb_from_voc.m ├── roidb_from_proposal.m └── roidb_from_voc.m ├── startup.m └── utils ├── RectLTRB2LTWH.m ├── RectLTWH2LTRB.m ├── active_caffe_mex.m ├── auto_select_gpu.m ├── boxoverlap.m ├── im_list_to_blob.m ├── mkdir_if_missing.m ├── parse_rst.m ├── prep_im_for_blob.m ├── prep_im_for_blob_size.m ├── procid.m ├── seed_rand.m ├── showboxes.m ├── subsample_images.m ├── subsample_images_per_class.m ├── symbolic_link.m ├── tic_toc_print.m ├── vis_label.m ├── xVOCap.m ├── xVOChash_init.m └── xVOChash_lookup.m /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # User Ingore 12 | models/ 13 | data/ 14 | datasets/ 15 | external/caffe/matlab 16 | output/ 17 | cachedir/ 18 | imdb/cache 19 | *.caffemodel 20 | *.mat 21 | 22 | # Windows Installer files 23 | *.cab 24 | *.msi 25 | *.msm 26 | *.msp 27 | 28 | # Windows shortcuts 29 | *.lnk 30 | 31 | # ========================= 32 | # Operating System Files 33 | # ========================= 34 | 35 | # OSX 36 | # ========================= 37 | 38 | .DS_Store 39 | .AppleDouble 40 | .LSOverride 41 | 42 | # Thumbnails 43 | ._* 44 | 45 | # Files that might appear on external disk 46 | .Spotlight-V100 47 | .Trashes 48 | 49 | # Directories potentially created on remote AFP share 50 | .AppleDB 51 | .AppleDesktop 52 | Network Trash Folder 53 | Temporary Items 54 | .apdisk 55 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/caffe"] 2 | path = external/caffe 3 | url = https://github.com/ShaoqingRen/caffe.git 4 | branch = faster-R-CNN 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Faster R-CNN 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2015 Microsoft Corporation 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | ************************************************************************ 26 | 27 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION 28 | 29 | This project, Faster R-CNN, incorporates material from the project(s) listed below (collectively, "Third Party Code"). Microsoft is not the original author of the Third Party Code. The original copyright notice and license under which Microsoft received such Third Party Code are set out below. This Third Party Code is licensed to you under their original license terms set forth below. Microsoft reserves all other rights not expressly granted, whether by implication, estoppel or otherwise. 30 | 31 | 1. Caffe, version 0.9, (https://github.com/BVLC/caffe/) 32 | 33 | COPYRIGHT 34 | 35 | All contributions by the University of California: 36 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents) 37 | All rights reserved. 38 | 39 | All other contributions: 40 | Copyright (c) 2014, 2015, the respective contributors 41 | All rights reserved. 42 | 43 | Caffe uses a shared copyright model: each contributor holds copyright over their contributions to Caffe. The project versioning records all such contribution and copyright details. If a contributor wants to further mark their specific copyright on a particular contribution, they should indicate their copyright solely in the commit message of the change when it is committed. 44 | 45 | The BSD 2-Clause License 46 | 47 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 48 | 49 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 50 | 51 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 52 | 53 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 | 55 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION********** 56 | 57 | 58 | -------------------------------------------------------------------------------- /experiments/+Dataset/private/voc0712_devkit.m: -------------------------------------------------------------------------------- 1 | function path = voc0712_devkit() 2 | path = './datasets/VOCdevkit0712'; 3 | end -------------------------------------------------------------------------------- /experiments/+Dataset/private/voc2007_devkit.m: -------------------------------------------------------------------------------- 1 | function path = voc2007_devkit() 2 | path = './datasets/VOCdevkit2007'; 3 | end -------------------------------------------------------------------------------- /experiments/+Dataset/private/voc2012_devkit.m: -------------------------------------------------------------------------------- 1 | function path = voc2012_devkit() 2 | path = './datasets/VOCdevkit2012'; 3 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc0712_trainval.m: -------------------------------------------------------------------------------- 1 | function dataset = voc0712_trainval(dataset, usage, use_flip) 2 | % Pascal voc 0712 trainval set 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit2007 = voc2007_devkit(); 8 | devkit2012 = voc2012_devkit(); 9 | 10 | switch usage 11 | case {'train'} 12 | dataset.imdb_train = { imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ... 13 | imdb_from_voc(devkit2012, 'trainval', '2012', use_flip)}; 14 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false); 15 | case {'test'} 16 | error('only supports one source test currently'); 17 | otherwise 18 | error('usage = ''train'' or ''test'''); 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc0712_trainval_ss.m: -------------------------------------------------------------------------------- 1 | function dataset = voc0712_trainval_ss(dataset, usage, use_flip) 2 | % Pascal voc 0712 trainval set with selective search 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit2007 = voc2007_devkit(); 8 | devkit2012 = voc2012_devkit(); 9 | 10 | switch usage 11 | case {'train'} 12 | dataset.imdb_train = { imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ... 13 | imdb_from_voc(devkit2012, 'trainval', '2012', use_flip)}; 14 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false); 15 | case {'test'} 16 | error('only supports one source test currently'); 17 | otherwise 18 | error('usage = ''train'' or ''test'''); 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc0712plus_trainval.m: -------------------------------------------------------------------------------- 1 | function dataset = voc0712plus_trainval(dataset, usage, use_flip) 2 | % Pascal voc 0712 trainval set 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit2007 = voc2007_devkit(); 8 | devkit2012 = voc2012_devkit(); 9 | 10 | switch usage 11 | case {'train'} 12 | dataset.imdb_train = { imdb_from_voc(devkit2012, 'trainval', '2012', use_flip), ... 13 | imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ... 14 | imdb_from_voc(devkit2007, 'test', '2007', use_flip)}; 15 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false); 16 | case {'test'} 17 | error('only supports one source test currently'); 18 | otherwise 19 | error('usage = ''train'' or ''test'''); 20 | end 21 | 22 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc0712plus_trainval_ss.m: -------------------------------------------------------------------------------- 1 | function dataset = voc0712plus_trainval_ss(dataset, usage, use_flip) 2 | % Pascal voc 0712 trainval set with selective search 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit2007 = voc2007_devkit(); 8 | devkit2012 = voc2012_devkit(); 9 | 10 | switch usage 11 | case {'train'} 12 | dataset.imdb_train = { imdb_from_voc(devkit2012, 'trainval', '2012', use_flip), ... 13 | imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ... 14 | imdb_from_voc(devkit2007, 'test', '2007', use_flip)}; 15 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false); 16 | case {'test'} 17 | error('only supports one source test currently'); 18 | otherwise 19 | error('usage = ''train'' or ''test'''); 20 | end 21 | 22 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc2007_test.m: -------------------------------------------------------------------------------- 1 | function dataset = voc2007_test(dataset, usage, use_flip) 2 | % Pascal voc 2007 test set 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit = voc2007_devkit(); 8 | 9 | switch usage 10 | case {'train'} 11 | dataset.imdb_train = { imdb_from_voc(devkit, 'test', '2007', use_flip) }; 12 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false); 13 | case {'test'} 14 | dataset.imdb_test = imdb_from_voc(devkit, 'test', '2007', use_flip) ; 15 | dataset.roidb_test = dataset.imdb_test.roidb_func(dataset.imdb_test); 16 | otherwise 17 | error('usage = ''train'' or ''test'''); 18 | end 19 | 20 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc2007_test_ss.m: -------------------------------------------------------------------------------- 1 | function dataset = voc2007_test_ss(dataset, usage, use_flip) 2 | % Pascal voc 2007 test set with selective search 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit = voc2007_devkit(); 8 | 9 | switch usage 10 | case {'train'} 11 | dataset.imdb_train = { imdb_from_voc(devkit, 'test', '2007', use_flip) }; 12 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false); 13 | case {'test'} 14 | dataset.imdb_test = imdb_from_voc(devkit, 'test', '2007', use_flip) ; 15 | dataset.roidb_test = dataset.imdb_test.roidb_func(dataset.imdb_test, 'with_selective_search', true); 16 | otherwise 17 | error('usage = ''train'' or ''test'''); 18 | end 19 | 20 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc2007_trainval.m: -------------------------------------------------------------------------------- 1 | function dataset = voc2007_trainval(dataset, usage, use_flip) 2 | % Pascal voc 2007 trainval set 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit = voc2007_devkit(); 8 | 9 | switch usage 10 | case {'train'} 11 | dataset.imdb_train = { imdb_from_voc(devkit, 'trainval', '2007', use_flip) }; 12 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false); 13 | case {'test'} 14 | dataset.imdb_test = imdb_from_voc(devkit, 'trainval', '2007', use_flip) ; 15 | dataset.roidb_test = dataset.imdb_test.roidb_func(dataset.imdb_test); 16 | otherwise 17 | error('usage = ''train'' or ''test'''); 18 | end 19 | 20 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc2007_trainval_ss.m: -------------------------------------------------------------------------------- 1 | function dataset = voc2007_trainval_ss(dataset, usage, use_flip) 2 | % Pascal voc 2007 trainval set with selective search 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit = voc2007_devkit(); 8 | 9 | switch usage 10 | case {'train'} 11 | dataset.imdb_train = { imdb_from_voc(devkit, 'trainval', '2007', use_flip) }; 12 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false); 13 | case {'test'} 14 | dataset.imdb_test = imdb_from_voc(devkit, 'trainval', '2007', use_flip) ; 15 | dataset.roidb_test = dataset.imdb_test.roidb_func(dataset.imdb_test, 'with_selective_search', true); 16 | otherwise 17 | error('usage = ''train'' or ''test'''); 18 | end 19 | 20 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc2012_test.m: -------------------------------------------------------------------------------- 1 | function dataset = voc2012_test(dataset, usage, use_flip) 2 | % Pascal voc 2012 test set 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit = voc2012_devkit(); 8 | 9 | switch usage 10 | case {'train'} 11 | dataset.imdb_train = { imdb_from_voc(devkit, 'test', '2012', use_flip) }; 12 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false); 13 | case {'test'} 14 | dataset.imdb_test = imdb_from_voc(devkit, 'test', '2012', use_flip) ; 15 | dataset.roidb_test = dataset.imdb_test.roidb_func(dataset.imdb_test); 16 | otherwise 17 | error('usage = ''train'' or ''test'''); 18 | end 19 | 20 | end -------------------------------------------------------------------------------- /experiments/+Dataset/voc2012_trainval.m: -------------------------------------------------------------------------------- 1 | function dataset = voc2012_trainval(dataset, usage, use_flip) 2 | % Pascal voc 2012 trainval set 3 | % set opts.imdb_train opts.roidb_train 4 | % or set opts.imdb_test opts.roidb_train 5 | 6 | % change to point to your devkit install 7 | devkit = voc2012_devkit(); 8 | 9 | switch usage 10 | case {'train'} 11 | dataset.imdb_train = { imdb_from_voc(devkit, 'trainval', '2012', use_flip) }; 12 | dataset.roidb_train = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false); 13 | case {'test'} 14 | dataset.imdb_test = imdb_from_voc(devkit, 'trainval', '2012', use_flip) ; 15 | dataset.roidb_test = dataset.imdb_test.roidb_func(dataset.imdb_test); 16 | otherwise 17 | error('usage = ''train'' or ''test'''); 18 | end 19 | 20 | end -------------------------------------------------------------------------------- /experiments/+Faster_RCNN_Train/do_fast_rcnn_test.m: -------------------------------------------------------------------------------- 1 | function mAP = do_fast_rcnn_test(conf, model_stage, imdb, roidb, ignore_cache) 2 | if ~exist('ignore_cache', 'var') 3 | ignore_cache = false; 4 | end 5 | 6 | mAP = fast_rcnn_test(conf, imdb, roidb, ... 7 | 'net_def_file', model_stage.test_net_def_file, ... 8 | 'net_file', model_stage.output_model_file, ... 9 | 'cache_name', model_stage.cache_name, ... 10 | 'ignore_cache', ignore_cache); 11 | end 12 | -------------------------------------------------------------------------------- /experiments/+Faster_RCNN_Train/do_fast_rcnn_train.m: -------------------------------------------------------------------------------- 1 | function model_stage = do_fast_rcnn_train(conf, dataset, model_stage, do_val) 2 | if ~do_val 3 | dataset.imdb_test = struct(); 4 | dataset.roidb_test = struct(); 5 | end 6 | 7 | model_stage.output_model_file = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 8 | 'do_val', do_val, ... 9 | 'imdb_val', dataset.imdb_test, ... 10 | 'roidb_val', dataset.roidb_test, ... 11 | 'solver_def_file', model_stage.solver_def_file, ... 12 | 'net_file', model_stage.init_net_file, ... 13 | 'cache_name', model_stage.cache_name); 14 | end -------------------------------------------------------------------------------- /experiments/+Faster_RCNN_Train/do_proposal_test.m: -------------------------------------------------------------------------------- 1 | function roidb_new = do_proposal_test(conf, model_stage, imdb, roidb) 2 | aboxes = proposal_test(conf, imdb, ... 3 | 'net_def_file', model_stage.test_net_def_file, ... 4 | 'net_file', model_stage.output_model_file, ... 5 | 'cache_name', model_stage.cache_name); 6 | 7 | aboxes = boxes_filter(aboxes, model_stage.nms.per_nms_topN, model_stage.nms.nms_overlap_thres, model_stage.nms.after_nms_topN, conf.use_gpu); 8 | 9 | roidb_regions = make_roidb_regions(aboxes, imdb.image_ids); 10 | 11 | roidb_new = roidb_from_proposal(imdb, roidb, roidb_regions, ... 12 | 'keep_raw_proposal', false); 13 | end 14 | 15 | function aboxes = boxes_filter(aboxes, per_nms_topN, nms_overlap_thres, after_nms_topN, use_gpu) 16 | % to speed up nms 17 | if per_nms_topN > 0 18 | aboxes = cellfun(@(x) x(1:min(length(x), per_nms_topN), :), aboxes, 'UniformOutput', false); 19 | end 20 | % do nms 21 | if nms_overlap_thres > 0 && nms_overlap_thres < 1 22 | if use_gpu 23 | for i = 1:length(aboxes) 24 | aboxes{i} = aboxes{i}(nms(aboxes{i}, nms_overlap_thres, use_gpu), :); 25 | end 26 | else 27 | parfor i = 1:length(aboxes) 28 | aboxes{i} = aboxes{i}(nms(aboxes{i}, nms_overlap_thres), :); 29 | end 30 | end 31 | end 32 | aver_boxes_num = mean(cellfun(@(x) size(x, 1), aboxes, 'UniformOutput', true)); 33 | fprintf('aver_boxes_num = %d, select top %d\n', round(aver_boxes_num), after_nms_topN); 34 | if after_nms_topN > 0 35 | aboxes = cellfun(@(x) x(1:min(length(x), after_nms_topN), :), aboxes, 'UniformOutput', false); 36 | end 37 | end 38 | 39 | function regions = make_roidb_regions(aboxes, images) 40 | regions.boxes = aboxes; 41 | regions.images = images; 42 | end 43 | -------------------------------------------------------------------------------- /experiments/+Faster_RCNN_Train/do_proposal_train.m: -------------------------------------------------------------------------------- 1 | function model_stage = do_proposal_train(conf, dataset, model_stage, do_val) 2 | if ~do_val 3 | dataset.imdb_test = struct(); 4 | dataset.roidb_test = struct(); 5 | end 6 | 7 | model_stage.output_model_file = proposal_train(conf, dataset.imdb_train, dataset.roidb_train, ... 8 | 'do_val', do_val, ... 9 | 'imdb_val', dataset.imdb_test, ... 10 | 'roidb_val', dataset.roidb_test, ... 11 | 'solver_def_file', model_stage.solver_def_file, ... 12 | 'net_file', model_stage.init_net_file, ... 13 | 'cache_name', model_stage.cache_name); 14 | end 15 | -------------------------------------------------------------------------------- /experiments/+Faster_RCNN_Train/gather_rpn_fast_rcnn_models.m: -------------------------------------------------------------------------------- 1 | function gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset) 2 | cachedir = fullfile(pwd, 'output', 'faster_rcnn_final', model.final_model.cache_name); 3 | mkdir_if_missing(cachedir); 4 | 5 | % find latest model for rpn and fast rcnn 6 | [rpn_test_net_def_file, rpn_output_model_file] = find_last_output_model_file(model.stage1_rpn, model.stage2_rpn); 7 | [fast_rcnn_test_net_def_file, fast_rcnn_output_model_file] = find_last_output_model_file(model.stage1_fast_rcnn, model.stage2_fast_rcnn); 8 | 9 | % check whether feature shared and find the indexs of shared layers 10 | [is_share_feature, last_shared_output_blob_name, shared_layer_names, shared_layer_idx] = ... 11 | check_proposal_fast_rcnn_model(rpn_test_net_def_file, rpn_output_model_file, ... 12 | fast_rcnn_test_net_def_file, fast_rcnn_output_model_file); 13 | 14 | proposal_detection_model.classes = dataset.imdb_test.classes; 15 | proposal_detection_model.image_means = conf_proposal.image_means; 16 | proposal_detection_model.conf_proposal = conf_proposal; 17 | proposal_detection_model.conf_detection = conf_fast_rcnn; 18 | 19 | % copy rpn and fast rcnn models into cachedir 20 | [~, test_net_proposal_name, test_net_proposal_ext] = fileparts(rpn_test_net_def_file); 21 | proposal_detection_model.proposal_net_def = ['proposal_', test_net_proposal_name, test_net_proposal_ext]; 22 | [~, proposal_model_name, proposal_model_ext] = fileparts(rpn_output_model_file); 23 | proposal_detection_model.proposal_net = ['proposal_', proposal_model_name, proposal_model_ext]; 24 | [~, test_net_fast_rcnn_name, test_net_fast_rcnn_ext] = fileparts(fast_rcnn_test_net_def_file); 25 | proposal_detection_model.detection_net_def = ['detection_', test_net_fast_rcnn_name, test_net_fast_rcnn_ext]; 26 | [~, fast_rcnn_model_name, fast_rcnn_model_ext] = fileparts(fast_rcnn_output_model_file); 27 | proposal_detection_model.detection_net = ['detection_', fast_rcnn_model_name, fast_rcnn_model_ext]; 28 | 29 | copyfile(rpn_test_net_def_file, fullfile(cachedir, proposal_detection_model.proposal_net_def)); 30 | copyfile(rpn_output_model_file, fullfile(cachedir, proposal_detection_model.proposal_net)); 31 | copyfile(fast_rcnn_test_net_def_file, fullfile(cachedir, proposal_detection_model.detection_net_def)); 32 | copyfile(fast_rcnn_output_model_file, fullfile(cachedir, proposal_detection_model.detection_net)); 33 | 34 | proposal_detection_model.is_share_feature = is_share_feature; 35 | if is_share_feature 36 | proposal_detection_model.last_shared_layer_idx = max(shared_layer_idx); 37 | proposal_detection_model.last_shared_layer_detection = ... 38 | shared_layer_names{proposal_detection_model.last_shared_layer_idx}; 39 | proposal_detection_model.last_shared_output_blob_name = ... 40 | last_shared_output_blob_name; 41 | fprintf('please modify %s file for sharing conv layers with proposal model (delete layers until %s)\n', ... 42 | proposal_detection_model.detection_net_def, proposal_detection_model.last_shared_layer_detection); 43 | end 44 | 45 | save(fullfile(cachedir, 'model'), 'proposal_detection_model'); 46 | end 47 | 48 | function [is_share_feature, last_shared_output_blob_name, shared_layer_names, shared_layer_idx] = check_proposal_fast_rcnn_model(proposal_model_net, proposal_model_bin, ... 49 | fast_rcnn_model_net, fast_rcnn_model_bin) 50 | 51 | rpn_net = caffe.Net(proposal_model_net, 'test'); 52 | rpn_net.copy_from(proposal_model_bin); 53 | 54 | fast_rcnn_net = caffe.Net(fast_rcnn_model_net, 'test'); 55 | fast_rcnn_net.copy_from(fast_rcnn_model_bin); 56 | 57 | share_layer = true; 58 | shared_layer_idx = []; 59 | shared_layer_names = {}; 60 | shared_rpn_blobs = {}; 61 | for i = 1:min(length(rpn_net.layer_names), length(fast_rcnn_net.layer_names)) 62 | if ~strcmp(rpn_net.layer_names{i}, fast_rcnn_net.layer_names{i}) 63 | break; 64 | end 65 | 66 | rpn_layer_name = rpn_net.layer_names{i}; 67 | fast_rcnn_layer_name = fast_rcnn_net.layer_names{i}; 68 | rpn_layer = rpn_net.layers(rpn_layer_name); 69 | fast_rcnn_layer = fast_rcnn_net.layers(fast_rcnn_layer_name); 70 | 71 | for j = 1:min(length(rpn_layer.params), length(fast_rcnn_layer.params)) 72 | if ~isequal(rpn_net.params(rpn_layer_name, j).get_data(), fast_rcnn_net.params(fast_rcnn_layer_name, j).get_data()) 73 | share_layer = false; 74 | end 75 | end 76 | 77 | if ~share_layer 78 | break; 79 | else 80 | shared_layer_idx(end+1) = i; 81 | shared_layer_names{end+1} = rpn_layer_name; 82 | last_shared_output_blob_name = rpn_net.blob_names{rpn_net.top_id_vecs{i}}; 83 | end 84 | end 85 | 86 | is_share_feature = false; 87 | if ~isempty(shared_layer_idx) 88 | is_share_feature = true; 89 | end 90 | 91 | caffe.reset_all(); 92 | end 93 | 94 | function [test_net_def_file, output_model_file] = find_last_output_model_file(stage1, stage2) 95 | if isfield(stage2, 'output_model_file') && exist(stage2.output_model_file, 'file') 96 | output_model_file = stage2.output_model_file; 97 | test_net_def_file = stage2.test_net_def_file; 98 | return; 99 | end 100 | if isfield(stage1, 'output_model_file') && exist(stage1.output_model_file, 'file') 101 | output_model_file = stage1.output_model_file; 102 | test_net_def_file = stage1.test_net_def_file; 103 | return; 104 | end 105 | error('find_last_output_model_file:: no trained models'); 106 | end -------------------------------------------------------------------------------- /experiments/+Faster_RCNN_Train/set_cache_folder.m: -------------------------------------------------------------------------------- 1 | function model = set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model) 2 | % model = set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | model.stage1_rpn.cache_name = [cache_base_proposal, '_stage1_rpn']; 10 | 11 | model.stage1_fast_rcnn.cache_name = ... 12 | [cache_base_proposal, ... 13 | strrep(sprintf('_top%d_nms%g_top%d', model.stage1_rpn.nms.per_nms_topN, ... 14 | model.stage1_rpn.nms.nms_overlap_thres, model.stage1_rpn.nms.after_nms_topN), '.', '_'), ... 15 | cache_base_fast_rcnn, '_stage1_fast_rcnn']; 16 | 17 | model.stage2_rpn.cache_name = ... 18 | [cache_base_proposal, ... 19 | strrep(sprintf('_top%d_nms%g_top%d', model.stage2_rpn.nms.per_nms_topN, ... 20 | model.stage2_rpn.nms.nms_overlap_thres, model.stage2_rpn.nms.after_nms_topN), '.', '_'), ... 21 | cache_base_fast_rcnn, '_stage2_rpn']; 22 | 23 | model.stage2_fast_rcnn.cache_name = ... 24 | [cache_base_proposal, ... 25 | strrep(sprintf('_top%d_nms%g_top%d', model.stage2_rpn.nms.per_nms_topN, ... 26 | model.stage2_rpn.nms.nms_overlap_thres, model.stage2_rpn.nms.after_nms_topN), '.', '_'), ... 27 | cache_base_fast_rcnn, '_stage2_fast_rcnn']; 28 | 29 | model.final_model.cache_name = [cache_base_proposal, cache_base_fast_rcnn]; 30 | end -------------------------------------------------------------------------------- /experiments/+Model/VGG16_for_Fast_RCNN_VOC0712.m: -------------------------------------------------------------------------------- 1 | function model = VGG16_for_Fast_RCNN_VOC0712(model) 2 | % VGG 16layers (only finetuned from conv3_1) 3 | 4 | model.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k60k.prototxt'); 5 | model.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 6 | 7 | model.net_file = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel'); 8 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image'); 9 | 10 | end -------------------------------------------------------------------------------- /experiments/+Model/VGG16_for_Fast_RCNN_VOC2007.m: -------------------------------------------------------------------------------- 1 | function model = VGG16_for_Fast_RCNN_VOC2007(model) 2 | % VGG 16layers (only finetuned from conv3_1) 3 | 4 | model.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k40k.prototxt'); 5 | model.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 6 | 7 | model.net_file = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel'); 8 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image'); 9 | 10 | end -------------------------------------------------------------------------------- /experiments/+Model/VGG16_for_Faster_RCNN_VOC0712.m: -------------------------------------------------------------------------------- 1 | function model = VGG16_for_Faster_RCNN_VOC0712(model) 2 | % VGG 16layers (only finetuned from conv3_1) 3 | 4 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image'); 5 | model.pre_trained_net_file = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel'); 6 | % Stride in input image pixels at the last conv layer 7 | model.feat_stride = 16; 8 | 9 | %% stage 1 rpn, inited from pre-trained network 10 | model.stage1_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'solver_60k80k.prototxt'); 11 | model.stage1_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 12 | model.stage1_rpn.init_net_file = model.pre_trained_net_file; 13 | 14 | % rpn test setting 15 | model.stage1_rpn.nms.per_nms_topN = -1; 16 | model.stage1_rpn.nms.nms_overlap_thres = 0.7; 17 | model.stage1_rpn.nms.after_nms_topN = 2000; 18 | 19 | %% stage 1 fast rcnn, inited from pre-trained network 20 | model.stage1_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k60k.prototxt'); 21 | model.stage1_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 22 | model.stage1_fast_rcnn.init_net_file = model.pre_trained_net_file; 23 | 24 | %% stage 2 rpn, only finetune fc layers 25 | model.stage2_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'solver_60k80k.prototxt'); 26 | model.stage2_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'test.prototxt'); 27 | 28 | % rpn test setting 29 | model.stage2_rpn.nms.per_nms_topN = -1; 30 | model.stage2_rpn.nms.nms_overlap_thres = 0.7; 31 | model.stage2_rpn.nms.after_nms_topN = 2000; 32 | 33 | %% stage 2 fast rcnn, only finetune fc layers 34 | model.stage2_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'solver_30k60k.prototxt'); 35 | model.stage2_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'test.prototxt'); 36 | 37 | %% final test 38 | model.final_test.nms.per_nms_topN = 6000; % to speed up nms 39 | model.final_test.nms.nms_overlap_thres = 0.7; 40 | model.final_test.nms.after_nms_topN = 300; 41 | end -------------------------------------------------------------------------------- /experiments/+Model/VGG16_for_Faster_RCNN_VOC0712plus.m: -------------------------------------------------------------------------------- 1 | function model = VGG16_for_Faster_RCNN_VOC0712plus(model) 2 | % VGG 16layers (only finetuned from conv3_1) 3 | 4 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image'); 5 | model.pre_trained_net_file = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel'); 6 | % Stride in input image pixels at the last conv layer 7 | model.feat_stride = 16; 8 | 9 | %% stage 1 rpn, inited from pre-trained network 10 | model.stage1_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'solver_60k80k.prototxt'); 11 | model.stage1_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 12 | model.stage1_rpn.init_net_file = model.pre_trained_net_file; 13 | 14 | % rpn test setting 15 | model.stage1_rpn.nms.per_nms_topN = -1; 16 | model.stage1_rpn.nms.nms_overlap_thres = 0.7; 17 | model.stage1_rpn.nms.after_nms_topN = 2000; 18 | 19 | %% stage 1 fast rcnn, inited from pre-trained network 20 | model.stage1_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_40k100k.prototxt'); 21 | model.stage1_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 22 | model.stage1_fast_rcnn.init_net_file = model.pre_trained_net_file; 23 | 24 | %% stage 2 rpn, only finetune fc layers 25 | model.stage2_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'solver_60k80k.prototxt'); 26 | model.stage2_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'test.prototxt'); 27 | 28 | % rpn test setting 29 | model.stage2_rpn.nms.per_nms_topN = -1; 30 | model.stage2_rpn.nms.nms_overlap_thres = 0.7; 31 | model.stage2_rpn.nms.after_nms_topN = 2000; 32 | 33 | %% stage 2 fast rcnn, only finetune fc layers 34 | model.stage2_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'solver_40k100k.prototxt'); 35 | model.stage2_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'test.prototxt'); 36 | 37 | %% final test 38 | model.final_test.nms.per_nms_topN = 6000; % to speed up nms 39 | model.final_test.nms.nms_overlap_thres = 0.7; 40 | model.final_test.nms.after_nms_topN = 300; 41 | end -------------------------------------------------------------------------------- /experiments/+Model/VGG16_for_Faster_RCNN_VOC2007.m: -------------------------------------------------------------------------------- 1 | function model = VGG16_for_Faster_RCNN_VOC2007(model) 2 | % VGG 16layers (only finetuned from conv3_1) 3 | 4 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image'); 5 | model.pre_trained_net_file = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel'); 6 | % Stride in input image pixels at the last conv layer 7 | model.feat_stride = 16; 8 | 9 | %% stage 1 rpn, inited from pre-trained network 10 | model.stage1_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'solver_60k80k.prototxt'); 11 | model.stage1_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 12 | model.stage1_rpn.init_net_file = model.pre_trained_net_file; 13 | 14 | % rpn test setting 15 | model.stage1_rpn.nms.per_nms_topN = -1; 16 | model.stage1_rpn.nms.nms_overlap_thres = 0.7; 17 | model.stage1_rpn.nms.after_nms_topN = 2000; 18 | 19 | %% stage 1 fast rcnn, inited from pre-trained network 20 | model.stage1_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k40k.prototxt'); 21 | model.stage1_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 22 | model.stage1_fast_rcnn.init_net_file = model.pre_trained_net_file; 23 | 24 | %% stage 2 rpn, only finetune fc layers 25 | model.stage2_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'solver_60k80k.prototxt'); 26 | model.stage2_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'test.prototxt'); 27 | 28 | % rpn test setting 29 | model.stage2_rpn.nms.per_nms_topN = -1; 30 | model.stage2_rpn.nms.nms_overlap_thres = 0.7; 31 | model.stage2_rpn.nms.after_nms_topN = 2000; 32 | 33 | %% stage 2 fast rcnn, only finetune fc layers 34 | model.stage2_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'solver_30k40k.prototxt'); 35 | model.stage2_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'test.prototxt'); 36 | 37 | %% final test 38 | model.final_test.nms.per_nms_topN = 6000; % to speed up nms 39 | model.final_test.nms.nms_overlap_thres = 0.7; 40 | model.final_test.nms.after_nms_topN = 300; 41 | end -------------------------------------------------------------------------------- /experiments/+Model/VGG16_for_Faster_RCNN_VOC2012.m: -------------------------------------------------------------------------------- 1 | function model = VGG16_for_Faster_RCNN_VOC2012(model) 2 | % VGG 16layers (only finetuned from conv3_1) 3 | 4 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image'); 5 | model.pre_trained_net_file = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel'); 6 | % Stride in input image pixels at the last conv layer 7 | model.feat_stride = 16; 8 | 9 | %% stage 1 rpn, inited from pre-trained network 10 | model.stage1_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'solver_60k80k.prototxt'); 11 | model.stage1_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 12 | model.stage1_rpn.init_net_file = model.pre_trained_net_file; 13 | 14 | % rpn test setting 15 | model.stage1_rpn.nms.per_nms_topN = -1; 16 | model.stage1_rpn.nms.nms_overlap_thres = 0.7; 17 | model.stage1_rpn.nms.after_nms_topN = 2000; 18 | 19 | %% stage 1 fast rcnn, inited from pre-trained network 20 | model.stage1_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k40k.prototxt'); 21 | model.stage1_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt'); 22 | model.stage1_fast_rcnn.init_net_file = model.pre_trained_net_file; 23 | 24 | %% stage 2 rpn, only finetune fc layers 25 | model.stage2_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'solver_60k80k.prototxt'); 26 | model.stage2_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'test.prototxt'); 27 | 28 | % rpn test setting 29 | model.stage2_rpn.nms.per_nms_topN = -1; 30 | model.stage2_rpn.nms.nms_overlap_thres = 0.7; 31 | model.stage2_rpn.nms.after_nms_topN = 2000; 32 | 33 | %% stage 2 fast rcnn, only finetune fc layers 34 | model.stage2_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'solver_30k40k.prototxt'); 35 | model.stage2_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'test.prototxt'); 36 | 37 | %% final test 38 | model.final_test.nms.per_nms_topN = 6000; % to speed up nms 39 | model.final_test.nms.nms_overlap_thres = 0.7; 40 | model.final_test.nms.after_nms_topN = 300; 41 | end -------------------------------------------------------------------------------- /experiments/+Model/ZF_for_Fast_RCNN_VOC0712.m: -------------------------------------------------------------------------------- 1 | function model = ZF_for_Fast_RCNN_VOC0712(model) 2 | 3 | model.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'solver_30k60k.prototxt'); 4 | model.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'test.prototxt'); 5 | 6 | model.net_file = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'ZF.caffemodel'); 7 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'mean_image'); 8 | 9 | end -------------------------------------------------------------------------------- /experiments/+Model/ZF_for_Fast_RCNN_VOC2007.m: -------------------------------------------------------------------------------- 1 | function model = ZF_for_Fast_RCNN_VOC2007(model) 2 | 3 | model.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'solver_30k40k.prototxt'); 4 | model.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'test.prototxt'); 5 | 6 | model.net_file = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'ZF.caffemodel'); 7 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'mean_image'); 8 | 9 | end -------------------------------------------------------------------------------- /experiments/+Model/ZF_for_Faster_RCNN_VOC0712.m: -------------------------------------------------------------------------------- 1 | function model = ZF_for_Faster_RCNN_VOC0712(model) 2 | 3 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'mean_image'); 4 | model.pre_trained_net_file = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'ZF.caffemodel'); 5 | % Stride in input image pixels at the last conv layer 6 | model.feat_stride = 16; 7 | 8 | %% stage 1 rpn, inited from pre-trained network 9 | model.stage1_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF', 'solver_60k80k.prototxt'); 10 | model.stage1_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF', 'test.prototxt'); 11 | model.stage1_rpn.init_net_file = model.pre_trained_net_file; 12 | 13 | % rpn test setting 14 | model.stage1_rpn.nms.per_nms_topN = -1; 15 | model.stage1_rpn.nms.nms_overlap_thres = 0.7; 16 | model.stage1_rpn.nms.after_nms_topN = 2000; 17 | 18 | %% stage 1 fast rcnn, inited from pre-trained network 19 | model.stage1_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'solver_30k60k.prototxt'); 20 | model.stage1_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'test.prototxt'); 21 | model.stage1_fast_rcnn.init_net_file = model.pre_trained_net_file; 22 | 23 | %% stage 2 rpn, only finetune fc layers 24 | model.stage2_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF_fc6', 'solver_60k80k.prototxt'); 25 | model.stage2_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF_fc6', 'test.prototxt'); 26 | 27 | % rpn test setting 28 | model.stage2_rpn.nms.per_nms_topN = -1; 29 | model.stage2_rpn.nms.nms_overlap_thres = 0.7; 30 | model.stage2_rpn.nms.after_nms_topN = 2000; 31 | 32 | %% stage 2 fast rcnn, only finetune fc layers 33 | model.stage2_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF_fc6', 'solver_30k60k.prototxt'); 34 | model.stage2_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF_fc6', 'test.prototxt'); 35 | 36 | %% final test 37 | model.final_test.nms.per_nms_topN = 6000; % to speed up nms 38 | model.final_test.nms.nms_overlap_thres = 0.7; 39 | model.final_test.nms.after_nms_topN = 300; 40 | end -------------------------------------------------------------------------------- /experiments/+Model/ZF_for_Faster_RCNN_VOC2007.m: -------------------------------------------------------------------------------- 1 | function model = ZF_for_Faster_RCNN_VOC2007(model) 2 | 3 | model.mean_image = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'mean_image'); 4 | model.pre_trained_net_file = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'ZF.caffemodel'); 5 | % Stride in input image pixels at the last conv layer 6 | model.feat_stride = 16; 7 | 8 | %% stage 1 rpn, inited from pre-trained network 9 | model.stage1_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF', 'solver_60k80k.prototxt'); 10 | model.stage1_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF', 'test.prototxt'); 11 | model.stage1_rpn.init_net_file = model.pre_trained_net_file; 12 | 13 | % rpn test setting 14 | model.stage1_rpn.nms.per_nms_topN = -1; 15 | model.stage1_rpn.nms.nms_overlap_thres = 0.7; 16 | model.stage1_rpn.nms.after_nms_topN = 2000; 17 | 18 | %% stage 1 fast rcnn, inited from pre-trained network 19 | model.stage1_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'solver_30k40k.prototxt'); 20 | model.stage1_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'test.prototxt'); 21 | model.stage1_fast_rcnn.init_net_file = model.pre_trained_net_file; 22 | 23 | %% stage 2 rpn, only finetune fc layers 24 | model.stage2_rpn.solver_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF_fc6', 'solver_60k80k.prototxt'); 25 | model.stage2_rpn.test_net_def_file = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF_fc6', 'test.prototxt'); 26 | 27 | % rpn test setting 28 | model.stage2_rpn.nms.per_nms_topN = -1; 29 | model.stage2_rpn.nms.nms_overlap_thres = 0.7; 30 | model.stage2_rpn.nms.after_nms_topN = 2000; 31 | 32 | %% stage 2 fast rcnn, only finetune fc layers 33 | model.stage2_fast_rcnn.solver_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF_fc6', 'solver_30k40k.prototxt'); 34 | model.stage2_fast_rcnn.test_net_def_file = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF_fc6', 'test.prototxt'); 35 | 36 | %% final test 37 | model.final_test.nms.per_nms_topN = 6000; % to speed up nms 38 | model.final_test.nms.nms_overlap_thres = 0.7; 39 | model.final_test.nms.after_nms_topN = 300; 40 | end -------------------------------------------------------------------------------- /experiments/script_fast_rcnn_VOC0712_VGG16.m: -------------------------------------------------------------------------------- 1 | function script_fast_rcnn_VOC0712_VGG16() 2 | % script_fast_rcnn_VOC0712_VGG16() 3 | % Fast rcnn training and testing with VGG16 model 4 | % -------------------------------------------------------- 5 | % Fast R-CNN 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 7 | % Copyright (c) 2015, Shaoqing Ren 8 | % Licensed under The MIT License [see LICENSE for details] 9 | % -------------------------------------------------------- 10 | 11 | clc; 12 | clear mex; 13 | clear is_valid_handle; % to clear init_key 14 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 15 | %% -------------------- CONFIG -------------------- 16 | opts.caffe_version = 'caffe_faster_rcnn'; 17 | opts.gpu_id = auto_select_gpu; 18 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 19 | 20 | % model 21 | model = Model.VGG16_for_Fast_RCNN_VOC0712(); 22 | % cache name 23 | opts.cache_name = 'fast_rcnn_VOC0712_VGG16'; 24 | % config 25 | conf = fast_rcnn_config('image_means', model.mean_image); 26 | % train/test data 27 | dataset = []; 28 | dataset = Dataset.voc0712_trainval_ss(dataset, 'train', conf.use_flipped); 29 | dataset = Dataset.voc2007_test_ss(dataset, 'test', false); 30 | 31 | % do validation, or not 32 | opts.do_val = true; 33 | 34 | %% -------------------- TRAINING -------------------- 35 | 36 | opts.fast_rcnn_model = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 37 | 'do_val', opts.do_val, ... 38 | 'imdb_val', dataset.imdb_test, ... 39 | 'roidb_val', dataset.roidb_test, ... 40 | 'solver_def_file', model.solver_def_file, ... 41 | 'net_file', model.net_file, ... 42 | 'cache_name', opts.cache_name); 43 | assert(exist(opts.fast_rcnn_model, 'file') ~= 0, 'not found trained model'); 44 | 45 | 46 | %% -------------------- TESTING -------------------- 47 | fast_rcnn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 48 | 'net_def_file', model.test_net_def_file, ... 49 | 'net_file', opts.fast_rcnn_model, ... 50 | 'cache_name', opts.cache_name); 51 | 52 | 53 | end 54 | -------------------------------------------------------------------------------- /experiments/script_fast_rcnn_VOC0712_ZF.m: -------------------------------------------------------------------------------- 1 | function script_fast_rcnn_VOC0712_ZF() 2 | % script_fast_rcnn_VOC0712_ZF() 3 | % Fast rcnn training and testing with Zeiler & Fergus model 4 | % -------------------------------------------------------- 5 | % Fast R-CNN 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 7 | % Copyright (c) 2015, Shaoqing Ren 8 | % Licensed under The MIT License [see LICENSE for details] 9 | % -------------------------------------------------------- 10 | 11 | clc; 12 | clear mex; 13 | clear is_valid_handle; % to clear init_key 14 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 15 | %% -------------------- CONFIG -------------------- 16 | opts.caffe_version = 'caffe_faster_rcnn'; 17 | opts.gpu_id = auto_select_gpu; 18 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 19 | 20 | % model 21 | model = Model.ZF_for_Fast_RCNN_VOC0712(); 22 | % cache name 23 | opts.cache_name = 'fast_rcnn_VOC0712_ZF'; 24 | % config 25 | conf = fast_rcnn_config('image_means', model.mean_image); 26 | % train/test data 27 | dataset = []; 28 | dataset = Dataset.voc0712_trainval_ss(dataset, 'train', conf.use_flipped); 29 | dataset = Dataset.voc2007_test_ss(dataset, 'test', false); 30 | 31 | % do validation, or not 32 | opts.do_val = true; 33 | 34 | %% -------------------- TRAINING -------------------- 35 | 36 | opts.fast_rcnn_model = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 37 | 'do_val', opts.do_val, ... 38 | 'imdb_val', dataset.imdb_test, ... 39 | 'roidb_val', dataset.roidb_test, ... 40 | 'solver_def_file', model.solver_def_file, ... 41 | 'net_file', model.net_file, ... 42 | 'cache_name', opts.cache_name); 43 | assert(exist(opts.fast_rcnn_model, 'file') ~= 0, 'not found trained model'); 44 | 45 | 46 | %% -------------------- TESTING -------------------- 47 | fast_rcnn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 48 | 'net_def_file', model.test_net_def_file, ... 49 | 'net_file', opts.fast_rcnn_model, ... 50 | 'cache_name', opts.cache_name); 51 | 52 | 53 | end 54 | -------------------------------------------------------------------------------- /experiments/script_fast_rcnn_VOC2007_VGG16.m: -------------------------------------------------------------------------------- 1 | function script_fast_rcnn_VOC2007_VGG16() 2 | % script_fast_rcnn_VOC2007_VGG16() 3 | % Fast rcnn training and testing with VGG16 model 4 | % -------------------------------------------------------- 5 | % Fast R-CNN 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 7 | % Copyright (c) 2015, Shaoqing Ren 8 | % Licensed under The MIT License [see LICENSE for details] 9 | % -------------------------------------------------------- 10 | 11 | clc; 12 | clear mex; 13 | clear is_valid_handle; % to clear init_key 14 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 15 | %% -------------------- CONFIG -------------------- 16 | opts.caffe_version = 'caffe_faster_rcnn'; 17 | opts.gpu_id = auto_select_gpu; 18 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 19 | 20 | % model 21 | model = Model.VGG16_for_Fast_RCNN_VOC2007(); 22 | % cache name 23 | opts.cache_name = 'fast_rcnn_VOC2007_VGG16'; 24 | % config 25 | conf = fast_rcnn_config('image_means', model.mean_image); 26 | % train/test data 27 | dataset = []; 28 | dataset = Dataset.voc2007_trainval_ss(dataset, 'train', conf.use_flipped); 29 | dataset = Dataset.voc2007_test_ss(dataset, 'test', false); 30 | 31 | % do validation, or not 32 | opts.do_val = true; 33 | 34 | %% -------------------- TRAINING -------------------- 35 | 36 | opts.fast_rcnn_model = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 37 | 'do_val', opts.do_val, ... 38 | 'imdb_val', dataset.imdb_test, ... 39 | 'roidb_val', dataset.roidb_test, ... 40 | 'solver_def_file', model.solver_def_file, ... 41 | 'net_file', model.net_file, ... 42 | 'cache_name', opts.cache_name); 43 | assert(exist(opts.fast_rcnn_model, 'file') ~= 0, 'not found trained model'); 44 | 45 | 46 | %% -------------------- TESTING -------------------- 47 | fast_rcnn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 48 | 'net_def_file', model.test_net_def_file, ... 49 | 'net_file', opts.fast_rcnn_model, ... 50 | 'cache_name', opts.cache_name); 51 | 52 | 53 | end 54 | -------------------------------------------------------------------------------- /experiments/script_fast_rcnn_VOC2007_ZF.m: -------------------------------------------------------------------------------- 1 | function script_fast_rcnn_VOC2007_ZF() 2 | % script_fast_rcnn_VOC2007_ZF() 3 | % Fast rcnn training and testing with Zeiler & Fergus model 4 | % -------------------------------------------------------- 5 | % Fast R-CNN 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 7 | % Copyright (c) 2015, Shaoqing Ren 8 | % Licensed under The MIT License [see LICENSE for details] 9 | % -------------------------------------------------------- 10 | 11 | clc; 12 | clear mex; 13 | clear is_valid_handle; % to clear init_key 14 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 15 | %% -------------------- CONFIG -------------------- 16 | opts.caffe_version = 'caffe_faster_rcnn'; 17 | opts.gpu_id = auto_select_gpu; 18 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 19 | 20 | % model 21 | model = Model.ZF_for_Fast_RCNN_VOC2007(); 22 | % cache name 23 | opts.cache_name = 'fast_rcnn_VOC2007_ZF'; 24 | % config 25 | conf = fast_rcnn_config('image_means', model.mean_image); 26 | % train/test data 27 | dataset = []; 28 | dataset = Dataset.voc2007_trainval_ss(dataset, 'train', conf.use_flipped); 29 | dataset = Dataset.voc2007_test_ss(dataset, 'test', false); 30 | 31 | % do validation, or not 32 | opts.do_val = true; 33 | 34 | %% -------------------- TRAINING -------------------- 35 | 36 | opts.fast_rcnn_model = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ... 37 | 'do_val', opts.do_val, ... 38 | 'imdb_val', dataset.imdb_test, ... 39 | 'roidb_val', dataset.roidb_test, ... 40 | 'solver_def_file', model.solver_def_file, ... 41 | 'net_file', model.net_file, ... 42 | 'cache_name', opts.cache_name); 43 | assert(exist(opts.fast_rcnn_model, 'file') ~= 0, 'not found trained model'); 44 | 45 | 46 | %% -------------------- TESTING -------------------- 47 | fast_rcnn_test(conf, dataset.imdb_test, dataset.roidb_test, ... 48 | 'net_def_file', model.test_net_def_file, ... 49 | 'net_file', opts.fast_rcnn_model, ... 50 | 'cache_name', opts.cache_name); 51 | 52 | 53 | end 54 | -------------------------------------------------------------------------------- /experiments/script_faster_rcnn_VOC0712_VGG16.m: -------------------------------------------------------------------------------- 1 | function script_faster_rcnn_VOC0712_VGG16() 2 | % script_faster_rcnn_VOC0712_VGG16() 3 | % Faster rcnn training and testing with VGG16 model 4 | % -------------------------------------------------------- 5 | % Faster R-CNN 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | clc; 11 | clear mex; 12 | clear is_valid_handle; % to clear init_key 13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 14 | %% -------------------- CONFIG -------------------- 15 | opts.caffe_version = 'caffe_faster_rcnn'; 16 | opts.gpu_id = auto_select_gpu; 17 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 18 | 19 | % do validation, or not 20 | opts.do_val = true; 21 | % model 22 | model = Model.VGG16_for_Faster_RCNN_VOC0712; 23 | % cache base 24 | cache_base_proposal = 'faster_rcnn_VOC0712_vgg_16layers'; 25 | cache_base_fast_rcnn = ''; 26 | % train/test data 27 | dataset = []; 28 | use_flipped = true; 29 | dataset = Dataset.voc0712_trainval(dataset, 'train', use_flipped); 30 | dataset = Dataset.voc2007_test(dataset, 'test', false); 31 | 32 | %% -------------------- TRAIN -------------------- 33 | % conf 34 | conf_proposal = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride); 35 | conf_fast_rcnn = fast_rcnn_config('image_means', model.mean_image); 36 | % set cache folder for each stage 37 | model = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model); 38 | % generate anchors and pre-calculate output size of rpn network 39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ... 40 | = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file); 41 | 42 | %% stage one proposal 43 | fprintf('\n***************\nstage one proposal \n***************\n'); 44 | % train 45 | model.stage1_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val); 46 | % test 47 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 48 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test); 49 | 50 | %% stage one fast rcnn 51 | fprintf('\n***************\nstage one fast rcnn\n***************\n'); 52 | % train 53 | model.stage1_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val); 54 | % test 55 | opts.mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 56 | 57 | %% stage two proposal 58 | % net proposal 59 | fprintf('\n***************\nstage two proposal\n***************\n'); 60 | % train 61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file; 62 | model.stage2_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val); 63 | % test 64 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 65 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 66 | 67 | %% stage two fast rcnn 68 | fprintf('\n***************\nstage two fast rcnn\n***************\n'); 69 | % train 70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file; 71 | model.stage2_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val); 72 | 73 | %% final test 74 | fprintf('\n***************\nfinal test\n***************\n'); 75 | 76 | model.stage2_rpn.nms = model.final_test.nms; 77 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 78 | opts.final_mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 79 | 80 | % save final models, for outside tester 81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset); 82 | end 83 | 84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file) 85 | [output_width_map, output_height_map] ... 86 | = proposal_calc_output_size(conf, test_net_def_file); 87 | anchors = proposal_generate_anchors(cache_name, ... 88 | 'scales', 2.^[3:5]); 89 | end -------------------------------------------------------------------------------- /experiments/script_faster_rcnn_VOC0712_ZF.m: -------------------------------------------------------------------------------- 1 | function script_faster_rcnn_VOC0712_ZF() 2 | % script_faster_rcnn_VOC0712_ZF() 3 | % Faster rcnn training and testing with Zeiler & Fergus model 4 | % -------------------------------------------------------- 5 | % Faster R-CNN 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | clc; 11 | clear mex; 12 | clear is_valid_handle; % to clear init_key 13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 14 | %% -------------------- CONFIG -------------------- 15 | opts.caffe_version = 'caffe_faster_rcnn'; 16 | opts.gpu_id = auto_select_gpu; 17 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 18 | 19 | % do validation, or not 20 | opts.do_val = true; 21 | % model 22 | model = Model.ZF_for_Faster_RCNN_VOC0712; 23 | % cache base 24 | cache_base_proposal = 'faster_rcnn_VOC0712_ZF'; 25 | cache_base_fast_rcnn = ''; 26 | % train/test data 27 | dataset = []; 28 | use_flipped = true; 29 | dataset = Dataset.voc0712_trainval(dataset, 'train', use_flipped); 30 | dataset = Dataset.voc2007_test(dataset, 'test', false); 31 | 32 | %% -------------------- TRAIN -------------------- 33 | % conf 34 | conf_proposal = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride); 35 | conf_fast_rcnn = fast_rcnn_config('image_means', model.mean_image); 36 | % set cache folder for each stage 37 | model = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model); 38 | % generate anchors and pre-calculate output size of rpn network 39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ... 40 | = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file); 41 | 42 | %% stage one proposal 43 | fprintf('\n***************\nstage one proposal \n***************\n'); 44 | % train 45 | model.stage1_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val); 46 | % test 47 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 48 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test); 49 | 50 | %% stage one fast rcnn 51 | fprintf('\n***************\nstage one fast rcnn\n***************\n'); 52 | % train 53 | model.stage1_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val); 54 | % test 55 | opts.mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 56 | 57 | %% stage two proposal 58 | % net proposal 59 | fprintf('\n***************\nstage two proposal\n***************\n'); 60 | % train 61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file; 62 | model.stage2_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val); 63 | % test 64 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 65 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 66 | 67 | %% stage two fast rcnn 68 | fprintf('\n***************\nstage two fast rcnn\n***************\n'); 69 | % train 70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file; 71 | model.stage2_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val); 72 | 73 | %% final test 74 | fprintf('\n***************\nfinal test\n***************\n'); 75 | 76 | model.stage2_rpn.nms = model.final_test.nms; 77 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 78 | opts.final_mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 79 | 80 | % save final models, for outside tester 81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset); 82 | end 83 | 84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file) 85 | [output_width_map, output_height_map] ... 86 | = proposal_calc_output_size(conf, test_net_def_file); 87 | anchors = proposal_generate_anchors(cache_name, ... 88 | 'scales', 2.^[3:5]); 89 | end -------------------------------------------------------------------------------- /experiments/script_faster_rcnn_VOC0712plus_VGG16.m: -------------------------------------------------------------------------------- 1 | function script_faster_rcnn_VOC0712plus_VGG16() 2 | % script_faster_rcnn_VOC0712plus_VGG16() 3 | % Faster rcnn training and testing with VGG16 model 4 | % -------------------------------------------------------- 5 | % Faster R-CNN 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | clc; 11 | clear mex; 12 | clear is_valid_handle; % to clear init_key 13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 14 | %% -------------------- CONFIG -------------------- 15 | opts.caffe_version = 'caffe_faster_rcnn'; 16 | opts.gpu_id = auto_select_gpu; 17 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 18 | 19 | % do validation, or not 20 | opts.do_val = false; 21 | % model 22 | model = Model.VGG16_for_Faster_RCNN_VOC0712plus; 23 | % cache base 24 | cache_base_proposal = 'faster_rcnn_VOC0712plus_vgg_16layers'; 25 | cache_base_fast_rcnn = ''; 26 | % train/test data 27 | dataset = []; 28 | use_flipped = true; 29 | dataset = Dataset.voc0712plus_trainval(dataset, 'train', use_flipped); 30 | dataset = Dataset.voc2012_test(dataset, 'test', false); 31 | 32 | %% -------------------- TRAIN -------------------- 33 | % conf 34 | conf_proposal = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride); 35 | conf_fast_rcnn = fast_rcnn_config('image_means', model.mean_image); 36 | % set cache folder for each stage 37 | model = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model); 38 | % generate anchors and pre-calculate output size of rpn network 39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ... 40 | = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file); 41 | 42 | %% stage one proposal 43 | fprintf('\n***************\nstage one proposal \n***************\n'); 44 | % train 45 | model.stage1_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val); 46 | % test 47 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 48 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test); 49 | 50 | %% stage one fast rcnn 51 | fprintf('\n***************\nstage one fast rcnn\n***************\n'); 52 | % train 53 | model.stage1_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val); 54 | % test 55 | % opts.mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 56 | 57 | %% stage two proposal 58 | % net proposal 59 | fprintf('\n***************\nstage two proposal\n***************\n'); 60 | % train 61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file; 62 | model.stage2_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val); 63 | % test 64 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 65 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 66 | 67 | %% stage two fast rcnn 68 | fprintf('\n***************\nstage two fast rcnn\n***************\n'); 69 | % train 70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file; 71 | model.stage2_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val); 72 | 73 | %% final test 74 | fprintf('\n***************\nfinal test\n***************\n'); 75 | 76 | model.stage2_rpn.nms = model.final_test.nms; 77 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 78 | opts.final_mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 79 | 80 | % save final models, for outside tester 81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset); 82 | end 83 | 84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file) 85 | [output_width_map, output_height_map] ... 86 | = proposal_calc_output_size(conf, test_net_def_file); 87 | anchors = proposal_generate_anchors(cache_name, ... 88 | 'scales', 2.^[3:5]); 89 | end -------------------------------------------------------------------------------- /experiments/script_faster_rcnn_VOC2007_VGG16.m: -------------------------------------------------------------------------------- 1 | function script_faster_rcnn_VOC2007_VGG16() 2 | % script_faster_rcnn_VOC2007_VGG16() 3 | % Faster rcnn training and testing with VGG16 model 4 | % -------------------------------------------------------- 5 | % Faster R-CNN 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | clc; 11 | clear mex; 12 | clear is_valid_handle; % to clear init_key 13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 14 | %% -------------------- CONFIG -------------------- 15 | opts.caffe_version = 'caffe_faster_rcnn'; 16 | opts.gpu_id = auto_select_gpu; 17 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 18 | 19 | % do validation, or not 20 | opts.do_val = true; 21 | % model 22 | model = Model.VGG16_for_Faster_RCNN_VOC2007; 23 | % cache base 24 | cache_base_proposal = 'faster_rcnn_VOC2007_vgg_16layers'; 25 | cache_base_fast_rcnn = ''; 26 | % train/test data 27 | dataset = []; 28 | use_flipped = true; 29 | dataset = Dataset.voc2007_trainval(dataset, 'train', use_flipped); 30 | dataset = Dataset.voc2007_test(dataset, 'test', false); 31 | 32 | %% -------------------- TRAIN -------------------- 33 | % conf 34 | conf_proposal = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride); 35 | conf_fast_rcnn = fast_rcnn_config('image_means', model.mean_image); 36 | % set cache folder for each stage 37 | model = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model); 38 | % generate anchors and pre-calculate output size of rpn network 39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ... 40 | = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file); 41 | 42 | %% stage one proposal 43 | fprintf('\n***************\nstage one proposal \n***************\n'); 44 | % train 45 | model.stage1_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val); 46 | % test 47 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 48 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test); 49 | 50 | %% stage one fast rcnn 51 | fprintf('\n***************\nstage one fast rcnn\n***************\n'); 52 | % train 53 | model.stage1_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val); 54 | % test 55 | opts.mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 56 | 57 | %% stage two proposal 58 | % net proposal 59 | fprintf('\n***************\nstage two proposal\n***************\n'); 60 | % train 61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file; 62 | model.stage2_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val); 63 | % test 64 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 65 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 66 | 67 | %% stage two fast rcnn 68 | fprintf('\n***************\nstage two fast rcnn\n***************\n'); 69 | % train 70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file; 71 | model.stage2_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val); 72 | 73 | %% final test 74 | fprintf('\n***************\nfinal test\n***************\n'); 75 | 76 | model.stage2_rpn.nms = model.final_test.nms; 77 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 78 | opts.final_mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 79 | 80 | % save final models, for outside tester 81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset); 82 | end 83 | 84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file) 85 | [output_width_map, output_height_map] ... 86 | = proposal_calc_output_size(conf, test_net_def_file); 87 | anchors = proposal_generate_anchors(cache_name, ... 88 | 'scales', 2.^[3:5]); 89 | end -------------------------------------------------------------------------------- /experiments/script_faster_rcnn_VOC2007_ZF.m: -------------------------------------------------------------------------------- 1 | function script_faster_rcnn_VOC2007_ZF() 2 | % script_faster_rcnn_VOC2007_ZF() 3 | % Faster rcnn training and testing with Zeiler & Fergus model 4 | % -------------------------------------------------------- 5 | % Faster R-CNN 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | clc; 11 | clear mex; 12 | clear is_valid_handle; % to clear init_key 13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 14 | %% -------------------- CONFIG -------------------- 15 | opts.caffe_version = 'caffe_faster_rcnn'; 16 | opts.gpu_id = auto_select_gpu; 17 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 18 | 19 | % do validation, or not 20 | opts.do_val = true; 21 | % model 22 | model = Model.ZF_for_Faster_RCNN_VOC2007; 23 | % cache base 24 | cache_base_proposal = 'faster_rcnn_VOC2007_ZF'; 25 | cache_base_fast_rcnn = ''; 26 | % train/test data 27 | dataset = []; 28 | use_flipped = true; 29 | dataset = Dataset.voc2007_trainval(dataset, 'train', use_flipped); 30 | dataset = Dataset.voc2007_test(dataset, 'test', false); 31 | 32 | %% -------------------- TRAIN -------------------- 33 | % conf 34 | conf_proposal = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride); 35 | conf_fast_rcnn = fast_rcnn_config('image_means', model.mean_image); 36 | % set cache folder for each stage 37 | model = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model); 38 | % generate anchors and pre-calculate output size of rpn network 39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ... 40 | = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file); 41 | 42 | %% stage one proposal 43 | fprintf('\n***************\nstage one proposal \n***************\n'); 44 | % train 45 | model.stage1_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val); 46 | % test 47 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 48 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test); 49 | 50 | %% stage one fast rcnn 51 | fprintf('\n***************\nstage one fast rcnn\n***************\n'); 52 | % train 53 | model.stage1_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val); 54 | % test 55 | opts.mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 56 | 57 | %% stage two proposal 58 | % net proposal 59 | fprintf('\n***************\nstage two proposal\n***************\n'); 60 | % train 61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file; 62 | model.stage2_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val); 63 | % test 64 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 65 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 66 | 67 | %% stage two fast rcnn 68 | fprintf('\n***************\nstage two fast rcnn\n***************\n'); 69 | % train 70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file; 71 | model.stage2_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val); 72 | 73 | %% final test 74 | fprintf('\n***************\nfinal test\n***************\n'); 75 | 76 | model.stage2_rpn.nms = model.final_test.nms; 77 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 78 | opts.final_mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 79 | 80 | % save final models, for outside tester 81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset); 82 | end 83 | 84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file) 85 | [output_width_map, output_height_map] ... 86 | = proposal_calc_output_size(conf, test_net_def_file); 87 | anchors = proposal_generate_anchors(cache_name, ... 88 | 'scales', 2.^[3:5]); 89 | end -------------------------------------------------------------------------------- /experiments/script_faster_rcnn_VOC2012_VGG16.m: -------------------------------------------------------------------------------- 1 | function script_faster_rcnn_VOC2012_VGG16() 2 | % script_faster_rcnn_VOC2012_VGG16() 3 | % Faster rcnn training and testing with VGG16 model 4 | % -------------------------------------------------------- 5 | % Faster R-CNN 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | clc; 11 | clear mex; 12 | clear is_valid_handle; % to clear init_key 13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 14 | %% -------------------- CONFIG -------------------- 15 | opts.caffe_version = 'caffe_faster_rcnn'; 16 | opts.gpu_id = auto_select_gpu; 17 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 18 | 19 | % do validation, or not 20 | opts.do_val = false; 21 | % model 22 | model = Model.VGG16_for_Faster_RCNN_VOC2012; 23 | % cache base 24 | cache_base_proposal = 'faster_rcnn_VOC2012_vgg_16layers'; 25 | cache_base_fast_rcnn = ''; 26 | % train/test data 27 | dataset = []; 28 | use_flipped = true; 29 | dataset = Dataset.voc2012_trainval(dataset, 'train', use_flipped); 30 | dataset = Dataset.voc2012_test(dataset, 'test', false); 31 | 32 | %% -------------------- TRAIN -------------------- 33 | % conf 34 | conf_proposal = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride); 35 | conf_fast_rcnn = fast_rcnn_config('image_means', model.mean_image); 36 | % set cache folder for each stage 37 | model = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model); 38 | % generate anchors and pre-calculate output size of rpn network 39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ... 40 | = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file); 41 | 42 | %% stage one proposal 43 | fprintf('\n***************\nstage one proposal \n***************\n'); 44 | % train 45 | model.stage1_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val); 46 | % test 47 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 48 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test); 49 | 50 | %% stage one fast rcnn 51 | fprintf('\n***************\nstage one fast rcnn\n***************\n'); 52 | % train 53 | model.stage1_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val); 54 | % test 55 | % opts.mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 56 | 57 | %% stage two proposal 58 | % net proposal 59 | fprintf('\n***************\nstage two proposal\n***************\n'); 60 | % train 61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file; 62 | model.stage2_rpn = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val); 63 | % test 64 | dataset.roidb_train = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false); 65 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 66 | 67 | %% stage two fast rcnn 68 | fprintf('\n***************\nstage two fast rcnn\n***************\n'); 69 | % train 70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file; 71 | model.stage2_fast_rcnn = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val); 72 | 73 | %% final test 74 | fprintf('\n***************\nfinal test\n***************\n'); 75 | 76 | model.stage2_rpn.nms = model.final_test.nms; 77 | dataset.roidb_test = Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test); 78 | opts.final_mAP = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test); 79 | 80 | % save final models, for outside tester 81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset); 82 | end 83 | 84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file) 85 | [output_width_map, output_height_map] ... 86 | = proposal_calc_output_size(conf, test_net_def_file); 87 | anchors = proposal_generate_anchors(cache_name, ... 88 | 'scales', 2.^[3:5]); 89 | end -------------------------------------------------------------------------------- /experiments/script_faster_rcnn_demo.m: -------------------------------------------------------------------------------- 1 | function script_faster_rcnn_demo() 2 | close all; 3 | clc; 4 | clear mex; 5 | clear is_valid_handle; % to clear init_key 6 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup')); 7 | %% -------------------- CONFIG -------------------- 8 | opts.caffe_version = 'caffe_faster_rcnn'; 9 | opts.gpu_id = auto_select_gpu; 10 | active_caffe_mex(opts.gpu_id, opts.caffe_version); 11 | 12 | opts.per_nms_topN = 6000; 13 | opts.nms_overlap_thres = 0.7; 14 | opts.after_nms_topN = 300; 15 | opts.use_gpu = true; 16 | 17 | opts.test_scales = 600; 18 | 19 | %% -------------------- INIT_MODEL -------------------- 20 | model_dir = fullfile(pwd, 'output', 'faster_rcnn_final', 'faster_rcnn_VOC0712_vgg_16layers'); %% VGG-16 21 | %model_dir = fullfile(pwd, 'output', 'faster_rcnn_final', 'faster_rcnn_VOC0712_ZF'); %% ZF 22 | proposal_detection_model = load_proposal_detection_model(model_dir); 23 | 24 | proposal_detection_model.conf_proposal.test_scales = opts.test_scales; 25 | proposal_detection_model.conf_detection.test_scales = opts.test_scales; 26 | if opts.use_gpu 27 | proposal_detection_model.conf_proposal.image_means = gpuArray(proposal_detection_model.conf_proposal.image_means); 28 | proposal_detection_model.conf_detection.image_means = gpuArray(proposal_detection_model.conf_detection.image_means); 29 | end 30 | 31 | % caffe.init_log(fullfile(pwd, 'caffe_log')); 32 | % proposal net 33 | rpn_net = caffe.Net(proposal_detection_model.proposal_net_def, 'test'); 34 | rpn_net.copy_from(proposal_detection_model.proposal_net); 35 | % fast rcnn net 36 | fast_rcnn_net = caffe.Net(proposal_detection_model.detection_net_def, 'test'); 37 | fast_rcnn_net.copy_from(proposal_detection_model.detection_net); 38 | 39 | % set gpu/cpu 40 | if opts.use_gpu 41 | caffe.set_mode_gpu(); 42 | else 43 | caffe.set_mode_cpu(); 44 | end 45 | 46 | %% -------------------- WARM UP -------------------- 47 | % the first run will be slower; use an empty image to warm up 48 | 49 | for j = 1:2 % we warm up 2 times 50 | im = uint8(ones(375, 500, 3)*128); 51 | if opts.use_gpu 52 | im = gpuArray(im); 53 | end 54 | [boxes, scores] = proposal_im_detect(proposal_detection_model.conf_proposal, rpn_net, im); 55 | aboxes = boxes_filter([boxes, scores], opts.per_nms_topN, opts.nms_overlap_thres, opts.after_nms_topN, opts.use_gpu); 56 | if proposal_detection_model.is_share_feature 57 | [boxes, scores] = fast_rcnn_conv_feat_detect(proposal_detection_model.conf_detection, fast_rcnn_net, im, ... 58 | rpn_net.blobs(proposal_detection_model.last_shared_output_blob_name), ... 59 | aboxes(:, 1:4), opts.after_nms_topN); 60 | else 61 | [boxes, scores] = fast_rcnn_im_detect(proposal_detection_model.conf_detection, fast_rcnn_net, im, ... 62 | aboxes(:, 1:4), opts.after_nms_topN); 63 | end 64 | end 65 | 66 | %% -------------------- TESTING -------------------- 67 | im_names = {'001763.jpg', '004545.jpg', '000542.jpg', '000456.jpg', '001150.jpg'}; 68 | % these images can be downloaded with fetch_faster_rcnn_final_model.m 69 | 70 | running_time = []; 71 | for j = 1:length(im_names) 72 | 73 | im = imread(fullfile(pwd, im_names{j})); 74 | 75 | if opts.use_gpu 76 | im = gpuArray(im); 77 | end 78 | 79 | % test proposal 80 | th = tic(); 81 | [boxes, scores] = proposal_im_detect(proposal_detection_model.conf_proposal, rpn_net, im); 82 | t_proposal = toc(th); 83 | th = tic(); 84 | aboxes = boxes_filter([boxes, scores], opts.per_nms_topN, opts.nms_overlap_thres, opts.after_nms_topN, opts.use_gpu); 85 | t_nms = toc(th); 86 | 87 | % test detection 88 | th = tic(); 89 | if proposal_detection_model.is_share_feature 90 | [boxes, scores] = fast_rcnn_conv_feat_detect(proposal_detection_model.conf_detection, fast_rcnn_net, im, ... 91 | rpn_net.blobs(proposal_detection_model.last_shared_output_blob_name), ... 92 | aboxes(:, 1:4), opts.after_nms_topN); 93 | else 94 | [boxes, scores] = fast_rcnn_im_detect(proposal_detection_model.conf_detection, fast_rcnn_net, im, ... 95 | aboxes(:, 1:4), opts.after_nms_topN); 96 | end 97 | t_detection = toc(th); 98 | 99 | fprintf('%s (%dx%d): time %.3fs (resize+conv+proposal: %.3fs, nms+regionwise: %.3fs)\n', im_names{j}, ... 100 | size(im, 2), size(im, 1), t_proposal + t_nms + t_detection, t_proposal, t_nms+t_detection); 101 | running_time(end+1) = t_proposal + t_nms + t_detection; 102 | 103 | % visualize 104 | classes = proposal_detection_model.classes; 105 | boxes_cell = cell(length(classes), 1); 106 | thres = 0.6; 107 | for i = 1:length(boxes_cell) 108 | boxes_cell{i} = [boxes(:, (1+(i-1)*4):(i*4)), scores(:, i)]; 109 | boxes_cell{i} = boxes_cell{i}(nms(boxes_cell{i}, 0.3), :); 110 | 111 | I = boxes_cell{i}(:, 5) >= thres; 112 | boxes_cell{i} = boxes_cell{i}(I, :); 113 | end 114 | figure(j); 115 | showboxes(im, boxes_cell, classes, 'voc'); 116 | pause(0.1); 117 | end 118 | fprintf('mean time: %.3fs\n', mean(running_time)); 119 | 120 | caffe.reset_all(); 121 | clear mex; 122 | 123 | end 124 | 125 | function proposal_detection_model = load_proposal_detection_model(model_dir) 126 | ld = load(fullfile(model_dir, 'model')); 127 | proposal_detection_model = ld.proposal_detection_model; 128 | clear ld; 129 | 130 | proposal_detection_model.proposal_net_def ... 131 | = fullfile(model_dir, proposal_detection_model.proposal_net_def); 132 | proposal_detection_model.proposal_net ... 133 | = fullfile(model_dir, proposal_detection_model.proposal_net); 134 | proposal_detection_model.detection_net_def ... 135 | = fullfile(model_dir, proposal_detection_model.detection_net_def); 136 | proposal_detection_model.detection_net ... 137 | = fullfile(model_dir, proposal_detection_model.detection_net); 138 | 139 | end 140 | 141 | function aboxes = boxes_filter(aboxes, per_nms_topN, nms_overlap_thres, after_nms_topN, use_gpu) 142 | % to speed up nms 143 | if per_nms_topN > 0 144 | aboxes = aboxes(1:min(length(aboxes), per_nms_topN), :); 145 | end 146 | % do nms 147 | if nms_overlap_thres > 0 && nms_overlap_thres < 1 148 | aboxes = aboxes(nms(aboxes, nms_overlap_thres, use_gpu), :); 149 | end 150 | if after_nms_topN > 0 151 | aboxes = aboxes(1:min(length(aboxes), after_nms_topN), :); 152 | end 153 | end 154 | -------------------------------------------------------------------------------- /faster_rcnn_build.m: -------------------------------------------------------------------------------- 1 | function faster_rcnn_build() 2 | % faster_rcnn_build() 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | % Compile nms_mex 10 | if ~exist('nms_mex', 'file') 11 | fprintf('Compiling nms_mex\n'); 12 | 13 | mex -O -outdir bin ... 14 | CXXFLAGS="\$CXXFLAGS -std=c++11" ... 15 | -largeArrayDims ... 16 | functions/nms/nms_mex.cpp ... 17 | -output nms_mex; 18 | end 19 | 20 | if ~exist('nms_gpu_mex', 'file') 21 | fprintf('Compiling nms_gpu_mex\n'); 22 | addpath(fullfile(pwd, 'functions', 'nms')); 23 | nvmex('functions/nms/nms_gpu_mex.cu', 'bin'); 24 | delete('nms_gpu_mex.o'); 25 | end 26 | 27 | 28 | -------------------------------------------------------------------------------- /fetch_data/fetch_caffe_mex_windows_vs2013_cuda65.m: -------------------------------------------------------------------------------- 1 | 2 | cur_dir = pwd; 3 | cd(fileparts(mfilename('fullpath'))); 4 | 5 | try 6 | fprintf('Downloading caffe_mex...\n'); 7 | urlwrite('https://onedrive.live.com/download?resid=36FEC490FBC32F1A!111&authkey=!AFVWFGTbViiX5tg&ithint=file%2czip', ... 8 | 'caffe_mex.zip'); 9 | 10 | fprintf('Unzipping...\n'); 11 | unzip('caffe_mex.zip', '..'); 12 | 13 | fprintf('Done.\n'); 14 | delete('caffe_mex.zip'); 15 | catch 16 | fprintf('Error in downloading, please try links in README.md https://github.com/ShaoqingRen/faster_rcnn'); 17 | end 18 | 19 | cd(cur_dir); 20 | -------------------------------------------------------------------------------- /fetch_data/fetch_faster_rcnn_final_model.m: -------------------------------------------------------------------------------- 1 | 2 | cur_dir = pwd; 3 | cd(fileparts(mfilename('fullpath'))); 4 | 5 | try 6 | fprintf('Downloading faster_rcnn_final_model...\n'); 7 | urlwrite('https://onedrive.live.com/download?resid=D7AF52BADBA8A4BC!114&authkey=!AERHoxZ-iAx_j34&ithint=file%2czip', ... 8 | 'faster_rcnn_final_model.zip'); 9 | 10 | 11 | fprintf('Unzipping...\n'); 12 | unzip('faster_rcnn_final_model.zip', '..'); 13 | 14 | fprintf('Done.\n'); 15 | delete('faster_rcnn_final_model.zip'); 16 | catch 17 | fprintf('Error in downloading, please try links in README.md https://github.com/ShaoqingRen/faster_rcnn'); 18 | end 19 | 20 | cd(cur_dir); 21 | -------------------------------------------------------------------------------- /fetch_data/fetch_model_VGG16.m: -------------------------------------------------------------------------------- 1 | 2 | cur_dir = pwd; 3 | cd(fileparts(mfilename('fullpath'))); 4 | 5 | try 6 | fprintf('Downloading model_VGG16...\n'); 7 | urlwrite('https://onedrive.live.com/download?resid=36FEC490FBC32F1A!114&authkey=!AE8uV9B07dREbhM&ithint=file%2czip', ... 8 | 'model_VGG16.zip'); 9 | 10 | fprintf('Unzipping...\n'); 11 | unzip('model_VGG16.zip', '..'); 12 | 13 | fprintf('Done.\n'); 14 | delete('model_VGG16.zip'); 15 | catch 16 | fprintf('Error in downloading, please try links in README.md https://github.com/ShaoqingRen/faster_rcnn'); 17 | end 18 | 19 | cd(cur_dir); 20 | -------------------------------------------------------------------------------- /fetch_data/fetch_model_ZF.m: -------------------------------------------------------------------------------- 1 | 2 | cur_dir = pwd; 3 | cd(fileparts(mfilename('fullpath'))); 4 | 5 | try 6 | fprintf('Downloading model_ZF...\n'); 7 | urlwrite('https://onedrive.live.com/download?resid=36FEC490FBC32F1A!113&authkey=!AIzdm0sD_SmhUQ4&ithint=file%2czip', ... 8 | 'model_ZF.zip'); 9 | 10 | fprintf('Unzipping...\n'); 11 | unzip('model_ZF.zip', '..'); 12 | 13 | fprintf('Done.\n'); 14 | delete('model_ZF.zip'); 15 | catch 16 | fprintf('Error in downloading, please try links in README.md https://github.com/ShaoqingRen/faster_rcnn'); 17 | end 18 | 19 | cd(cur_dir); 20 | -------------------------------------------------------------------------------- /functions/fast_rcnn/fast_rcnn_bbox_transform.m: -------------------------------------------------------------------------------- 1 | function [regression_label] = fast_rcnn_bbox_transform(ex_boxes, gt_boxes) 2 | % [regression_label] = fast_rcnn_bbox_transform(ex_boxes, gt_boxes) 3 | % -------------------------------------------------------- 4 | % Fast R-CNN 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | ex_widths = ex_boxes(:, 3) - ex_boxes(:, 1) + 1; 11 | ex_heights = ex_boxes(:, 4) - ex_boxes(:, 2) + 1; 12 | ex_ctr_x = ex_boxes(:, 1) + 0.5 * (ex_widths - 1); 13 | ex_ctr_y = ex_boxes(:, 2) + 0.5 * (ex_heights - 1); 14 | 15 | gt_widths = gt_boxes(:, 3) - gt_boxes(:, 1) + 1; 16 | gt_heights = gt_boxes(:, 4) - gt_boxes(:, 2) + 1; 17 | gt_ctr_x = gt_boxes(:, 1) + 0.5 * (gt_widths - 1); 18 | gt_ctr_y = gt_boxes(:, 2) + 0.5 * (gt_heights - 1); 19 | 20 | targets_dx = (gt_ctr_x - ex_ctr_x) ./ (ex_widths+eps); 21 | targets_dy = (gt_ctr_y - ex_ctr_y) ./ (ex_heights+eps); 22 | targets_dw = log(gt_widths ./ ex_widths); 23 | targets_dh = log(gt_heights ./ ex_heights); 24 | 25 | regression_label = [targets_dx, targets_dy, targets_dw, targets_dh]; 26 | end -------------------------------------------------------------------------------- /functions/fast_rcnn/fast_rcnn_bbox_transform_inv.m: -------------------------------------------------------------------------------- 1 | function [pred_boxes] = fast_rcnn_bbox_transform_inv(boxes, box_deltas) 2 | % [pred_boxes] = fast_rcnn_bbox_transform_inv(boxes, box_deltas) 3 | % -------------------------------------------------------- 4 | % Fast R-CNN 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | src_w = double(boxes(:, 3) - boxes(:, 1) + 1); 11 | src_h = double(boxes(:, 4) - boxes(:, 2) + 1); 12 | src_ctr_x = double(boxes(:, 1) + 0.5*(src_w-1)); 13 | src_ctr_y = double(boxes(:, 2) + 0.5*(src_h-1)); 14 | 15 | dst_ctr_x = double(box_deltas(:, 1:4:end)); 16 | dst_ctr_y = double(box_deltas(:, 2:4:end)); 17 | dst_scl_x = double(box_deltas(:, 3:4:end)); 18 | dst_scl_y = double(box_deltas(:, 4:4:end)); 19 | 20 | pred_ctr_x = bsxfun(@plus, bsxfun(@times, dst_ctr_x, src_w), src_ctr_x); 21 | pred_ctr_y = bsxfun(@plus, bsxfun(@times, dst_ctr_y, src_h), src_ctr_y); 22 | pred_w = bsxfun(@times, exp(dst_scl_x), src_w); 23 | pred_h = bsxfun(@times, exp(dst_scl_y), src_h); 24 | pred_boxes = zeros(size(box_deltas), 'single'); 25 | pred_boxes(:, 1:4:end) = pred_ctr_x - 0.5*(pred_w-1); 26 | pred_boxes(:, 2:4:end) = pred_ctr_y - 0.5*(pred_h-1); 27 | pred_boxes(:, 3:4:end) = pred_ctr_x + 0.5*(pred_w-1); 28 | pred_boxes(:, 4:4:end) = pred_ctr_y + 0.5*(pred_h-1); 29 | end -------------------------------------------------------------------------------- /functions/fast_rcnn/fast_rcnn_config.m: -------------------------------------------------------------------------------- 1 | function conf = fast_rcnn_config(varargin) 2 | % conf = fast_rcnn_config(varargin) 3 | % Fast R-CNN configuration 4 | % -------------------------------------------------------- 5 | % Fast R-CNN 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 7 | % Copyright (c) 2015, Shaoqing Ren 8 | % Licensed under The MIT License [see LICENSE for details] 9 | % -------------------------------------------------------- 10 | % 11 | ip = inputParser; 12 | 13 | %% training 14 | % whether use gpu 15 | ip.addParamValue('use_gpu', gpuDeviceCount > 0, ... 16 | @islogical); 17 | % Image scales -- the short edge of input image 18 | ip.addParamValue('scales', 600, @ismatrix); 19 | % Max pixel size of a scaled input image 20 | ip.addParamValue('max_size', 1000, @isscalar); 21 | % Images per batch 22 | ip.addParamValue('ims_per_batch', 2, @isscalar); 23 | % Minibatch size 24 | ip.addParamValue('batch_size', 128, @isscalar); 25 | % Fraction of minibatch that is foreground labeled (class > 0) 26 | ip.addParamValue('fg_fraction', 0.25, @isscalar); 27 | % Overlap threshold for a ROI to be considered foreground (if >= fg_thresh) 28 | ip.addParamValue('fg_thresh', 0.5, @isscalar); 29 | % Overlap threshold for a ROI to be considered background (class = 0 if 30 | % overlap in [bg_thresh_lo, bg_thresh_hi)) 31 | ip.addParamValue('bg_thresh_hi', 0.5, @isscalar); 32 | ip.addParamValue('bg_thresh_lo', 0.1, @isscalar); 33 | % mean image, in RGB order 34 | ip.addParamValue('image_means', 128, @ismatrix); 35 | % Use horizontally-flipped images during training? 36 | ip.addParamValue('use_flipped', true, @islogical); 37 | % Vaild training sample (IoU > bbox_thresh) for bounding box regresion 38 | ip.addParamValue('bbox_thresh', 0.5, @isscalar); 39 | 40 | % random seed 41 | ip.addParamValue('rng_seed', 6, @isscalar); 42 | 43 | 44 | %% testing 45 | ip.addParamValue('test_scales', 600, @isscalar); 46 | ip.addParamValue('test_max_size', 1000, @isscalar); 47 | ip.addParamValue('test_nms', 0.3, @isscalar); 48 | ip.addParamValue('test_binary', false, @islogical); 49 | 50 | ip.parse(varargin{:}); 51 | conf = ip.Results; 52 | 53 | % if image_means is a file, load it 54 | if ischar(conf.image_means) 55 | s = load(conf.image_means); 56 | s_fieldnames = fieldnames(s); 57 | assert(length(s_fieldnames) == 1); 58 | conf.image_means = s.(s_fieldnames{1}); 59 | end 60 | end -------------------------------------------------------------------------------- /functions/fast_rcnn/fast_rcnn_conv_feat_detect.m: -------------------------------------------------------------------------------- 1 | function [pred_boxes, scores] = fast_rcnn_conv_feat_detect(conf, caffe_net, im, conv_feat_blob, boxes, max_rois_num_in_gpu) 2 | % [pred_boxes, scores] = fast_rcnn_conv_feat_detect(conf, caffe_net, im, conv_feat_blob, boxes, max_rois_num_in_gpu) 3 | % -------------------------------------------------------- 4 | % Fast R-CNN 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | [rois_blob, ~] = get_blobs(conf, im, boxes); 11 | 12 | % permute data into caffe c++ memory, thus [num, channels, height, width] 13 | rois_blob = rois_blob - 1; % to c's index (start from 0) 14 | rois_blob = permute(rois_blob, [3, 4, 2, 1]); 15 | rois_blob = single(rois_blob); 16 | 17 | % set conv feature map as 'data' 18 | caffe_net.blobs('data').copy_data_from(conv_feat_blob); 19 | 20 | total_rois = size(rois_blob, 4); 21 | total_scores = cell(ceil(total_rois / max_rois_num_in_gpu), 1); 22 | total_box_deltas = cell(ceil(total_rois / max_rois_num_in_gpu), 1); 23 | for i = 1:ceil(total_rois / max_rois_num_in_gpu) 24 | 25 | sub_ind_start = 1 + (i-1) * max_rois_num_in_gpu; 26 | sub_ind_end = min(total_rois, i * max_rois_num_in_gpu); 27 | sub_rois_blob = rois_blob(:, :, :, sub_ind_start:sub_ind_end); 28 | 29 | % only set rois blob here 30 | net_inputs = {[], sub_rois_blob}; 31 | 32 | % Reshape net's input blobs 33 | caffe_net.reshape_as_input(net_inputs); 34 | output_blobs = caffe_net.forward(net_inputs); 35 | 36 | if conf.test_binary 37 | % simulate binary logistic regression 38 | scores = caffe_net.blobs('cls_score').get_data(); 39 | scores = squeeze(scores)'; 40 | % Return scores as fg - bg 41 | scores = bsxfun(@minus, scores, scores(:, 1)); 42 | else 43 | % use softmax estimated probabilities 44 | scores = output_blobs{2}; 45 | scores = squeeze(scores)'; 46 | end 47 | 48 | % Apply bounding-box regression deltas 49 | box_deltas = output_blobs{1}; 50 | box_deltas = squeeze(box_deltas)'; 51 | 52 | total_scores{i} = scores; 53 | total_box_deltas{i} = box_deltas; 54 | end 55 | 56 | scores = cell2mat(total_scores); 57 | box_deltas = cell2mat(total_box_deltas); 58 | 59 | pred_boxes = fast_rcnn_bbox_transform_inv(boxes, box_deltas); 60 | pred_boxes = clip_boxes(pred_boxes, size(im, 2), size(im, 1)); 61 | 62 | % remove scores and boxes for back-ground 63 | pred_boxes = pred_boxes(:, 5:end); 64 | scores = scores(:, 2:end); 65 | end 66 | 67 | function [rois_blob, im_scale_factors] = get_blobs(conf, im, rois) 68 | im_scale_factors = get_image_blob_scales(conf, im); 69 | rois_blob = get_rois_blob(conf, rois, im_scale_factors); 70 | end 71 | 72 | function im_scales = get_image_blob_scales(conf, im) 73 | im_scales = arrayfun(@(x) prep_im_for_blob_size(size(im), x, conf.test_max_size), conf.test_scales, 'UniformOutput', false); 74 | im_scales = cell2mat(im_scales); 75 | end 76 | 77 | function [rois_blob] = get_rois_blob(conf, im_rois, im_scale_factors) 78 | [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, im_scale_factors); 79 | rois_blob = single([levels, feat_rois]); 80 | end 81 | 82 | function [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, scales) 83 | im_rois = single(im_rois); 84 | 85 | if length(scales) > 1 86 | widths = im_rois(:, 3) - im_rois(:, 1) + 1; 87 | heights = im_rois(:, 4) - im_rois(:, 2) + 1; 88 | 89 | areas = widths .* heights; 90 | scaled_areas = bsxfun(@times, areas(:), scales(:)'.^2); 91 | levels = max(abs(scaled_areas - 224.^2), 2); 92 | else 93 | levels = ones(size(im_rois, 1), 1); 94 | end 95 | 96 | feat_rois = round(bsxfun(@times, im_rois-1, scales(levels))) + 1; 97 | end 98 | 99 | function boxes = clip_boxes(boxes, im_width, im_height) 100 | % x1 >= 1 & <= im_width 101 | boxes(:, 1:4:end) = max(min(boxes(:, 1:4:end), im_width), 1); 102 | % y1 >= 1 & <= im_height 103 | boxes(:, 2:4:end) = max(min(boxes(:, 2:4:end), im_height), 1); 104 | % x2 >= 1 & <= im_width 105 | boxes(:, 3:4:end) = max(min(boxes(:, 3:4:end), im_width), 1); 106 | % y2 >= 1 & <= im_height 107 | boxes(:, 4:4:end) = max(min(boxes(:, 4:4:end), im_height), 1); 108 | end 109 | -------------------------------------------------------------------------------- /functions/fast_rcnn/fast_rcnn_generate_sliding_windows.m: -------------------------------------------------------------------------------- 1 | function roidb = fast_rcnn_generate_sliding_windows(conf, imdb, roidb, roipool_in_size) 2 | % [pred_boxes, scores] = fast_rcnn_conv_feat_detect(conf, im, conv_feat, boxes, max_rois_num_in_gpu, net_idx) 3 | % -------------------------------------------------------- 4 | % Fast R-CNN 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | regions.images = imdb.image_ids; 11 | 12 | im_sizes = imdb.sizes; 13 | regions.boxes = cellfun(@(x) generate_sliding_windows_one_image(conf, x, roipool_in_size), num2cell(im_sizes, 2), 'UniformOutput', false); 14 | 15 | roidb = roidb_from_proposal(imdb, roidb, regions); 16 | end 17 | 18 | function boxes = generate_sliding_windows_one_image(conf, im_size, roipool_in_size) 19 | im_scale = prep_im_for_blob_size(im_size, conf.scales, conf.max_size); 20 | im_size = round(im_size * im_scale); 21 | 22 | x1 = 1:conf.feat_stride:im_size(2); 23 | y1 = 1:conf.feat_stride:im_size(1); 24 | [x1, y1] = meshgrid(x1, y1); 25 | x1 = x1(:); 26 | y1 = y1(:); 27 | x2 = x1 + roipool_in_size * conf.feat_stride - 1; 28 | y2 = y1 + roipool_in_size * conf.feat_stride - 1; 29 | 30 | boxes = [x1, y1, x2, y2]; 31 | boxes = filter_boxes(im_size, boxes); 32 | 33 | boxes = bsxfun(@times, boxes-1, 1/im_scale) + 1; 34 | end 35 | 36 | function boxes = filter_boxes(im_size, boxes) 37 | valid_ind = boxes(:, 1) >= 1 & boxes(:, 1) <= im_size(2) & ... 38 | boxes(:, 2) >= 1 & boxes(:, 2) <= im_size(1) & ... 39 | boxes(:, 3) >= 1 & boxes(:, 3) <= im_size(2) & ... 40 | boxes(:, 4) >= 1 & boxes(:, 4) <= im_size(1); 41 | 42 | boxes = boxes(valid_ind, :); 43 | end -------------------------------------------------------------------------------- /functions/fast_rcnn/fast_rcnn_get_minibatch.m: -------------------------------------------------------------------------------- 1 | function [im_blob, rois_blob, labels_blob, bbox_targets_blob, bbox_loss_blob] = fast_rcnn_get_minibatch(conf, image_roidb) 2 | % [im_blob, rois_blob, labels_blob, bbox_targets_blob, bbox_loss_blob] ... 3 | % = fast_rcnn_get_minibatch(conf, image_roidb) 4 | % -------------------------------------------------------- 5 | % Fast R-CNN 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 7 | % Copyright (c) 2015, Shaoqing Ren 8 | % Licensed under The MIT License [see LICENSE for details] 9 | % -------------------------------------------------------- 10 | 11 | num_images = length(image_roidb); 12 | % Infer number of classes from the number of columns in gt_overlaps 13 | num_classes = size(image_roidb(1).overlap, 2); 14 | % Sample random scales to use for each image in this batch 15 | random_scale_inds = randi(length(conf.scales), num_images, 1); 16 | 17 | assert(mod(conf.batch_size, num_images) == 0, ... 18 | sprintf('num_images %d must divide BATCH_SIZE %d', num_images, conf.batch_size)); 19 | 20 | rois_per_image = conf.batch_size / num_images; 21 | fg_rois_per_image = round(rois_per_image * conf.fg_fraction); 22 | 23 | % Get the input image blob 24 | [im_blob, im_scales] = get_image_blob(conf, image_roidb, random_scale_inds); 25 | 26 | % build the region of interest and label blobs 27 | rois_blob = zeros(0, 5, 'single'); 28 | labels_blob = zeros(0, 1, 'single'); 29 | bbox_targets_blob = zeros(0, 4 * (num_classes+1), 'single'); 30 | bbox_loss_blob = zeros(size(bbox_targets_blob), 'single'); 31 | 32 | for i = 1:num_images 33 | [labels, ~, im_rois, bbox_targets, bbox_loss] = ... 34 | sample_rois(conf, image_roidb(i), fg_rois_per_image, rois_per_image); 35 | 36 | % Add to ROIs blob 37 | feat_rois = fast_rcnn_map_im_rois_to_feat_rois(conf, im_rois, im_scales(i)); 38 | batch_ind = i * ones(size(feat_rois, 1), 1); 39 | rois_blob_this_image = [batch_ind, feat_rois]; 40 | rois_blob = [rois_blob; rois_blob_this_image]; 41 | 42 | % Add to labels, bbox targets, and bbox loss blobs 43 | labels_blob = [labels_blob; labels]; 44 | bbox_targets_blob = [bbox_targets_blob; bbox_targets]; 45 | bbox_loss_blob = [bbox_loss_blob; bbox_loss]; 46 | end 47 | 48 | % permute data into caffe c++ memory, thus [num, channels, height, width] 49 | im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg 50 | im_blob = single(permute(im_blob, [2, 1, 3, 4])); 51 | rois_blob = rois_blob - 1; % to c's index (start from 0) 52 | rois_blob = single(permute(rois_blob, [3, 4, 2, 1])); 53 | labels_blob = single(permute(labels_blob, [3, 4, 2, 1])); 54 | bbox_targets_blob = single(permute(bbox_targets_blob, [3, 4, 2, 1])); 55 | bbox_loss_blob = single(permute(bbox_loss_blob, [3, 4, 2, 1])); 56 | 57 | assert(~isempty(im_blob)); 58 | assert(~isempty(rois_blob)); 59 | assert(~isempty(labels_blob)); 60 | assert(~isempty(bbox_targets_blob)); 61 | assert(~isempty(bbox_loss_blob)); 62 | end 63 | 64 | %% Build an input blob from the images in the roidb at the specified scales. 65 | function [im_blob, im_scales] = get_image_blob(conf, images, random_scale_inds) 66 | 67 | num_images = length(images); 68 | processed_ims = cell(num_images, 1); 69 | im_scales = nan(num_images, 1); 70 | for i = 1:num_images 71 | im = imread(images(i).image_path); 72 | target_size = conf.scales(random_scale_inds(i)); 73 | 74 | [im, im_scale] = prep_im_for_blob(im, conf.image_means, target_size, conf.max_size); 75 | 76 | im_scales(i) = im_scale; 77 | processed_ims{i} = im; 78 | end 79 | 80 | im_blob = im_list_to_blob(processed_ims); 81 | end 82 | 83 | %% Generate a random sample of ROIs comprising foreground and background examples. 84 | function [labels, overlaps, rois, bbox_targets, bbox_loss_weights] = ... 85 | sample_rois(conf, image_roidb, fg_rois_per_image, rois_per_image) 86 | 87 | [overlaps, labels] = max(image_roidb(1).overlap, [], 2); 88 | % labels = image_roidb(1).max_classes; 89 | % overlaps = image_roidb(1).max_overlaps; 90 | rois = image_roidb(1).boxes; 91 | 92 | % Select foreground ROIs as those with >= FG_THRESH overlap 93 | fg_inds = find(overlaps >= conf.fg_thresh); 94 | % Guard against the case when an image has fewer than fg_rois_per_image 95 | % foreground ROIs 96 | fg_rois_per_this_image = min(fg_rois_per_image, length(fg_inds)); 97 | % Sample foreground regions without replacement 98 | if ~isempty(fg_inds) 99 | fg_inds = fg_inds(randperm(length(fg_inds), fg_rois_per_this_image)); 100 | end 101 | 102 | % Select background ROIs as those within [BG_THRESH_LO, BG_THRESH_HI) 103 | bg_inds = find(overlaps < conf.bg_thresh_hi & overlaps >= conf.bg_thresh_lo); 104 | % Compute number of background ROIs to take from this image (guarding 105 | % against there being fewer than desired) 106 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image; 107 | bg_rois_per_this_image = min(bg_rois_per_this_image, length(bg_inds)); 108 | % Sample foreground regions without replacement 109 | if ~isempty(bg_inds) 110 | bg_inds = bg_inds(randperm(length(bg_inds), bg_rois_per_this_image)); 111 | end 112 | % The indices that we're selecting (both fg and bg) 113 | keep_inds = [fg_inds; bg_inds]; 114 | % Select sampled values from various arrays 115 | labels = labels(keep_inds); 116 | % Clamp labels for the background ROIs to 0 117 | labels((fg_rois_per_this_image+1):end) = 0; 118 | overlaps = overlaps(keep_inds); 119 | rois = rois(keep_inds, :); 120 | 121 | assert(all(labels == image_roidb.bbox_targets(keep_inds, 1))); 122 | 123 | % Infer number of classes from the number of columns in gt_overlaps 124 | num_classes = size(image_roidb(1).overlap, 2); 125 | 126 | [bbox_targets, bbox_loss_weights] = get_bbox_regression_labels(conf, ... 127 | image_roidb.bbox_targets(keep_inds, :), num_classes); 128 | 129 | end 130 | 131 | function [bbox_targets, bbox_loss_weights] = get_bbox_regression_labels(conf, bbox_target_data, num_classes) 132 | %% Bounding-box regression targets are stored in a compact form in the roidb. 133 | % This function expands those targets into the 4-of-4*(num_classes+1) representation used 134 | % by the network (i.e. only one class has non-zero targets). 135 | % The loss weights are similarly expanded. 136 | % Return (N, (num_classes+1) * 4, 1, 1) blob of regression targets 137 | % Return (N, (num_classes+1 * 4, 1, 1) blob of loss weights 138 | clss = bbox_target_data(:, 1); 139 | bbox_targets = zeros(length(clss), 4 * (num_classes+1), 'single'); 140 | bbox_loss_weights = zeros(size(bbox_targets), 'single'); 141 | inds = find(clss > 0); 142 | for i = 1:length(inds) 143 | ind = inds(i); 144 | cls = clss(ind); 145 | targets_inds = (1+cls*4):((cls+1)*4); 146 | bbox_targets(ind, targets_inds) = bbox_target_data(ind, 2:end); 147 | bbox_loss_weights(ind, targets_inds) = 1; 148 | end 149 | end 150 | 151 | 152 | -------------------------------------------------------------------------------- /functions/fast_rcnn/fast_rcnn_im_detect.m: -------------------------------------------------------------------------------- 1 | function [pred_boxes, scores] = fast_rcnn_im_detect(conf, caffe_net, im, boxes, max_rois_num_in_gpu) 2 | % [pred_boxes, scores] = fast_rcnn_im_detect(conf, caffe_net, im, boxes, max_rois_num_in_gpu) 3 | % -------------------------------------------------------- 4 | % Fast R-CNN 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | [im_blob, rois_blob, ~] = get_blobs(conf, im, boxes); 11 | 12 | % When mapping from image ROIs to feature map ROIs, there's some aliasing 13 | % (some distinct image ROIs get mapped to the same feature ROI). 14 | % Here, we identify duplicate feature ROIs, so we only compute features 15 | % on the unique subset. 16 | [~, index, inv_index] = unique(rois_blob, 'rows'); 17 | rois_blob = rois_blob(index, :); 18 | boxes = boxes(index, :); 19 | 20 | % permute data into caffe c++ memory, thus [num, channels, height, width] 21 | im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg 22 | im_blob = permute(im_blob, [2, 1, 3, 4]); 23 | im_blob = single(im_blob); 24 | rois_blob = rois_blob - 1; % to c's index (start from 0) 25 | rois_blob = permute(rois_blob, [3, 4, 2, 1]); 26 | rois_blob = single(rois_blob); 27 | 28 | total_rois = size(rois_blob, 4); 29 | total_scores = cell(ceil(total_rois / max_rois_num_in_gpu), 1); 30 | total_box_deltas = cell(ceil(total_rois / max_rois_num_in_gpu), 1); 31 | for i = 1:ceil(total_rois / max_rois_num_in_gpu) 32 | 33 | sub_ind_start = 1 + (i-1) * max_rois_num_in_gpu; 34 | sub_ind_end = min(total_rois, i * max_rois_num_in_gpu); 35 | sub_rois_blob = rois_blob(:, :, :, sub_ind_start:sub_ind_end); 36 | 37 | net_inputs = {im_blob, sub_rois_blob}; 38 | 39 | % Reshape net's input blobs 40 | caffe_net.reshape_as_input(net_inputs); 41 | output_blobs = caffe_net.forward(net_inputs); 42 | 43 | if conf.test_binary 44 | % simulate binary logistic regression 45 | scores = caffe_net.blobs('cls_score').get_data(); 46 | scores = squeeze(scores)'; 47 | % Return scores as fg - bg 48 | scores = bsxfun(@minus, scores, scores(:, 1)); 49 | else 50 | % use softmax estimated probabilities 51 | scores = output_blobs{2}; 52 | scores = squeeze(scores)'; 53 | end 54 | 55 | % Apply bounding-box regression deltas 56 | box_deltas = output_blobs{1}; 57 | box_deltas = squeeze(box_deltas)'; 58 | 59 | total_scores{i} = scores; 60 | total_box_deltas{i} = box_deltas; 61 | end 62 | 63 | scores = cell2mat(total_scores); 64 | box_deltas = cell2mat(total_box_deltas); 65 | 66 | pred_boxes = fast_rcnn_bbox_transform_inv(boxes, box_deltas); 67 | pred_boxes = clip_boxes(pred_boxes, size(im, 2), size(im, 1)); 68 | 69 | % Map scores and predictions back to the original set of boxes 70 | scores = scores(inv_index, :); 71 | pred_boxes = pred_boxes(inv_index, :); 72 | 73 | % remove scores and boxes for back-ground 74 | pred_boxes = pred_boxes(:, 5:end); 75 | scores = scores(:, 2:end); 76 | end 77 | 78 | function [data_blob, rois_blob, im_scale_factors] = get_blobs(conf, im, rois) 79 | [data_blob, im_scale_factors] = get_image_blob(conf, im); 80 | rois_blob = get_rois_blob(conf, rois, im_scale_factors); 81 | end 82 | 83 | function [blob, im_scales] = get_image_blob(conf, im) 84 | [ims, im_scales] = arrayfun(@(x) prep_im_for_blob(im, conf.image_means, x, conf.test_max_size), conf.test_scales, 'UniformOutput', false); 85 | im_scales = cell2mat(im_scales); 86 | blob = im_list_to_blob(ims); 87 | end 88 | 89 | function [rois_blob] = get_rois_blob(conf, im_rois, im_scale_factors) 90 | [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, im_scale_factors); 91 | rois_blob = single([levels, feat_rois]); 92 | end 93 | 94 | function [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, scales) 95 | im_rois = single(im_rois); 96 | 97 | if length(scales) > 1 98 | widths = im_rois(:, 3) - im_rois(:, 1) + 1; 99 | heights = im_rois(:, 4) - im_rois(:, 2) + 1; 100 | 101 | areas = widths .* heights; 102 | scaled_areas = bsxfun(@times, areas(:), scales(:)'.^2); 103 | [~, levels] = min(abs(scaled_areas - 224.^2), [], 2); 104 | else 105 | levels = ones(size(im_rois, 1), 1); 106 | end 107 | 108 | feat_rois = round(bsxfun(@times, im_rois-1, scales(levels))) + 1; 109 | end 110 | 111 | function boxes = clip_boxes(boxes, im_width, im_height) 112 | % x1 >= 1 & <= im_width 113 | boxes(:, 1:4:end) = max(min(boxes(:, 1:4:end), im_width), 1); 114 | % y1 >= 1 & <= im_height 115 | boxes(:, 2:4:end) = max(min(boxes(:, 2:4:end), im_height), 1); 116 | % x2 >= 1 & <= im_width 117 | boxes(:, 3:4:end) = max(min(boxes(:, 3:4:end), im_width), 1); 118 | % y2 >= 1 & <= im_height 119 | boxes(:, 4:4:end) = max(min(boxes(:, 4:4:end), im_height), 1); 120 | end 121 | -------------------------------------------------------------------------------- /functions/fast_rcnn/fast_rcnn_map_im_rois_to_feat_rois.m: -------------------------------------------------------------------------------- 1 | function [feat_rois] = fast_rcnn_map_im_rois_to_feat_rois(conf, im_rois, im_scale_factor) 2 | % [feat_rois] = fast_rcnn_map_im_rois_to_feat_rois(conf, im_rois, im_scale_factor) 3 | % -------------------------------------------------------- 4 | % Fast R-CNN 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | %% Map a ROI in image-pixel coordinates to a ROI in feature coordinates. 11 | % in matlab's index (start from 1) 12 | 13 | feat_rois = round((im_rois-1) * im_scale_factor) + 1; 14 | 15 | %feat_rois = round((im_rois-1) * im_scale_factor / single(conf.feat_stride)) + 1; 16 | 17 | end -------------------------------------------------------------------------------- /functions/fast_rcnn/fast_rcnn_prepare_image_roidb.m: -------------------------------------------------------------------------------- 1 | function [image_roidb, bbox_means, bbox_stds] = fast_rcnn_prepare_image_roidb(conf, imdbs, roidbs, bbox_means, bbox_stds) 2 | % [image_roidb, bbox_means, bbox_stds] = fast_rcnn_prepare_image_roidb(conf, imdbs, roidbs, cache_img, bbox_means, bbox_stds) 3 | % Gather useful information from imdb and roidb 4 | % pre-calculate mean (bbox_means) and std (bbox_stds) of the regression 5 | % term for normalization 6 | % -------------------------------------------------------- 7 | % Fast R-CNN 8 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn) 9 | % Copyright (c) 2015, Shaoqing Ren 10 | % Licensed under The MIT License [see LICENSE for details] 11 | % -------------------------------------------------------- 12 | 13 | if ~exist('bbox_means', 'var') 14 | bbox_means = []; 15 | bbox_stds = []; 16 | end 17 | 18 | if ~iscell(imdbs) 19 | imdbs = {imdbs}; 20 | roidbs = {roidbs}; 21 | end 22 | 23 | imdbs = imdbs(:); 24 | roidbs = roidbs(:); 25 | 26 | image_roidb = ... 27 | cellfun(@(x, y) ... // @(imdbs, roidbs) 28 | arrayfun(@(z) ... //@([1:length(x.image_ids)]) 29 | struct('image_path', x.image_at(z), 'image_id', x.image_ids{z}, 'im_size', x.sizes(z, :), 'imdb_name', x.name, ... 30 | 'overlap', y.rois(z).overlap, 'boxes', y.rois(z).boxes, 'class', y.rois(z).class, 'image', [], 'bbox_targets', []), ... 31 | [1:length(x.image_ids)]', 'UniformOutput', true),... 32 | imdbs, roidbs, 'UniformOutput', false); 33 | 34 | image_roidb = cat(1, image_roidb{:}); 35 | 36 | % enhance roidb to contain bounding-box regression targets 37 | [image_roidb, bbox_means, bbox_stds] = append_bbox_regression_targets(conf, image_roidb, bbox_means, bbox_stds); 38 | end 39 | 40 | function [image_roidb, means, stds] = append_bbox_regression_targets(conf, image_roidb, means, stds) 41 | % means and stds -- (k+1) * 4, include background class 42 | 43 | num_images = length(image_roidb); 44 | % Infer number of classes from the number of columns in gt_overlaps 45 | num_classes = size(image_roidb(1).overlap, 2); 46 | valid_imgs = true(num_images, 1); 47 | for i = 1:num_images 48 | rois = image_roidb(i).boxes; 49 | [image_roidb(i).bbox_targets, valid_imgs(i)] = ... 50 | compute_targets(conf, rois, image_roidb(i).overlap); 51 | end 52 | if ~all(valid_imgs) 53 | image_roidb = image_roidb(valid_imgs); 54 | num_images = length(image_roidb); 55 | fprintf('Warning: fast_rcnn_prepare_image_roidb: filter out %d images, which contains zero valid samples\n', sum(~valid_imgs)); 56 | end 57 | 58 | if ~(exist('means', 'var') && ~isempty(means) && exist('stds', 'var') && ~isempty(stds)) 59 | % Compute values needed for means and stds 60 | % var(x) = E(x^2) - E(x)^2 61 | class_counts = zeros(num_classes, 1) + eps; 62 | sums = zeros(num_classes, 4); 63 | squared_sums = zeros(num_classes, 4); 64 | for i = 1:num_images 65 | targets = image_roidb(i).bbox_targets; 66 | for cls = 1:num_classes 67 | cls_inds = find(targets(:, 1) == cls); 68 | if ~isempty(cls_inds) 69 | class_counts(cls) = class_counts(cls) + length(cls_inds); 70 | sums(cls, :) = sums(cls, :) + sum(targets(cls_inds, 2:end), 1); 71 | squared_sums(cls, :) = squared_sums(cls, :) + sum(targets(cls_inds, 2:end).^2, 1); 72 | end 73 | end 74 | end 75 | 76 | means = bsxfun(@rdivide, sums, class_counts); 77 | stds = (bsxfun(@minus, bsxfun(@rdivide, squared_sums, class_counts), means.^2)).^0.5; 78 | 79 | % add background class 80 | means = [0, 0, 0, 0; means]; 81 | stds = [0, 0, 0, 0; stds]; 82 | end 83 | 84 | % Normalize targets 85 | for i = 1:num_images 86 | targets = image_roidb(i).bbox_targets; 87 | for cls = 1:num_classes 88 | cls_inds = find(targets(:, 1) == cls); 89 | if ~isempty(cls_inds) 90 | image_roidb(i).bbox_targets(cls_inds, 2:end) = ... 91 | bsxfun(@minus, image_roidb(i).bbox_targets(cls_inds, 2:end), means(cls+1, :)); 92 | image_roidb(i).bbox_targets(cls_inds, 2:end) = ... 93 | bsxfun(@rdivide, image_roidb(i).bbox_targets(cls_inds, 2:end), stds(cls+1, :)); 94 | end 95 | end 96 | end 97 | end 98 | 99 | 100 | function [bbox_targets, is_valid] = compute_targets(conf, rois, overlap) 101 | 102 | overlap = full(overlap); 103 | 104 | [max_overlaps, max_labels] = max(overlap, [], 2); 105 | 106 | % ensure ROIs are floats 107 | rois = single(rois); 108 | 109 | bbox_targets = zeros(size(rois, 1), 5, 'single'); 110 | 111 | % Indices of ground-truth ROIs 112 | gt_inds = find(max_overlaps == 1); 113 | 114 | if ~isempty(gt_inds) 115 | % Indices of examples for which we try to make predictions 116 | ex_inds = find(max_overlaps >= conf.bbox_thresh); 117 | 118 | % Get IoU overlap between each ex ROI and gt ROI 119 | ex_gt_overlaps = boxoverlap(rois(ex_inds, :), rois(gt_inds, :)); 120 | 121 | assert(all(abs(max(ex_gt_overlaps, [], 2) - max_overlaps(ex_inds)) < 10^-4)); 122 | 123 | % Find which gt ROI each ex ROI has max overlap with: 124 | % this will be the ex ROI's gt target 125 | [~, gt_assignment] = max(ex_gt_overlaps, [], 2); 126 | gt_rois = rois(gt_inds(gt_assignment), :); 127 | ex_rois = rois(ex_inds, :); 128 | 129 | [regression_label] = fast_rcnn_bbox_transform(ex_rois, gt_rois); 130 | 131 | bbox_targets(ex_inds, :) = [max_labels(ex_inds), regression_label]; 132 | end 133 | 134 | % Select foreground ROIs as those with >= fg_thresh overlap 135 | is_fg = max_overlaps >= conf.fg_thresh; 136 | % Select background ROIs as those within [bg_thresh_lo, bg_thresh_hi) 137 | is_bg = max_overlaps < conf.bg_thresh_hi & max_overlaps >= conf.bg_thresh_lo; 138 | 139 | % check if there is any fg or bg sample. If no, filter out this image 140 | is_valid = true; 141 | if ~any(is_fg | is_bg) 142 | is_valid = false; 143 | end 144 | end -------------------------------------------------------------------------------- /functions/nms/nms.m: -------------------------------------------------------------------------------- 1 | function pick = nms(boxes, overlap, use_gpu) 2 | % top = nms(boxes, overlap) 3 | % Non-maximum suppression. (FAST VERSION) 4 | % Greedily select high-scoring detections and skip detections 5 | % that are significantly covered by a previously selected 6 | % detection. 7 | % 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m), 9 | % but an inner loop has been eliminated to significantly speed it 10 | % up in the case of a large number of boxes 11 | 12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz 13 | % All rights reserved. 14 | % 15 | % This file is part of the Exemplar-SVM library and is made 16 | % available under the terms of the MIT license (see COPYING file). 17 | % Project homepage: https://github.com/quantombone/exemplarsvm 18 | 19 | 20 | if isempty(boxes) 21 | pick = []; 22 | return; 23 | end 24 | 25 | if ~exist('use_gpu', 'var') 26 | use_gpu = false; 27 | end 28 | 29 | if use_gpu 30 | s = boxes(:, end); 31 | if ~issorted(s(end:-1:1)) 32 | [~, I] = sort(s, 'descend'); 33 | boxes = boxes(I, :); 34 | pick = nms_gpu_mex(single(boxes)', double(overlap)); 35 | pick = I(pick); 36 | else 37 | pick = nms_gpu_mex(single(boxes)', double(overlap)); 38 | end 39 | return; 40 | end 41 | 42 | if size(boxes, 1) < 1000000 43 | pick = nms_mex(double(boxes), double(overlap)); 44 | return; 45 | end 46 | 47 | x1 = boxes(:,1); 48 | y1 = boxes(:,2); 49 | x2 = boxes(:,3); 50 | y2 = boxes(:,4); 51 | s = boxes(:,end); 52 | 53 | area = (x2-x1+1) .* (y2-y1+1); 54 | [vals, I] = sort(s); 55 | 56 | pick = s*0; 57 | counter = 1; 58 | while ~isempty(I) 59 | last = length(I); 60 | i = I(last); 61 | pick(counter) = i; 62 | counter = counter + 1; 63 | 64 | xx1 = max(x1(i), x1(I(1:last-1))); 65 | yy1 = max(y1(i), y1(I(1:last-1))); 66 | xx2 = min(x2(i), x2(I(1:last-1))); 67 | yy2 = min(y2(i), y2(I(1:last-1))); 68 | 69 | w = max(0.0, xx2-xx1+1); 70 | h = max(0.0, yy2-yy1+1); 71 | 72 | inter = w.*h; 73 | o = inter ./ (area(i) + area(I(1:last-1)) - inter); 74 | 75 | I = I(find(o<=overlap)); 76 | end 77 | 78 | pick = pick(1:(counter-1)); 79 | -------------------------------------------------------------------------------- /functions/nms/nms_gpu_mex.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Example of how to use the mxGPUArray API in a MEX file. This example shows 3 | * how to write a MEX function that takes a gpuArray input and returns a 4 | * gpuArray output, e.g. B=mexFunction(A). 5 | * 6 | * Copyright 2012 The MathWorks, Inc. 7 | */ 8 | 9 | #include "mex.h" 10 | #include 11 | #include 12 | 13 | #define DIVUP(m,n) ((m)/(n)+((m)%(n)>0)) 14 | int const threadsPerBlock = (sizeof(unsigned long long) * 8); 15 | 16 | /* 17 | * Device code 18 | */ 19 | __device__ inline float devIoU(float const * const a, float const * const b) 20 | { 21 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 22 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 23 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 24 | float interS = width * height; 25 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 26 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 27 | return interS / (Sa + Sb - interS); 28 | } 29 | 30 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thres, const float *dev_boxes, unsigned long long *dev_mask) 31 | { 32 | const int row_start = blockIdx.y, col_start = blockIdx.x; 33 | const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock), col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 34 | 35 | //if (row_start > col_start) return; 36 | 37 | __shared__ float block_boxes[threadsPerBlock * 5]; 38 | if (threadIdx.x < col_size) 39 | { 40 | block_boxes[threadIdx.x * 5 + 0] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 41 | block_boxes[threadIdx.x * 5 + 1] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 42 | block_boxes[threadIdx.x * 5 + 2] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 44 | block_boxes[threadIdx.x * 5 + 4] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 45 | } 46 | __syncthreads(); 47 | 48 | if (threadIdx.x < row_size) 49 | { 50 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 51 | const float *cur_box = dev_boxes + cur_box_idx * 5; 52 | int i = 0; 53 | unsigned long long t = 0; 54 | int start = 0; 55 | if (row_start == col_start) start = threadIdx.x + 1; 56 | for (i = start; i < col_size; i++) 57 | { 58 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thres) 59 | { 60 | t |= 1ULL << i; 61 | } 62 | } 63 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 64 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 65 | } 66 | } 67 | 68 | /* 69 | * Host code 70 | */ 71 | void mexFunction(int nlhs, mxArray *plhs[], 72 | int nrhs, const mxArray *prhs[]) 73 | { 74 | 75 | /* Declare all variables.*/ 76 | mxArray const *boxes, *ov_thres; 77 | float *boxes_host = NULL; 78 | float *boxes_dev = NULL; 79 | unsigned long long *mask_dev = NULL; 80 | 81 | /* Throw an error if the input is not a array. */ 82 | if (nrhs != 2) { 83 | mexErrMsgTxt("nms_gpu_mex::need 2 inputs"); 84 | } 85 | 86 | boxes = prhs[0]; 87 | if (mxGetClassID(boxes) != mxSINGLE_CLASS) { 88 | mexErrMsgTxt("nms_gpu_mex::input boxes must be single"); 89 | } 90 | 91 | ov_thres = prhs[1]; 92 | if (mxGetClassID(ov_thres) != mxDOUBLE_CLASS) { 93 | mexErrMsgTxt("nms_gpu_mex::input boxes must be double"); 94 | } 95 | 96 | float nms_overlap_thres = (float)mxGetScalar(ov_thres); 97 | 98 | int boxes_dim = mxGetM(boxes); 99 | int boxes_num = mxGetN(boxes); 100 | if (boxes_dim != 5) 101 | { 102 | mexErrMsgTxt("nms_gpu_mex::input boxes's row must be 5"); 103 | } 104 | 105 | boxes_host = (float *)(mxGetPr(boxes)); 106 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 107 | 108 | cudaMalloc(&boxes_dev, mxGetNumberOfElements(boxes) * sizeof(float)); 109 | cudaMemcpy(boxes_dev, boxes_host, mxGetNumberOfElements(boxes) * sizeof(float), cudaMemcpyHostToDevice); 110 | 111 | /* Create a GPUArray to hold the result and get its underlying pointer. */ 112 | cudaMalloc(&mask_dev, boxes_num * col_blocks * sizeof(unsigned long long)); 113 | 114 | 115 | /* 116 | * Call the kernel using the CUDA runtime API. We are using a 1-d grid here, 117 | * and it would be possible for the number of elements to be too large for 118 | * the grid. For this example we are not guarding against this possibility. 119 | */ 120 | 121 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), DIVUP(boxes_num, threadsPerBlock)); 122 | dim3 threads(threadsPerBlock); 123 | nms_kernel << > >(boxes_num, nms_overlap_thres, boxes_dev, mask_dev); 124 | 125 | std::vector mask_host(boxes_num * col_blocks); 126 | cudaMemcpy(&mask_host[0], mask_dev, sizeof(unsigned long long) * boxes_num * col_blocks, cudaMemcpyDeviceToHost); 127 | 128 | std::vector remv(col_blocks); 129 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 130 | 131 | std::vector keep; 132 | keep.reserve(boxes_num); 133 | for (int i = 0; i < boxes_num; i++) 134 | { 135 | int nblock = i / threadsPerBlock; 136 | int inblock = i % threadsPerBlock; 137 | 138 | if (!(remv[nblock] & (1ULL << inblock))) 139 | { 140 | keep.push_back(i + 1); // to matlab's index 141 | 142 | unsigned long long *p = &mask_host[0] + i * col_blocks; 143 | for (int j = nblock; j < col_blocks; j++) 144 | { 145 | remv[j] |= p[j]; 146 | } 147 | } 148 | } 149 | 150 | /* Wrap the result up as a MATLAB cpuArray for return. */ 151 | mwSize dims[4] = { (int)keep.size(), 1, 1, 1 }; 152 | plhs[0] = mxCreateNumericArray(4, dims, mxINT32_CLASS, mxREAL); 153 | 154 | int *output = (int *)(mxGetPr(plhs[0])); 155 | memcpy(output, &keep[0], (int)keep.size() * sizeof(int)); 156 | 157 | 158 | cudaFree(boxes_dev); 159 | cudaFree(mask_dev); 160 | } 161 | -------------------------------------------------------------------------------- /functions/nms/nms_mex.cpp: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | #ifdef _MSC_VER 3 | #include 4 | #include 5 | #endif 6 | #include 7 | #include 8 | using namespace std; 9 | 10 | struct score { 11 | double s; 12 | int idx; 13 | bool operator() (score i, score j) { return (i.idx < j.idx);} 14 | } score; 15 | 16 | template 17 | void nms(const mxArray *input_boxes, double overlap, vector &vPick, int &nPick) 18 | { 19 | int nSample = (int)mxGetM(input_boxes); 20 | int nDim_boxes = (int)mxGetN(input_boxes); 21 | 22 | T *pBoxes = (T*)mxGetData(input_boxes); 23 | 24 | vector vArea(nSample); 25 | for (int i = 0; i < nSample; ++i) 26 | { 27 | vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 28 | * (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1); 29 | if (vArea[i] < 0) 30 | mexErrMsgTxt("Boxes area must >= 0"); 31 | } 32 | 33 | std::multimap scores; 34 | for (int i = 0; i < nSample; ++i) 35 | scores.insert(std::pair(pBoxes[4*nSample + i], i)); 36 | 37 | nPick = 0; 38 | 39 | do 40 | { 41 | int last = scores.rbegin()->second; 42 | vPick[nPick] = last; 43 | nPick += 1; 44 | 45 | for (typename std::multimap::iterator it = scores.begin(); it != scores.end();) 46 | { 47 | int it_idx = it->second; 48 | T xx1 = max(pBoxes[0*nSample + last], pBoxes[0*nSample + it_idx]); 49 | T yy1 = max(pBoxes[1*nSample + last], pBoxes[1*nSample + it_idx]); 50 | T xx2 = min(pBoxes[2*nSample + last], pBoxes[2*nSample + it_idx]); 51 | T yy2 = min(pBoxes[3*nSample + last], pBoxes[3*nSample + it_idx]); 52 | 53 | double w = max(T(0.0), xx2-xx1+1), h = max(T(0.0), yy2-yy1+1); 54 | 55 | double ov = w*h / (vArea[last] + vArea[it_idx] - w*h); 56 | 57 | if (ov > overlap) 58 | { 59 | it = scores.erase(it); 60 | } 61 | else 62 | { 63 | it++; 64 | } 65 | } 66 | 67 | } while (scores.size() != 0); 68 | } 69 | 70 | 71 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 72 | { 73 | if (nrhs != 2) 74 | mexErrMsgTxt("Wrong number of inputs"); 75 | if (nlhs != 1) 76 | mexErrMsgTxt("One output"); 77 | 78 | const mxArray *input_boxes = prhs[0]; 79 | if (mxGetClassID(input_boxes) != mxDOUBLE_CLASS && mxGetClassID(input_boxes) != mxSINGLE_CLASS) 80 | mexErrMsgTxt("Input boxes must be Double or Single"); 81 | 82 | const mxArray *input_overlap = prhs[1]; 83 | if (mxGetClassID(input_overlap) != mxDOUBLE_CLASS ) 84 | mexErrMsgTxt("Input overlap must be Double"); 85 | 86 | double overlap = mxGetScalar(input_overlap); 87 | 88 | int nSample = (int)mxGetM(input_boxes); 89 | int nDim_boxes = (int)mxGetN(input_boxes); 90 | 91 | if (nSample * nDim_boxes == 0) 92 | { 93 | plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL); 94 | return; 95 | } 96 | 97 | if (nDim_boxes != 5) 98 | mexErrMsgTxt("nms_mex boxes must has 5 columns"); 99 | 100 | 101 | int nPick = 0; 102 | vector vPick(nSample); 103 | if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS) 104 | nms(input_boxes, overlap, vPick, nPick); 105 | else 106 | nms(input_boxes, overlap, vPick, nPick); 107 | 108 | plhs[0] = mxCreateNumericMatrix(nPick, 1, mxDOUBLE_CLASS, mxREAL); 109 | double *pRst = mxGetPr(plhs[0]); 110 | for (int i = 0; i < nPick; ++i) 111 | pRst[i] = vPick[i] + 1; 112 | } 113 | -------------------------------------------------------------------------------- /functions/nms/nms_multiclass.m: -------------------------------------------------------------------------------- 1 | function picks = nms_multiclass(boxes, overlap) 2 | % top = nms(boxes, overlap) 3 | % Non-maximum suppression. (FAST VERSION) 4 | % Greedily select high-scoring detections and skip detections 5 | % that are significantly covered by a previously selected 6 | % detection. 7 | % 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m), 9 | % but an inner loop has been eliminated to significantly speed it 10 | % up in the case of a large number of boxes 11 | 12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz 13 | % All rights reserved. 14 | % 15 | % This file is part of the Exemplar-SVM library and is made 16 | % available under the terms of the MIT license (see COPYING file). 17 | % Project homepage: https://github.com/quantombone/exemplarsvm 18 | 19 | 20 | if isempty(boxes) 21 | picks = {}; 22 | return; 23 | end 24 | 25 | if size(boxes, 1) < 10000 26 | picks = nms_multiclass_mex(double(boxes), double(overlap)); 27 | return; 28 | end 29 | 30 | x1 = boxes(:,1); 31 | y1 = boxes(:,2); 32 | x2 = boxes(:,3); 33 | y2 = boxes(:,4); 34 | 35 | area = (x2-x1+1) .* (y2-y1+1); 36 | 37 | picks = cell(size(boxes, 2)-4, 1); 38 | for iS = 5:size(boxes, 2) 39 | s = boxes(:,iS); 40 | [~, I] = sort(s); 41 | 42 | pick = s*0; 43 | counter = 1; 44 | while ~isempty(I) 45 | last = length(I); 46 | i = I(last); 47 | pick(counter) = i; 48 | counter = counter + 1; 49 | 50 | xx1 = max(x1(i), x1(I(1:last-1))); 51 | yy1 = max(y1(i), y1(I(1:last-1))); 52 | xx2 = min(x2(i), x2(I(1:last-1))); 53 | yy2 = min(y2(i), y2(I(1:last-1))); 54 | 55 | w = max(0.0, xx2-xx1+1); 56 | h = max(0.0, yy2-yy1+1); 57 | 58 | inter = w.*h; 59 | o = inter ./ (area(i) + area(I(1:last-1)) - inter); 60 | 61 | I = I(o<=overlap); 62 | end 63 | 64 | pick = pick(1:(counter-1)); 65 | picks{iS-4} = pick; 66 | end 67 | -------------------------------------------------------------------------------- /functions/nms/nms_multiclass_mex.cpp: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | #ifdef WIN32 3 | #include 4 | #include 5 | #else 6 | #include 7 | #endif 8 | #include 9 | #include 10 | #include 11 | using namespace std; 12 | 13 | struct score { 14 | double s; 15 | int idx; 16 | bool operator() (score i, score j) { return (i.idx < j.idx);} 17 | } score; 18 | 19 | template 20 | void nms(const mxArray *input_boxes, int iScoreIdx, double overlap, const vector &vArea, vector &vPick, int &nPick) 21 | { 22 | int nSample = (int)mxGetM(input_boxes); 23 | int nDim_boxes = (int)mxGetN(input_boxes); 24 | 25 | T *pBoxes = (T*)mxGetData(input_boxes); 26 | 27 | //vector vArea(nSample); 28 | //for (int i = 0; i < nSample; ++i) 29 | //{ 30 | // vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 31 | // * (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1); 32 | // if (vArea[i] < 0) 33 | // mexErrMsgTxt("Boxes area must >= 0"); 34 | //} 35 | 36 | std::multimap scores; 37 | for (int i = 0; i < nSample; ++i) 38 | scores.insert(std::pair(pBoxes[iScoreIdx*nSample + i], i)); 39 | 40 | nPick = 0; 41 | 42 | do 43 | { 44 | int last = scores.rbegin()->second; 45 | vPick[nPick] = last; 46 | nPick += 1; 47 | 48 | for (typename std::multimap::iterator it = scores.begin(); it != scores.end();) 49 | { 50 | int it_idx = it->second; 51 | T xx1 = std::max(pBoxes[0*nSample + last], pBoxes[0*nSample + it_idx]); 52 | T yy1 = std::max(pBoxes[1*nSample + last], pBoxes[1*nSample + it_idx]); 53 | T xx2 = std::min(pBoxes[2*nSample + last], pBoxes[2*nSample + it_idx]); 54 | T yy2 = std::min(pBoxes[3*nSample + last], pBoxes[3*nSample + it_idx]); 55 | 56 | double w = max(0.0, xx2-xx1+1), h = max(0.0, yy2-yy1+1); 57 | 58 | double ov = w*h / (vArea[last] + vArea[it_idx] - w*h); 59 | 60 | if (ov > overlap) 61 | { 62 | #ifdef WIN32 63 | it = scores.erase(it); 64 | #else 65 | typename std::multimap::iterator save=it; ++save; 66 | scores.erase(it); 67 | it=save; 68 | #endif 69 | } 70 | else 71 | { 72 | it++; 73 | } 74 | } 75 | 76 | } while (scores.size() != 0); 77 | } 78 | 79 | 80 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[]) 81 | { 82 | if (nrhs != 2) 83 | mexErrMsgTxt("Wrong number of inputs"); 84 | if (nlhs != 1) 85 | mexErrMsgTxt("One output"); 86 | 87 | const mxArray *input_boxes = prhs[0]; 88 | if (mxGetClassID(input_boxes) != mxDOUBLE_CLASS && mxGetClassID(input_boxes) != mxSINGLE_CLASS) 89 | mexErrMsgTxt("Input boxes must be Double or Single"); 90 | 91 | const mxArray *input_overlap = prhs[1]; 92 | if (mxGetClassID(input_overlap) != mxDOUBLE_CLASS ) 93 | mexErrMsgTxt("Input overlap must be Double"); 94 | 95 | double overlap = mxGetScalar(input_overlap); 96 | 97 | int nSample = (int)mxGetM(input_boxes); 98 | int nDim_boxes = (int)mxGetN(input_boxes); 99 | 100 | if (nSample * nDim_boxes == 0) 101 | { 102 | plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL); 103 | return; 104 | } 105 | 106 | if (nDim_boxes < 5) 107 | mexErrMsgTxt("nms_mex boxes must has least 5 columns"); 108 | 109 | vector vArea(nSample); 110 | if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS) 111 | { 112 | double *pBoxes = (double*)mxGetData(input_boxes); 113 | for (int i = 0; i < nSample; ++i) 114 | { 115 | vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 116 | * (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1); 117 | if (vArea[i] < 0) 118 | mexErrMsgTxt("Boxes area must >= 0"); 119 | } 120 | } 121 | else 122 | { 123 | if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS) 124 | { 125 | float *pBoxes = (float*)mxGetData(input_boxes); 126 | for (int i = 0; i < nSample; ++i) 127 | { 128 | vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 129 | * (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1); 130 | if (vArea[i] < 0) 131 | mexErrMsgTxt("Boxes area must >= 0"); 132 | } 133 | } 134 | } 135 | 136 | vector nPick(nDim_boxes - 4, 0); 137 | vector > vPicks(nDim_boxes - 4); 138 | plhs[0] = mxCreateCellMatrix_730(nDim_boxes - 4, 1); 139 | 140 | #pragma omp parallel for ordered schedule(dynamic) 141 | for (int i = 0; i < vPicks.size(); ++i) 142 | { 143 | vPicks[i].resize(nSample); 144 | 145 | if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS) 146 | nms(input_boxes, i+4, overlap, vArea, vPicks[i], nPick[i]); 147 | else 148 | nms(input_boxes, i+4, overlap, vArea, vPicks[i], nPick[i]); 149 | 150 | mxArray *mxPick = mxCreateNumericMatrix(nPick[i], 1, mxDOUBLE_CLASS, mxREAL); 151 | double *pRst = mxGetPr(mxPick); 152 | for (int j = 0; j < nPick[i]; ++j) 153 | pRst[j] = vPicks[i][j] + 1; 154 | 155 | mxSetCell(plhs[0], i, mxPick); 156 | } 157 | 158 | } -------------------------------------------------------------------------------- /functions/nms/nvmex.m: -------------------------------------------------------------------------------- 1 | function nvmex(cuFileName, outDir) 2 | %NVMEX Compiles and links a CUDA file for MATLAB usage 3 | % NVMEX(FILENAME) will create a MEX-File (also with the name FILENAME) by 4 | % invoking the CUDA compiler, nvcc, and then linking with the MEX 5 | % function in MATLAB. 6 | 7 | if ispc % Windows 8 | Host_Compiler_Location = '-ccbin "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\x86_amd64"'; 9 | CUDA_INC_Location = ['"' getenv('CUDA_PATH') '\include"']; 10 | CUDA_SAMPLES_Location =['"' getenv('NVCUDASAMPLES6_5_ROOT') '\common\inc"']; 11 | PIC_Option = ''; 12 | if ( strcmp(computer('arch'),'win32') ==1) 13 | machine_str = ' --machine 32 '; 14 | CUDA_LIB_Location = ['"' getenv('CUDA_PATH') '\lib\Win32"']; 15 | elseif ( strcmp(computer('arch'),'win64') ==1) 16 | machine_str = ' --machine 64 '; 17 | CUDA_LIB_Location = ['"' getenv('CUDA_PATH') '\lib\x64"']; 18 | end 19 | NVCC = 'nvcc'; 20 | else % Mac and Linux (assuming gcc is on the path) 21 | CUDA_INC_Location = '/usr/local/cuda/include'; 22 | CUDA_SAMPLES_Location = '/usr/local/cuda/samples/common/inc'; 23 | Host_Compiler_Location = ' '; 24 | PIC_Option = ' --compiler-options -fPIC '; 25 | machine_str = []; 26 | CUDA_LIB_Location = '/usr/local/cuda/lib64'; 27 | NVCC = '/usr/local/cuda/bin/nvcc'; 28 | end 29 | % !!! End of things to modify !!! 30 | [~, filename] = fileparts(cuFileName); 31 | nvccCommandLine = [ ... 32 | NVCC ' --compile ' Host_Compiler_Location ' ' ... 33 | '-o ' filename '.o ' ... 34 | machine_str PIC_Option ... 35 | ' -I' '"' matlabroot '/extern/include "' ... 36 | ' -I' CUDA_INC_Location ' -I' CUDA_SAMPLES_Location ... 37 | ' "' cuFileName '" ' 38 | ]; 39 | mexCommandLine = ['mex ' '-outdir ' outDir ' ' filename '.o' ' -L' CUDA_LIB_Location ' -lcudart']; 40 | disp(nvccCommandLine); 41 | warning off; 42 | status = system(nvccCommandLine); 43 | warning on; 44 | if status < 0 45 | error 'Error invoking nvcc'; 46 | end 47 | disp(mexCommandLine); 48 | eval(mexCommandLine); 49 | end 50 | -------------------------------------------------------------------------------- /functions/rpn/proposal_calc_output_size.m: -------------------------------------------------------------------------------- 1 | function [output_width_map, output_height_map] = proposal_calc_output_size(conf, test_net_def_file) 2 | % [output_width_map, output_height_map] = proposal_calc_output_size(conf, test_net_def_file) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | % caffe.init_log(fullfile(pwd, 'caffe_log')); 10 | caffe_net = caffe.Net(test_net_def_file, 'test'); 11 | 12 | % set gpu/cpu 13 | if conf.use_gpu 14 | caffe.set_mode_gpu(); 15 | else 16 | caffe.set_mode_cpu(); 17 | end 18 | 19 | input = 100:conf.max_size; 20 | output_w = nan(size(input)); 21 | output_h = nan(size(input)); 22 | for i = 1:length(input) 23 | s = input(i); 24 | im_blob = single(zeros(s, s, 3, 1)); 25 | net_inputs = {im_blob}; 26 | 27 | % Reshape net's input blobs 28 | caffe_net.reshape_as_input(net_inputs); 29 | caffe_net.forward(net_inputs); 30 | 31 | cls_score = caffe_net.blobs('proposal_cls_score').get_data(); 32 | output_w(i) = size(cls_score, 1); 33 | output_h(i) = size(cls_score, 2); 34 | end 35 | 36 | output_width_map = containers.Map(input, output_w); 37 | output_height_map = containers.Map(input, output_h); 38 | 39 | caffe.reset_all(); 40 | end -------------------------------------------------------------------------------- /functions/rpn/proposal_config.m: -------------------------------------------------------------------------------- 1 | function conf = proposal_config(varargin) 2 | % conf = proposal_config(varargin) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | ip = inputParser; 10 | 11 | %% training 12 | ip.addParamValue('use_gpu', gpuDeviceCount > 0, ... 13 | @islogical); 14 | 15 | % whether drop the anchors that has edges outside of the image boundary 16 | ip.addParamValue('drop_boxes_runoff_image', ... 17 | true, @islogical); 18 | 19 | % Image scales -- the short edge of input image 20 | ip.addParamValue('scales', 600, @ismatrix); 21 | % Max pixel size of a scaled input image 22 | ip.addParamValue('max_size', 1000, @isscalar); 23 | % Images per batch, only supports ims_per_batch = 1 currently 24 | ip.addParamValue('ims_per_batch', 1, @isscalar); 25 | % Minibatch size 26 | ip.addParamValue('batch_size', 256, @isscalar); 27 | % Fraction of minibatch that is foreground labeled (class > 0) 28 | ip.addParamValue('fg_fraction', 0.5, @isscalar); 29 | % weight of background samples, when weight of foreground samples is 30 | % 1.0 31 | ip.addParamValue('bg_weight', 1.0, @isscalar); 32 | % Overlap threshold for a ROI to be considered foreground (if >= fg_thresh) 33 | ip.addParamValue('fg_thresh', 0.7, @isscalar); 34 | % Overlap threshold for a ROI to be considered background (class = 0 if 35 | % overlap in [bg_thresh_lo, bg_thresh_hi)) 36 | ip.addParamValue('bg_thresh_hi', 0.3, @isscalar); 37 | ip.addParamValue('bg_thresh_lo', 0, @isscalar); 38 | % mean image, in RGB order 39 | ip.addParamValue('image_means', 128, @ismatrix); 40 | % Use horizontally-flipped images during training? 41 | ip.addParamValue('use_flipped', true, @islogical); 42 | % Stride in input image pixels at ROI pooling level (network specific) 43 | % 16 is true for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 44 | ip.addParamValue('feat_stride', 16, @isscalar); 45 | % train proposal target only to labled ground-truths or also include 46 | % other proposal results (selective search, etc.) 47 | ip.addParamValue('target_only_gt', true, @islogical); 48 | 49 | % random seed 50 | ip.addParamValue('rng_seed', 6, @isscalar); 51 | 52 | 53 | %% testing 54 | ip.addParamValue('test_scales', 600, @isscalar); 55 | ip.addParamValue('test_max_size', 1000, @isscalar); 56 | ip.addParamValue('test_nms', 0.3, @isscalar); 57 | ip.addParamValue('test_binary', false, @islogical); 58 | ip.addParamValue('test_min_box_size',16, @isscalar); 59 | ip.addParamValue('test_drop_boxes_runoff_image', ... 60 | false, @islogical); 61 | 62 | ip.parse(varargin{:}); 63 | conf = ip.Results; 64 | 65 | assert(conf.ims_per_batch == 1, 'currently rpn only supports ims_per_batch == 1'); 66 | 67 | % if image_means is a file, load it 68 | if ischar(conf.image_means) 69 | s = load(conf.image_means); 70 | s_fieldnames = fieldnames(s); 71 | assert(length(s_fieldnames) == 1); 72 | conf.image_means = s.(s_fieldnames{1}); 73 | end 74 | end -------------------------------------------------------------------------------- /functions/rpn/proposal_generate_anchors.m: -------------------------------------------------------------------------------- 1 | function anchors = proposal_generate_anchors(cache_name, varargin) 2 | % anchors = proposal_generate_anchors(cache_name, varargin) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | %% inputs 10 | ip = inputParser; 11 | ip.addRequired('cache_name', @isstr); 12 | 13 | % the size of the base anchor 14 | ip.addParamValue('base_size', 16, @isscalar); 15 | % ratio list of anchors 16 | ip.addParamValue('ratios', [0.5, 1, 2], @ismatrix); 17 | % scale list of anchors 18 | ip.addParamValue('scales', 2.^[3:5], @ismatrix); 19 | ip.addParamValue('ignore_cache', false, @islogical); 20 | ip.parse(cache_name, varargin{:}); 21 | opts = ip.Results; 22 | 23 | %% 24 | if ~opts.ignore_cache 25 | anchor_cache_dir = fullfile(pwd, 'output', 'rpn_cachedir', cache_name); 26 | mkdir_if_missing(anchor_cache_dir); 27 | anchor_cache_file = fullfile(anchor_cache_dir, 'anchors'); 28 | end 29 | try 30 | ld = load(anchor_cache_file); 31 | anchors = ld.anchors; 32 | catch 33 | base_anchor = [1, 1, opts.base_size, opts.base_size]; 34 | ratio_anchors = ratio_jitter(base_anchor, opts.ratios); 35 | anchors = cellfun(@(x) scale_jitter(x, opts.scales), num2cell(ratio_anchors, 2), 'UniformOutput', false); 36 | anchors = cat(1, anchors{:}); 37 | if ~opts.ignore_cache 38 | save(anchor_cache_file, 'anchors'); 39 | end 40 | end 41 | 42 | end 43 | 44 | function anchors = ratio_jitter(anchor, ratios) 45 | ratios = ratios(:); 46 | 47 | w = anchor(3) - anchor(1) + 1; 48 | h = anchor(4) - anchor(2) + 1; 49 | x_ctr = anchor(1) + (w - 1) / 2; 50 | y_ctr = anchor(2) + (h - 1) / 2; 51 | size = w * h; 52 | 53 | size_ratios = size ./ ratios; 54 | ws = round(sqrt(size_ratios)); 55 | hs = round(ws .* ratios); 56 | 57 | anchors = [x_ctr - (ws - 1) / 2, y_ctr - (hs - 1) / 2, x_ctr + (ws - 1) / 2, y_ctr + (hs - 1) / 2]; 58 | end 59 | 60 | function anchors = scale_jitter(anchor, scales) 61 | scales = scales(:); 62 | 63 | w = anchor(3) - anchor(1) + 1; 64 | h = anchor(4) - anchor(2) + 1; 65 | x_ctr = anchor(1) + (w - 1) / 2; 66 | y_ctr = anchor(2) + (h - 1) / 2; 67 | 68 | ws = w * scales; 69 | hs = h * scales; 70 | 71 | anchors = [x_ctr - (ws - 1) / 2, y_ctr - (hs - 1) / 2, x_ctr + (ws - 1) / 2, y_ctr + (hs - 1) / 2]; 72 | end 73 | 74 | -------------------------------------------------------------------------------- /functions/rpn/proposal_generate_minibatch.m: -------------------------------------------------------------------------------- 1 | function [input_blobs, random_scale_inds] = proposal_generate_minibatch(conf, image_roidb) 2 | % [input_blobs, random_scale_inds] = proposal_generate_minibatch(conf, image_roidb) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | num_images = length(image_roidb); 10 | assert(num_images == 1, 'proposal_generate_minibatch_fcn only support num_images == 1'); 11 | 12 | % Sample random scales to use for each image in this batch 13 | random_scale_inds = randi(length(conf.scales), num_images, 1); 14 | 15 | assert(mod(conf.batch_size, num_images) == 0, ... 16 | sprintf('num_images %d must divide BATCH_SIZE %d', num_images, conf.batch_size)); 17 | 18 | rois_per_image = conf.batch_size / num_images; 19 | fg_rois_per_image = round(rois_per_image * conf.fg_fraction); 20 | 21 | % Get the input image blob 22 | [im_blob, im_scales] = get_image_blob(conf, image_roidb, random_scale_inds); 23 | 24 | for i = 1:num_images 25 | [labels, label_weights, bbox_targets, bbox_loss] = ... 26 | sample_rois(conf, image_roidb(i), fg_rois_per_image, rois_per_image, im_scales(i), random_scale_inds(i)); 27 | 28 | % get fcn output size 29 | img_size = round(image_roidb(i).im_size * im_scales(i)); 30 | output_size = cell2mat([conf.output_height_map.values({img_size(1)}), conf.output_width_map.values({img_size(2)})]); 31 | 32 | assert(img_size(1) == size(im_blob, 1) && img_size(2) == size(im_blob, 2)); 33 | 34 | labels_blob = reshape(labels, size(conf.anchors, 1), output_size(1), output_size(2)); 35 | label_weights_blob = reshape(label_weights, size(conf.anchors, 1), output_size(1), output_size(2)); 36 | bbox_targets_blob = reshape(bbox_targets', size(conf.anchors, 1)*4, output_size(1), output_size(2)); 37 | bbox_loss_blob = reshape(bbox_loss', size(conf.anchors, 1)*4, output_size(1), output_size(2)); 38 | 39 | % permute from [channel, height, width], where channel is the 40 | % fastest dimension to [width, height, channel] 41 | labels_blob = permute(labels_blob, [3, 2, 1]); 42 | label_weights_blob = permute(label_weights_blob, [3, 2, 1]); 43 | bbox_targets_blob = permute(bbox_targets_blob, [3, 2, 1]); 44 | bbox_loss_blob = permute(bbox_loss_blob, [3, 2, 1]); 45 | end 46 | 47 | % permute data into caffe c++ memory, thus [num, channels, height, width] 48 | im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg 49 | im_blob = single(permute(im_blob, [2, 1, 3, 4])); 50 | labels_blob = single(labels_blob); 51 | labels_blob(labels_blob > 0) = 1; %to binary lable (fg and bg) 52 | label_weights_blob = single(label_weights_blob); 53 | bbox_targets_blob = single(bbox_targets_blob); 54 | bbox_loss_blob = single(bbox_loss_blob); 55 | 56 | assert(~isempty(im_blob)); 57 | assert(~isempty(labels_blob)); 58 | assert(~isempty(label_weights_blob)); 59 | assert(~isempty(bbox_targets_blob)); 60 | assert(~isempty(bbox_loss_blob)); 61 | 62 | input_blobs = {im_blob, labels_blob, label_weights_blob, bbox_targets_blob, bbox_loss_blob}; 63 | end 64 | 65 | 66 | %% Build an input blob from the images in the roidb at the specified scales. 67 | function [im_blob, im_scales] = get_image_blob(conf, images, random_scale_inds) 68 | 69 | num_images = length(images); 70 | processed_ims = cell(num_images, 1); 71 | im_scales = nan(num_images, 1); 72 | for i = 1:num_images 73 | im = imread(images(i).image_path); 74 | target_size = conf.scales(random_scale_inds(i)); 75 | 76 | [im, im_scale] = prep_im_for_blob(im, conf.image_means, target_size, conf.max_size); 77 | 78 | im_scales(i) = im_scale; 79 | processed_ims{i} = im; 80 | end 81 | 82 | im_blob = im_list_to_blob(processed_ims); 83 | end 84 | 85 | %% Generate a random sample of ROIs comprising foreground and background examples. 86 | function [labels, label_weights, bbox_targets, bbox_loss_weights] = ... 87 | sample_rois(conf, image_roidb, fg_rois_per_image, rois_per_image, im_scale, im_scale_ind) 88 | 89 | bbox_targets = image_roidb.bbox_targets{im_scale_ind}; 90 | ex_asign_labels = bbox_targets(:, 1); 91 | 92 | % Select foreground ROIs as those with >= FG_THRESH overlap 93 | fg_inds = find(bbox_targets(:, 1) > 0); 94 | 95 | % Select background ROIs as those within [BG_THRESH_LO, BG_THRESH_HI) 96 | bg_inds = find(bbox_targets(:, 1) < 0); 97 | 98 | % select foreground 99 | fg_num = min(fg_rois_per_image, length(fg_inds)); 100 | fg_inds = fg_inds(randperm(length(fg_inds), fg_num)); 101 | 102 | bg_num = min(rois_per_image - fg_num, length(bg_inds)); 103 | bg_inds = bg_inds(randperm(length(bg_inds), bg_num)); 104 | 105 | labels = zeros(size(bbox_targets, 1), 1); 106 | % set foreground labels 107 | labels(fg_inds) = ex_asign_labels(fg_inds); 108 | assert(all(ex_asign_labels(fg_inds) > 0)); 109 | 110 | label_weights = zeros(size(bbox_targets, 1), 1); 111 | % set foreground labels weights 112 | label_weights(fg_inds) = 1; 113 | % set background labels weights 114 | label_weights(bg_inds) = conf.bg_weight; 115 | 116 | bbox_targets = single(full(bbox_targets(:, 2:end))); 117 | 118 | bbox_loss_weights = bbox_targets * 0; 119 | bbox_loss_weights(fg_inds, :) = 1; 120 | end 121 | 122 | function visual_anchors(image_roidb, anchors, im_scale) 123 | imshow(imresize(imread(image_roidb.image_path), im_scale)); 124 | hold on; 125 | cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'r'), num2cell(anchors, 2)); 126 | hold off; 127 | end 128 | 129 | -------------------------------------------------------------------------------- /functions/rpn/proposal_im_detect.m: -------------------------------------------------------------------------------- 1 | function [pred_boxes, scores, box_deltas_, anchors_, scores_] = proposal_im_detect(conf, caffe_net, im) 2 | % [pred_boxes, scores, box_deltas_, anchors_, scores_] = proposal_im_detect(conf, im, net_idx) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | im = single(im); 10 | [im_blob, im_scales] = get_image_blob(conf, im); 11 | im_size = size(im); 12 | scaled_im_size = round(im_size * im_scales); 13 | 14 | % permute data into caffe c++ memory, thus [num, channels, height, width] 15 | im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg 16 | im_blob = permute(im_blob, [2, 1, 3, 4]); 17 | im_blob = single(im_blob); 18 | 19 | net_inputs = {im_blob}; 20 | 21 | % Reshape net's input blobs 22 | caffe_net.reshape_as_input(net_inputs); 23 | output_blobs = caffe_net.forward(net_inputs); 24 | 25 | % Apply bounding-box regression deltas 26 | box_deltas = output_blobs{1}; 27 | featuremap_size = [size(box_deltas, 2), size(box_deltas, 1)]; 28 | % permute from [width, height, channel] to [channel, height, width], where channel is the 29 | % fastest dimension 30 | box_deltas = permute(box_deltas, [3, 2, 1]); 31 | box_deltas = reshape(box_deltas, 4, [])'; 32 | 33 | anchors = proposal_locate_anchors(conf, size(im), conf.test_scales, featuremap_size); 34 | pred_boxes = fast_rcnn_bbox_transform_inv(anchors, box_deltas); 35 | % scale back 36 | pred_boxes = bsxfun(@times, pred_boxes - 1, ... 37 | ([im_size(2), im_size(1), im_size(2), im_size(1)] - 1) ./ ([scaled_im_size(2), scaled_im_size(1), scaled_im_size(2), scaled_im_size(1)] - 1)) + 1; 38 | pred_boxes = clip_boxes(pred_boxes, size(im, 2), size(im, 1)); 39 | 40 | assert(conf.test_binary == false); 41 | % use softmax estimated probabilities 42 | scores = output_blobs{2}(:, :, end); 43 | scores = reshape(scores, size(output_blobs{1}, 1), size(output_blobs{1}, 2), []); 44 | % permute from [width, height, channel] to [channel, height, width], where channel is the 45 | % fastest dimension 46 | scores = permute(scores, [3, 2, 1]); 47 | scores = scores(:); 48 | 49 | box_deltas_ = box_deltas; 50 | anchors_ = anchors; 51 | scores_ = scores; 52 | 53 | if conf.test_drop_boxes_runoff_image 54 | contained_in_image = is_contain_in_image(anchors, round(size(im) * im_scales)); 55 | pred_boxes = pred_boxes(contained_in_image, :); 56 | scores = scores(contained_in_image, :); 57 | end 58 | 59 | % drop too small boxes 60 | [pred_boxes, scores] = filter_boxes(conf.test_min_box_size, pred_boxes, scores); 61 | 62 | % sort 63 | [scores, scores_ind] = sort(scores, 'descend'); 64 | pred_boxes = pred_boxes(scores_ind, :); 65 | end 66 | 67 | function [data_blob, rois_blob, im_scale_factors] = get_blobs(conf, im, rois) 68 | [data_blob, im_scale_factors] = get_image_blob(conf, im); 69 | rois_blob = get_rois_blob(conf, rois, im_scale_factors); 70 | end 71 | 72 | function [blob, im_scales] = get_image_blob(conf, im) 73 | if length(conf.test_scales) == 1 74 | [blob, im_scales] = prep_im_for_blob(im, conf.image_means, conf.test_scales, conf.test_max_size); 75 | else 76 | [ims, im_scales] = arrayfun(@(x) prep_im_for_blob(im, conf.image_means, x, conf.test_max_size), conf.test_scales, 'UniformOutput', false); 77 | im_scales = cell2mat(im_scales); 78 | blob = im_list_to_blob(ims); 79 | end 80 | end 81 | 82 | function [rois_blob] = get_rois_blob(conf, im_rois, im_scale_factors) 83 | [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, im_scale_factors); 84 | rois_blob = single([levels, feat_rois]); 85 | end 86 | 87 | function [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, scales) 88 | im_rois = single(im_rois); 89 | 90 | if length(scales) > 1 91 | widths = im_rois(:, 3) - im_rois(:, 1) + 1; 92 | heights = im_rois(:, 4) - im_rois(:, 2) + 1; 93 | 94 | areas = widths .* heights; 95 | scaled_areas = bsxfun(@times, areas(:), scales(:)'.^2); 96 | levels = max(abs(scaled_areas - 224.^2), 2); 97 | else 98 | levels = ones(size(im_rois, 1), 1); 99 | end 100 | 101 | feat_rois = round(bsxfun(@times, im_rois-1, scales(levels)) / conf.feat_stride) + 1; 102 | end 103 | 104 | function [boxes, scores] = filter_boxes(min_box_size, boxes, scores) 105 | widths = boxes(:, 3) - boxes(:, 1) + 1; 106 | heights = boxes(:, 4) - boxes(:, 2) + 1; 107 | 108 | valid_ind = widths >= min_box_size & heights >= min_box_size; 109 | boxes = boxes(valid_ind, :); 110 | scores = scores(valid_ind, :); 111 | end 112 | 113 | function boxes = clip_boxes(boxes, im_width, im_height) 114 | % x1 >= 1 & <= im_width 115 | boxes(:, 1:4:end) = max(min(boxes(:, 1:4:end), im_width), 1); 116 | % y1 >= 1 & <= im_height 117 | boxes(:, 2:4:end) = max(min(boxes(:, 2:4:end), im_height), 1); 118 | % x2 >= 1 & <= im_width 119 | boxes(:, 3:4:end) = max(min(boxes(:, 3:4:end), im_width), 1); 120 | % y2 >= 1 & <= im_height 121 | boxes(:, 4:4:end) = max(min(boxes(:, 4:4:end), im_height), 1); 122 | end 123 | 124 | function contained = is_contain_in_image(boxes, im_size) 125 | contained = boxes >= 1 & bsxfun(@le, boxes, [im_size(2), im_size(1), im_size(2), im_size(1)]); 126 | 127 | contained = all(contained, 2); 128 | end 129 | 130 | -------------------------------------------------------------------------------- /functions/rpn/proposal_locate_anchors.m: -------------------------------------------------------------------------------- 1 | function [anchors, im_scales] = proposal_locate_anchors(conf, im_size, target_scale, feature_map_size) 2 | % [anchors, im_scales] = proposal_locate_anchors(conf, im_size, target_scale, feature_map_size) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | % generate anchors for each scale 9 | 10 | % only for fcn 11 | if ~exist('feature_map_size', 'var') 12 | feature_map_size = []; 13 | end 14 | 15 | func = @proposal_locate_anchors_single_scale; 16 | 17 | if exist('target_scale', 'var') 18 | [anchors, im_scales] = func(im_size, conf, target_scale, feature_map_size); 19 | else 20 | [anchors, im_scales] = arrayfun(@(x) func(im_size, conf, x, feature_map_size), ... 21 | conf.scales, 'UniformOutput', false); 22 | end 23 | 24 | end 25 | 26 | function [anchors, im_scale] = proposal_locate_anchors_single_scale(im_size, conf, target_scale, feature_map_size) 27 | if isempty(feature_map_size) 28 | im_scale = prep_im_for_blob_size(im_size, target_scale, conf.max_size); 29 | img_size = round(im_size * im_scale); 30 | output_size = cell2mat([conf.output_height_map.values({img_size(1)}), conf.output_width_map.values({img_size(2)})]); 31 | else 32 | im_scale = prep_im_for_blob_size(im_size, target_scale, conf.max_size); 33 | output_size = feature_map_size; 34 | end 35 | 36 | shift_x = [0:(output_size(2)-1)] * conf.feat_stride; 37 | shift_y = [0:(output_size(1)-1)] * conf.feat_stride; 38 | [shift_x, shift_y] = meshgrid(shift_x, shift_y); 39 | 40 | % concat anchors as [channel, height, width], where channel is the fastest dimension. 41 | anchors = reshape(bsxfun(@plus, permute(conf.anchors, [1, 3, 2]), ... 42 | permute([shift_x(:), shift_y(:), shift_x(:), shift_y(:)], [3, 1, 2])), [], 4); 43 | 44 | % equals to 45 | % anchors = arrayfun(@(x, y) single(bsxfun(@plus, conf.anchors, [x, y, x, y])), shift_x, shift_y, 'UniformOutput', false); 46 | % anchors = reshape(anchors, [], 1); 47 | % anchors = cat(1, anchors{:}); 48 | 49 | end -------------------------------------------------------------------------------- /functions/rpn/proposal_test.m: -------------------------------------------------------------------------------- 1 | function aboxes = proposal_test(conf, imdb, varargin) 2 | % aboxes = proposal_test(conf, imdb, varargin) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | %% inputs 10 | ip = inputParser; 11 | ip.addRequired('conf', @isstruct); 12 | ip.addRequired('imdb', @isstruct); 13 | ip.addParamValue('net_def_file', fullfile(pwd, 'proposal_models', 'Zeiler_conv5', 'test.prototxt'), ... 14 | @isstr); 15 | ip.addParamValue('net_file', fullfile(pwd, 'proposal_models', 'Zeiler_conv5', 'Zeiler_conv5.caffemodel'), ... 16 | @isstr); 17 | ip.addParamValue('cache_name', 'Zeiler_conv5', ... 18 | @isstr); 19 | 20 | ip.addParamValue('suffix', '', @isstr); 21 | 22 | ip.parse(conf, imdb, varargin{:}); 23 | opts = ip.Results; 24 | 25 | 26 | cache_dir = fullfile(pwd, 'output', 'rpn_cachedir', opts.cache_name, imdb.name); 27 | try 28 | % try to load cache 29 | ld = load(fullfile(cache_dir, ['proposal_boxes_' imdb.name opts.suffix])); 30 | aboxes = ld.aboxes; 31 | clear ld; 32 | catch 33 | %% init net 34 | % init caffe net 35 | mkdir_if_missing(cache_dir); 36 | caffe_log_file_base = fullfile(cache_dir, 'caffe_log'); 37 | caffe.init_log(caffe_log_file_base); 38 | caffe_net = caffe.Net(opts.net_def_file, 'test'); 39 | caffe_net.copy_from(opts.net_file); 40 | 41 | % init log 42 | timestamp = datestr(datevec(now()), 'yyyymmdd_HHMMSS'); 43 | mkdir_if_missing(fullfile(cache_dir, 'log')); 44 | log_file = fullfile(cache_dir, 'log', ['test_', timestamp, '.txt']); 45 | diary(log_file); 46 | 47 | % set random seed 48 | prev_rng = seed_rand(conf.rng_seed); 49 | caffe.set_random_seed(conf.rng_seed); 50 | 51 | % set gpu/cpu 52 | if conf.use_gpu 53 | caffe.set_mode_gpu(); 54 | else 55 | caffe.set_mode_cpu(); 56 | end 57 | 58 | disp('opts:'); 59 | disp(opts); 60 | disp('conf:'); 61 | disp(conf); 62 | 63 | %% testing 64 | num_images = length(imdb.image_ids); 65 | % all detections are collected into: 66 | % all_boxes[image] = N x 5 array of detections in 67 | % (x1, y1, x2, y2, score) 68 | aboxes = cell(num_images, 1); 69 | abox_deltas = cell(num_images, 1); 70 | aanchors = cell(num_images, 1); 71 | ascores = cell(num_images, 1); 72 | 73 | count = 0; 74 | for i = 1:num_images 75 | count = count + 1; 76 | fprintf('%s: test (%s) %d/%d ', procid(), imdb.name, count, num_images); 77 | th = tic; 78 | im = imread(imdb.image_at(i)); 79 | 80 | [boxes, scores, abox_deltas{i}, aanchors{i}, ascores{i}] = proposal_im_detect(conf, caffe_net, im); 81 | 82 | fprintf(' time: %.3fs\n', toc(th)); 83 | 84 | aboxes{i} = [boxes, scores]; 85 | end 86 | save(fullfile(cache_dir, ['proposal_boxes_' imdb.name opts.suffix]), 'aboxes', '-v7.3'); 87 | 88 | diary off; 89 | caffe.reset_all(); 90 | rng(prev_rng); 91 | end 92 | end 93 | -------------------------------------------------------------------------------- /functions/rpn/proposal_visual_debug.m: -------------------------------------------------------------------------------- 1 | function proposal_visual_debug(conf, image_roidb, input_blobs, bbox_means, bbox_stds, classes, scale_inds) 2 | % proposal_visual_debug(conf, image_roidb, input_blobs, bbox_means, bbox_stds, classes, scale_inds) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | im_blob = input_blobs{1}; 10 | labels_blob = input_blobs{2}; 11 | label_weights_blob = input_blobs{3}; 12 | bbox_targets_blob = input_blobs{4}; 13 | bbox_loss_weights_blob = input_blobs{5}; 14 | 15 | % concat anchors as [channel, height, width], where channel is the fastest dimension. 16 | rois = proposal_locate_anchors(conf, image_roidb.im_size, conf.scales(scale_inds)); 17 | 18 | bbox_targets = double(permute(bbox_targets_blob, [3, 2, 1])); 19 | bbox_targets = reshape(bbox_targets, 4, [])'; 20 | bbox_targets = bsxfun(@times, bbox_targets, bbox_stds); 21 | bbox_targets = bsxfun(@plus, bbox_targets, bbox_means); 22 | 23 | labels_blob = double(permute(labels_blob, [3, 2, 1])); 24 | labels_blob = labels_blob(:); 25 | label_weights_blob = double(permute(label_weights_blob, [3, 2, 1])); 26 | label_weights_blob = label_weights_blob(:); 27 | pred_boxes = fast_rcnn_bbox_transform_inv(rois, bbox_targets); 28 | 29 | num_anchors = size(conf.anchors, 1); 30 | for i = 1:size(im_blob, 4) 31 | for j = 1:num_anchors 32 | im = im_blob(:, :, [3, 2, 1], i); 33 | im = permute(im, [2, 1, 3]); 34 | imshow(mat2gray(im)); 35 | hold on; 36 | 37 | sub_rois = rois(j:num_anchors:end, :); 38 | sub_labels = labels_blob(j:num_anchors:end); 39 | sub_label_weights = label_weights_blob(j:num_anchors:end); 40 | sub_pred_boxes = pred_boxes(j:num_anchors:end, :); 41 | 42 | % bg 43 | bg_ind = find(sub_labels == 0 & sub_label_weights > 0); 44 | if ~isempty(bg_ind) 45 | cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'k'), ... 46 | num2cell(sub_rois(bg_ind, :), 2)); 47 | cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'b'), ... 48 | num2cell(sub_rois(bg_ind(round(length(bg_ind)/2)), :), 2)); 49 | end 50 | 51 | % fg 52 | fg_ind = sub_labels > 0; 53 | cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'r'), ... 54 | num2cell(sub_rois(fg_ind, :), 2)); 55 | cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'g'), ... 56 | num2cell(sub_pred_boxes(fg_ind, :), 2)); 57 | 58 | % % others 59 | % others_ind = find(sub_labels == 0 & sub_label_weights == 0); 60 | % cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', [0.5, 0.5, 0.5]), ... 61 | % num2cell(sub_rois(others_ind, :), 2)); 62 | 63 | hold off; 64 | pause; 65 | end 66 | end 67 | end -------------------------------------------------------------------------------- /imdb/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /imdb/imdb_eval_voc.m: -------------------------------------------------------------------------------- 1 | function res = imdb_eval_voc(cls, boxes, imdb, cache_name, suffix) 2 | % res = imdb_eval_voc(cls, boxes, imdb, suffix) 3 | % Use the VOCdevkit to evaluate detections specified in boxes 4 | % for class cls against the ground-truth boxes in the image 5 | % database imdb. Results files are saved with an optional 6 | % suffix. 7 | 8 | % AUTORIGHTS 9 | % --------------------------------------------------------- 10 | % Copyright (c) 2014, Ross Girshick 11 | % 12 | % This file is part of the R-CNN code and is available 13 | % under the terms of the Simplified BSD License provided in 14 | % LICENSE. Please retain this notice and LICENSE if you use 15 | % this file (or any portion of it) in your project. 16 | % --------------------------------------------------------- 17 | 18 | % Add a random string ("salt") to the end of the results file name 19 | % to prevent concurrent evaluations from clobbering each other 20 | use_res_salt = true; 21 | % Delete results files after computing APs 22 | rm_res = true; 23 | % comp4 because we use outside data (ILSVRC2012) 24 | comp_id = 'comp4'; 25 | % draw each class curve 26 | draw_curve = true; 27 | 28 | % save results 29 | if ~exist('suffix', 'var') || isempty(suffix) || strcmp(suffix, '') 30 | suffix = ''; 31 | else 32 | if suffix(1) ~= '_' 33 | suffix = ['_' suffix]; 34 | end 35 | end 36 | 37 | conf.cache_dir = fullfile('output', 'fast_rcnn_cachedir', cache_name, imdb.name); 38 | VOCopts = imdb.details.VOCopts; 39 | image_ids = imdb.image_ids; 40 | test_set = VOCopts.testset; 41 | year = VOCopts.dataset(4:end); 42 | 43 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 44 | 45 | if use_res_salt 46 | prev_rng = rng; 47 | rng shuffle; 48 | salt = sprintf('%d', randi(100000)); 49 | res_id = [comp_id '-' salt]; 50 | rng(prev_rng); 51 | else 52 | res_id = comp_id; 53 | end 54 | res_fn = sprintf(VOCopts.detrespath, res_id, cls); 55 | 56 | % write out detections in PASCAL format and score 57 | fid = fopen(res_fn, 'w'); 58 | for i = 1:length(image_ids); 59 | bbox = boxes{i}; 60 | keep = nms(bbox, 0.3); 61 | bbox = bbox(keep,:); 62 | for j = 1:size(bbox,1) 63 | fprintf(fid, '%s %f %.3f %.3f %.3f %.3f\n', image_ids{i}, bbox(j,end), bbox(j,1:4)); 64 | end 65 | end 66 | fclose(fid); 67 | 68 | recall = []; 69 | prec = []; 70 | ap = 0; 71 | ap_auc = 0; 72 | 73 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 74 | if do_eval 75 | % Bug in VOCevaldet requires that tic has been called first 76 | tic; 77 | [recall, prec, ap] = VOCevaldet(VOCopts, res_id, cls, draw_curve); 78 | ap_auc = xVOCap(recall, prec); 79 | 80 | % force plot limits 81 | ylim([0 1]); 82 | xlim([0 1]); 83 | 84 | print(gcf, '-djpeg', '-r0', ... 85 | fullfile(conf.cache_dir, [cls '_pr_' imdb.name suffix '.jpg'])); 86 | end 87 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 88 | 89 | save(fullfile(conf.cache_dir, [cls '_pr_' imdb.name suffix]), ... 90 | 'recall', 'prec', 'ap', 'ap_auc'); 91 | 92 | res.recall = recall; 93 | res.prec = prec; 94 | res.ap = ap; 95 | res.ap_auc = ap_auc; 96 | if rm_res 97 | delete(res_fn); 98 | end 99 | 100 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 101 | -------------------------------------------------------------------------------- /imdb/imdb_from_voc.m: -------------------------------------------------------------------------------- 1 | function imdb = imdb_from_voc(root_dir, image_set, year, flip) 2 | % imdb = imdb_from_voc(root_dir, image_set, year) 3 | % Builds an image database for the PASCAL VOC devkit located 4 | % at root_dir using the image_set and year. 5 | % 6 | % Inspired by Andrea Vedaldi's MKL imdb and roidb code. 7 | 8 | % AUTORIGHTS 9 | % --------------------------------------------------------- 10 | % Copyright (c) 2014, Ross Girshick 11 | % 12 | % This file is part of the R-CNN code and is available 13 | % under the terms of the Simplified BSD License provided in 14 | % LICENSE. Please retain this notice and LICENSE if you use 15 | % this file (or any portion of it) in your project. 16 | % --------------------------------------------------------- 17 | 18 | %imdb.name = 'voc_train_2007' 19 | %imdb.image_dir = '/work4/rbg/VOC2007/VOCdevkit/VOC2007/JPEGImages/' 20 | %imdb.extension = '.jpg' 21 | %imdb.image_ids = {'000001', ... } 22 | %imdb.sizes = [numimages x 2] 23 | %imdb.classes = {'aeroplane', ... } 24 | %imdb.num_classes 25 | %imdb.class_to_id 26 | %imdb.class_ids 27 | %imdb.eval_func = pointer to the function that evaluates detections 28 | %imdb.roidb_func = pointer to the function that returns regions of interest 29 | 30 | if nargin < 4 31 | flip = false; 32 | end 33 | 34 | cache_file = ['./imdb/cache/imdb_voc_' year '_' image_set]; 35 | if flip 36 | cache_file = [cache_file, '_flip']; 37 | end 38 | try 39 | load(cache_file); 40 | catch 41 | VOCopts = get_voc_opts(root_dir); 42 | VOCopts.testset = image_set; 43 | 44 | imdb.name = ['voc_' year '_' image_set]; 45 | imdb.image_dir = fileparts(VOCopts.imgpath); 46 | imdb.image_ids = textread(sprintf(VOCopts.imgsetpath, image_set), '%s'); 47 | imdb.extension = 'jpg'; 48 | imdb.flip = flip; 49 | if flip 50 | image_at = @(i) sprintf('%s/%s.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension); 51 | flip_image_at = @(i) sprintf('%s/%s_flip.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension); 52 | for i = 1:length(imdb.image_ids) 53 | if ~exist(flip_image_at(i), 'file') 54 | im = imread(image_at(i)); 55 | imwrite(fliplr(im), flip_image_at(i)); 56 | end 57 | end 58 | img_num = length(imdb.image_ids)*2; 59 | image_ids = imdb.image_ids; 60 | imdb.image_ids(1:2:img_num) = image_ids; 61 | imdb.image_ids(2:2:img_num) = cellfun(@(x) [x, '_flip'], image_ids, 'UniformOutput', false); 62 | imdb.flip_from = zeros(img_num, 1); 63 | imdb.flip_from(2:2:img_num) = 1:2:img_num; 64 | end 65 | imdb.classes = VOCopts.classes; 66 | imdb.num_classes = length(imdb.classes); 67 | imdb.class_to_id = ... 68 | containers.Map(imdb.classes, 1:imdb.num_classes); 69 | imdb.class_ids = 1:imdb.num_classes; 70 | 71 | % private VOC details 72 | imdb.details.VOCopts = VOCopts; 73 | 74 | % VOC specific functions for evaluation and region of interest DB 75 | imdb.eval_func = @imdb_eval_voc; 76 | imdb.roidb_func = @roidb_from_voc; 77 | imdb.image_at = @(i) ... 78 | sprintf('%s/%s.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension); 79 | 80 | for i = 1:length(imdb.image_ids) 81 | tic_toc_print('imdb (%s): %d/%d\n', imdb.name, i, length(imdb.image_ids)); 82 | info = imfinfo(sprintf(VOCopts.imgpath, imdb.image_ids{i})); 83 | imdb.sizes(i, :) = [info.Height info.Width]; 84 | end 85 | 86 | fprintf('Saving imdb to cache...'); 87 | save(cache_file, 'imdb', '-v7.3'); 88 | fprintf('done\n'); 89 | end 90 | -------------------------------------------------------------------------------- /imdb/roidb_from_proposal.m: -------------------------------------------------------------------------------- 1 | function roidb = roidb_from_proposal(imdb, roidb, regions, varargin) 2 | % roidb = roidb_from_proposal(imdb, roidb, regions, varargin)s 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | ip = inputParser; 10 | ip.addRequired('imdb', @isstruct); 11 | ip.addRequired('roidb', @isstruct); 12 | ip.addRequired('regions', @isstruct); 13 | ip.addParamValue('keep_raw_proposal', true, @islogical); 14 | ip.parse(imdb, roidb, regions, varargin{:}); 15 | opts = ip.Results; 16 | 17 | assert(strcmp(opts.roidb.name, opts.imdb.name)); 18 | rois = opts.roidb.rois; 19 | 20 | if ~opts.keep_raw_proposal 21 | % remove proposal boxes in roidb 22 | for i = 1:length(rois) 23 | is_gt = rois(i).gt; 24 | rois(i).gt = rois(i).gt(is_gt, :); 25 | rois(i).overlap = rois(i).overlap(is_gt, :); 26 | rois(i).boxes = rois(i).boxes(is_gt, :); 27 | rois(i).class = rois(i).class(is_gt, :); 28 | end 29 | end 30 | 31 | % add new proposal boxes 32 | for i = 1:length(rois) 33 | [~, image_name1] = fileparts(imdb.image_ids{i}); 34 | [~, image_name2] = fileparts(opts.regions.images{i}); 35 | assert(strcmp(image_name1, image_name2)); 36 | 37 | boxes = opts.regions.boxes{i}(:, 1:4); 38 | is_gt = rois(i).gt; 39 | gt_boxes = rois(i).boxes(is_gt, :); 40 | gt_classes = rois(i).class(is_gt, :); 41 | all_boxes = cat(1, rois(i).boxes, boxes); 42 | 43 | num_gt_boxes = size(gt_boxes, 1); 44 | num_boxes = size(boxes, 1); 45 | 46 | rois(i).gt = cat(1, rois(i).gt, false(num_boxes, 1)); 47 | rois(i).overlap = cat(1, rois(i).overlap, zeros(num_boxes, size(rois(i).overlap, 2))); 48 | rois(i).boxes = cat(1, rois(i).boxes, boxes); 49 | rois(i).class = cat(1, rois(i).class, zeros(num_boxes, 1)); 50 | for j = 1:num_gt_boxes 51 | rois(i).overlap(:, gt_classes(j)) = ... 52 | max(full(rois(i).overlap(:, gt_classes(j))), boxoverlap(all_boxes, gt_boxes(j, :))); 53 | end 54 | end 55 | 56 | roidb.rois = rois; 57 | 58 | end -------------------------------------------------------------------------------- /imdb/roidb_from_voc.m: -------------------------------------------------------------------------------- 1 | function roidb = roidb_from_voc(imdb, varargin) 2 | % roidb = roidb_from_voc(imdb, rootDir) 3 | % Builds an regions of interest database from imdb image 4 | % database. Uses precomputed selective search boxes available 5 | % in the R-CNN data package. 6 | % 7 | % Inspired by Andrea Vedaldi's MKL imdb and roidb code. 8 | 9 | % AUTORIGHTS 10 | % --------------------------------------------------------- 11 | % Copyright (c) 2014, Ross Girshick 12 | % 13 | % This file is part of the R-CNN code and is available 14 | % under the terms of the Simplified BSD License provided in 15 | % LICENSE. Please retain this notice and LICENSE if you use 16 | % this file (or any portion of it) in your project. 17 | % --------------------------------------------------------- 18 | 19 | ip = inputParser; 20 | ip.addRequired('imdb', @isstruct); 21 | ip.addParamValue('exclude_difficult_samples', true, @islogical); 22 | ip.addParamValue('with_selective_search', false, @islogical); 23 | ip.addParamValue('with_edge_box', false, @islogical); 24 | ip.addParamValue('with_self_proposal', false, @islogical); 25 | ip.addParamValue('rootDir', '.', @ischar); 26 | ip.addParamValue('extension', '', @ischar); 27 | ip.parse(imdb, varargin{:}); 28 | opts = ip.Results; 29 | 30 | roidb.name = imdb.name; 31 | if ~isempty(opts.extension) 32 | opts.extension = ['_', opts.extension]; 33 | end 34 | regions_file_ss = fullfile(opts.rootDir, sprintf('/data/selective_search_data/%s%s.mat', roidb.name, opts.extension)); 35 | regions_file_eb = fullfile(opts.rootDir, sprintf('/data/edge_box_data/%s%s.mat', roidb.name, opts.extension)); 36 | regions_file_sp = fullfile(opts.rootDir, sprintf('/data/self_proposal_data/%s%s.mat', roidb.name, opts.extension)); 37 | 38 | cache_file_ss = []; 39 | cache_file_eb = []; 40 | cache_file_sp = []; 41 | if opts.with_selective_search 42 | cache_file_ss = 'ss_'; 43 | if~exist(regions_file_ss, 'file') 44 | error('roidb_from_ilsvrc:: cannot find %s', regions_file_ss); 45 | end 46 | end 47 | 48 | if opts.with_edge_box 49 | cache_file_eb = 'eb_'; 50 | if ~exist(regions_file_eb, 'file') 51 | error('roidb_from_ilsvrc:: cannot find %s', regions_file_eb); 52 | end 53 | end 54 | 55 | if opts.with_self_proposal 56 | cache_file_sp = 'sp_'; 57 | if ~exist(regions_file_sp, 'file') 58 | error('roidb_from_ilsvrc:: cannot find %s', regions_file_sp); 59 | end 60 | end 61 | 62 | cache_file = fullfile(opts.rootDir, ['/imdb/cache/roidb_' cache_file_ss cache_file_eb cache_file_sp imdb.name opts.extension]); 63 | if imdb.flip 64 | cache_file = [cache_file '_flip']; 65 | end 66 | if opts.exclude_difficult_samples 67 | cache_file = [cache_file '_easy']; 68 | end 69 | cache_file = [cache_file, '.mat']; 70 | try 71 | load(cache_file); 72 | catch 73 | VOCopts = imdb.details.VOCopts; 74 | 75 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 76 | 77 | roidb.name = imdb.name; 78 | 79 | fprintf('Loading region proposals...'); 80 | regions = []; 81 | if opts.with_selective_search 82 | regions = load_proposals(regions_file_ss, regions); 83 | end 84 | if opts.with_edge_box 85 | regions = load_proposals(regions_file_eb, regions); 86 | end 87 | if opts.with_self_proposal 88 | regions = load_proposals(regions_file_sp, regions); 89 | end 90 | fprintf('done\n'); 91 | if isempty(regions) 92 | fprintf('Warrning: no windows proposal is loaded !\n'); 93 | regions.boxes = cell(length(imdb.image_ids), 1); 94 | if imdb.flip 95 | regions.images = imdb.image_ids(1:2:end); 96 | else 97 | regions.images = imdb.image_ids; 98 | end 99 | end 100 | 101 | if ~imdb.flip 102 | for i = 1:length(imdb.image_ids) 103 | tic_toc_print('roidb (%s): %d/%d\n', roidb.name, i, length(imdb.image_ids)); 104 | try 105 | voc_rec = PASreadrecord(sprintf(VOCopts.annopath, imdb.image_ids{i})); 106 | catch 107 | voc_rec = []; 108 | end 109 | if ~isempty(regions) 110 | [~, image_name1] = fileparts(imdb.image_ids{i}); 111 | [~, image_name2] = fileparts(regions.images{i}); 112 | assert(strcmp(image_name1, image_name2)); 113 | end 114 | roidb.rois(i) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, false); 115 | end 116 | else 117 | for i = 1:length(imdb.image_ids)/2 118 | tic_toc_print('roidb (%s): %d/%d\n', roidb.name, i, length(imdb.image_ids)/2); 119 | try 120 | voc_rec = PASreadrecord(sprintf(VOCopts.annopath, imdb.image_ids{i*2-1})); 121 | catch 122 | voc_rec = []; 123 | end 124 | if ~isempty(regions) 125 | [~, image_name1] = fileparts(imdb.image_ids{i*2-1}); 126 | [~, image_name2] = fileparts(regions.images{i}); 127 | assert(strcmp(image_name1, image_name2)); 128 | assert(imdb.flip_from(i*2) == i*2-1); 129 | end 130 | roidb.rois(i*2-1) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, false); 131 | roidb.rois(i*2) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, true); 132 | end 133 | end 134 | 135 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 136 | 137 | fprintf('Saving roidb to cache...'); 138 | save(cache_file, 'roidb', '-v7.3'); 139 | fprintf('done\n'); 140 | end 141 | 142 | 143 | % ------------------------------------------------------------------------ 144 | function rec = attach_proposals(voc_rec, boxes, class_to_id, exclude_difficult_samples, flip) 145 | % ------------------------------------------------------------------------ 146 | 147 | % change selective search order from [y1 x1 y2 x2] to [x1 y1 x2 y2] 148 | if ~isempty(boxes) 149 | boxes = boxes(:, [2 1 4 3]); 150 | if flip 151 | boxes(:, [1, 3]) = voc_rec.imgsize(1) + 1 - boxes(:, [3, 1]); 152 | end 153 | end 154 | 155 | % gt: [2108x1 double] 156 | % overlap: [2108x20 single] 157 | % dataset: 'voc_2007_trainval' 158 | % boxes: [2108x4 single] 159 | % feat: [2108x9216 single] 160 | % class: [2108x1 uint8] 161 | if isfield(voc_rec, 'objects') 162 | if exclude_difficult_samples 163 | valid_objects = ~cat(1, voc_rec.objects(:).difficult); 164 | else 165 | valid_objects = 1:length(voc_rec.objects(:)); 166 | end 167 | gt_boxes = cat(1, voc_rec.objects(valid_objects).bbox); 168 | if flip 169 | gt_boxes(:, [1, 3]) = voc_rec.imgsize(1) + 1 - gt_boxes(:, [3, 1]); 170 | end 171 | all_boxes = cat(1, gt_boxes, boxes); 172 | gt_classes = class_to_id.values({voc_rec.objects(valid_objects).class}); 173 | gt_classes = cat(1, gt_classes{:}); 174 | num_gt_boxes = size(gt_boxes, 1); 175 | else 176 | gt_boxes = []; 177 | all_boxes = boxes; 178 | gt_classes = []; 179 | num_gt_boxes = 0; 180 | end 181 | num_boxes = size(boxes, 1); 182 | 183 | rec.gt = cat(1, true(num_gt_boxes, 1), false(num_boxes, 1)); 184 | rec.overlap = zeros(num_gt_boxes+num_boxes, class_to_id.Count, 'single'); 185 | for i = 1:num_gt_boxes 186 | rec.overlap(:, gt_classes(i)) = ... 187 | max(rec.overlap(:, gt_classes(i)), boxoverlap(all_boxes, gt_boxes(i, :))); 188 | end 189 | rec.boxes = single(all_boxes); 190 | rec.feat = []; 191 | rec.class = uint8(cat(1, gt_classes, zeros(num_boxes, 1))); 192 | 193 | % ------------------------------------------------------------------------ 194 | function regions = load_proposals(proposal_file, regions) 195 | % ------------------------------------------------------------------------ 196 | if isempty(regions) 197 | regions = load(proposal_file); 198 | else 199 | regions_more = load(proposal_file); 200 | if ~all(cellfun(@(x, y) strcmp(x, y), regions.images(:), regions_more.images(:), 'UniformOutput', true)) 201 | error('roidb_from_ilsvrc: %s is has different images list with other proposals.\n', proposal_file); 202 | end 203 | regions.boxes = cellfun(@(x, y) [double(x); double(y)], regions.boxes(:), regions_more.boxes(:), 'UniformOutput', false); 204 | end 205 | -------------------------------------------------------------------------------- /startup.m: -------------------------------------------------------------------------------- 1 | function startup() 2 | % startup() 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | curdir = fileparts(mfilename('fullpath')); 10 | addpath(genpath(fullfile(curdir, 'utils'))); 11 | addpath(genpath(fullfile(curdir, 'functions'))); 12 | addpath(genpath(fullfile(curdir, 'bin'))); 13 | addpath(genpath(fullfile(curdir, 'experiments'))); 14 | addpath(genpath(fullfile(curdir, 'imdb'))); 15 | 16 | mkdir_if_missing(fullfile(curdir, 'datasets')); 17 | 18 | mkdir_if_missing(fullfile(curdir, 'external')); 19 | 20 | caffe_path = fullfile(curdir, 'external', 'caffe', 'matlab'); 21 | if exist(caffe_path, 'dir') == 0 22 | error('matcaffe is missing from external/caffe/matlab; See README.md'); 23 | end 24 | addpath(genpath(caffe_path)); 25 | 26 | mkdir_if_missing(fullfile(curdir, 'imdb', 'cache')); 27 | 28 | mkdir_if_missing(fullfile(curdir, 'output')); 29 | 30 | mkdir_if_missing(fullfile(curdir, 'models')); 31 | 32 | fprintf('fast_rcnn startup done\n'); 33 | end 34 | -------------------------------------------------------------------------------- /utils/RectLTRB2LTWH.m: -------------------------------------------------------------------------------- 1 | function [ rectsLTWH ] = RectLTRB2LTWH( rectsLTRB ) 2 | %rects (l, t, r, b) to (l, t, w, h) 3 | 4 | rectsLTWH = [rectsLTRB(:, 1), rectsLTRB(:, 2), rectsLTRB(:, 3)-rectsLTRB(:,1)+1, rectsLTRB(:,4)-rectsLTRB(:,2)+1]; 5 | end 6 | 7 | -------------------------------------------------------------------------------- /utils/RectLTWH2LTRB.m: -------------------------------------------------------------------------------- 1 | function [ rectsLTRB ] = RectLTWH2LTRB(rectsLTWH) 2 | %rects (l, t, r, b) to (l, t, w, h) 3 | 4 | rectsLTRB = [rectsLTWH(:, 1), rectsLTWH(:, 2), rectsLTWH(:, 1)+rectsLTWH(:,3)-1, rectsLTWH(:,2)+rectsLTWH(:,4)-1]; 5 | end 6 | 7 | -------------------------------------------------------------------------------- /utils/active_caffe_mex.m: -------------------------------------------------------------------------------- 1 | function active_caffe_mex(gpu_id, caffe_version) 2 | % active_caffe_mex(gpu_id, caffe_version) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | % set gpu in matlab 10 | gpuDevice(gpu_id); 11 | 12 | if ~exist('caffe_version', 'var') || isempty(caffe_version) 13 | caffe_version = 'caffe'; 14 | end 15 | cur_dir = pwd; 16 | caffe_dir = fullfile(pwd, 'external', 'caffe', 'matlab', caffe_version); 17 | 18 | if ~exist(caffe_dir, 'dir') 19 | warning('Specified caffe folder (%s) is not exist, change to default one (%s)', ... 20 | caffe_dir, fullfile(pwd, 'external', 'caffe', 'matlab')); 21 | caffe_dir = fullfile(pwd, 'external', 'caffe', 'matlab'); 22 | end 23 | 24 | addpath(genpath(caffe_dir)); 25 | cd(caffe_dir); 26 | caffe.set_device(gpu_id-1); 27 | cd(cur_dir); 28 | end 29 | -------------------------------------------------------------------------------- /utils/auto_select_gpu.m: -------------------------------------------------------------------------------- 1 | function gpu_id = auto_select_gpu() 2 | % gpu_id = auto_select_gpu() 3 | % Select the gpu which has the maximum free memory 4 | % -------------------------------------------------------- 5 | % Faster R-CNN 6 | % Copyright (c) 2015, Shaoqing Ren 7 | % Licensed under The MIT License [see LICENSE for details] 8 | % -------------------------------------------------------- 9 | 10 | % deselects all GPU devices 11 | gpuDevice([]); 12 | 13 | maxFreeMemory = 0; 14 | for i = 1:gpuDeviceCount 15 | g = gpuDevice(i); 16 | freeMemory = g.FreeMemory(); 17 | fprintf('GPU %d: free memory %d\n', i, freeMemory); 18 | if freeMemory > maxFreeMemory 19 | maxFreeMemory = freeMemory; 20 | gpu_id = i; 21 | end 22 | end 23 | fprintf('Use GPU %d\n', gpu_id); 24 | 25 | % deselects all GPU devices 26 | gpuDevice([]); 27 | end 28 | -------------------------------------------------------------------------------- /utils/boxoverlap.m: -------------------------------------------------------------------------------- 1 | function o = boxoverlap(a, b) 2 | % Compute the symmetric intersection over union overlap between a set of 3 | % bounding boxes in a and a single bounding box in b. 4 | % 5 | % a a matrix where each row specifies a bounding box 6 | % b a matrix where each row specifies a bounding box 7 | 8 | % AUTORIGHTS 9 | % ------------------------------------------------------- 10 | % Copyright (C) 2011-2012 Ross Girshick 11 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick 12 | % 13 | % This file is part of the voc-releaseX code 14 | % (http://people.cs.uchicago.edu/~rbg/latent/) 15 | % and is available under the terms of an MIT-like license 16 | % provided in COPYING. Please retain this notice and 17 | % COPYING if you use this file (or a portion of it) in 18 | % your project. 19 | % ------------------------------------------------------- 20 | 21 | o = cell(1, size(b, 1)); 22 | for i = 1:size(b, 1) 23 | x1 = max(a(:,1), b(i,1)); 24 | y1 = max(a(:,2), b(i,2)); 25 | x2 = min(a(:,3), b(i,3)); 26 | y2 = min(a(:,4), b(i,4)); 27 | 28 | w = x2-x1+1; 29 | h = y2-y1+1; 30 | inter = w.*h; 31 | aarea = (a(:,3)-a(:,1)+1) .* (a(:,4)-a(:,2)+1); 32 | barea = (b(i,3)-b(i,1)+1) * (b(i,4)-b(i,2)+1); 33 | % intersection over union overlap 34 | o{i} = inter ./ (aarea+barea-inter); 35 | % set invalid entries to 0 overlap 36 | o{i}(w <= 0) = 0; 37 | o{i}(h <= 0) = 0; 38 | end 39 | 40 | o = cell2mat(o); 41 | -------------------------------------------------------------------------------- /utils/im_list_to_blob.m: -------------------------------------------------------------------------------- 1 | function blob = im_list_to_blob(ims) 2 | max_shape = max(cell2mat(cellfun(@size, ims(:), 'UniformOutput', false)), [], 1); 3 | assert(all(cellfun(@(x) size(x, 3), ims, 'UniformOutput', true) == 3)); 4 | num_images = length(ims); 5 | blob = zeros(max_shape(1), max_shape(2), 3, num_images, 'single'); 6 | 7 | for i = 1:length(ims) 8 | im = ims{i}; 9 | blob(1:size(im, 1), 1:size(im, 2), :, i) = im; 10 | end 11 | end -------------------------------------------------------------------------------- /utils/mkdir_if_missing.m: -------------------------------------------------------------------------------- 1 | function made = mkdir_if_missing(path) 2 | made = false; 3 | if exist(path, 'dir') == 0 4 | mkdir(path); 5 | made = true; 6 | end 7 | -------------------------------------------------------------------------------- /utils/parse_rst.m: -------------------------------------------------------------------------------- 1 | function results = parse_rst(results, rst) 2 | % results = parse_rst(results, rst) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | if isempty(results) 10 | for i = 1:length(rst) 11 | results.(rst(i).blob_name).data = []; 12 | end 13 | end 14 | 15 | for i = 1:length(rst) 16 | results.(rst(i).blob_name).data = [results.(rst(i).blob_name).data; rst(i).data(:)]; 17 | end 18 | end -------------------------------------------------------------------------------- /utils/prep_im_for_blob.m: -------------------------------------------------------------------------------- 1 | function [im, im_scale] = prep_im_for_blob(im, im_means, target_size, max_size) 2 | im = single(im); 3 | 4 | if ~isa(im, 'gpuArray') 5 | try 6 | im = bsxfun(@minus, im, im_means); 7 | catch 8 | im_means = imresize(im_means, [size(im, 1), size(im, 2)], 'bilinear', 'antialiasing', false); 9 | im = bsxfun(@minus, im, im_means); 10 | end 11 | im_scale = prep_im_for_blob_size(size(im), target_size, max_size); 12 | 13 | target_size = round([size(im, 1), size(im, 2)] * im_scale); 14 | im = imresize(im, target_size, 'bilinear', 'antialiasing', false); 15 | else 16 | % for im as gpuArray 17 | try 18 | im = bsxfun(@minus, im, im_means); 19 | catch 20 | im_means_scale = max(double(size(im, 1)) / size(im_means, 1), double(size(im, 2)) / size(im_means, 2)); 21 | im_means = imresize(im_means, im_means_scale); 22 | y_start = floor((size(im_means, 1) - size(im, 1)) / 2) + 1; 23 | x_start = floor((size(im_means, 2) - size(im, 2)) / 2) + 1; 24 | im_means = im_means(y_start:(y_start+size(im, 1)-1), x_start:(x_start+size(im, 2)-1)); 25 | im = bsxfun(@minus, im, im_means); 26 | end 27 | 28 | im_scale = prep_im_for_blob_size(size(im), target_size, max_size); 29 | im = imresize(im, im_scale); 30 | end 31 | end -------------------------------------------------------------------------------- /utils/prep_im_for_blob_size.m: -------------------------------------------------------------------------------- 1 | function im_scale = prep_im_for_blob_size(im_size, target_size, max_size) 2 | 3 | im_size_min = min(im_size(1:2)); 4 | im_size_max = max(im_size(1:2)); 5 | im_scale = double(target_size) / im_size_min; 6 | 7 | % Prevent the biggest axis from being more than MAX_SIZE 8 | if round(im_scale * im_size_max) > max_size 9 | im_scale = double(max_size) / double(im_size_max); 10 | end 11 | end -------------------------------------------------------------------------------- /utils/procid.m: -------------------------------------------------------------------------------- 1 | function s = procid() 2 | % Returns a string identifying the process. 3 | 4 | % AUTORIGHTS 5 | % ------------------------------------------------------- 6 | % Copyright (C) 2009-2012 Ross Girshick 7 | % 8 | % This file is part of the voc-releaseX code 9 | % (http://people.cs.uchicago.edu/~rbg/latent/) 10 | % and is available under the terms of an MIT-like license 11 | % provided in COPYING. Please retain this notice and 12 | % COPYING if you use this file (or a portion of it) in 13 | % your project. 14 | % ------------------------------------------------------- 15 | 16 | d = pwd(); 17 | i = strfind(d, filesep); 18 | d = d(i(end)+1:end); 19 | s = d; 20 | -------------------------------------------------------------------------------- /utils/seed_rand.m: -------------------------------------------------------------------------------- 1 | function prev_rng = seed_rand(seed) 2 | % seed_rand - Set random number generator to a fixed seed. 3 | % prev_rng = seed_rand(seed) 4 | % 5 | % Strategic use ensures that results are reproducible. 6 | % 7 | % To restore the previous rng after calling this do: 8 | % rng(prev_rng); 9 | 10 | % AUTORIGHTS 11 | % --------------------------------------------------------- 12 | % Copyright (c) 2014, Ross Girshick 13 | % 14 | % This file is part of the R-CNN code and is available 15 | % under the terms of the Simplified BSD License provided in 16 | % LICENSE. Please retain this notice and LICENSE if you use 17 | % this file (or any portion of it) in your project. 18 | % --------------------------------------------------------- 19 | 20 | if nargin < 1 21 | % This value works best for me. 22 | seed = 3; 23 | % Just kidding, of course ;-). 24 | end 25 | 26 | prev_rng = rng; 27 | rng(seed, 'twister') 28 | -------------------------------------------------------------------------------- /utils/showboxes.m: -------------------------------------------------------------------------------- 1 | function showboxes(im, boxes, legends, color_conf) 2 | % Draw bounding boxes on top of an image. 3 | % showboxes(im, boxes) 4 | % 5 | % ------------------------------------------------------- 6 | 7 | fix_width = 800; 8 | if isa(im, 'gpuArray') 9 | im = gather(im); 10 | end 11 | imsz = size(im); 12 | scale = fix_width / imsz(2); 13 | im = imresize(im, scale); 14 | 15 | if size(boxes{1}, 2) >= 5 16 | boxes = cellfun(@(x) [x(:, 1:4) * scale, x(:, 5)], boxes, 'UniformOutput', false); 17 | else 18 | boxes = cellfun(@(x) x(:, 1:4) * scale, boxes, 'UniformOutput', false); 19 | end 20 | 21 | if ~exist('color_conf', 'var') 22 | color_conf = 'default'; 23 | end 24 | 25 | image(im); 26 | axis image; 27 | axis off; 28 | set(gcf, 'Color', 'white'); 29 | 30 | valid_boxes = cellfun(@(x) ~isempty(x), boxes, 'UniformOutput', true); 31 | valid_boxes_num = sum(valid_boxes); 32 | 33 | if valid_boxes_num > 0 34 | switch color_conf 35 | case 'default' 36 | colors_candidate = colormap('hsv'); 37 | colors_candidate = colors_candidate(1:(floor(size(colors_candidate, 1)/valid_boxes_num)):end, :); 38 | colors_candidate = mat2cell(colors_candidate, ones(size(colors_candidate, 1), 1))'; 39 | colors = cell(size(valid_boxes)); 40 | colors(valid_boxes) = colors_candidate(1:sum(valid_boxes)); 41 | case 'voc' 42 | colors_candidate = colormap('hsv'); 43 | colors_candidate = colors_candidate(1:(floor(size(colors_candidate, 1)/20)):end, :); 44 | colors_candidate = mat2cell(colors_candidate, ones(size(colors_candidate, 1), 1))'; 45 | colors = colors_candidate; 46 | end 47 | 48 | 49 | for i = 1:length(boxes) 50 | if isempty(boxes{i}) 51 | continue; 52 | end 53 | 54 | for j = 1:size(boxes{i}) 55 | box = boxes{i}(j, 1:4); 56 | if size(boxes{i}, 2) >= 5 57 | score = boxes{i}(j, end); 58 | linewidth = 2 + min(max(score, 0), 1) * 2; 59 | rectangle('Position', RectLTRB2LTWH(box), 'LineWidth', linewidth, 'EdgeColor', colors{i}); 60 | label = sprintf('%s : %.3f', legends{i}, score); 61 | text(double(box(1))+2, double(box(2)), label, 'BackgroundColor', 'w'); 62 | else 63 | linewidth = 2; 64 | rectangle('Position', RectLTRB2LTWH(box), 'LineWidth', linewidth, 'EdgeColor', colors{i}); 65 | label = sprintf('%s(%d)', legends{i}, i); 66 | text(double(box(1))+2, double(box(2)), label, 'BackgroundColor', 'w'); 67 | end 68 | end 69 | 70 | end 71 | end 72 | end 73 | 74 | function [ rectsLTWH ] = RectLTRB2LTWH( rectsLTRB ) 75 | %rects (l, t, r, b) to (l, t, w, h) 76 | 77 | rectsLTWH = [rectsLTRB(:, 1), rectsLTRB(:, 2), rectsLTRB(:, 3)-rectsLTRB(:,1)+1, rectsLTRB(:,4)-rectsLTRB(2)+1]; 78 | end 79 | 80 | -------------------------------------------------------------------------------- /utils/subsample_images.m: -------------------------------------------------------------------------------- 1 | function [imdbs, roidbs] = subsample_images(imdbs, roidbs, max_num_neg_images, seed) 2 | 3 | if ~exist('seed', 'var') 4 | seed = 6; 5 | end 6 | 7 | % class_num = cellfun(@(x) length(x.class_ids), imdbs, 'UniformOutput', true); 8 | % assert(length(unique(class_num)) == 1); 9 | % class_num = unique(class_num); 10 | 11 | rois = cellfun(@(x) x.rois(:), roidbs, 'UniformOutput', false); 12 | rois_combine = cell2mat(rois(:)); 13 | 14 | % fix the random seed for repeatability 15 | prev_rng = seed_rand(seed); 16 | inds = randperm(length(rois_combine), max_num_neg_images); 17 | inds = sort(inds); 18 | 19 | img_idx_start = 1; 20 | for i = 1:length(imdbs) 21 | imdb_img_num = length(imdbs{i}.image_ids); 22 | img_idx_end = img_idx_start + imdb_img_num - 1; 23 | inds_start = find(inds >= img_idx_start, 1, 'first'); 24 | inds_end = find(inds <= img_idx_end, 1, 'last'); 25 | 26 | inds_sub = inds(inds_start:inds_end); 27 | inds_sub = inds_sub - img_idx_start + 1; 28 | 29 | imdbs{i}.image_ids = imdbs{i}.image_ids(inds_sub); 30 | imdbs{i}.sizes = imdbs{i}.sizes(inds_sub, :); 31 | if isfield(imdbs{i}, 'image_dir') 32 | imdbs{i}.image_at = @(x) ... 33 | sprintf('%s/%s.%s', imdbs{i}.image_dir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 34 | else 35 | imdbs{i}.image_at = @(x) ... 36 | sprintf('%s/%s.%s', imdbs{i}.imagedir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 37 | end 38 | roidbs{i}.rois = roidbs{i}.rois(inds_sub); 39 | 40 | img_idx_start = img_idx_start + imdb_img_num; 41 | end 42 | 43 | % restore previous rng 44 | rng(prev_rng); 45 | 46 | end -------------------------------------------------------------------------------- /utils/subsample_images_per_class.m: -------------------------------------------------------------------------------- 1 | function [imdbs, roidbs] = subsample_images_per_class(imdbs, roidbs, max_per_class_image_num, seed) 2 | 3 | if ~exist('seed', 'var') 4 | seed = 6; 5 | end 6 | 7 | class_num = cellfun(@(x) length(x.class_ids), imdbs, 'UniformOutput', true); 8 | assert(length(unique(class_num)) == 1); 9 | class_num = unique(class_num); 10 | 11 | rois = cellfun(@(x) x.rois, roidbs, 'UniformOutput', false); 12 | rois_combine = cell2mat(rois(:)); 13 | rois_combine_class = arrayfun(@(x) x.class, rois_combine, 'UniformOutput', false); 14 | 15 | %% select images with max_image_num 16 | 17 | % fix the random seed for repeatability 18 | prev_rng = seed_rand(seed); 19 | inds = cell(class_num, 1); 20 | rois_combine_length = length(rois_combine); 21 | valid_idxs = cell(class_num, 1); 22 | parfor i = 1:class_num 23 | valid_idxs{i} = cellfun(@(x) any(x == i), rois_combine_class, 'UniformOutput', false); 24 | valid_idxs{i} = cell2mat(valid_idxs{i}); 25 | end 26 | 27 | for i = 1:class_num 28 | valid_num = sum(valid_idxs{i}); 29 | 30 | num = min(valid_num, max_per_class_image_num); 31 | inds{i} = 1:rois_combine_length; 32 | inds{i} = inds{i}(valid_idxs{i}); 33 | inds{i} = inds{i}(randperm(length(inds{i}), num)); 34 | end 35 | 36 | inds = cell2mat(inds')'; 37 | inds = unique(inds); 38 | 39 | % restore previous rng 40 | rng(prev_rng); 41 | 42 | img_idx_start = 1; 43 | for i = 1:length(imdbs) 44 | imdb_img_num = length(imdbs{i}.image_ids); 45 | img_idx_end = img_idx_start + imdb_img_num - 1; 46 | inds_start = find(inds >= img_idx_start, 1, 'first'); 47 | inds_end = find(inds <= img_idx_end, 1, 'last'); 48 | 49 | inds_sub = inds(inds_start:inds_end); 50 | inds_sub = inds_sub - img_idx_start + 1; 51 | 52 | imdbs{i}.image_ids = imdbs{i}.image_ids(inds_sub); 53 | imdbs{i}.sizes = imdbs{i}.sizes(inds_sub, :); 54 | if isfield(imdbs{i}, 'image_dir') 55 | imdbs{i}.image_at = @(x) ... 56 | sprintf('%s/%s.%s', imdbs{i}.image_dir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 57 | else 58 | imdbs{i}.image_at = @(x) ... 59 | sprintf('%s/%s.%s', imdbs{i}.imagedir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 60 | end 61 | roidbs{i}.rois = roidbs{i}.rois(inds_sub); 62 | 63 | img_idx_start = img_idx_start + imdb_img_num; 64 | end 65 | 66 | 67 | -------------------------------------------------------------------------------- /utils/symbolic_link.m: -------------------------------------------------------------------------------- 1 | function symbolic_link(link, target) 2 | % symbolic_link(link, target) 3 | % -------------------------------------------------------- 4 | % Faster R-CNN 5 | % Copyright (c) 2015, Shaoqing Ren 6 | % Licensed under The MIT License [see LICENSE for details] 7 | % -------------------------------------------------------- 8 | 9 | if ispc() 10 | system(sprintf('mklink /J %s %s', link, target)); 11 | else 12 | system(sprintf('ln -s %s %s', link, target)); 13 | end 14 | 15 | end 16 | -------------------------------------------------------------------------------- /utils/tic_toc_print.m: -------------------------------------------------------------------------------- 1 | function tic_toc_print(fmt, varargin) 2 | % Print only after 1 second has passed since the last print. 3 | % Arguments are the same as for fprintf. 4 | 5 | % AUTORIGHTS 6 | % ------------------------------------------------------- 7 | % Copyright (C) 2009-2012 Ross Girshick 8 | % 9 | % This file is part of the voc-releaseX code 10 | % (http://people.cs.uchicago.edu/~rbg/latent/) 11 | % and is available under the terms of an MIT-like license 12 | % provided in COPYING. Please retain this notice and 13 | % COPYING if you use this file (or a portion of it) in 14 | % your project. 15 | % ------------------------------------------------------- 16 | 17 | persistent th; 18 | 19 | if isempty(th) 20 | th = tic(); 21 | end 22 | 23 | if toc(th) > 1 24 | fprintf(fmt, varargin{:}); 25 | drawnow; 26 | th = tic(); 27 | end 28 | -------------------------------------------------------------------------------- /utils/vis_label.m: -------------------------------------------------------------------------------- 1 | function vis_label(imdb, roidb) 2 | 3 | rois = roidb.rois; 4 | for iIM = 1:length(rois) 5 | im = imread(imdb.image_at(iIM)); 6 | boxes = arrayfun(@(x) rois(iIM).boxes(rois(iIM).class == x, :), 1:length(imdb.classes), 'UniformOutput', false); 7 | legends = imdb.classes; 8 | showboxes(im, boxes, legends); 9 | pause; 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /utils/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /utils/xVOChash_init.m: -------------------------------------------------------------------------------- 1 | function hash = xVOChash_init(strs) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | hsize=4999; 5 | hash.key=cell(hsize,1); 6 | hash.val=cell(hsize,1); 7 | 8 | for i=1:numel(strs) 9 | s=strs{i}; 10 | h=mod(str2double(s([4 6:end])),hsize)+1; 11 | j=numel(hash.key{h})+1; 12 | hash.key{h}{j}=strs{i}; 13 | hash.val{h}(j)=i; 14 | end 15 | 16 | -------------------------------------------------------------------------------- /utils/xVOChash_lookup.m: -------------------------------------------------------------------------------- 1 | function ind = xVOChash_lookup(hash,s) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | hsize=numel(hash.key); 5 | h=mod(str2double(s([4 6:end])),hsize)+1; 6 | ind=hash.val{h}(strmatch(s,hash.key{h},'exact')); 7 | --------------------------------------------------------------------------------