├── .gitattributes
├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── experiments
    ├── +Dataset
    │   ├── private
    │   │   ├── voc0712_devkit.m
    │   │   ├── voc2007_devkit.m
    │   │   └── voc2012_devkit.m
    │   ├── voc0712_trainval.m
    │   ├── voc0712_trainval_ss.m
    │   ├── voc0712plus_trainval.m
    │   ├── voc0712plus_trainval_ss.m
    │   ├── voc2007_test.m
    │   ├── voc2007_test_ss.m
    │   ├── voc2007_trainval.m
    │   ├── voc2007_trainval_ss.m
    │   ├── voc2012_test.m
    │   └── voc2012_trainval.m
    ├── +Faster_RCNN_Train
    │   ├── do_fast_rcnn_test.m
    │   ├── do_fast_rcnn_train.m
    │   ├── do_proposal_test.m
    │   ├── do_proposal_train.m
    │   ├── gather_rpn_fast_rcnn_models.m
    │   └── set_cache_folder.m
    ├── +Model
    │   ├── VGG16_for_Fast_RCNN_VOC0712.m
    │   ├── VGG16_for_Fast_RCNN_VOC2007.m
    │   ├── VGG16_for_Faster_RCNN_VOC0712.m
    │   ├── VGG16_for_Faster_RCNN_VOC0712plus.m
    │   ├── VGG16_for_Faster_RCNN_VOC2007.m
    │   ├── VGG16_for_Faster_RCNN_VOC2012.m
    │   ├── ZF_for_Fast_RCNN_VOC0712.m
    │   ├── ZF_for_Fast_RCNN_VOC2007.m
    │   ├── ZF_for_Faster_RCNN_VOC0712.m
    │   └── ZF_for_Faster_RCNN_VOC2007.m
    ├── script_fast_rcnn_VOC0712_VGG16.m
    ├── script_fast_rcnn_VOC0712_ZF.m
    ├── script_fast_rcnn_VOC2007_VGG16.m
    ├── script_fast_rcnn_VOC2007_ZF.m
    ├── script_faster_rcnn_VOC0712_VGG16.m
    ├── script_faster_rcnn_VOC0712_ZF.m
    ├── script_faster_rcnn_VOC0712plus_VGG16.m
    ├── script_faster_rcnn_VOC2007_VGG16.m
    ├── script_faster_rcnn_VOC2007_ZF.m
    ├── script_faster_rcnn_VOC2012_VGG16.m
    └── script_faster_rcnn_demo.m
├── faster_rcnn_build.m
├── fetch_data
    ├── fetch_caffe_mex_windows_vs2013_cuda65.m
    ├── fetch_faster_rcnn_final_model.m
    ├── fetch_model_VGG16.m
    └── fetch_model_ZF.m
├── functions
    ├── fast_rcnn
    │   ├── fast_rcnn_bbox_transform.m
    │   ├── fast_rcnn_bbox_transform_inv.m
    │   ├── fast_rcnn_config.m
    │   ├── fast_rcnn_conv_feat_detect.m
    │   ├── fast_rcnn_generate_sliding_windows.m
    │   ├── fast_rcnn_get_minibatch.m
    │   ├── fast_rcnn_im_detect.m
    │   ├── fast_rcnn_map_im_rois_to_feat_rois.m
    │   ├── fast_rcnn_prepare_image_roidb.m
    │   ├── fast_rcnn_test.m
    │   └── fast_rcnn_train.m
    ├── nms
    │   ├── nms.m
    │   ├── nms_gpu_mex.cu
    │   ├── nms_mex.cpp
    │   ├── nms_multiclass.m
    │   ├── nms_multiclass_mex.cpp
    │   └── nvmex.m
    └── rpn
    │   ├── proposal_calc_output_size.m
    │   ├── proposal_config.m
    │   ├── proposal_generate_anchors.m
    │   ├── proposal_generate_minibatch.m
    │   ├── proposal_im_detect.m
    │   ├── proposal_locate_anchors.m
    │   ├── proposal_prepare_image_roidb.m
    │   ├── proposal_test.m
    │   ├── proposal_train.m
    │   └── proposal_visual_debug.m
├── imdb
    ├── get_voc_opts.m
    ├── imdb_eval_voc.m
    ├── imdb_from_voc.m
    ├── roidb_from_proposal.m
    └── roidb_from_voc.m
├── startup.m
└── utils
    ├── RectLTRB2LTWH.m
    ├── RectLTWH2LTRB.m
    ├── active_caffe_mex.m
    ├── auto_select_gpu.m
    ├── boxoverlap.m
    ├── im_list_to_blob.m
    ├── mkdir_if_missing.m
    ├── parse_rst.m
    ├── prep_im_for_blob.m
    ├── prep_im_for_blob_size.m
    ├── procid.m
    ├── seed_rand.m
    ├── showboxes.m
    ├── subsample_images.m
    ├── subsample_images_per_class.m
    ├── symbolic_link.m
    ├── tic_toc_print.m
    ├── vis_label.m
    ├── xVOCap.m
    ├── xVOChash_init.m
    └── xVOChash_lookup.m


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # User Ingore
12 | models/
13 | data/
14 | datasets/
15 | external/caffe/matlab
16 | output/
17 | cachedir/
18 | imdb/cache
19 | *.caffemodel
20 | *.mat
21 | 
22 | # Windows Installer files
23 | *.cab
24 | *.msi
25 | *.msm
26 | *.msp
27 | 
28 | # Windows shortcuts
29 | *.lnk
30 | 
31 | # =========================
32 | # Operating System Files
33 | # =========================
34 | 
35 | # OSX
36 | # =========================
37 | 
38 | .DS_Store
39 | .AppleDouble
40 | .LSOverride
41 | 
42 | # Thumbnails
43 | ._*
44 | 
45 | # Files that might appear on external disk
46 | .Spotlight-V100
47 | .Trashes
48 | 
49 | # Directories potentially created on remote AFP share
50 | .AppleDB
51 | .AppleDesktop
52 | Network Trash Folder
53 | Temporary Items
54 | .apdisk
55 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "external/caffe"]
2 | 	path = external/caffe
3 | 	url = https://github.com/ShaoqingRen/caffe.git
4 | 	branch = faster-R-CNN
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Faster R-CNN
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2015 Microsoft Corporation
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 
25 | ************************************************************************
26 | 
27 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
28 | 
29 | This project, Faster R-CNN, incorporates material from the project(s) listed below (collectively, "Third Party Code").  Microsoft is not the original author of the Third Party Code.  The original copyright notice and license under which Microsoft received such Third Party Code are set out below. This Third Party Code is licensed to you under their original license terms set forth below.  Microsoft reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
30 |  
31 | 1.	Caffe, version 0.9, (https://github.com/BVLC/caffe/)
32 | 
33 | COPYRIGHT
34 | 
35 | All contributions by the University of California:
36 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
37 | All rights reserved.
38 | 
39 | All other contributions:
40 | Copyright (c) 2014, 2015, the respective contributors
41 | All rights reserved.
42 | 
43 | Caffe uses a shared copyright model: each contributor holds copyright over their contributions to Caffe. The project versioning records all such contribution and copyright details. If a contributor wants to further mark their specific copyright on a particular contribution, they should indicate their copyright solely in the commit message of the change when it is committed.
44 | 
45 | The BSD 2-Clause License
46 | 
47 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
48 | 
49 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
50 | 
51 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
52 | 
53 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 | 
55 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/experiments/+Dataset/private/voc0712_devkit.m:
--------------------------------------------------------------------------------
1 | function path = voc0712_devkit()
2 |     path = './datasets/VOCdevkit0712';
3 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/private/voc2007_devkit.m:
--------------------------------------------------------------------------------
1 | function path = voc2007_devkit()
2 |     path = './datasets/VOCdevkit2007';
3 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/private/voc2012_devkit.m:
--------------------------------------------------------------------------------
1 | function path = voc2012_devkit()
2 |     path = './datasets/VOCdevkit2012';
3 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc0712_trainval.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc0712_trainval(dataset, usage, use_flip)
 2 | % Pascal voc 0712 trainval set 
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit2007                      = voc2007_devkit();
 8 | devkit2012                      = voc2012_devkit();
 9 | 
10 | switch usage
11 |     case {'train'}
12 |         dataset.imdb_train    = {  imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ...
13 |                                     imdb_from_voc(devkit2012, 'trainval', '2012', use_flip)};
14 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false);
15 |     case {'test'}
16 |         error('only supports one source test currently');  
17 |     otherwise
18 |         error('usage = ''train'' or ''test''');
19 | end
20 | 
21 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc0712_trainval_ss.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc0712_trainval_ss(dataset, usage, use_flip)
 2 | % Pascal voc 0712 trainval set with selective search
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit2007                      = voc2007_devkit();
 8 | devkit2012                      = voc2012_devkit();
 9 | 
10 | switch usage
11 |     case {'train'}
12 |         dataset.imdb_train    = {  imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ...
13 |                                     imdb_from_voc(devkit2012, 'trainval', '2012', use_flip)};
14 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false);
15 |     case {'test'}
16 |         error('only supports one source test currently');  
17 |     otherwise
18 |         error('usage = ''train'' or ''test''');
19 | end
20 | 
21 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc0712plus_trainval.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc0712plus_trainval(dataset, usage, use_flip)
 2 | % Pascal voc 0712 trainval set 
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit2007                      = voc2007_devkit();
 8 | devkit2012                      = voc2012_devkit();
 9 | 
10 | switch usage
11 |     case {'train'}
12 |         dataset.imdb_train    = {  imdb_from_voc(devkit2012, 'trainval', '2012', use_flip), ... 
13 |                                     imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ... 
14 |                                     imdb_from_voc(devkit2007, 'test', '2007', use_flip)};
15 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false);
16 |     case {'test'}
17 |         error('only supports one source test currently');  
18 |     otherwise
19 |         error('usage = ''train'' or ''test''');
20 | end
21 | 
22 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc0712plus_trainval_ss.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc0712plus_trainval_ss(dataset, usage, use_flip)
 2 | % Pascal voc 0712 trainval set with selective search
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit2007                      = voc2007_devkit();
 8 | devkit2012                      = voc2012_devkit();
 9 | 
10 | switch usage
11 |     case {'train'}
12 |         dataset.imdb_train    = {  imdb_from_voc(devkit2012, 'trainval', '2012', use_flip), ... 
13 |                                     imdb_from_voc(devkit2007, 'trainval', '2007', use_flip), ... 
14 |                                     imdb_from_voc(devkit2007, 'test', '2007', use_flip)};
15 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false);
16 |     case {'test'}
17 |         error('only supports one source test currently');  
18 |     otherwise
19 |         error('usage = ''train'' or ''test''');
20 | end
21 | 
22 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc2007_test.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc2007_test(dataset, usage, use_flip)
 2 | % Pascal voc 2007 test set
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit                      = voc2007_devkit();
 8 | 
 9 | switch usage
10 |     case {'train'}
11 |         dataset.imdb_train    = {  imdb_from_voc(devkit, 'test', '2007', use_flip) };
12 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false);
13 |     case {'test'}
14 |         dataset.imdb_test     = imdb_from_voc(devkit, 'test', '2007', use_flip) ;
15 |         dataset.roidb_test    = dataset.imdb_test.roidb_func(dataset.imdb_test);
16 |     otherwise
17 |         error('usage = ''train'' or ''test''');
18 | end
19 | 
20 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc2007_test_ss.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc2007_test_ss(dataset, usage, use_flip)
 2 | % Pascal voc 2007 test set with selective search
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit                      = voc2007_devkit();
 8 | 
 9 | switch usage
10 |     case {'train'}
11 |         dataset.imdb_train    = {  imdb_from_voc(devkit, 'test', '2007', use_flip) };
12 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false);
13 |     case {'test'}
14 |         dataset.imdb_test     = imdb_from_voc(devkit, 'test', '2007', use_flip) ;
15 |         dataset.roidb_test    = dataset.imdb_test.roidb_func(dataset.imdb_test, 'with_selective_search', true);
16 |     otherwise
17 |         error('usage = ''train'' or ''test''');
18 | end
19 | 
20 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc2007_trainval.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc2007_trainval(dataset, usage, use_flip)
 2 | % Pascal voc 2007 trainval set
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit                      = voc2007_devkit();
 8 | 
 9 | switch usage
10 |     case {'train'}
11 |         dataset.imdb_train    = {  imdb_from_voc(devkit, 'trainval', '2007', use_flip) };
12 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false);
13 |     case {'test'}
14 |         dataset.imdb_test     = imdb_from_voc(devkit, 'trainval', '2007', use_flip) ;
15 |         dataset.roidb_test    = dataset.imdb_test.roidb_func(dataset.imdb_test);
16 |     otherwise
17 |         error('usage = ''train'' or ''test''');
18 | end
19 | 
20 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc2007_trainval_ss.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc2007_trainval_ss(dataset, usage, use_flip)
 2 | % Pascal voc 2007 trainval set with selective search
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit                      = voc2007_devkit();
 8 | 
 9 | switch usage
10 |     case {'train'}
11 |         dataset.imdb_train    = {  imdb_from_voc(devkit, 'trainval', '2007', use_flip) };
12 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x, 'with_selective_search', true), dataset.imdb_train, 'UniformOutput', false);
13 |     case {'test'}
14 |         dataset.imdb_test     = imdb_from_voc(devkit, 'trainval', '2007', use_flip) ;
15 |         dataset.roidb_test    = dataset.imdb_test.roidb_func(dataset.imdb_test, 'with_selective_search', true);
16 |     otherwise
17 |         error('usage = ''train'' or ''test''');
18 | end
19 | 
20 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc2012_test.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc2012_test(dataset, usage, use_flip)
 2 | % Pascal voc 2012 test set
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit                      = voc2012_devkit();
 8 | 
 9 | switch usage
10 |     case {'train'}
11 |         dataset.imdb_train    = {  imdb_from_voc(devkit, 'test', '2012', use_flip) };
12 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false);
13 |     case {'test'}
14 |         dataset.imdb_test     = imdb_from_voc(devkit, 'test', '2012', use_flip) ;
15 |         dataset.roidb_test    = dataset.imdb_test.roidb_func(dataset.imdb_test);
16 |     otherwise
17 |         error('usage = ''train'' or ''test''');
18 | end
19 | 
20 | end


--------------------------------------------------------------------------------
/experiments/+Dataset/voc2012_trainval.m:
--------------------------------------------------------------------------------
 1 | function dataset = voc2012_trainval(dataset, usage, use_flip)
 2 | % Pascal voc 2012 trainval set
 3 | % set opts.imdb_train opts.roidb_train 
 4 | % or set opts.imdb_test opts.roidb_train
 5 | 
 6 | % change to point to your devkit install
 7 | devkit                      = voc2012_devkit();
 8 | 
 9 | switch usage
10 |     case {'train'}
11 |         dataset.imdb_train    = {  imdb_from_voc(devkit, 'trainval', '2012', use_flip) };
12 |         dataset.roidb_train   = cellfun(@(x) x.roidb_func(x), dataset.imdb_train, 'UniformOutput', false);
13 |     case {'test'}
14 |         dataset.imdb_test     = imdb_from_voc(devkit, 'trainval', '2012', use_flip) ;
15 |         dataset.roidb_test    = dataset.imdb_test.roidb_func(dataset.imdb_test);
16 |     otherwise
17 |         error('usage = ''train'' or ''test''');
18 | end
19 | 
20 | end


--------------------------------------------------------------------------------
/experiments/+Faster_RCNN_Train/do_fast_rcnn_test.m:
--------------------------------------------------------------------------------
 1 | function mAP = do_fast_rcnn_test(conf, model_stage, imdb, roidb, ignore_cache)
 2 |     if ~exist('ignore_cache', 'var')
 3 |         ignore_cache            = false;
 4 |     end
 5 | 
 6 |     mAP                         = fast_rcnn_test(conf, imdb, roidb, ...
 7 |                                     'net_def_file',     model_stage.test_net_def_file, ...
 8 |                                     'net_file',         model_stage.output_model_file, ...
 9 |                                     'cache_name',       model_stage.cache_name, ...
10 |                                     'ignore_cache',     ignore_cache);
11 | end
12 | 


--------------------------------------------------------------------------------
/experiments/+Faster_RCNN_Train/do_fast_rcnn_train.m:
--------------------------------------------------------------------------------
 1 | function model_stage = do_fast_rcnn_train(conf, dataset, model_stage, do_val)
 2 |     if ~do_val
 3 |         dataset.imdb_test = struct();
 4 |         dataset.roidb_test = struct();
 5 |     end
 6 | 
 7 |     model_stage.output_model_file = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
 8 |                                     'do_val',           do_val, ...
 9 |                                     'imdb_val',         dataset.imdb_test, ...
10 |                                     'roidb_val',        dataset.roidb_test, ...
11 |                                     'solver_def_file',  model_stage.solver_def_file, ...
12 |                                     'net_file',         model_stage.init_net_file, ...
13 |                                     'cache_name',       model_stage.cache_name);
14 | end


--------------------------------------------------------------------------------
/experiments/+Faster_RCNN_Train/do_proposal_test.m:
--------------------------------------------------------------------------------
 1 | function roidb_new = do_proposal_test(conf, model_stage, imdb, roidb)
 2 |     aboxes                      = proposal_test(conf, imdb, ...
 3 |                                         'net_def_file',     model_stage.test_net_def_file, ...
 4 |                                         'net_file',         model_stage.output_model_file, ...
 5 |                                         'cache_name',       model_stage.cache_name);      
 6 |                                     
 7 |     aboxes                      = boxes_filter(aboxes, model_stage.nms.per_nms_topN, model_stage.nms.nms_overlap_thres, model_stage.nms.after_nms_topN, conf.use_gpu);    
 8 |     
 9 |     roidb_regions               = make_roidb_regions(aboxes, imdb.image_ids);  
10 |     
11 |     roidb_new                   = roidb_from_proposal(imdb, roidb, roidb_regions, ...
12 |                                         'keep_raw_proposal', false);    
13 | end
14 | 
15 | function aboxes = boxes_filter(aboxes, per_nms_topN, nms_overlap_thres, after_nms_topN, use_gpu)
16 |     % to speed up nms
17 |     if per_nms_topN > 0
18 |         aboxes = cellfun(@(x) x(1:min(length(x), per_nms_topN), :), aboxes, 'UniformOutput', false);
19 |     end
20 |     % do nms
21 |     if nms_overlap_thres > 0 && nms_overlap_thres < 1
22 |         if use_gpu
23 |             for i = 1:length(aboxes)
24 |                 aboxes{i} = aboxes{i}(nms(aboxes{i}, nms_overlap_thres, use_gpu), :);
25 |             end 
26 |         else
27 |             parfor i = 1:length(aboxes)
28 |                 aboxes{i} = aboxes{i}(nms(aboxes{i}, nms_overlap_thres), :);
29 |             end       
30 |         end
31 |     end
32 |     aver_boxes_num = mean(cellfun(@(x) size(x, 1), aboxes, 'UniformOutput', true));
33 |     fprintf('aver_boxes_num = %d, select top %d\n', round(aver_boxes_num), after_nms_topN);
34 |     if after_nms_topN > 0
35 |         aboxes = cellfun(@(x) x(1:min(length(x), after_nms_topN), :), aboxes, 'UniformOutput', false);
36 |     end
37 | end
38 | 
39 | function regions = make_roidb_regions(aboxes, images)
40 |     regions.boxes = aboxes;
41 |     regions.images = images;
42 | end
43 | 


--------------------------------------------------------------------------------
/experiments/+Faster_RCNN_Train/do_proposal_train.m:
--------------------------------------------------------------------------------
 1 | function model_stage = do_proposal_train(conf, dataset, model_stage, do_val)
 2 |     if ~do_val
 3 |         dataset.imdb_test = struct();
 4 |         dataset.roidb_test = struct();
 5 |     end
 6 | 
 7 |     model_stage.output_model_file = proposal_train(conf, dataset.imdb_train, dataset.roidb_train, ...
 8 |                                     'do_val',           do_val, ...
 9 |                                     'imdb_val',         dataset.imdb_test, ...
10 |                                     'roidb_val',        dataset.roidb_test, ...
11 |                                     'solver_def_file',  model_stage.solver_def_file, ...
12 |                                     'net_file',         model_stage.init_net_file, ...
13 |                                     'cache_name',       model_stage.cache_name);
14 | end
15 | 


--------------------------------------------------------------------------------
/experiments/+Faster_RCNN_Train/gather_rpn_fast_rcnn_models.m:
--------------------------------------------------------------------------------
  1 | function gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset)
  2 |     cachedir = fullfile(pwd, 'output', 'faster_rcnn_final', model.final_model.cache_name);
  3 |     mkdir_if_missing(cachedir);
  4 |     
  5 |     % find latest model for rpn and fast rcnn
  6 |     [rpn_test_net_def_file, rpn_output_model_file] = find_last_output_model_file(model.stage1_rpn, model.stage2_rpn);
  7 |     [fast_rcnn_test_net_def_file, fast_rcnn_output_model_file] = find_last_output_model_file(model.stage1_fast_rcnn, model.stage2_fast_rcnn);
  8 |     
  9 |     % check whether feature shared and find the indexs of shared layers
 10 |     [is_share_feature, last_shared_output_blob_name, shared_layer_names, shared_layer_idx] = ...
 11 |         check_proposal_fast_rcnn_model(rpn_test_net_def_file, rpn_output_model_file, ...
 12 |          fast_rcnn_test_net_def_file, fast_rcnn_output_model_file);
 13 |      
 14 |     proposal_detection_model.classes = dataset.imdb_test.classes;
 15 |     proposal_detection_model.image_means = conf_proposal.image_means;
 16 |     proposal_detection_model.conf_proposal = conf_proposal;
 17 |     proposal_detection_model.conf_detection = conf_fast_rcnn;
 18 |     
 19 |     % copy rpn and fast rcnn models into cachedir
 20 |     [~, test_net_proposal_name, test_net_proposal_ext] = fileparts(rpn_test_net_def_file);
 21 |     proposal_detection_model.proposal_net_def = ['proposal_', test_net_proposal_name, test_net_proposal_ext];
 22 |     [~, proposal_model_name, proposal_model_ext] = fileparts(rpn_output_model_file);
 23 |     proposal_detection_model.proposal_net = ['proposal_', proposal_model_name, proposal_model_ext];
 24 |     [~, test_net_fast_rcnn_name, test_net_fast_rcnn_ext] = fileparts(fast_rcnn_test_net_def_file);
 25 |     proposal_detection_model.detection_net_def = ['detection_', test_net_fast_rcnn_name, test_net_fast_rcnn_ext];
 26 |     [~, fast_rcnn_model_name, fast_rcnn_model_ext] = fileparts(fast_rcnn_output_model_file);
 27 |     proposal_detection_model.detection_net = ['detection_', fast_rcnn_model_name, fast_rcnn_model_ext];
 28 |      
 29 |     copyfile(rpn_test_net_def_file, fullfile(cachedir, proposal_detection_model.proposal_net_def));
 30 |     copyfile(rpn_output_model_file, fullfile(cachedir, proposal_detection_model.proposal_net));
 31 |     copyfile(fast_rcnn_test_net_def_file, fullfile(cachedir, proposal_detection_model.detection_net_def));
 32 |     copyfile(fast_rcnn_output_model_file, fullfile(cachedir, proposal_detection_model.detection_net));
 33 |     
 34 |     proposal_detection_model.is_share_feature = is_share_feature;
 35 |     if is_share_feature
 36 |         proposal_detection_model.last_shared_layer_idx = max(shared_layer_idx);
 37 |         proposal_detection_model.last_shared_layer_detection = ...
 38 |             shared_layer_names{proposal_detection_model.last_shared_layer_idx};
 39 |         proposal_detection_model.last_shared_output_blob_name = ...
 40 |             last_shared_output_blob_name;
 41 |         fprintf('please modify %s file for sharing conv layers with proposal model (delete layers until %s)\n', ...
 42 |             proposal_detection_model.detection_net_def, proposal_detection_model.last_shared_layer_detection);
 43 |     end
 44 |     
 45 |     save(fullfile(cachedir, 'model'), 'proposal_detection_model');
 46 | end
 47 | 
 48 | function [is_share_feature, last_shared_output_blob_name, shared_layer_names, shared_layer_idx] = check_proposal_fast_rcnn_model(proposal_model_net, proposal_model_bin, ...
 49 |         fast_rcnn_model_net, fast_rcnn_model_bin)
 50 | 
 51 |     rpn_net = caffe.Net(proposal_model_net, 'test');
 52 |     rpn_net.copy_from(proposal_model_bin);
 53 |     
 54 |     fast_rcnn_net = caffe.Net(fast_rcnn_model_net, 'test');
 55 |     fast_rcnn_net.copy_from(fast_rcnn_model_bin);
 56 |     
 57 |     share_layer = true;
 58 |     shared_layer_idx = [];
 59 |     shared_layer_names = {};
 60 |     shared_rpn_blobs = {};
 61 |     for i = 1:min(length(rpn_net.layer_names), length(fast_rcnn_net.layer_names))
 62 |        if ~strcmp(rpn_net.layer_names{i}, fast_rcnn_net.layer_names{i})
 63 |            break;
 64 |        end
 65 |        
 66 |        rpn_layer_name = rpn_net.layer_names{i};
 67 |        fast_rcnn_layer_name = fast_rcnn_net.layer_names{i};
 68 |        rpn_layer = rpn_net.layers(rpn_layer_name);
 69 |        fast_rcnn_layer = fast_rcnn_net.layers(fast_rcnn_layer_name);
 70 |        
 71 |        for j = 1:min(length(rpn_layer.params), length(fast_rcnn_layer.params))
 72 |            if ~isequal(rpn_net.params(rpn_layer_name, j).get_data(), fast_rcnn_net.params(fast_rcnn_layer_name, j).get_data())
 73 |                share_layer = false;
 74 |            end 
 75 |        end
 76 |        
 77 |        if ~share_layer 
 78 |            break;
 79 |        else
 80 |            shared_layer_idx(end+1) = i;
 81 |            shared_layer_names{end+1} = rpn_layer_name; 
 82 |            last_shared_output_blob_name = rpn_net.blob_names{rpn_net.top_id_vecs{i}};
 83 |        end
 84 |     end
 85 |     
 86 |     is_share_feature = false;
 87 |     if ~isempty(shared_layer_idx)
 88 |         is_share_feature = true;
 89 |     end
 90 |     
 91 |     caffe.reset_all(); 
 92 | end
 93 | 
 94 | function [test_net_def_file, output_model_file] = find_last_output_model_file(stage1, stage2)
 95 |     if isfield(stage2, 'output_model_file') && exist(stage2.output_model_file, 'file')
 96 |         output_model_file = stage2.output_model_file;
 97 |         test_net_def_file = stage2.test_net_def_file;
 98 |         return;
 99 |     end
100 |     if isfield(stage1, 'output_model_file') && exist(stage1.output_model_file, 'file')
101 |         output_model_file = stage1.output_model_file;
102 |         test_net_def_file = stage1.test_net_def_file;
103 |         return;
104 |     end
105 |     error('find_last_output_model_file:: no trained models');
106 | end


--------------------------------------------------------------------------------
/experiments/+Faster_RCNN_Train/set_cache_folder.m:
--------------------------------------------------------------------------------
 1 | function model = set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model)
 2 | % model = set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 |     model.stage1_rpn.cache_name = [cache_base_proposal, '_stage1_rpn'];
10 | 
11 |     model.stage1_fast_rcnn.cache_name = ...
12 |             [cache_base_proposal, ...
13 |             strrep(sprintf('_top%d_nms%g_top%d', model.stage1_rpn.nms.per_nms_topN, ...
14 |             model.stage1_rpn.nms.nms_overlap_thres, model.stage1_rpn.nms.after_nms_topN), '.', '_'), ...
15 |             cache_base_fast_rcnn, '_stage1_fast_rcnn'];
16 | 
17 |     model.stage2_rpn.cache_name = ...
18 |             [cache_base_proposal, ...
19 |             strrep(sprintf('_top%d_nms%g_top%d', model.stage2_rpn.nms.per_nms_topN, ...
20 |             model.stage2_rpn.nms.nms_overlap_thres, model.stage2_rpn.nms.after_nms_topN), '.', '_'), ...
21 |             cache_base_fast_rcnn, '_stage2_rpn'];
22 | 
23 |     model.stage2_fast_rcnn.cache_name = ...
24 |             [cache_base_proposal, ...
25 |             strrep(sprintf('_top%d_nms%g_top%d', model.stage2_rpn.nms.per_nms_topN, ...
26 |             model.stage2_rpn.nms.nms_overlap_thres, model.stage2_rpn.nms.after_nms_topN), '.', '_'), ...
27 |             cache_base_fast_rcnn, '_stage2_fast_rcnn'];
28 | 
29 |     model.final_model.cache_name = [cache_base_proposal, cache_base_fast_rcnn];
30 | end


--------------------------------------------------------------------------------
/experiments/+Model/VGG16_for_Fast_RCNN_VOC0712.m:
--------------------------------------------------------------------------------
 1 | function model = VGG16_for_Fast_RCNN_VOC0712(model)
 2 | % VGG 16layers (only finetuned from conv3_1)
 3 | 
 4 | model.solver_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k60k.prototxt');
 5 | model.test_net_def_file      = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
 6 | 
 7 | model.net_file               = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel');
 8 | model.mean_image             = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image');
 9 | 
10 | end


--------------------------------------------------------------------------------
/experiments/+Model/VGG16_for_Fast_RCNN_VOC2007.m:
--------------------------------------------------------------------------------
 1 | function model = VGG16_for_Fast_RCNN_VOC2007(model)
 2 | % VGG 16layers (only finetuned from conv3_1)
 3 | 
 4 | model.solver_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k40k.prototxt');
 5 | model.test_net_def_file      = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
 6 | 
 7 | model.net_file               = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel');
 8 | model.mean_image             = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image');
 9 | 
10 | end


--------------------------------------------------------------------------------
/experiments/+Model/VGG16_for_Faster_RCNN_VOC0712.m:
--------------------------------------------------------------------------------
 1 | function model = VGG16_for_Faster_RCNN_VOC0712(model)
 2 | % VGG 16layers (only finetuned from conv3_1)
 3 | 
 4 | model.mean_image                                = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image');
 5 | model.pre_trained_net_file                      = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel');
 6 | % Stride in input image pixels at the last conv layer
 7 | model.feat_stride                               = 16;
 8 | 
 9 | %% stage 1 rpn, inited from pre-trained network
10 | model.stage1_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'solver_60k80k.prototxt');
11 | model.stage1_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
12 | model.stage1_rpn.init_net_file                  = model.pre_trained_net_file;
13 | 
14 | % rpn test setting
15 | model.stage1_rpn.nms.per_nms_topN              	= -1;
16 | model.stage1_rpn.nms.nms_overlap_thres      	= 0.7;
17 | model.stage1_rpn.nms.after_nms_topN         	= 2000;
18 | 
19 | %% stage 1 fast rcnn, inited from pre-trained network
20 | model.stage1_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k60k.prototxt');
21 | model.stage1_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
22 | model.stage1_fast_rcnn.init_net_file            = model.pre_trained_net_file;
23 | 
24 | %% stage 2 rpn, only finetune fc layers
25 | model.stage2_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'solver_60k80k.prototxt');
26 | model.stage2_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'test.prototxt');
27 | 
28 | % rpn test setting
29 | model.stage2_rpn.nms.per_nms_topN              	= -1;
30 | model.stage2_rpn.nms.nms_overlap_thres         	= 0.7;
31 | model.stage2_rpn.nms.after_nms_topN           	= 2000;
32 | 
33 | %% stage 2 fast rcnn, only finetune fc layers
34 | model.stage2_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'solver_30k60k.prototxt');
35 | model.stage2_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'test.prototxt');
36 | 
37 | %% final test
38 | model.final_test.nms.per_nms_topN              	= 6000; % to speed up nms
39 | model.final_test.nms.nms_overlap_thres         	= 0.7;
40 | model.final_test.nms.after_nms_topN           	= 300;
41 | end


--------------------------------------------------------------------------------
/experiments/+Model/VGG16_for_Faster_RCNN_VOC0712plus.m:
--------------------------------------------------------------------------------
 1 | function model = VGG16_for_Faster_RCNN_VOC0712plus(model)
 2 | % VGG 16layers (only finetuned from conv3_1)
 3 | 
 4 | model.mean_image                                = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image');
 5 | model.pre_trained_net_file                      = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel');
 6 | % Stride in input image pixels at the last conv layer
 7 | model.feat_stride                               = 16;
 8 | 
 9 | %% stage 1 rpn, inited from pre-trained network
10 | model.stage1_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'solver_60k80k.prototxt');
11 | model.stage1_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
12 | model.stage1_rpn.init_net_file                  = model.pre_trained_net_file;
13 | 
14 | % rpn test setting
15 | model.stage1_rpn.nms.per_nms_topN              	= -1;
16 | model.stage1_rpn.nms.nms_overlap_thres      	= 0.7;
17 | model.stage1_rpn.nms.after_nms_topN         	= 2000;
18 | 
19 | %% stage 1 fast rcnn, inited from pre-trained network
20 | model.stage1_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_40k100k.prototxt');
21 | model.stage1_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
22 | model.stage1_fast_rcnn.init_net_file            = model.pre_trained_net_file;
23 | 
24 | %% stage 2 rpn, only finetune fc layers
25 | model.stage2_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'solver_60k80k.prototxt');
26 | model.stage2_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'test.prototxt');
27 | 
28 | % rpn test setting
29 | model.stage2_rpn.nms.per_nms_topN              	= -1;
30 | model.stage2_rpn.nms.nms_overlap_thres         	= 0.7;
31 | model.stage2_rpn.nms.after_nms_topN           	= 2000;
32 | 
33 | %% stage 2 fast rcnn, only finetune fc layers
34 | model.stage2_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'solver_40k100k.prototxt');
35 | model.stage2_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'test.prototxt');
36 | 
37 | %% final test
38 | model.final_test.nms.per_nms_topN              	= 6000; % to speed up nms
39 | model.final_test.nms.nms_overlap_thres         	= 0.7;
40 | model.final_test.nms.after_nms_topN           	= 300;
41 | end


--------------------------------------------------------------------------------
/experiments/+Model/VGG16_for_Faster_RCNN_VOC2007.m:
--------------------------------------------------------------------------------
 1 | function model = VGG16_for_Faster_RCNN_VOC2007(model)
 2 | % VGG 16layers (only finetuned from conv3_1)
 3 | 
 4 | model.mean_image                                = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image');
 5 | model.pre_trained_net_file                      = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel');
 6 | % Stride in input image pixels at the last conv layer
 7 | model.feat_stride                               = 16;
 8 | 
 9 | %% stage 1 rpn, inited from pre-trained network
10 | model.stage1_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'solver_60k80k.prototxt');
11 | model.stage1_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
12 | model.stage1_rpn.init_net_file                  = model.pre_trained_net_file;
13 | 
14 | % rpn test setting
15 | model.stage1_rpn.nms.per_nms_topN               = -1;
16 | model.stage1_rpn.nms.nms_overlap_thres       	= 0.7;
17 | model.stage1_rpn.nms.after_nms_topN         	= 2000;
18 | 
19 | %% stage 1 fast rcnn, inited from pre-trained network
20 | model.stage1_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k40k.prototxt');
21 | model.stage1_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
22 | model.stage1_fast_rcnn.init_net_file            = model.pre_trained_net_file;
23 | 
24 | %% stage 2 rpn, only finetune fc layers
25 | model.stage2_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'solver_60k80k.prototxt');
26 | model.stage2_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'test.prototxt');
27 | 
28 | % rpn test setting
29 | model.stage2_rpn.nms.per_nms_topN              	= -1;
30 | model.stage2_rpn.nms.nms_overlap_thres       	= 0.7;
31 | model.stage2_rpn.nms.after_nms_topN           	= 2000;
32 | 
33 | %% stage 2 fast rcnn, only finetune fc layers
34 | model.stage2_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'solver_30k40k.prototxt');
35 | model.stage2_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'test.prototxt');
36 | 
37 | %% final test
38 | model.final_test.nms.per_nms_topN              	= 6000; % to speed up nms
39 | model.final_test.nms.nms_overlap_thres       	= 0.7;
40 | model.final_test.nms.after_nms_topN          	= 300;
41 | end


--------------------------------------------------------------------------------
/experiments/+Model/VGG16_for_Faster_RCNN_VOC2012.m:
--------------------------------------------------------------------------------
 1 | function model = VGG16_for_Faster_RCNN_VOC2012(model)
 2 | % VGG 16layers (only finetuned from conv3_1)
 3 | 
 4 | model.mean_image                                = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'mean_image');
 5 | model.pre_trained_net_file                      = fullfile(pwd, 'models', 'pre_trained_models', 'vgg_16layers', 'vgg16.caffemodel');
 6 | % Stride in input image pixels at the last conv layer
 7 | model.feat_stride                               = 16;
 8 | 
 9 | %% stage 1 rpn, inited from pre-trained network
10 | model.stage1_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'solver_60k80k.prototxt');
11 | model.stage1_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
12 | model.stage1_rpn.init_net_file                  = model.pre_trained_net_file;
13 | 
14 | % rpn test setting
15 | model.stage1_rpn.nms.per_nms_topN               = -1;
16 | model.stage1_rpn.nms.nms_overlap_thres       	= 0.7;
17 | model.stage1_rpn.nms.after_nms_topN         	= 2000;
18 | 
19 | %% stage 1 fast rcnn, inited from pre-trained network
20 | model.stage1_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'solver_30k40k.prototxt');
21 | model.stage1_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_conv3_1', 'test.prototxt');
22 | model.stage1_fast_rcnn.init_net_file            = model.pre_trained_net_file;
23 | 
24 | %% stage 2 rpn, only finetune fc layers
25 | model.stage2_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'solver_60k80k.prototxt');
26 | model.stage2_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'vgg_16layers_fc6', 'test.prototxt');
27 | 
28 | % rpn test setting
29 | model.stage2_rpn.nms.per_nms_topN              	= -1;
30 | model.stage2_rpn.nms.nms_overlap_thres       	= 0.7;
31 | model.stage2_rpn.nms.after_nms_topN           	= 2000;
32 | 
33 | %% stage 2 fast rcnn, only finetune fc layers
34 | model.stage2_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'solver_30k40k.prototxt');
35 | model.stage2_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'vgg_16layers_fc6', 'test.prototxt');
36 | 
37 | %% final test
38 | model.final_test.nms.per_nms_topN              	= 6000; % to speed up nms
39 | model.final_test.nms.nms_overlap_thres       	= 0.7;
40 | model.final_test.nms.after_nms_topN          	= 300;
41 | end


--------------------------------------------------------------------------------
/experiments/+Model/ZF_for_Fast_RCNN_VOC0712.m:
--------------------------------------------------------------------------------
1 | function model = ZF_for_Fast_RCNN_VOC0712(model)
2 | 
3 | model.solver_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'solver_30k60k.prototxt');
4 | model.test_net_def_file      = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'test.prototxt');
5 | 
6 | model.net_file               = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'ZF.caffemodel');
7 | model.mean_image             = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'mean_image');
8 | 
9 | end


--------------------------------------------------------------------------------
/experiments/+Model/ZF_for_Fast_RCNN_VOC2007.m:
--------------------------------------------------------------------------------
1 | function model = ZF_for_Fast_RCNN_VOC2007(model)
2 | 
3 | model.solver_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'solver_30k40k.prototxt');
4 | model.test_net_def_file      = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'test.prototxt');
5 | 
6 | model.net_file               = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'ZF.caffemodel');
7 | model.mean_image             = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'mean_image');
8 | 
9 | end


--------------------------------------------------------------------------------
/experiments/+Model/ZF_for_Faster_RCNN_VOC0712.m:
--------------------------------------------------------------------------------
 1 | function model = ZF_for_Faster_RCNN_VOC0712(model)
 2 | 
 3 | model.mean_image                                = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'mean_image');
 4 | model.pre_trained_net_file                      = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'ZF.caffemodel');
 5 | % Stride in input image pixels at the last conv layer
 6 | model.feat_stride                               = 16;
 7 | 
 8 | %% stage 1 rpn, inited from pre-trained network
 9 | model.stage1_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF', 'solver_60k80k.prototxt');
10 | model.stage1_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF', 'test.prototxt');
11 | model.stage1_rpn.init_net_file                  = model.pre_trained_net_file;
12 | 
13 | % rpn test setting
14 | model.stage1_rpn.nms.per_nms_topN             	= -1;
15 | model.stage1_rpn.nms.nms_overlap_thres       	= 0.7;
16 | model.stage1_rpn.nms.after_nms_topN          	= 2000;
17 | 
18 | %% stage 1 fast rcnn, inited from pre-trained network
19 | model.stage1_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'solver_30k60k.prototxt');
20 | model.stage1_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'test.prototxt');
21 | model.stage1_fast_rcnn.init_net_file            = model.pre_trained_net_file;
22 | 
23 | %% stage 2 rpn, only finetune fc layers
24 | model.stage2_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF_fc6', 'solver_60k80k.prototxt');
25 | model.stage2_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF_fc6', 'test.prototxt');
26 | 
27 | % rpn test setting
28 | model.stage2_rpn.nms.per_nms_topN             	= -1;
29 | model.stage2_rpn.nms.nms_overlap_thres       	= 0.7;
30 | model.stage2_rpn.nms.after_nms_topN           	= 2000;
31 | 
32 | %% stage 2 fast rcnn, only finetune fc layers
33 | model.stage2_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF_fc6', 'solver_30k60k.prototxt');
34 | model.stage2_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF_fc6', 'test.prototxt');
35 | 
36 | %% final test
37 | model.final_test.nms.per_nms_topN              	= 6000; % to speed up nms
38 | model.final_test.nms.nms_overlap_thres       	= 0.7;
39 | model.final_test.nms.after_nms_topN           	= 300;
40 | end


--------------------------------------------------------------------------------
/experiments/+Model/ZF_for_Faster_RCNN_VOC2007.m:
--------------------------------------------------------------------------------
 1 | function model = ZF_for_Faster_RCNN_VOC2007(model)
 2 | 
 3 | model.mean_image                                = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'mean_image');
 4 | model.pre_trained_net_file                      = fullfile(pwd, 'models', 'pre_trained_models', 'ZF', 'ZF.caffemodel');
 5 | % Stride in input image pixels at the last conv layer
 6 | model.feat_stride                               = 16;
 7 | 
 8 | %% stage 1 rpn, inited from pre-trained network
 9 | model.stage1_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF', 'solver_60k80k.prototxt');
10 | model.stage1_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF', 'test.prototxt');
11 | model.stage1_rpn.init_net_file                  = model.pre_trained_net_file;
12 | 
13 | % rpn test setting
14 | model.stage1_rpn.nms.per_nms_topN              	= -1;
15 | model.stage1_rpn.nms.nms_overlap_thres        	= 0.7;
16 | model.stage1_rpn.nms.after_nms_topN           	= 2000;
17 | 
18 | %% stage 1 fast rcnn, inited from pre-trained network
19 | model.stage1_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'solver_30k40k.prototxt');
20 | model.stage1_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF', 'test.prototxt');
21 | model.stage1_fast_rcnn.init_net_file            = model.pre_trained_net_file;
22 | 
23 | %% stage 2 rpn, only finetune fc layers
24 | model.stage2_rpn.solver_def_file                = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF_fc6', 'solver_60k80k.prototxt');
25 | model.stage2_rpn.test_net_def_file              = fullfile(pwd, 'models', 'rpn_prototxts', 'ZF_fc6', 'test.prototxt');
26 | 
27 | % rpn test setting
28 | model.stage2_rpn.nms.per_nms_topN             	= -1;
29 | model.stage2_rpn.nms.nms_overlap_thres       	= 0.7;
30 | model.stage2_rpn.nms.after_nms_topN           	= 2000;
31 | 
32 | %% stage 2 fast rcnn, only finetune fc layers
33 | model.stage2_fast_rcnn.solver_def_file          = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF_fc6', 'solver_30k40k.prototxt');
34 | model.stage2_fast_rcnn.test_net_def_file        = fullfile(pwd, 'models', 'fast_rcnn_prototxts', 'ZF_fc6', 'test.prototxt');
35 | 
36 | %% final test
37 | model.final_test.nms.per_nms_topN            	= 6000; % to speed up nms
38 | model.final_test.nms.nms_overlap_thres       	= 0.7;
39 | model.final_test.nms.after_nms_topN          	= 300;
40 | end


--------------------------------------------------------------------------------
/experiments/script_fast_rcnn_VOC0712_VGG16.m:
--------------------------------------------------------------------------------
 1 | function script_fast_rcnn_VOC0712_VGG16()
 2 | % script_fast_rcnn_VOC0712_VGG16()
 3 | % Fast rcnn training and testing with VGG16 model
 4 | % --------------------------------------------------------
 5 | % Fast R-CNN
 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 7 | % Copyright (c) 2015, Shaoqing Ren
 8 | % Licensed under The MIT License [see LICENSE for details]
 9 | % --------------------------------------------------------
10 | 
11 | clc;
12 | clear mex;
13 | clear is_valid_handle; % to clear init_key
14 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
15 | %% -------------------- CONFIG --------------------
16 | opts.caffe_version          = 'caffe_faster_rcnn';
17 | opts.gpu_id                 = auto_select_gpu;
18 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
19 | 
20 | % model
21 | model                       = Model.VGG16_for_Fast_RCNN_VOC0712();
22 | % cache name
23 | opts.cache_name             = 'fast_rcnn_VOC0712_VGG16';
24 | % config
25 | conf                        = fast_rcnn_config('image_means', model.mean_image);
26 | % train/test data
27 | dataset                     = [];
28 | dataset                     = Dataset.voc0712_trainval_ss(dataset, 'train', conf.use_flipped);
29 | dataset                     = Dataset.voc2007_test_ss(dataset, 'test', false);
30 | 
31 | % do validation, or not
32 | opts.do_val                 = true; 
33 | 
34 | %% -------------------- TRAINING --------------------
35 | 
36 | opts.fast_rcnn_model        = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
37 |                                 'do_val',           opts.do_val, ...
38 |                                 'imdb_val',         dataset.imdb_test, ...
39 |                                 'roidb_val',        dataset.roidb_test, ...
40 |                                 'solver_def_file',  model.solver_def_file, ...
41 |                                 'net_file',         model.net_file, ...
42 |                                 'cache_name',       opts.cache_name);
43 | assert(exist(opts.fast_rcnn_model, 'file') ~= 0, 'not found trained model');
44 | 
45 |                                 
46 | %% -------------------- TESTING --------------------
47 |                               fast_rcnn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
48 |                                     'net_def_file',     model.test_net_def_file, ...
49 |                                     'net_file',         opts.fast_rcnn_model, ...
50 |                                     'cache_name',       opts.cache_name);
51 | 
52 |                                 
53 | end
54 | 


--------------------------------------------------------------------------------
/experiments/script_fast_rcnn_VOC0712_ZF.m:
--------------------------------------------------------------------------------
 1 | function script_fast_rcnn_VOC0712_ZF()
 2 | % script_fast_rcnn_VOC0712_ZF()
 3 | % Fast rcnn training and testing with Zeiler & Fergus model
 4 | % --------------------------------------------------------
 5 | % Fast R-CNN
 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 7 | % Copyright (c) 2015, Shaoqing Ren
 8 | % Licensed under The MIT License [see LICENSE for details]
 9 | % --------------------------------------------------------
10 | 
11 | clc;
12 | clear mex;
13 | clear is_valid_handle; % to clear init_key
14 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
15 | %% -------------------- CONFIG --------------------
16 | opts.caffe_version          = 'caffe_faster_rcnn';
17 | opts.gpu_id                 = auto_select_gpu;
18 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
19 | 
20 | % model
21 | model                       = Model.ZF_for_Fast_RCNN_VOC0712();
22 | % cache name
23 | opts.cache_name             = 'fast_rcnn_VOC0712_ZF';
24 | % config
25 | conf                        = fast_rcnn_config('image_means', model.mean_image);
26 | % train/test data
27 | dataset                     = [];
28 | dataset                     = Dataset.voc0712_trainval_ss(dataset, 'train', conf.use_flipped);
29 | dataset                     = Dataset.voc2007_test_ss(dataset, 'test', false);
30 | 
31 | % do validation, or not
32 | opts.do_val                 = true; 
33 | 
34 | %% -------------------- TRAINING --------------------
35 | 
36 | opts.fast_rcnn_model        = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
37 |                                 'do_val',           opts.do_val, ...
38 |                                 'imdb_val',         dataset.imdb_test, ...
39 |                                 'roidb_val',        dataset.roidb_test, ...
40 |                                 'solver_def_file',  model.solver_def_file, ...
41 |                                 'net_file',         model.net_file, ...
42 |                                 'cache_name',       opts.cache_name);
43 | assert(exist(opts.fast_rcnn_model, 'file') ~= 0, 'not found trained model');
44 | 
45 |                                 
46 | %% -------------------- TESTING --------------------
47 |                               fast_rcnn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
48 |                                     'net_def_file',     model.test_net_def_file, ...
49 |                                     'net_file',         opts.fast_rcnn_model, ...
50 |                                     'cache_name',       opts.cache_name);
51 | 
52 |                                 
53 | end
54 | 


--------------------------------------------------------------------------------
/experiments/script_fast_rcnn_VOC2007_VGG16.m:
--------------------------------------------------------------------------------
 1 | function script_fast_rcnn_VOC2007_VGG16()
 2 | % script_fast_rcnn_VOC2007_VGG16()
 3 | % Fast rcnn training and testing with VGG16 model
 4 | % --------------------------------------------------------
 5 | % Fast R-CNN
 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 7 | % Copyright (c) 2015, Shaoqing Ren
 8 | % Licensed under The MIT License [see LICENSE for details]
 9 | % --------------------------------------------------------
10 | 
11 | clc;
12 | clear mex;
13 | clear is_valid_handle; % to clear init_key
14 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
15 | %% -------------------- CONFIG --------------------
16 | opts.caffe_version          = 'caffe_faster_rcnn';
17 | opts.gpu_id                 = auto_select_gpu;
18 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
19 | 
20 | % model
21 | model                       = Model.VGG16_for_Fast_RCNN_VOC2007();
22 | % cache name
23 | opts.cache_name             = 'fast_rcnn_VOC2007_VGG16';
24 | % config
25 | conf                        = fast_rcnn_config('image_means', model.mean_image);
26 | % train/test data
27 | dataset                     = [];
28 | dataset                     = Dataset.voc2007_trainval_ss(dataset, 'train', conf.use_flipped);
29 | dataset                     = Dataset.voc2007_test_ss(dataset, 'test', false);
30 | 
31 | % do validation, or not
32 | opts.do_val                 = true; 
33 | 
34 | %% -------------------- TRAINING --------------------
35 | 
36 | opts.fast_rcnn_model        = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
37 |                                 'do_val',           opts.do_val, ...
38 |                                 'imdb_val',         dataset.imdb_test, ...
39 |                                 'roidb_val',        dataset.roidb_test, ...
40 |                                 'solver_def_file',  model.solver_def_file, ...
41 |                                 'net_file',         model.net_file, ...
42 |                                 'cache_name',       opts.cache_name);
43 | assert(exist(opts.fast_rcnn_model, 'file') ~= 0, 'not found trained model');
44 | 
45 |                                 
46 | %% -------------------- TESTING --------------------
47 |                               fast_rcnn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
48 |                                     'net_def_file',     model.test_net_def_file, ...
49 |                                     'net_file',         opts.fast_rcnn_model, ...
50 |                                     'cache_name',       opts.cache_name);
51 | 
52 |                                 
53 | end
54 | 


--------------------------------------------------------------------------------
/experiments/script_fast_rcnn_VOC2007_ZF.m:
--------------------------------------------------------------------------------
 1 | function script_fast_rcnn_VOC2007_ZF()
 2 | % script_fast_rcnn_VOC2007_ZF()
 3 | % Fast rcnn training and testing with Zeiler & Fergus model
 4 | % --------------------------------------------------------
 5 | % Fast R-CNN
 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 7 | % Copyright (c) 2015, Shaoqing Ren
 8 | % Licensed under The MIT License [see LICENSE for details]
 9 | % --------------------------------------------------------
10 | 
11 | clc;
12 | clear mex;
13 | clear is_valid_handle; % to clear init_key
14 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
15 | %% -------------------- CONFIG --------------------
16 | opts.caffe_version          = 'caffe_faster_rcnn';
17 | opts.gpu_id                 = auto_select_gpu;
18 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
19 | 
20 | % model
21 | model                       = Model.ZF_for_Fast_RCNN_VOC2007();
22 | % cache name
23 | opts.cache_name             = 'fast_rcnn_VOC2007_ZF';
24 | % config
25 | conf                        = fast_rcnn_config('image_means', model.mean_image);
26 | % train/test data
27 | dataset                     = [];
28 | dataset                     = Dataset.voc2007_trainval_ss(dataset, 'train', conf.use_flipped);
29 | dataset                     = Dataset.voc2007_test_ss(dataset, 'test', false);
30 | 
31 | % do validation, or not
32 | opts.do_val                 = true; 
33 | 
34 | %% -------------------- TRAINING --------------------
35 | 
36 | opts.fast_rcnn_model        = fast_rcnn_train(conf, dataset.imdb_train, dataset.roidb_train, ...
37 |                                 'do_val',           opts.do_val, ...
38 |                                 'imdb_val',         dataset.imdb_test, ...
39 |                                 'roidb_val',        dataset.roidb_test, ...
40 |                                 'solver_def_file',  model.solver_def_file, ...
41 |                                 'net_file',         model.net_file, ...
42 |                                 'cache_name',       opts.cache_name);
43 | assert(exist(opts.fast_rcnn_model, 'file') ~= 0, 'not found trained model');
44 | 
45 |                                 
46 | %% -------------------- TESTING --------------------
47 |                               fast_rcnn_test(conf, dataset.imdb_test, dataset.roidb_test, ...
48 |                                     'net_def_file',     model.test_net_def_file, ...
49 |                                     'net_file',         opts.fast_rcnn_model, ...
50 |                                     'cache_name',       opts.cache_name);
51 | 
52 |                                 
53 | end
54 | 


--------------------------------------------------------------------------------
/experiments/script_faster_rcnn_VOC0712_VGG16.m:
--------------------------------------------------------------------------------
 1 | function script_faster_rcnn_VOC0712_VGG16()
 2 | % script_faster_rcnn_VOC0712_VGG16()
 3 | % Faster rcnn training and testing with VGG16 model
 4 | % --------------------------------------------------------
 5 | % Faster R-CNN
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 | clc;
11 | clear mex;
12 | clear is_valid_handle; % to clear init_key
13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
14 | %% -------------------- CONFIG --------------------
15 | opts.caffe_version          = 'caffe_faster_rcnn';
16 | opts.gpu_id                 = auto_select_gpu;
17 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
18 | 
19 | % do validation, or not 
20 | opts.do_val                 = true; 
21 | % model
22 | model                       = Model.VGG16_for_Faster_RCNN_VOC0712;
23 | % cache base
24 | cache_base_proposal         = 'faster_rcnn_VOC0712_vgg_16layers';
25 | cache_base_fast_rcnn        = '';
26 | % train/test data
27 | dataset                     = [];
28 | use_flipped                 = true;
29 | dataset                     = Dataset.voc0712_trainval(dataset, 'train', use_flipped);
30 | dataset                     = Dataset.voc2007_test(dataset, 'test', false);
31 | 
32 | %% -------------------- TRAIN --------------------
33 | % conf
34 | conf_proposal               = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride);
35 | conf_fast_rcnn              = fast_rcnn_config('image_means', model.mean_image);
36 | % set cache folder for each stage
37 | model                       = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model);
38 | % generate anchors and pre-calculate output size of rpn network 
39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ...
40 |                             = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file);
41 | 
42 | %%  stage one proposal
43 | fprintf('\n***************\nstage one proposal \n***************\n');
44 | % train
45 | model.stage1_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val);
46 | % test
47 | dataset.roidb_train         = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
48 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test);
49 | 
50 | %%  stage one fast rcnn
51 | fprintf('\n***************\nstage one fast rcnn\n***************\n');
52 | % train
53 | model.stage1_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val);
54 | % test
55 | opts.mAP                    = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
56 | 
57 | %%  stage two proposal
58 | % net proposal
59 | fprintf('\n***************\nstage two proposal\n***************\n');
60 | % train
61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file;
62 | model.stage2_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val);
63 | % test
64 | dataset.roidb_train        	= cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
65 | dataset.roidb_test         	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
66 | 
67 | %%  stage two fast rcnn
68 | fprintf('\n***************\nstage two fast rcnn\n***************\n');
69 | % train
70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file;
71 | model.stage2_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val);
72 | 
73 | %% final test
74 | fprintf('\n***************\nfinal test\n***************\n');
75 |      
76 | model.stage2_rpn.nms        = model.final_test.nms;
77 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
78 | opts.final_mAP              = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
79 | 
80 | % save final models, for outside tester
81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset);
82 | end
83 | 
84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file)
85 |     [output_width_map, output_height_map] ...                           
86 |                                 = proposal_calc_output_size(conf, test_net_def_file);
87 |     anchors                = proposal_generate_anchors(cache_name, ...
88 |                                     'scales',  2.^[3:5]);
89 | end


--------------------------------------------------------------------------------
/experiments/script_faster_rcnn_VOC0712_ZF.m:
--------------------------------------------------------------------------------
 1 | function script_faster_rcnn_VOC0712_ZF()
 2 | % script_faster_rcnn_VOC0712_ZF()
 3 | % Faster rcnn training and testing with Zeiler & Fergus model
 4 | % --------------------------------------------------------
 5 | % Faster R-CNN
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 | clc;
11 | clear mex;
12 | clear is_valid_handle; % to clear init_key
13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
14 | %% -------------------- CONFIG --------------------
15 | opts.caffe_version          = 'caffe_faster_rcnn';
16 | opts.gpu_id                 = auto_select_gpu;
17 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
18 | 
19 | % do validation, or not 
20 | opts.do_val                 = true; 
21 | % model
22 | model                       = Model.ZF_for_Faster_RCNN_VOC0712;
23 | % cache base
24 | cache_base_proposal         = 'faster_rcnn_VOC0712_ZF';
25 | cache_base_fast_rcnn        = '';
26 | % train/test data
27 | dataset                     = [];
28 | use_flipped                 = true;
29 | dataset                     = Dataset.voc0712_trainval(dataset, 'train', use_flipped);
30 | dataset                     = Dataset.voc2007_test(dataset, 'test', false);
31 | 
32 | %% -------------------- TRAIN --------------------
33 | % conf
34 | conf_proposal               = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride);
35 | conf_fast_rcnn              = fast_rcnn_config('image_means', model.mean_image);
36 | % set cache folder for each stage
37 | model                       = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model);
38 | % generate anchors and pre-calculate output size of rpn network 
39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ...
40 |                             = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file);
41 | 
42 | %%  stage one proposal
43 | fprintf('\n***************\nstage one proposal \n***************\n');
44 | % train
45 | model.stage1_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val);
46 | % test
47 | dataset.roidb_train        	= cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
48 | dataset.roidb_test        	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test);
49 | 
50 | %%  stage one fast rcnn
51 | fprintf('\n***************\nstage one fast rcnn\n***************\n');
52 | % train
53 | model.stage1_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val);
54 | % test
55 | opts.mAP                    = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
56 | 
57 | %%  stage two proposal
58 | % net proposal
59 | fprintf('\n***************\nstage two proposal\n***************\n');
60 | % train
61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file;
62 | model.stage2_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val);
63 | % test
64 | dataset.roidb_train       	= cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
65 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
66 | 
67 | %%  stage two fast rcnn
68 | fprintf('\n***************\nstage two fast rcnn\n***************\n');
69 | % train
70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file;
71 | model.stage2_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val);
72 | 
73 | %% final test
74 | fprintf('\n***************\nfinal test\n***************\n');
75 |      
76 | model.stage2_rpn.nms        = model.final_test.nms;
77 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
78 | opts.final_mAP              = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
79 | 
80 | % save final models, for outside tester
81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset);
82 | end
83 | 
84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file)
85 |     [output_width_map, output_height_map] ...                           
86 |                                 = proposal_calc_output_size(conf, test_net_def_file);
87 |     anchors                = proposal_generate_anchors(cache_name, ...
88 |                                     'scales',  2.^[3:5]);
89 | end


--------------------------------------------------------------------------------
/experiments/script_faster_rcnn_VOC0712plus_VGG16.m:
--------------------------------------------------------------------------------
 1 | function script_faster_rcnn_VOC0712plus_VGG16()
 2 | % script_faster_rcnn_VOC0712plus_VGG16()
 3 | % Faster rcnn training and testing with VGG16 model
 4 | % --------------------------------------------------------
 5 | % Faster R-CNN
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 | clc;
11 | clear mex;
12 | clear is_valid_handle; % to clear init_key
13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
14 | %% -------------------- CONFIG --------------------
15 | opts.caffe_version          = 'caffe_faster_rcnn';
16 | opts.gpu_id                 = auto_select_gpu;
17 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
18 | 
19 | % do validation, or not 
20 | opts.do_val                 = false; 
21 | % model
22 | model                       = Model.VGG16_for_Faster_RCNN_VOC0712plus;
23 | % cache base
24 | cache_base_proposal         = 'faster_rcnn_VOC0712plus_vgg_16layers';
25 | cache_base_fast_rcnn        = '';
26 | % train/test data
27 | dataset                     = [];
28 | use_flipped                 = true;
29 | dataset                     = Dataset.voc0712plus_trainval(dataset, 'train', use_flipped);
30 | dataset                     = Dataset.voc2012_test(dataset, 'test', false);
31 | 
32 | %% -------------------- TRAIN --------------------
33 | % conf
34 | conf_proposal               = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride);
35 | conf_fast_rcnn              = fast_rcnn_config('image_means', model.mean_image);
36 | % set cache folder for each stage
37 | model                       = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model);
38 | % generate anchors and pre-calculate output size of rpn network 
39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ...
40 |                             = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file);
41 | 
42 | %%  stage one proposal
43 | fprintf('\n***************\nstage one proposal \n***************\n');
44 | % train
45 | model.stage1_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val);
46 | % test
47 | dataset.roidb_train         = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
48 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test);
49 | 
50 | %%  stage one fast rcnn
51 | fprintf('\n***************\nstage one fast rcnn\n***************\n');
52 | % train
53 | model.stage1_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val);
54 | % test
55 | % opts.mAP                    = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
56 | 
57 | %%  stage two proposal
58 | % net proposal
59 | fprintf('\n***************\nstage two proposal\n***************\n');
60 | % train
61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file;
62 | model.stage2_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val);
63 | % test
64 | dataset.roidb_train        	= cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
65 | dataset.roidb_test         	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
66 | 
67 | %%  stage two fast rcnn
68 | fprintf('\n***************\nstage two fast rcnn\n***************\n');
69 | % train
70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file;
71 | model.stage2_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val);
72 | 
73 | %% final test
74 | fprintf('\n***************\nfinal test\n***************\n');
75 |      
76 | model.stage2_rpn.nms        = model.final_test.nms;
77 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
78 | opts.final_mAP              = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
79 | 
80 | % save final models, for outside tester
81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset);
82 | end
83 | 
84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file)
85 |     [output_width_map, output_height_map] ...                           
86 |                                 = proposal_calc_output_size(conf, test_net_def_file);
87 |     anchors                = proposal_generate_anchors(cache_name, ...
88 |                                     'scales',  2.^[3:5]);
89 | end


--------------------------------------------------------------------------------
/experiments/script_faster_rcnn_VOC2007_VGG16.m:
--------------------------------------------------------------------------------
 1 | function script_faster_rcnn_VOC2007_VGG16()
 2 | % script_faster_rcnn_VOC2007_VGG16()
 3 | % Faster rcnn training and testing with VGG16 model
 4 | % --------------------------------------------------------
 5 | % Faster R-CNN
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 | clc;
11 | clear mex;
12 | clear is_valid_handle; % to clear init_key
13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
14 | %% -------------------- CONFIG --------------------
15 | opts.caffe_version          = 'caffe_faster_rcnn';
16 | opts.gpu_id                 = auto_select_gpu;
17 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
18 | 
19 | % do validation, or not 
20 | opts.do_val                 = true; 
21 | % model
22 | model                       = Model.VGG16_for_Faster_RCNN_VOC2007;
23 | % cache base
24 | cache_base_proposal         = 'faster_rcnn_VOC2007_vgg_16layers';
25 | cache_base_fast_rcnn        = '';
26 | % train/test data
27 | dataset                     = [];
28 | use_flipped                 = true;
29 | dataset                     = Dataset.voc2007_trainval(dataset, 'train', use_flipped);
30 | dataset                     = Dataset.voc2007_test(dataset, 'test', false);
31 | 
32 | %% -------------------- TRAIN --------------------
33 | % conf
34 | conf_proposal               = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride);
35 | conf_fast_rcnn              = fast_rcnn_config('image_means', model.mean_image);
36 | % set cache folder for each stage
37 | model                       = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model);
38 | % generate anchors and pre-calculate output size of rpn network 
39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ...
40 |                             = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file);
41 | 
42 | %%  stage one proposal
43 | fprintf('\n***************\nstage one proposal \n***************\n');
44 | % train
45 | model.stage1_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val);
46 | % test
47 | dataset.roidb_train         = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
48 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test);
49 | 
50 | %%  stage one fast rcnn
51 | fprintf('\n***************\nstage one fast rcnn\n***************\n');
52 | % train
53 | model.stage1_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val);
54 | % test
55 | opts.mAP                    = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
56 | 
57 | %%  stage two proposal
58 | % net proposal
59 | fprintf('\n***************\nstage two proposal\n***************\n');
60 | % train
61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file;
62 | model.stage2_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val);
63 | % test
64 | dataset.roidb_train        	= cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
65 | dataset.roidb_test         	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
66 | 
67 | %%  stage two fast rcnn
68 | fprintf('\n***************\nstage two fast rcnn\n***************\n');
69 | % train
70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file;
71 | model.stage2_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val);
72 | 
73 | %% final test
74 | fprintf('\n***************\nfinal test\n***************\n');
75 |      
76 | model.stage2_rpn.nms        = model.final_test.nms;
77 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
78 | opts.final_mAP              = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
79 | 
80 | % save final models, for outside tester
81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset);
82 | end
83 | 
84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file)
85 |     [output_width_map, output_height_map] ...                           
86 |                                 = proposal_calc_output_size(conf, test_net_def_file);
87 |     anchors                = proposal_generate_anchors(cache_name, ...
88 |                                     'scales',  2.^[3:5]);
89 | end


--------------------------------------------------------------------------------
/experiments/script_faster_rcnn_VOC2007_ZF.m:
--------------------------------------------------------------------------------
 1 | function script_faster_rcnn_VOC2007_ZF()
 2 | % script_faster_rcnn_VOC2007_ZF()
 3 | % Faster rcnn training and testing with Zeiler & Fergus model
 4 | % --------------------------------------------------------
 5 | % Faster R-CNN
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 | clc;
11 | clear mex;
12 | clear is_valid_handle; % to clear init_key
13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
14 | %% -------------------- CONFIG --------------------
15 | opts.caffe_version          = 'caffe_faster_rcnn';
16 | opts.gpu_id                 = auto_select_gpu;
17 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
18 | 
19 | % do validation, or not 
20 | opts.do_val                 = true; 
21 | % model
22 | model                       = Model.ZF_for_Faster_RCNN_VOC2007;
23 | % cache base
24 | cache_base_proposal         = 'faster_rcnn_VOC2007_ZF';
25 | cache_base_fast_rcnn        = '';
26 | % train/test data
27 | dataset                     = [];
28 | use_flipped                 = true;
29 | dataset                     = Dataset.voc2007_trainval(dataset, 'train', use_flipped);
30 | dataset                     = Dataset.voc2007_test(dataset, 'test', false);
31 | 
32 | %% -------------------- TRAIN --------------------
33 | % conf
34 | conf_proposal               = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride);
35 | conf_fast_rcnn              = fast_rcnn_config('image_means', model.mean_image);
36 | % set cache folder for each stage
37 | model                       = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model);
38 | % generate anchors and pre-calculate output size of rpn network 
39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ...
40 |                             = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file);
41 | 
42 | %%  stage one proposal
43 | fprintf('\n***************\nstage one proposal \n***************\n');
44 | % train
45 | model.stage1_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val);
46 | % test
47 | dataset.roidb_train        	= cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
48 | dataset.roidb_test        	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test);
49 | 
50 | %%  stage one fast rcnn
51 | fprintf('\n***************\nstage one fast rcnn\n***************\n');
52 | % train
53 | model.stage1_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val);
54 | % test
55 | opts.mAP                    = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
56 | 
57 | %%  stage two proposal
58 | % net proposal
59 | fprintf('\n***************\nstage two proposal\n***************\n');
60 | % train
61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file;
62 | model.stage2_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val);
63 | % test
64 | dataset.roidb_train       	= cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
65 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
66 | 
67 | %%  stage two fast rcnn
68 | fprintf('\n***************\nstage two fast rcnn\n***************\n');
69 | % train
70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file;
71 | model.stage2_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val);
72 | 
73 | %% final test
74 | fprintf('\n***************\nfinal test\n***************\n');
75 |      
76 | model.stage2_rpn.nms        = model.final_test.nms;
77 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
78 | opts.final_mAP              = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
79 | 
80 | % save final models, for outside tester
81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset);
82 | end
83 | 
84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file)
85 |     [output_width_map, output_height_map] ...                           
86 |                                 = proposal_calc_output_size(conf, test_net_def_file);
87 |     anchors                = proposal_generate_anchors(cache_name, ...
88 |                                     'scales',  2.^[3:5]);
89 | end


--------------------------------------------------------------------------------
/experiments/script_faster_rcnn_VOC2012_VGG16.m:
--------------------------------------------------------------------------------
 1 | function script_faster_rcnn_VOC2012_VGG16()
 2 | % script_faster_rcnn_VOC2012_VGG16()
 3 | % Faster rcnn training and testing with VGG16 model
 4 | % --------------------------------------------------------
 5 | % Faster R-CNN
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 | clc;
11 | clear mex;
12 | clear is_valid_handle; % to clear init_key
13 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
14 | %% -------------------- CONFIG --------------------
15 | opts.caffe_version          = 'caffe_faster_rcnn';
16 | opts.gpu_id                 = auto_select_gpu;
17 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
18 | 
19 | % do validation, or not 
20 | opts.do_val                 = false; 
21 | % model
22 | model                       = Model.VGG16_for_Faster_RCNN_VOC2012;
23 | % cache base
24 | cache_base_proposal         = 'faster_rcnn_VOC2012_vgg_16layers';
25 | cache_base_fast_rcnn        = '';
26 | % train/test data
27 | dataset                     = [];
28 | use_flipped                 = true;
29 | dataset                     = Dataset.voc2012_trainval(dataset, 'train', use_flipped);
30 | dataset                     = Dataset.voc2012_test(dataset, 'test', false);
31 | 
32 | %% -------------------- TRAIN --------------------
33 | % conf
34 | conf_proposal               = proposal_config('image_means', model.mean_image, 'feat_stride', model.feat_stride);
35 | conf_fast_rcnn              = fast_rcnn_config('image_means', model.mean_image);
36 | % set cache folder for each stage
37 | model                       = Faster_RCNN_Train.set_cache_folder(cache_base_proposal, cache_base_fast_rcnn, model);
38 | % generate anchors and pre-calculate output size of rpn network 
39 | [conf_proposal.anchors, conf_proposal.output_width_map, conf_proposal.output_height_map] ...
40 |                             = proposal_prepare_anchors(conf_proposal, model.stage1_rpn.cache_name, model.stage1_rpn.test_net_def_file);
41 | 
42 | %%  stage one proposal
43 | fprintf('\n***************\nstage one proposal \n***************\n');
44 | % train
45 | model.stage1_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage1_rpn, opts.do_val);
46 | % test
47 | dataset.roidb_train         = cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
48 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage1_rpn, dataset.imdb_test, dataset.roidb_test);
49 | 
50 | %%  stage one fast rcnn
51 | fprintf('\n***************\nstage one fast rcnn\n***************\n');
52 | % train
53 | model.stage1_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage1_fast_rcnn, opts.do_val);
54 | % test
55 | % opts.mAP                    = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage1_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
56 | 
57 | %%  stage two proposal
58 | % net proposal
59 | fprintf('\n***************\nstage two proposal\n***************\n');
60 | % train
61 | model.stage2_rpn.init_net_file = model.stage1_fast_rcnn.output_model_file;
62 | model.stage2_rpn            = Faster_RCNN_Train.do_proposal_train(conf_proposal, dataset, model.stage2_rpn, opts.do_val);
63 | % test
64 | dataset.roidb_train        	= cellfun(@(x, y) Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, x, y), dataset.imdb_train, dataset.roidb_train, 'UniformOutput', false);
65 | dataset.roidb_test         	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
66 | 
67 | %%  stage two fast rcnn
68 | fprintf('\n***************\nstage two fast rcnn\n***************\n');
69 | % train
70 | model.stage2_fast_rcnn.init_net_file = model.stage1_fast_rcnn.output_model_file;
71 | model.stage2_fast_rcnn      = Faster_RCNN_Train.do_fast_rcnn_train(conf_fast_rcnn, dataset, model.stage2_fast_rcnn, opts.do_val);
72 | 
73 | %% final test
74 | fprintf('\n***************\nfinal test\n***************\n');
75 |      
76 | model.stage2_rpn.nms        = model.final_test.nms;
77 | dataset.roidb_test       	= Faster_RCNN_Train.do_proposal_test(conf_proposal, model.stage2_rpn, dataset.imdb_test, dataset.roidb_test);
78 | opts.final_mAP              = Faster_RCNN_Train.do_fast_rcnn_test(conf_fast_rcnn, model.stage2_fast_rcnn, dataset.imdb_test, dataset.roidb_test);
79 | 
80 | % save final models, for outside tester
81 | Faster_RCNN_Train.gather_rpn_fast_rcnn_models(conf_proposal, conf_fast_rcnn, model, dataset);
82 | end
83 | 
84 | function [anchors, output_width_map, output_height_map] = proposal_prepare_anchors(conf, cache_name, test_net_def_file)
85 |     [output_width_map, output_height_map] ...                           
86 |                                 = proposal_calc_output_size(conf, test_net_def_file);
87 |     anchors                = proposal_generate_anchors(cache_name, ...
88 |                                     'scales',  2.^[3:5]);
89 | end


--------------------------------------------------------------------------------
/experiments/script_faster_rcnn_demo.m:
--------------------------------------------------------------------------------
  1 | function script_faster_rcnn_demo()
  2 | close all;
  3 | clc;
  4 | clear mex;
  5 | clear is_valid_handle; % to clear init_key
  6 | run(fullfile(fileparts(fileparts(mfilename('fullpath'))), 'startup'));
  7 | %% -------------------- CONFIG --------------------
  8 | opts.caffe_version          = 'caffe_faster_rcnn';
  9 | opts.gpu_id                 = auto_select_gpu;
 10 | active_caffe_mex(opts.gpu_id, opts.caffe_version);
 11 | 
 12 | opts.per_nms_topN           = 6000;
 13 | opts.nms_overlap_thres      = 0.7;
 14 | opts.after_nms_topN         = 300;
 15 | opts.use_gpu                = true;
 16 | 
 17 | opts.test_scales            = 600;
 18 | 
 19 | %% -------------------- INIT_MODEL --------------------
 20 | model_dir                   = fullfile(pwd, 'output', 'faster_rcnn_final', 'faster_rcnn_VOC0712_vgg_16layers'); %% VGG-16
 21 | %model_dir                   = fullfile(pwd, 'output', 'faster_rcnn_final', 'faster_rcnn_VOC0712_ZF'); %% ZF
 22 | proposal_detection_model    = load_proposal_detection_model(model_dir);
 23 | 
 24 | proposal_detection_model.conf_proposal.test_scales = opts.test_scales;
 25 | proposal_detection_model.conf_detection.test_scales = opts.test_scales;
 26 | if opts.use_gpu
 27 |     proposal_detection_model.conf_proposal.image_means = gpuArray(proposal_detection_model.conf_proposal.image_means);
 28 |     proposal_detection_model.conf_detection.image_means = gpuArray(proposal_detection_model.conf_detection.image_means);
 29 | end
 30 | 
 31 | % caffe.init_log(fullfile(pwd, 'caffe_log'));
 32 | % proposal net
 33 | rpn_net = caffe.Net(proposal_detection_model.proposal_net_def, 'test');
 34 | rpn_net.copy_from(proposal_detection_model.proposal_net);
 35 | % fast rcnn net
 36 | fast_rcnn_net = caffe.Net(proposal_detection_model.detection_net_def, 'test');
 37 | fast_rcnn_net.copy_from(proposal_detection_model.detection_net);
 38 | 
 39 | % set gpu/cpu
 40 | if opts.use_gpu
 41 |     caffe.set_mode_gpu();
 42 | else
 43 |     caffe.set_mode_cpu();
 44 | end       
 45 | 
 46 | %% -------------------- WARM UP --------------------
 47 | % the first run will be slower; use an empty image to warm up
 48 | 
 49 | for j = 1:2 % we warm up 2 times
 50 |     im = uint8(ones(375, 500, 3)*128);
 51 |     if opts.use_gpu
 52 |         im = gpuArray(im);
 53 |     end
 54 |     [boxes, scores]             = proposal_im_detect(proposal_detection_model.conf_proposal, rpn_net, im);
 55 |     aboxes                      = boxes_filter([boxes, scores], opts.per_nms_topN, opts.nms_overlap_thres, opts.after_nms_topN, opts.use_gpu);
 56 |     if proposal_detection_model.is_share_feature
 57 |         [boxes, scores]             = fast_rcnn_conv_feat_detect(proposal_detection_model.conf_detection, fast_rcnn_net, im, ...
 58 |             rpn_net.blobs(proposal_detection_model.last_shared_output_blob_name), ...
 59 |             aboxes(:, 1:4), opts.after_nms_topN);
 60 |     else
 61 |         [boxes, scores]             = fast_rcnn_im_detect(proposal_detection_model.conf_detection, fast_rcnn_net, im, ...
 62 |             aboxes(:, 1:4), opts.after_nms_topN);
 63 |     end
 64 | end
 65 | 
 66 | %% -------------------- TESTING --------------------
 67 | im_names = {'001763.jpg', '004545.jpg', '000542.jpg', '000456.jpg', '001150.jpg'};
 68 | % these images can be downloaded with fetch_faster_rcnn_final_model.m
 69 | 
 70 | running_time = [];
 71 | for j = 1:length(im_names)
 72 |     
 73 |     im = imread(fullfile(pwd, im_names{j}));
 74 |     
 75 |     if opts.use_gpu
 76 |         im = gpuArray(im);
 77 |     end
 78 |     
 79 |     % test proposal
 80 |     th = tic();
 81 |     [boxes, scores]             = proposal_im_detect(proposal_detection_model.conf_proposal, rpn_net, im);
 82 |     t_proposal = toc(th);
 83 |     th = tic();
 84 |     aboxes                      = boxes_filter([boxes, scores], opts.per_nms_topN, opts.nms_overlap_thres, opts.after_nms_topN, opts.use_gpu);
 85 |     t_nms = toc(th);
 86 |     
 87 |     % test detection
 88 |     th = tic();
 89 |     if proposal_detection_model.is_share_feature
 90 |         [boxes, scores]             = fast_rcnn_conv_feat_detect(proposal_detection_model.conf_detection, fast_rcnn_net, im, ...
 91 |             rpn_net.blobs(proposal_detection_model.last_shared_output_blob_name), ...
 92 |             aboxes(:, 1:4), opts.after_nms_topN);
 93 |     else
 94 |         [boxes, scores]             = fast_rcnn_im_detect(proposal_detection_model.conf_detection, fast_rcnn_net, im, ...
 95 |             aboxes(:, 1:4), opts.after_nms_topN);
 96 |     end
 97 |     t_detection = toc(th);
 98 |     
 99 |     fprintf('%s (%dx%d): time %.3fs (resize+conv+proposal: %.3fs, nms+regionwise: %.3fs)\n', im_names{j}, ...
100 |         size(im, 2), size(im, 1), t_proposal + t_nms + t_detection, t_proposal, t_nms+t_detection);
101 |     running_time(end+1) = t_proposal + t_nms + t_detection;
102 |     
103 |     % visualize
104 |     classes = proposal_detection_model.classes;
105 |     boxes_cell = cell(length(classes), 1);
106 |     thres = 0.6;
107 |     for i = 1:length(boxes_cell)
108 |         boxes_cell{i} = [boxes(:, (1+(i-1)*4):(i*4)), scores(:, i)];
109 |         boxes_cell{i} = boxes_cell{i}(nms(boxes_cell{i}, 0.3), :);
110 |         
111 |         I = boxes_cell{i}(:, 5) >= thres;
112 |         boxes_cell{i} = boxes_cell{i}(I, :);
113 |     end
114 |     figure(j);
115 |     showboxes(im, boxes_cell, classes, 'voc');
116 |     pause(0.1);
117 | end
118 | fprintf('mean time: %.3fs\n', mean(running_time));
119 | 
120 | caffe.reset_all(); 
121 | clear mex;
122 | 
123 | end
124 | 
125 | function proposal_detection_model = load_proposal_detection_model(model_dir)
126 |     ld                          = load(fullfile(model_dir, 'model'));
127 |     proposal_detection_model    = ld.proposal_detection_model;
128 |     clear ld;
129 |     
130 |     proposal_detection_model.proposal_net_def ...
131 |                                 = fullfile(model_dir, proposal_detection_model.proposal_net_def);
132 |     proposal_detection_model.proposal_net ...
133 |                                 = fullfile(model_dir, proposal_detection_model.proposal_net);
134 |     proposal_detection_model.detection_net_def ...
135 |                                 = fullfile(model_dir, proposal_detection_model.detection_net_def);
136 |     proposal_detection_model.detection_net ...
137 |                                 = fullfile(model_dir, proposal_detection_model.detection_net);
138 |     
139 | end
140 | 
141 | function aboxes = boxes_filter(aboxes, per_nms_topN, nms_overlap_thres, after_nms_topN, use_gpu)
142 |     % to speed up nms
143 |     if per_nms_topN > 0
144 |         aboxes = aboxes(1:min(length(aboxes), per_nms_topN), :);
145 |     end
146 |     % do nms
147 |     if nms_overlap_thres > 0 && nms_overlap_thres < 1
148 |         aboxes = aboxes(nms(aboxes, nms_overlap_thres, use_gpu), :);       
149 |     end
150 |     if after_nms_topN > 0
151 |         aboxes = aboxes(1:min(length(aboxes), after_nms_topN), :);
152 |     end
153 | end
154 | 


--------------------------------------------------------------------------------
/faster_rcnn_build.m:
--------------------------------------------------------------------------------
 1 | function faster_rcnn_build()
 2 | % faster_rcnn_build()
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 | % Compile nms_mex
10 | if ~exist('nms_mex', 'file')
11 |   fprintf('Compiling nms_mex\n');
12 | 
13 |   mex -O -outdir bin ...
14 |       CXXFLAGS="\$CXXFLAGS -std=c++11"  ...
15 |       -largeArrayDims ...
16 |       functions/nms/nms_mex.cpp ...
17 |       -output nms_mex;
18 | end
19 | 
20 | if ~exist('nms_gpu_mex', 'file')
21 |    fprintf('Compiling nms_gpu_mex\n');
22 |    addpath(fullfile(pwd, 'functions', 'nms'));
23 |    nvmex('functions/nms/nms_gpu_mex.cu', 'bin');
24 |    delete('nms_gpu_mex.o');
25 | end
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/fetch_data/fetch_caffe_mex_windows_vs2013_cuda65.m:
--------------------------------------------------------------------------------
 1 | 
 2 | cur_dir = pwd;
 3 | cd(fileparts(mfilename('fullpath')));
 4 | 
 5 | try
 6 |     fprintf('Downloading caffe_mex...\n');
 7 |     urlwrite('https://onedrive.live.com/download?resid=36FEC490FBC32F1A!111&authkey=!AFVWFGTbViiX5tg&ithint=file%2czip', ...
 8 |         'caffe_mex.zip');
 9 | 
10 |     fprintf('Unzipping...\n');
11 |     unzip('caffe_mex.zip', '..');
12 | 
13 |     fprintf('Done.\n');
14 |     delete('caffe_mex.zip');
15 | catch
16 |     fprintf('Error in downloading, please try links in README.md https://github.com/ShaoqingRen/faster_rcnn'); 
17 | end
18 | 
19 | cd(cur_dir);
20 | 


--------------------------------------------------------------------------------
/fetch_data/fetch_faster_rcnn_final_model.m:
--------------------------------------------------------------------------------
 1 | 
 2 | cur_dir = pwd;
 3 | cd(fileparts(mfilename('fullpath')));
 4 | 
 5 | try
 6 |     fprintf('Downloading faster_rcnn_final_model...\n');
 7 |     urlwrite('https://onedrive.live.com/download?resid=D7AF52BADBA8A4BC!114&authkey=!AERHoxZ-iAx_j34&ithint=file%2czip', ...
 8 |         'faster_rcnn_final_model.zip');
 9 | 
10 |     
11 |     fprintf('Unzipping...\n');
12 |     unzip('faster_rcnn_final_model.zip', '..');
13 | 
14 |     fprintf('Done.\n');
15 |     delete('faster_rcnn_final_model.zip');
16 | catch
17 |     fprintf('Error in downloading, please try links in README.md https://github.com/ShaoqingRen/faster_rcnn'); 
18 | end
19 | 
20 | cd(cur_dir);
21 | 


--------------------------------------------------------------------------------
/fetch_data/fetch_model_VGG16.m:
--------------------------------------------------------------------------------
 1 | 
 2 | cur_dir = pwd;
 3 | cd(fileparts(mfilename('fullpath')));
 4 | 
 5 | try
 6 |     fprintf('Downloading model_VGG16...\n');
 7 |     urlwrite('https://onedrive.live.com/download?resid=36FEC490FBC32F1A!114&authkey=!AE8uV9B07dREbhM&ithint=file%2czip', ...
 8 |         'model_VGG16.zip');
 9 | 
10 |     fprintf('Unzipping...\n');
11 |     unzip('model_VGG16.zip', '..');
12 | 
13 |     fprintf('Done.\n');
14 |     delete('model_VGG16.zip');
15 | catch
16 |     fprintf('Error in downloading, please try links in README.md https://github.com/ShaoqingRen/faster_rcnn');
17 | end
18 | 
19 | cd(cur_dir);
20 | 


--------------------------------------------------------------------------------
/fetch_data/fetch_model_ZF.m:
--------------------------------------------------------------------------------
 1 | 
 2 | cur_dir = pwd;
 3 | cd(fileparts(mfilename('fullpath')));
 4 | 
 5 | try
 6 |     fprintf('Downloading model_ZF...\n');
 7 |     urlwrite('https://onedrive.live.com/download?resid=36FEC490FBC32F1A!113&authkey=!AIzdm0sD_SmhUQ4&ithint=file%2czip', ...
 8 |         'model_ZF.zip');
 9 | 
10 |     fprintf('Unzipping...\n');
11 |     unzip('model_ZF.zip', '..');
12 | 
13 |     fprintf('Done.\n');
14 |     delete('model_ZF.zip');
15 | catch
16 |     fprintf('Error in downloading, please try links in README.md https://github.com/ShaoqingRen/faster_rcnn'); 
17 | end
18 | 
19 | cd(cur_dir);
20 | 


--------------------------------------------------------------------------------
/functions/fast_rcnn/fast_rcnn_bbox_transform.m:
--------------------------------------------------------------------------------
 1 | function [regression_label] = fast_rcnn_bbox_transform(ex_boxes, gt_boxes)
 2 | % [regression_label] = fast_rcnn_bbox_transform(ex_boxes, gt_boxes)
 3 | % --------------------------------------------------------
 4 | % Fast R-CNN
 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 |     ex_widths = ex_boxes(:, 3) - ex_boxes(:, 1) + 1;
11 |     ex_heights = ex_boxes(:, 4) - ex_boxes(:, 2) + 1;
12 |     ex_ctr_x = ex_boxes(:, 1) + 0.5 * (ex_widths - 1);
13 |     ex_ctr_y = ex_boxes(:, 2) + 0.5 * (ex_heights - 1);
14 |     
15 |     gt_widths = gt_boxes(:, 3) - gt_boxes(:, 1) + 1;
16 |     gt_heights = gt_boxes(:, 4) - gt_boxes(:, 2) + 1;
17 |     gt_ctr_x = gt_boxes(:, 1) + 0.5 * (gt_widths - 1);
18 |     gt_ctr_y = gt_boxes(:, 2) + 0.5 * (gt_heights - 1);
19 |     
20 |     targets_dx = (gt_ctr_x - ex_ctr_x) ./ (ex_widths+eps);
21 |     targets_dy = (gt_ctr_y - ex_ctr_y) ./ (ex_heights+eps);
22 |     targets_dw = log(gt_widths ./ ex_widths);
23 |     targets_dh = log(gt_heights ./ ex_heights);
24 |     
25 |     regression_label = [targets_dx, targets_dy, targets_dw, targets_dh];
26 | end


--------------------------------------------------------------------------------
/functions/fast_rcnn/fast_rcnn_bbox_transform_inv.m:
--------------------------------------------------------------------------------
 1 | function [pred_boxes] = fast_rcnn_bbox_transform_inv(boxes, box_deltas)
 2 | % [pred_boxes] = fast_rcnn_bbox_transform_inv(boxes, box_deltas)
 3 | % --------------------------------------------------------
 4 | % Fast R-CNN
 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 |     
10 |     src_w = double(boxes(:, 3) - boxes(:, 1) + 1);
11 |     src_h = double(boxes(:, 4) - boxes(:, 2) + 1);
12 |     src_ctr_x = double(boxes(:, 1) + 0.5*(src_w-1));
13 |     src_ctr_y = double(boxes(:, 2) + 0.5*(src_h-1));
14 |     
15 |     dst_ctr_x = double(box_deltas(:, 1:4:end));
16 |     dst_ctr_y = double(box_deltas(:, 2:4:end));
17 |     dst_scl_x = double(box_deltas(:, 3:4:end));
18 |     dst_scl_y = double(box_deltas(:, 4:4:end));
19 | 
20 |     pred_ctr_x = bsxfun(@plus, bsxfun(@times, dst_ctr_x, src_w), src_ctr_x);
21 |     pred_ctr_y = bsxfun(@plus, bsxfun(@times, dst_ctr_y, src_h), src_ctr_y);
22 |     pred_w = bsxfun(@times, exp(dst_scl_x), src_w);
23 |     pred_h = bsxfun(@times, exp(dst_scl_y), src_h);
24 |     pred_boxes = zeros(size(box_deltas), 'single');
25 |     pred_boxes(:, 1:4:end) = pred_ctr_x - 0.5*(pred_w-1);
26 |     pred_boxes(:, 2:4:end) = pred_ctr_y - 0.5*(pred_h-1);
27 |     pred_boxes(:, 3:4:end) = pred_ctr_x + 0.5*(pred_w-1);
28 |     pred_boxes(:, 4:4:end) = pred_ctr_y + 0.5*(pred_h-1); 
29 | end


--------------------------------------------------------------------------------
/functions/fast_rcnn/fast_rcnn_config.m:
--------------------------------------------------------------------------------
 1 | function conf = fast_rcnn_config(varargin)
 2 | % conf = fast_rcnn_config(varargin)
 3 | % Fast R-CNN configuration
 4 | % --------------------------------------------------------
 5 | % Fast R-CNN
 6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 7 | % Copyright (c) 2015, Shaoqing Ren
 8 | % Licensed under The MIT License [see LICENSE for details]
 9 | % --------------------------------------------------------
10 | %
11 |     ip = inputParser;
12 |     
13 |     %% training
14 |     % whether use gpu
15 |     ip.addParamValue('use_gpu',         gpuDeviceCount > 0, ...            
16 |                                                         @islogical);
17 |     % Image scales -- the short edge of input image                                                
18 |     ip.addParamValue('scales',          600,            @ismatrix);
19 |     % Max pixel size of a scaled input image
20 |     ip.addParamValue('max_size',        1000,           @isscalar);
21 |     % Images per batch
22 |     ip.addParamValue('ims_per_batch',   2,              @isscalar);
23 |     % Minibatch size
24 |     ip.addParamValue('batch_size',      128,            @isscalar);
25 |     % Fraction of minibatch that is foreground labeled (class > 0)
26 |     ip.addParamValue('fg_fraction',     0.25,           @isscalar);
27 |     % Overlap threshold for a ROI to be considered foreground (if >= fg_thresh)
28 |     ip.addParamValue('fg_thresh',       0.5,            @isscalar);
29 |     % Overlap threshold for a ROI to be considered background (class = 0 if
30 |     % overlap in [bg_thresh_lo, bg_thresh_hi))
31 |     ip.addParamValue('bg_thresh_hi',    0.5,            @isscalar);
32 |     ip.addParamValue('bg_thresh_lo',    0.1,            @isscalar);
33 |     % mean image, in RGB order
34 |     ip.addParamValue('image_means',     128,            @ismatrix);
35 |     % Use horizontally-flipped images during training?
36 |     ip.addParamValue('use_flipped',     true,           @islogical);
37 |     % Vaild training sample (IoU > bbox_thresh) for bounding box regresion
38 |     ip.addParamValue('bbox_thresh',     0.5,            @isscalar);
39 | 
40 |     % random seed
41 |     ip.addParamValue('rng_seed',        6,              @isscalar);
42 | 
43 |     
44 |     %% testing
45 |     ip.addParamValue('test_scales',     600,            @isscalar);
46 |     ip.addParamValue('test_max_size',   1000,           @isscalar);
47 |     ip.addParamValue('test_nms',        0.3,            @isscalar);
48 |     ip.addParamValue('test_binary',     false,          @islogical);
49 |     
50 |     ip.parse(varargin{:});
51 |     conf = ip.Results;
52 |     
53 |     % if image_means is a file, load it
54 |     if ischar(conf.image_means)
55 |         s = load(conf.image_means);
56 |         s_fieldnames = fieldnames(s);
57 |         assert(length(s_fieldnames) == 1);
58 |         conf.image_means = s.(s_fieldnames{1});
59 |     end
60 | end


--------------------------------------------------------------------------------
/functions/fast_rcnn/fast_rcnn_conv_feat_detect.m:
--------------------------------------------------------------------------------
  1 | function [pred_boxes, scores] = fast_rcnn_conv_feat_detect(conf, caffe_net, im, conv_feat_blob, boxes, max_rois_num_in_gpu)
  2 | % [pred_boxes, scores] = fast_rcnn_conv_feat_detect(conf, caffe_net, im, conv_feat_blob, boxes, max_rois_num_in_gpu)
  3 | % --------------------------------------------------------
  4 | % Fast R-CNN
  5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
  6 | % Copyright (c) 2015, Shaoqing Ren
  7 | % Licensed under The MIT License [see LICENSE for details]
  8 | % --------------------------------------------------------
  9 | 
 10 |     [rois_blob, ~] = get_blobs(conf, im, boxes);
 11 |     
 12 |     % permute data into caffe c++ memory, thus [num, channels, height, width]
 13 |     rois_blob = rois_blob - 1; % to c's index (start from 0)
 14 |     rois_blob = permute(rois_blob, [3, 4, 2, 1]);
 15 |     rois_blob = single(rois_blob);
 16 |     
 17 |     % set conv feature map as 'data'
 18 |     caffe_net.blobs('data').copy_data_from(conv_feat_blob);
 19 |     
 20 |     total_rois = size(rois_blob, 4);
 21 |     total_scores = cell(ceil(total_rois / max_rois_num_in_gpu), 1);
 22 |     total_box_deltas = cell(ceil(total_rois / max_rois_num_in_gpu), 1);
 23 |     for i = 1:ceil(total_rois / max_rois_num_in_gpu)
 24 |         
 25 |         sub_ind_start = 1 + (i-1) * max_rois_num_in_gpu;
 26 |         sub_ind_end = min(total_rois, i * max_rois_num_in_gpu);
 27 |         sub_rois_blob = rois_blob(:, :, :, sub_ind_start:sub_ind_end);
 28 |         
 29 |         % only set rois blob here
 30 |         net_inputs = {[], sub_rois_blob};
 31 | 
 32 |         % Reshape net's input blobs
 33 |         caffe_net.reshape_as_input(net_inputs);
 34 |         output_blobs = caffe_net.forward(net_inputs);
 35 | 
 36 |         if conf.test_binary
 37 |             % simulate binary logistic regression
 38 |             scores = caffe_net.blobs('cls_score').get_data();
 39 |             scores = squeeze(scores)';
 40 |             % Return scores as fg - bg
 41 |             scores = bsxfun(@minus, scores, scores(:, 1));
 42 |         else
 43 |             % use softmax estimated probabilities
 44 |             scores = output_blobs{2};
 45 |             scores = squeeze(scores)';
 46 |         end
 47 | 
 48 |         % Apply bounding-box regression deltas
 49 |         box_deltas = output_blobs{1};
 50 |         box_deltas = squeeze(box_deltas)';
 51 |         
 52 |         total_scores{i} = scores;
 53 |         total_box_deltas{i} = box_deltas;
 54 |     end 
 55 |     
 56 |     scores = cell2mat(total_scores);
 57 |     box_deltas = cell2mat(total_box_deltas);
 58 |     
 59 |     pred_boxes = fast_rcnn_bbox_transform_inv(boxes, box_deltas);
 60 |     pred_boxes = clip_boxes(pred_boxes, size(im, 2), size(im, 1));
 61 |     
 62 |     % remove scores and boxes for back-ground
 63 |     pred_boxes = pred_boxes(:, 5:end);
 64 |     scores = scores(:, 2:end);
 65 | end
 66 | 
 67 | function [rois_blob, im_scale_factors] = get_blobs(conf, im, rois)
 68 |     im_scale_factors = get_image_blob_scales(conf, im);
 69 |     rois_blob = get_rois_blob(conf, rois, im_scale_factors);
 70 | end
 71 | 
 72 | function im_scales = get_image_blob_scales(conf, im)
 73 |     im_scales = arrayfun(@(x) prep_im_for_blob_size(size(im), x, conf.test_max_size), conf.test_scales, 'UniformOutput', false);
 74 |     im_scales = cell2mat(im_scales); 
 75 | end
 76 | 
 77 | function [rois_blob] = get_rois_blob(conf, im_rois, im_scale_factors)
 78 |     [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, im_scale_factors);
 79 |     rois_blob = single([levels, feat_rois]);
 80 | end
 81 | 
 82 | function [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, scales)
 83 |     im_rois = single(im_rois);
 84 |     
 85 |     if length(scales) > 1
 86 |         widths = im_rois(:, 3) - im_rois(:, 1) + 1;
 87 |         heights = im_rois(:, 4) - im_rois(:, 2) + 1;
 88 |         
 89 |         areas = widths .* heights;
 90 |         scaled_areas = bsxfun(@times, areas(:), scales(:)'.^2);
 91 |         levels = max(abs(scaled_areas - 224.^2), 2); 
 92 |     else
 93 |         levels = ones(size(im_rois, 1), 1);
 94 |     end
 95 |     
 96 |     feat_rois = round(bsxfun(@times, im_rois-1, scales(levels))) + 1;
 97 | end
 98 | 
 99 | function boxes = clip_boxes(boxes, im_width, im_height)
100 |     % x1 >= 1 & <= im_width
101 |     boxes(:, 1:4:end) = max(min(boxes(:, 1:4:end), im_width), 1);
102 |     % y1 >= 1 & <= im_height
103 |     boxes(:, 2:4:end) = max(min(boxes(:, 2:4:end), im_height), 1);
104 |     % x2 >= 1 & <= im_width
105 |     boxes(:, 3:4:end) = max(min(boxes(:, 3:4:end), im_width), 1);
106 |     % y2 >= 1 & <= im_height
107 |     boxes(:, 4:4:end) = max(min(boxes(:, 4:4:end), im_height), 1);
108 | end
109 |     


--------------------------------------------------------------------------------
/functions/fast_rcnn/fast_rcnn_generate_sliding_windows.m:
--------------------------------------------------------------------------------
 1 | function roidb = fast_rcnn_generate_sliding_windows(conf, imdb, roidb, roipool_in_size)
 2 | % [pred_boxes, scores] = fast_rcnn_conv_feat_detect(conf, im, conv_feat, boxes, max_rois_num_in_gpu, net_idx)
 3 | % --------------------------------------------------------
 4 | % Fast R-CNN
 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 |     regions.images = imdb.image_ids;
11 |     
12 |     im_sizes = imdb.sizes;
13 |     regions.boxes = cellfun(@(x) generate_sliding_windows_one_image(conf, x, roipool_in_size), num2cell(im_sizes, 2), 'UniformOutput', false);
14 | 
15 |     roidb = roidb_from_proposal(imdb, roidb, regions);
16 | end
17 | 
18 | function boxes = generate_sliding_windows_one_image(conf, im_size, roipool_in_size)
19 |     im_scale = prep_im_for_blob_size(im_size, conf.scales, conf.max_size);
20 |     im_size = round(im_size * im_scale);
21 | 
22 |     x1 = 1:conf.feat_stride:im_size(2);
23 |     y1 = 1:conf.feat_stride:im_size(1);
24 |     [x1, y1] = meshgrid(x1, y1);
25 |     x1 = x1(:);
26 |     y1 = y1(:);
27 |     x2 = x1 + roipool_in_size * conf.feat_stride - 1;
28 |     y2 = y1 + roipool_in_size * conf.feat_stride - 1;
29 |     
30 |     boxes = [x1, y1, x2, y2];
31 |     boxes = filter_boxes(im_size, boxes);
32 |     
33 |     boxes = bsxfun(@times, boxes-1, 1/im_scale) + 1;
34 | end
35 | 
36 | function boxes = filter_boxes(im_size, boxes)    
37 |     valid_ind = boxes(:, 1) >= 1 & boxes(:, 1) <= im_size(2) & ...
38 |                 boxes(:, 2) >= 1 & boxes(:, 2) <= im_size(1) & ...
39 |                 boxes(:, 3) >= 1 & boxes(:, 3) <= im_size(2) & ...
40 |                 boxes(:, 4) >= 1 & boxes(:, 4) <= im_size(1);
41 | 
42 |     boxes = boxes(valid_ind, :);
43 | end


--------------------------------------------------------------------------------
/functions/fast_rcnn/fast_rcnn_get_minibatch.m:
--------------------------------------------------------------------------------
  1 | function [im_blob, rois_blob, labels_blob, bbox_targets_blob, bbox_loss_blob] = fast_rcnn_get_minibatch(conf, image_roidb)
  2 | % [im_blob, rois_blob, labels_blob, bbox_targets_blob, bbox_loss_blob] ...
  3 | %    = fast_rcnn_get_minibatch(conf, image_roidb)
  4 | % --------------------------------------------------------
  5 | % Fast R-CNN
  6 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
  7 | % Copyright (c) 2015, Shaoqing Ren
  8 | % Licensed under The MIT License [see LICENSE for details]
  9 | % --------------------------------------------------------
 10 | 
 11 |     num_images = length(image_roidb);
 12 |     % Infer number of classes from the number of columns in gt_overlaps
 13 |     num_classes = size(image_roidb(1).overlap, 2);
 14 |     % Sample random scales to use for each image in this batch
 15 |     random_scale_inds = randi(length(conf.scales), num_images, 1);
 16 |     
 17 |     assert(mod(conf.batch_size, num_images) == 0, ...
 18 |         sprintf('num_images %d must divide BATCH_SIZE %d', num_images, conf.batch_size));
 19 |     
 20 |     rois_per_image = conf.batch_size / num_images;
 21 |     fg_rois_per_image = round(rois_per_image * conf.fg_fraction);
 22 |     
 23 |     % Get the input image blob
 24 |     [im_blob, im_scales] = get_image_blob(conf, image_roidb, random_scale_inds);
 25 |     
 26 |     % build the region of interest and label blobs
 27 |     rois_blob = zeros(0, 5, 'single');
 28 |     labels_blob = zeros(0, 1, 'single');
 29 |     bbox_targets_blob = zeros(0, 4 * (num_classes+1), 'single');
 30 |     bbox_loss_blob = zeros(size(bbox_targets_blob), 'single');
 31 |     
 32 |     for i = 1:num_images
 33 |         [labels, ~, im_rois, bbox_targets, bbox_loss] = ...
 34 |             sample_rois(conf, image_roidb(i), fg_rois_per_image, rois_per_image);
 35 |         
 36 |         % Add to ROIs blob
 37 |         feat_rois = fast_rcnn_map_im_rois_to_feat_rois(conf, im_rois, im_scales(i));
 38 |         batch_ind = i * ones(size(feat_rois, 1), 1);
 39 |         rois_blob_this_image = [batch_ind, feat_rois];
 40 |         rois_blob = [rois_blob; rois_blob_this_image];
 41 |         
 42 |         % Add to labels, bbox targets, and bbox loss blobs
 43 |         labels_blob = [labels_blob; labels];
 44 |         bbox_targets_blob = [bbox_targets_blob; bbox_targets];
 45 |         bbox_loss_blob = [bbox_loss_blob; bbox_loss];
 46 |     end
 47 |     
 48 |     % permute data into caffe c++ memory, thus [num, channels, height, width]
 49 |     im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg
 50 |     im_blob = single(permute(im_blob, [2, 1, 3, 4]));
 51 |     rois_blob = rois_blob - 1; % to c's index (start from 0)
 52 |     rois_blob = single(permute(rois_blob, [3, 4, 2, 1]));
 53 |     labels_blob = single(permute(labels_blob, [3, 4, 2, 1]));
 54 |     bbox_targets_blob = single(permute(bbox_targets_blob, [3, 4, 2, 1])); 
 55 |     bbox_loss_blob = single(permute(bbox_loss_blob, [3, 4, 2, 1]));
 56 |     
 57 |     assert(~isempty(im_blob));
 58 |     assert(~isempty(rois_blob));
 59 |     assert(~isempty(labels_blob));
 60 |     assert(~isempty(bbox_targets_blob));
 61 |     assert(~isempty(bbox_loss_blob));
 62 | end
 63 | 
 64 | %% Build an input blob from the images in the roidb at the specified scales.
 65 | function [im_blob, im_scales] = get_image_blob(conf, images, random_scale_inds)
 66 |     
 67 |     num_images = length(images);
 68 |     processed_ims = cell(num_images, 1);
 69 |     im_scales = nan(num_images, 1);
 70 |     for i = 1:num_images
 71 |         im = imread(images(i).image_path);
 72 |         target_size = conf.scales(random_scale_inds(i));
 73 |         
 74 |         [im, im_scale] = prep_im_for_blob(im, conf.image_means, target_size, conf.max_size);
 75 |         
 76 |         im_scales(i) = im_scale;
 77 |         processed_ims{i} = im; 
 78 |     end
 79 |     
 80 |     im_blob = im_list_to_blob(processed_ims);
 81 | end
 82 | 
 83 | %% Generate a random sample of ROIs comprising foreground and background examples.
 84 | function [labels, overlaps, rois, bbox_targets, bbox_loss_weights] = ...
 85 |     sample_rois(conf, image_roidb, fg_rois_per_image, rois_per_image)
 86 | 
 87 |     [overlaps, labels] = max(image_roidb(1).overlap, [], 2);
 88 | %     labels = image_roidb(1).max_classes;
 89 | %     overlaps = image_roidb(1).max_overlaps;
 90 |     rois = image_roidb(1).boxes;
 91 |     
 92 |     % Select foreground ROIs as those with >= FG_THRESH overlap
 93 |     fg_inds = find(overlaps >= conf.fg_thresh);
 94 |     % Guard against the case when an image has fewer than fg_rois_per_image
 95 |     % foreground ROIs
 96 |     fg_rois_per_this_image = min(fg_rois_per_image, length(fg_inds));
 97 |     % Sample foreground regions without replacement
 98 |     if ~isempty(fg_inds)
 99 |        fg_inds = fg_inds(randperm(length(fg_inds), fg_rois_per_this_image));
100 |     end
101 |     
102 |     % Select background ROIs as those within [BG_THRESH_LO, BG_THRESH_HI)
103 |     bg_inds = find(overlaps < conf.bg_thresh_hi & overlaps >= conf.bg_thresh_lo);
104 |     % Compute number of background ROIs to take from this image (guarding
105 |     % against there being fewer than desired)
106 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image;
107 |     bg_rois_per_this_image = min(bg_rois_per_this_image, length(bg_inds));
108 |     % Sample foreground regions without replacement
109 |     if ~isempty(bg_inds)
110 |        bg_inds = bg_inds(randperm(length(bg_inds), bg_rois_per_this_image));
111 |     end
112 |     % The indices that we're selecting (both fg and bg)
113 |     keep_inds = [fg_inds; bg_inds];
114 |     % Select sampled values from various arrays
115 |     labels = labels(keep_inds);
116 |     % Clamp labels for the background ROIs to 0
117 |     labels((fg_rois_per_this_image+1):end) = 0;
118 |     overlaps = overlaps(keep_inds);
119 |     rois = rois(keep_inds, :);
120 |     
121 |     assert(all(labels == image_roidb.bbox_targets(keep_inds, 1)));
122 |     
123 |     % Infer number of classes from the number of columns in gt_overlaps
124 |     num_classes = size(image_roidb(1).overlap, 2);
125 |     
126 |     [bbox_targets, bbox_loss_weights] = get_bbox_regression_labels(conf, ...
127 |         image_roidb.bbox_targets(keep_inds, :), num_classes);
128 |     
129 | end
130 | 
131 | function [bbox_targets, bbox_loss_weights] = get_bbox_regression_labels(conf, bbox_target_data, num_classes)
132 | %% Bounding-box regression targets are stored in a compact form in the roidb.
133 |  % This function expands those targets into the 4-of-4*(num_classes+1) representation used
134 |  % by the network (i.e. only one class has non-zero targets).
135 |  % The loss weights are similarly expanded.
136 | % Return (N, (num_classes+1) * 4, 1, 1) blob of regression targets
137 | % Return (N, (num_classes+1 * 4, 1, 1) blob of loss weights
138 |     clss = bbox_target_data(:, 1);
139 |     bbox_targets = zeros(length(clss), 4 * (num_classes+1), 'single');
140 |     bbox_loss_weights = zeros(size(bbox_targets), 'single');
141 |     inds = find(clss > 0);
142 |     for i = 1:length(inds)
143 |        ind = inds(i);
144 |        cls = clss(ind);
145 |        targets_inds = (1+cls*4):((cls+1)*4);
146 |        bbox_targets(ind, targets_inds) = bbox_target_data(ind, 2:end);
147 |        bbox_loss_weights(ind, targets_inds) = 1;  
148 |     end
149 | end
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/functions/fast_rcnn/fast_rcnn_im_detect.m:
--------------------------------------------------------------------------------
  1 | function [pred_boxes, scores] = fast_rcnn_im_detect(conf, caffe_net, im, boxes, max_rois_num_in_gpu)
  2 | % [pred_boxes, scores] = fast_rcnn_im_detect(conf, caffe_net, im, boxes, max_rois_num_in_gpu)
  3 | % --------------------------------------------------------
  4 | % Fast R-CNN
  5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
  6 | % Copyright (c) 2015, Shaoqing Ren
  7 | % Licensed under The MIT License [see LICENSE for details]
  8 | % --------------------------------------------------------
  9 | 
 10 |     [im_blob, rois_blob, ~] = get_blobs(conf, im, boxes);
 11 |     
 12 |     % When mapping from image ROIs to feature map ROIs, there's some aliasing
 13 |     % (some distinct image ROIs get mapped to the same feature ROI).
 14 |     % Here, we identify duplicate feature ROIs, so we only compute features
 15 |     % on the unique subset.
 16 |     [~, index, inv_index] = unique(rois_blob, 'rows');
 17 |     rois_blob = rois_blob(index, :);
 18 |     boxes = boxes(index, :);
 19 |     
 20 |     % permute data into caffe c++ memory, thus [num, channels, height, width]
 21 |     im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg
 22 |     im_blob = permute(im_blob, [2, 1, 3, 4]);
 23 |     im_blob = single(im_blob);
 24 |     rois_blob = rois_blob - 1; % to c's index (start from 0)
 25 |     rois_blob = permute(rois_blob, [3, 4, 2, 1]);
 26 |     rois_blob = single(rois_blob);
 27 |     
 28 |     total_rois = size(rois_blob, 4);
 29 |     total_scores = cell(ceil(total_rois / max_rois_num_in_gpu), 1);
 30 |     total_box_deltas = cell(ceil(total_rois / max_rois_num_in_gpu), 1);
 31 |     for i = 1:ceil(total_rois / max_rois_num_in_gpu)
 32 |         
 33 |         sub_ind_start = 1 + (i-1) * max_rois_num_in_gpu;
 34 |         sub_ind_end = min(total_rois, i * max_rois_num_in_gpu);
 35 |         sub_rois_blob = rois_blob(:, :, :, sub_ind_start:sub_ind_end);
 36 |         
 37 |         net_inputs = {im_blob, sub_rois_blob};
 38 | 
 39 |         % Reshape net's input blobs
 40 |         caffe_net.reshape_as_input(net_inputs);
 41 |         output_blobs = caffe_net.forward(net_inputs);
 42 | 
 43 |         if conf.test_binary
 44 |             % simulate binary logistic regression
 45 |             scores = caffe_net.blobs('cls_score').get_data();
 46 |             scores = squeeze(scores)';
 47 |             % Return scores as fg - bg
 48 |             scores = bsxfun(@minus, scores, scores(:, 1));
 49 |         else
 50 |             % use softmax estimated probabilities
 51 |             scores = output_blobs{2};
 52 |             scores = squeeze(scores)';
 53 |         end
 54 | 
 55 |         % Apply bounding-box regression deltas
 56 |         box_deltas = output_blobs{1};
 57 |         box_deltas = squeeze(box_deltas)';
 58 |         
 59 |         total_scores{i} = scores;
 60 |         total_box_deltas{i} = box_deltas;
 61 |     end 
 62 |     
 63 |     scores = cell2mat(total_scores);
 64 |     box_deltas = cell2mat(total_box_deltas);
 65 |     
 66 |     pred_boxes = fast_rcnn_bbox_transform_inv(boxes, box_deltas);
 67 |     pred_boxes = clip_boxes(pred_boxes, size(im, 2), size(im, 1));
 68 | 
 69 |     % Map scores and predictions back to the original set of boxes
 70 |     scores = scores(inv_index, :);
 71 |     pred_boxes = pred_boxes(inv_index, :);
 72 |     
 73 |     % remove scores and boxes for back-ground
 74 |     pred_boxes = pred_boxes(:, 5:end);
 75 |     scores = scores(:, 2:end);
 76 | end
 77 | 
 78 | function [data_blob, rois_blob, im_scale_factors] = get_blobs(conf, im, rois)
 79 |     [data_blob, im_scale_factors] = get_image_blob(conf, im);
 80 |     rois_blob = get_rois_blob(conf, rois, im_scale_factors);
 81 | end
 82 | 
 83 | function [blob, im_scales] = get_image_blob(conf, im)
 84 |     [ims, im_scales] = arrayfun(@(x) prep_im_for_blob(im, conf.image_means, x, conf.test_max_size), conf.test_scales, 'UniformOutput', false);
 85 |     im_scales = cell2mat(im_scales);
 86 |     blob = im_list_to_blob(ims);    
 87 | end
 88 | 
 89 | function [rois_blob] = get_rois_blob(conf, im_rois, im_scale_factors)
 90 |     [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, im_scale_factors);
 91 |     rois_blob = single([levels, feat_rois]);
 92 | end
 93 | 
 94 | function [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, scales)
 95 |     im_rois = single(im_rois);
 96 |     
 97 |     if length(scales) > 1
 98 |         widths = im_rois(:, 3) - im_rois(:, 1) + 1;
 99 |         heights = im_rois(:, 4) - im_rois(:, 2) + 1;
100 |         
101 |         areas = widths .* heights;
102 |         scaled_areas = bsxfun(@times, areas(:), scales(:)'.^2);
103 |         [~, levels] = min(abs(scaled_areas - 224.^2), [], 2); 
104 |     else
105 |         levels = ones(size(im_rois, 1), 1);
106 |     end
107 |     
108 |     feat_rois = round(bsxfun(@times, im_rois-1, scales(levels))) + 1;
109 | end
110 | 
111 | function boxes = clip_boxes(boxes, im_width, im_height)
112 |     % x1 >= 1 & <= im_width
113 |     boxes(:, 1:4:end) = max(min(boxes(:, 1:4:end), im_width), 1);
114 |     % y1 >= 1 & <= im_height
115 |     boxes(:, 2:4:end) = max(min(boxes(:, 2:4:end), im_height), 1);
116 |     % x2 >= 1 & <= im_width
117 |     boxes(:, 3:4:end) = max(min(boxes(:, 3:4:end), im_width), 1);
118 |     % y2 >= 1 & <= im_height
119 |     boxes(:, 4:4:end) = max(min(boxes(:, 4:4:end), im_height), 1);
120 | end
121 |     


--------------------------------------------------------------------------------
/functions/fast_rcnn/fast_rcnn_map_im_rois_to_feat_rois.m:
--------------------------------------------------------------------------------
 1 | function [feat_rois] = fast_rcnn_map_im_rois_to_feat_rois(conf, im_rois, im_scale_factor)
 2 | % [feat_rois] = fast_rcnn_map_im_rois_to_feat_rois(conf, im_rois, im_scale_factor)
 3 | % --------------------------------------------------------
 4 | % Fast R-CNN
 5 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 | %% Map a ROI in image-pixel coordinates to a ROI in feature coordinates.
11 | % in matlab's index (start from 1)
12 | 
13 |     feat_rois = round((im_rois-1) * im_scale_factor) + 1;
14 |     
15 |     %feat_rois = round((im_rois-1) * im_scale_factor / single(conf.feat_stride)) + 1;
16 | 
17 | end


--------------------------------------------------------------------------------
/functions/fast_rcnn/fast_rcnn_prepare_image_roidb.m:
--------------------------------------------------------------------------------
  1 | function [image_roidb, bbox_means, bbox_stds] = fast_rcnn_prepare_image_roidb(conf, imdbs, roidbs, bbox_means, bbox_stds)
  2 | % [image_roidb, bbox_means, bbox_stds] = fast_rcnn_prepare_image_roidb(conf, imdbs, roidbs, cache_img, bbox_means, bbox_stds)
  3 | %   Gather useful information from imdb and roidb
  4 | %   pre-calculate mean (bbox_means) and std (bbox_stds) of the regression
  5 | %   term for normalization
  6 | % --------------------------------------------------------
  7 | % Fast R-CNN
  8 | % Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
  9 | % Copyright (c) 2015, Shaoqing Ren
 10 | % Licensed under The MIT License [see LICENSE for details]
 11 | % -------------------------------------------------------- 
 12 |     
 13 |     if ~exist('bbox_means', 'var')
 14 |         bbox_means = [];
 15 |         bbox_stds = [];
 16 |     end
 17 |     
 18 |     if ~iscell(imdbs)
 19 |         imdbs = {imdbs};
 20 |         roidbs = {roidbs};
 21 |     end
 22 | 
 23 |     imdbs = imdbs(:);
 24 |     roidbs = roidbs(:);
 25 |     
 26 |     image_roidb = ...
 27 |         cellfun(@(x, y) ... // @(imdbs, roidbs)
 28 |                 arrayfun(@(z) ... //@([1:length(x.image_ids)])
 29 |                         struct('image_path', x.image_at(z), 'image_id', x.image_ids{z}, 'im_size', x.sizes(z, :), 'imdb_name', x.name, ...
 30 |                         'overlap', y.rois(z).overlap, 'boxes', y.rois(z).boxes, 'class', y.rois(z).class, 'image', [], 'bbox_targets', []), ...
 31 |                 [1:length(x.image_ids)]', 'UniformOutput', true),...
 32 |         imdbs, roidbs, 'UniformOutput', false);
 33 |     
 34 |     image_roidb = cat(1, image_roidb{:});
 35 |     
 36 |     % enhance roidb to contain bounding-box regression targets
 37 |     [image_roidb, bbox_means, bbox_stds] = append_bbox_regression_targets(conf, image_roidb, bbox_means, bbox_stds);
 38 | end
 39 | 
 40 | function [image_roidb, means, stds] = append_bbox_regression_targets(conf, image_roidb, means, stds)
 41 |     % means and stds -- (k+1) * 4, include background class
 42 | 
 43 |     num_images = length(image_roidb);
 44 |     % Infer number of classes from the number of columns in gt_overlaps
 45 |     num_classes = size(image_roidb(1).overlap, 2);
 46 |     valid_imgs = true(num_images, 1);
 47 |     for i = 1:num_images
 48 |        rois = image_roidb(i).boxes; 
 49 |        [image_roidb(i).bbox_targets, valid_imgs(i)] = ...
 50 |            compute_targets(conf, rois, image_roidb(i).overlap);
 51 |     end
 52 |     if ~all(valid_imgs)
 53 |         image_roidb = image_roidb(valid_imgs);
 54 |         num_images = length(image_roidb);
 55 |         fprintf('Warning: fast_rcnn_prepare_image_roidb: filter out %d images, which contains zero valid samples\n', sum(~valid_imgs));
 56 |     end
 57 |         
 58 |     if ~(exist('means', 'var') && ~isempty(means) && exist('stds', 'var') && ~isempty(stds))
 59 |         % Compute values needed for means and stds
 60 |         % var(x) = E(x^2) - E(x)^2
 61 |         class_counts = zeros(num_classes, 1) + eps;
 62 |         sums = zeros(num_classes, 4);
 63 |         squared_sums = zeros(num_classes, 4);
 64 |         for i = 1:num_images
 65 |            targets = image_roidb(i).bbox_targets;
 66 |            for cls = 1:num_classes
 67 |               cls_inds = find(targets(:, 1) == cls);
 68 |               if ~isempty(cls_inds)
 69 |                  class_counts(cls) = class_counts(cls) + length(cls_inds); 
 70 |                  sums(cls, :) = sums(cls, :) + sum(targets(cls_inds, 2:end), 1);
 71 |                  squared_sums(cls, :) = squared_sums(cls, :) + sum(targets(cls_inds, 2:end).^2, 1);
 72 |               end
 73 |            end
 74 |         end
 75 | 
 76 |         means = bsxfun(@rdivide, sums, class_counts);
 77 |         stds = (bsxfun(@minus, bsxfun(@rdivide, squared_sums, class_counts), means.^2)).^0.5;
 78 |         
 79 |         % add background class
 80 |         means = [0, 0, 0, 0; means]; 
 81 |         stds = [0, 0, 0, 0; stds];
 82 |     end
 83 |     
 84 |     % Normalize targets
 85 |     for i = 1:num_images
 86 |         targets = image_roidb(i).bbox_targets;
 87 |         for cls = 1:num_classes
 88 |             cls_inds = find(targets(:, 1) == cls);
 89 |             if ~isempty(cls_inds)
 90 |                 image_roidb(i).bbox_targets(cls_inds, 2:end) = ...
 91 |                     bsxfun(@minus, image_roidb(i).bbox_targets(cls_inds, 2:end), means(cls+1, :));
 92 |                 image_roidb(i).bbox_targets(cls_inds, 2:end) = ...
 93 |                     bsxfun(@rdivide, image_roidb(i).bbox_targets(cls_inds, 2:end), stds(cls+1, :));
 94 |             end
 95 |         end
 96 |     end
 97 | end
 98 | 
 99 | 
100 | function [bbox_targets, is_valid] = compute_targets(conf, rois, overlap)
101 | 
102 |     overlap = full(overlap);
103 | 
104 |     [max_overlaps, max_labels] = max(overlap, [], 2);
105 | 
106 |     % ensure ROIs are floats
107 |     rois = single(rois);
108 |     
109 |     bbox_targets = zeros(size(rois, 1), 5, 'single');
110 |     
111 |     % Indices of ground-truth ROIs
112 |     gt_inds = find(max_overlaps == 1);
113 |     
114 |     if ~isempty(gt_inds)
115 |         % Indices of examples for which we try to make predictions
116 |         ex_inds = find(max_overlaps >= conf.bbox_thresh);
117 | 
118 |         % Get IoU overlap between each ex ROI and gt ROI
119 |         ex_gt_overlaps = boxoverlap(rois(ex_inds, :), rois(gt_inds, :));
120 | 
121 |         assert(all(abs(max(ex_gt_overlaps, [], 2) - max_overlaps(ex_inds)) < 10^-4));
122 | 
123 |         % Find which gt ROI each ex ROI has max overlap with:
124 |         % this will be the ex ROI's gt target
125 |         [~, gt_assignment] = max(ex_gt_overlaps, [], 2);
126 |         gt_rois = rois(gt_inds(gt_assignment), :);
127 |         ex_rois = rois(ex_inds, :);
128 | 
129 |         [regression_label] = fast_rcnn_bbox_transform(ex_rois, gt_rois);
130 | 
131 |         bbox_targets(ex_inds, :) = [max_labels(ex_inds), regression_label];
132 |     end
133 |     
134 |     % Select foreground ROIs as those with >= fg_thresh overlap
135 |     is_fg = max_overlaps >= conf.fg_thresh;
136 |     % Select background ROIs as those within [bg_thresh_lo, bg_thresh_hi)
137 |     is_bg = max_overlaps < conf.bg_thresh_hi & max_overlaps >= conf.bg_thresh_lo;
138 |     
139 |     % check if there is any fg or bg sample. If no, filter out this image
140 |     is_valid = true;
141 |     if ~any(is_fg | is_bg)
142 |         is_valid = false;
143 |     end
144 | end


--------------------------------------------------------------------------------
/functions/nms/nms.m:
--------------------------------------------------------------------------------
 1 | function pick = nms(boxes, overlap, use_gpu)
 2 | % top = nms(boxes, overlap)
 3 | % Non-maximum suppression. (FAST VERSION)
 4 | % Greedily select high-scoring detections and skip detections
 5 | % that are significantly covered by a previously selected
 6 | % detection.
 7 | %
 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m),
 9 | % but an inner loop has been eliminated to significantly speed it
10 | % up in the case of a large number of boxes
11 | 
12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz
13 | % All rights reserved.
14 | % 
15 | % This file is part of the Exemplar-SVM library and is made
16 | % available under the terms of the MIT license (see COPYING file).
17 | % Project homepage: https://github.com/quantombone/exemplarsvm
18 | 
19 | 
20 | if isempty(boxes)
21 |   pick = [];
22 |   return;
23 | end
24 | 
25 | if ~exist('use_gpu', 'var')
26 |     use_gpu = false;
27 | end
28 | 
29 | if use_gpu
30 |     s = boxes(:, end);
31 |     if ~issorted(s(end:-1:1))
32 |         [~, I] = sort(s, 'descend');
33 |         boxes = boxes(I, :);
34 |         pick = nms_gpu_mex(single(boxes)', double(overlap));
35 |         pick = I(pick);
36 |     else
37 |         pick = nms_gpu_mex(single(boxes)', double(overlap));
38 |     end
39 |     return;
40 | end
41 |     
42 | if size(boxes, 1) < 1000000
43 |     pick = nms_mex(double(boxes), double(overlap));
44 |     return;
45 | end
46 | 
47 | x1 = boxes(:,1);
48 | y1 = boxes(:,2);
49 | x2 = boxes(:,3);
50 | y2 = boxes(:,4);
51 | s = boxes(:,end);
52 | 
53 | area = (x2-x1+1) .* (y2-y1+1);
54 | [vals, I] = sort(s);
55 | 
56 | pick = s*0;
57 | counter = 1;
58 | while ~isempty(I)
59 |   last = length(I);
60 |   i = I(last);  
61 |   pick(counter) = i;
62 |   counter = counter + 1;
63 |   
64 |   xx1 = max(x1(i), x1(I(1:last-1)));
65 |   yy1 = max(y1(i), y1(I(1:last-1)));
66 |   xx2 = min(x2(i), x2(I(1:last-1)));
67 |   yy2 = min(y2(i), y2(I(1:last-1)));
68 |   
69 |   w = max(0.0, xx2-xx1+1);
70 |   h = max(0.0, yy2-yy1+1);
71 |   
72 |   inter = w.*h;
73 |   o = inter ./ (area(i) + area(I(1:last-1)) - inter);
74 |   
75 |   I = I(find(o<=overlap));
76 | end
77 | 
78 | pick = pick(1:(counter-1));
79 | 


--------------------------------------------------------------------------------
/functions/nms/nms_gpu_mex.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | * Example of how to use the mxGPUArray API in a MEX file.  This example shows
  3 | * how to write a MEX function that takes a gpuArray input and returns a
  4 | * gpuArray output, e.g. B=mexFunction(A).
  5 | *
  6 | * Copyright 2012 The MathWorks, Inc.
  7 | */
  8 | 
  9 | #include "mex.h"
 10 | #include <vector>
 11 | #include <iostream>
 12 | 
 13 | #define DIVUP(m,n)		((m)/(n)+((m)%(n)>0))
 14 | int const threadsPerBlock = (sizeof(unsigned long long) * 8);
 15 | 
 16 | /*
 17 | * Device code
 18 | */
 19 | __device__ inline float devIoU(float const * const a, float const * const b)
 20 | {
 21 | 	float left = max(a[0], b[0]), right = min(a[2], b[2]);
 22 | 	float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 23 | 	float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 24 | 	float interS = width * height;
 25 | 	float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 26 | 	float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 27 | 	return interS / (Sa + Sb - interS);
 28 | }
 29 | 
 30 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thres, const float *dev_boxes, unsigned long long *dev_mask)
 31 | {
 32 | 	const int row_start = blockIdx.y, col_start = blockIdx.x;
 33 | 	const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock), col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 | 	//if (row_start > col_start) return;
 36 | 
 37 | 	__shared__ float block_boxes[threadsPerBlock * 5];
 38 | 	if (threadIdx.x < col_size)
 39 | 	{
 40 | 		block_boxes[threadIdx.x * 5 + 0] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 41 | 		block_boxes[threadIdx.x * 5 + 1] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 42 | 		block_boxes[threadIdx.x * 5 + 2] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 | 		block_boxes[threadIdx.x * 5 + 3] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 44 | 		block_boxes[threadIdx.x * 5 + 4] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 45 | 	}
 46 | 	__syncthreads();
 47 | 
 48 | 	if (threadIdx.x < row_size)
 49 | 	{
 50 | 		const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 51 | 		const float *cur_box = dev_boxes + cur_box_idx * 5;
 52 | 		int i = 0;
 53 | 		unsigned long long t = 0;
 54 | 		int start = 0;
 55 | 		if (row_start == col_start) start = threadIdx.x + 1;
 56 | 		for (i = start; i < col_size; i++)
 57 | 		{
 58 | 			if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thres)
 59 | 			{
 60 | 				t |= 1ULL << i;
 61 | 			}
 62 | 		}
 63 | 		const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 64 | 		dev_mask[cur_box_idx * col_blocks + col_start] = t;
 65 | 	}
 66 | }
 67 | 
 68 | /*
 69 | * Host code
 70 | */
 71 | void mexFunction(int nlhs, mxArray *plhs[],
 72 | 	int nrhs, const mxArray *prhs[])
 73 | {
 74 | 	
 75 | 	/* Declare all variables.*/
 76 | 	mxArray const *boxes, *ov_thres;
 77 | 	float *boxes_host = NULL;
 78 | 	float *boxes_dev = NULL;
 79 | 	unsigned long long *mask_dev = NULL;
 80 | 
 81 | 	/* Throw an error if the input is not a array. */
 82 | 	if (nrhs != 2) {
 83 | 		mexErrMsgTxt("nms_gpu_mex::need 2 inputs");
 84 | 	}
 85 | 
 86 | 	boxes = prhs[0];
 87 | 	if (mxGetClassID(boxes) != mxSINGLE_CLASS) {
 88 | 		mexErrMsgTxt("nms_gpu_mex::input boxes must be single");
 89 | 	}
 90 | 
 91 | 	ov_thres = prhs[1];
 92 | 	if (mxGetClassID(ov_thres) != mxDOUBLE_CLASS) {
 93 | 		mexErrMsgTxt("nms_gpu_mex::input boxes must be double");
 94 | 	}
 95 | 
 96 | 	float nms_overlap_thres = (float)mxGetScalar(ov_thres);
 97 | 
 98 | 	int boxes_dim = mxGetM(boxes);
 99 | 	int boxes_num = mxGetN(boxes);
100 | 	if (boxes_dim != 5)
101 | 	{
102 | 		mexErrMsgTxt("nms_gpu_mex::input boxes's row must be 5");
103 | 	}
104 | 
105 | 	boxes_host = (float *)(mxGetPr(boxes));
106 | 	const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
107 | 
108 | 	cudaMalloc(&boxes_dev, mxGetNumberOfElements(boxes) * sizeof(float));
109 | 	cudaMemcpy(boxes_dev, boxes_host, mxGetNumberOfElements(boxes) * sizeof(float), cudaMemcpyHostToDevice);
110 | 
111 | 	/* Create a GPUArray to hold the result and get its underlying pointer. */
112 | 	cudaMalloc(&mask_dev, boxes_num * col_blocks * sizeof(unsigned long long));
113 | 	
114 | 
115 | 	/*
116 | 	* Call the kernel using the CUDA runtime API. We are using a 1-d grid here,
117 | 	* and it would be possible for the number of elements to be too large for
118 | 	* the grid. For this example we are not guarding against this possibility.
119 | 	*/
120 | 
121 | 	dim3 blocks(DIVUP(boxes_num, threadsPerBlock), DIVUP(boxes_num, threadsPerBlock));
122 | 	dim3 threads(threadsPerBlock);
123 | 	nms_kernel << <blocks, threads >> >(boxes_num, nms_overlap_thres, boxes_dev, mask_dev);
124 | 
125 | 	std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
126 | 	cudaMemcpy(&mask_host[0], mask_dev, sizeof(unsigned long long) * boxes_num * col_blocks, cudaMemcpyDeviceToHost);
127 | 
128 | 	std::vector<unsigned long long> remv(col_blocks);
129 | 	memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
130 | 
131 | 	std::vector<int> keep;
132 | 	keep.reserve(boxes_num);
133 | 	for (int i = 0; i < boxes_num; i++)
134 | 	{
135 | 		int nblock = i / threadsPerBlock;
136 | 		int inblock = i % threadsPerBlock;
137 | 
138 | 		if (!(remv[nblock] & (1ULL << inblock)))
139 | 		{
140 | 			keep.push_back(i + 1);  // to matlab's index
141 | 
142 | 			unsigned long long *p = &mask_host[0] + i * col_blocks;
143 | 			for (int j = nblock; j < col_blocks; j++)
144 | 			{
145 | 				remv[j] |= p[j];
146 | 			}
147 | 		}
148 | 	}
149 | 
150 | 	/* Wrap the result up as a MATLAB cpuArray for return. */
151 | 	mwSize dims[4] = { (int)keep.size(), 1, 1, 1 };
152 | 	plhs[0] = mxCreateNumericArray(4, dims, mxINT32_CLASS, mxREAL);
153 | 
154 | 	int *output = (int *)(mxGetPr(plhs[0]));
155 | 	memcpy(output, &keep[0], (int)keep.size() * sizeof(int));
156 | 
157 | 
158 | 	cudaFree(boxes_dev);
159 | 	cudaFree(mask_dev);
160 | }
161 | 


--------------------------------------------------------------------------------
/functions/nms/nms_mex.cpp:
--------------------------------------------------------------------------------
  1 | #include "mex.h"
  2 | #ifdef _MSC_VER
  3 | #include <windows.h>
  4 | #include <tchar.h>
  5 | #endif
  6 | #include <vector>
  7 | #include <map>
  8 | using namespace std;
  9 | 
 10 | struct score {
 11 | 	double s;
 12 | 	int idx;
 13 | 	bool operator() (score i, score j) { return (i.idx < j.idx);}
 14 | } score;
 15 | 
 16 | template <typename T>
 17 | void nms(const mxArray *input_boxes, double overlap, vector<int> &vPick, int &nPick)
 18 | {
 19 | 	int nSample = (int)mxGetM(input_boxes);
 20 | 	int nDim_boxes = (int)mxGetN(input_boxes);
 21 | 
 22 |     T *pBoxes = (T*)mxGetData(input_boxes);
 23 | 
 24 | 	vector<double> vArea(nSample);
 25 | 	for (int i = 0; i < nSample; ++i)
 26 | 	{
 27 | 		vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 
 28 | 		* (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1);
 29 | 		if (vArea[i] < 0)
 30 | 			mexErrMsgTxt("Boxes area must >= 0");
 31 | 	}
 32 | 
 33 | 	std::multimap<T, int> scores;
 34 | 	for (int i = 0; i < nSample; ++i)
 35 | 		scores.insert(std::pair<T,int>(pBoxes[4*nSample + i], i));
 36 | 
 37 | 	nPick = 0;
 38 | 
 39 | 	do 
 40 | 	{
 41 | 		int last = scores.rbegin()->second;
 42 | 		vPick[nPick] = last;
 43 | 		nPick += 1;
 44 | 
 45 | 		for (typename std::multimap<T, int>::iterator it = scores.begin(); it != scores.end();)
 46 | 		{
 47 | 			int it_idx = it->second;
 48 | 			T xx1 = max(pBoxes[0*nSample + last], pBoxes[0*nSample + it_idx]);
 49 | 			T yy1 = max(pBoxes[1*nSample + last], pBoxes[1*nSample + it_idx]);
 50 | 			T xx2 = min(pBoxes[2*nSample + last], pBoxes[2*nSample + it_idx]);
 51 | 			T yy2 = min(pBoxes[3*nSample + last], pBoxes[3*nSample + it_idx]);
 52 | 
 53 | 			double w = max(T(0.0), xx2-xx1+1), h = max(T(0.0), yy2-yy1+1);
 54 | 
 55 | 			double ov = w*h / (vArea[last] + vArea[it_idx] - w*h);
 56 | 
 57 | 			if (ov > overlap)
 58 | 			{
 59 | 				it = scores.erase(it);
 60 | 			}
 61 | 			else
 62 | 			{
 63 | 				it++;
 64 | 			}
 65 | 		}
 66 | 
 67 | 	} while (scores.size() != 0);
 68 | }
 69 | 
 70 | 
 71 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 72 | {
 73 | 	if (nrhs != 2)
 74 | 		mexErrMsgTxt("Wrong number of inputs"); 
 75 | 	if (nlhs != 1)
 76 | 		mexErrMsgTxt("One output");
 77 | 
 78 | 	const mxArray *input_boxes = prhs[0];
 79 | 	if (mxGetClassID(input_boxes) != mxDOUBLE_CLASS && mxGetClassID(input_boxes) != mxSINGLE_CLASS)
 80 | 		mexErrMsgTxt("Input boxes must be Double or Single");
 81 | 
 82 | 	const mxArray *input_overlap = prhs[1];
 83 | 	if (mxGetClassID(input_overlap) != mxDOUBLE_CLASS )
 84 | 		mexErrMsgTxt("Input overlap must be Double");
 85 | 
 86 | 	double overlap = mxGetScalar(input_overlap);
 87 | 
 88 | 	int nSample = (int)mxGetM(input_boxes);
 89 | 	int nDim_boxes = (int)mxGetN(input_boxes);
 90 | 
 91 | 	if (nSample * nDim_boxes == 0)
 92 | 	{
 93 | 		plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL);
 94 | 		return;
 95 | 	}
 96 | 
 97 | 	if (nDim_boxes != 5)
 98 | 		mexErrMsgTxt("nms_mex boxes must has 5 columns");
 99 | 
100 | 	
101 | 	int nPick = 0;
102 | 	vector<int> vPick(nSample);
103 | 	if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS)
104 | 		nms<double>(input_boxes, overlap, vPick, nPick);
105 | 	else
106 | 		nms<float>(input_boxes, overlap, vPick, nPick);
107 | 
108 | 	plhs[0] = mxCreateNumericMatrix(nPick, 1, mxDOUBLE_CLASS, mxREAL);
109 | 	double *pRst = mxGetPr(plhs[0]);
110 | 	for (int i = 0; i < nPick; ++i)
111 | 		pRst[i] = vPick[i] + 1;
112 | }
113 | 


--------------------------------------------------------------------------------
/functions/nms/nms_multiclass.m:
--------------------------------------------------------------------------------
 1 | function picks = nms_multiclass(boxes, overlap)
 2 | % top = nms(boxes, overlap)
 3 | % Non-maximum suppression. (FAST VERSION)
 4 | % Greedily select high-scoring detections and skip detections
 5 | % that are significantly covered by a previously selected
 6 | % detection.
 7 | %
 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m),
 9 | % but an inner loop has been eliminated to significantly speed it
10 | % up in the case of a large number of boxes
11 | 
12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz
13 | % All rights reserved.
14 | % 
15 | % This file is part of the Exemplar-SVM library and is made
16 | % available under the terms of the MIT license (see COPYING file).
17 | % Project homepage: https://github.com/quantombone/exemplarsvm
18 | 
19 | 
20 | if isempty(boxes)
21 |   picks = {};
22 |   return;
23 | end
24 | 
25 | if size(boxes, 1) < 10000
26 |     picks = nms_multiclass_mex(double(boxes), double(overlap));
27 |     return;
28 | end
29 | 
30 | x1 = boxes(:,1);
31 | y1 = boxes(:,2);
32 | x2 = boxes(:,3);
33 | y2 = boxes(:,4);
34 | 
35 | area = (x2-x1+1) .* (y2-y1+1);
36 | 
37 | picks = cell(size(boxes, 2)-4, 1);
38 | for iS = 5:size(boxes, 2)
39 |     s = boxes(:,iS);
40 |     [~, I] = sort(s);
41 | 
42 |     pick = s*0;
43 |     counter = 1;
44 |     while ~isempty(I)
45 |       last = length(I);
46 |       i = I(last);  
47 |       pick(counter) = i;
48 |       counter = counter + 1;
49 | 
50 |       xx1 = max(x1(i), x1(I(1:last-1)));
51 |       yy1 = max(y1(i), y1(I(1:last-1)));
52 |       xx2 = min(x2(i), x2(I(1:last-1)));
53 |       yy2 = min(y2(i), y2(I(1:last-1)));
54 | 
55 |       w = max(0.0, xx2-xx1+1);
56 |       h = max(0.0, yy2-yy1+1);
57 | 
58 |       inter = w.*h;
59 |       o = inter ./ (area(i) + area(I(1:last-1)) - inter);
60 | 
61 |       I = I(o<=overlap);
62 |     end
63 | 
64 |     pick = pick(1:(counter-1));
65 |     picks{iS-4} = pick;
66 | end
67 | 


--------------------------------------------------------------------------------
/functions/nms/nms_multiclass_mex.cpp:
--------------------------------------------------------------------------------
  1 | #include "mex.h"
  2 | #ifdef WIN32
  3 | #include <windows.h>
  4 | #include <tchar.h>
  5 | #else
  6 | #include <algorithm>
  7 | #endif
  8 | #include <vector>
  9 | #include <map>
 10 | #include <omp.h>
 11 | using namespace std;
 12 | 
 13 | struct score {
 14 | 	double s;
 15 | 	int idx;
 16 | 	bool operator() (score i, score j) { return (i.idx < j.idx);}
 17 | } score;
 18 | 
 19 | template <typename T>
 20 | void nms(const mxArray *input_boxes, int iScoreIdx, double overlap, const vector<double> &vArea, vector<int> &vPick, int &nPick)
 21 | {
 22 | 	int nSample = (int)mxGetM(input_boxes);
 23 | 	int nDim_boxes = (int)mxGetN(input_boxes);
 24 | 
 25 |     T *pBoxes = (T*)mxGetData(input_boxes);
 26 | 
 27 | 	//vector<double> vArea(nSample);
 28 | 	//for (int i = 0; i < nSample; ++i)
 29 | 	//{
 30 | 	//	vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 
 31 | 	//	* (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1);
 32 | 	//	if (vArea[i] < 0)
 33 | 	//		mexErrMsgTxt("Boxes area must >= 0");
 34 | 	//}
 35 | 
 36 | 	std::multimap<T, int> scores;
 37 | 	for (int i = 0; i < nSample; ++i)
 38 | 		scores.insert(std::pair<T,int>(pBoxes[iScoreIdx*nSample + i], i));
 39 | 
 40 | 	nPick = 0;
 41 | 
 42 | 	do 
 43 | 	{
 44 | 		int last = scores.rbegin()->second;
 45 | 		vPick[nPick] = last;
 46 | 		nPick += 1;
 47 | 
 48 | 		for (typename std::multimap<T, int>::iterator it = scores.begin(); it != scores.end();)
 49 | 		{
 50 | 			int it_idx = it->second;
 51 | 			T xx1 = std::max(pBoxes[0*nSample + last], pBoxes[0*nSample + it_idx]);
 52 | 			T yy1 = std::max(pBoxes[1*nSample + last], pBoxes[1*nSample + it_idx]);
 53 | 			T xx2 = std::min(pBoxes[2*nSample + last], pBoxes[2*nSample + it_idx]);
 54 | 			T yy2 = std::min(pBoxes[3*nSample + last], pBoxes[3*nSample + it_idx]);
 55 | 
 56 | 			double w = max(0.0, xx2-xx1+1), h = max(0.0, yy2-yy1+1);
 57 | 
 58 | 			double ov = w*h / (vArea[last] + vArea[it_idx] - w*h);
 59 | 
 60 | 			if (ov > overlap)
 61 | 			{
 62 | 				#ifdef WIN32
 63 | 				it = scores.erase(it);
 64 |                 #else
 65 |                 typename std::multimap<T, int>::iterator save=it; ++save;
 66 | 				scores.erase(it);
 67 |                 it=save;
 68 |                 #endif
 69 | 			}
 70 | 			else
 71 | 			{
 72 | 				it++;
 73 | 			}
 74 | 		}
 75 | 
 76 | 	} while (scores.size() != 0);
 77 | }
 78 | 
 79 | 
 80 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[])
 81 | {
 82 | 	if (nrhs != 2)
 83 | 		mexErrMsgTxt("Wrong number of inputs"); 
 84 | 	if (nlhs != 1)
 85 | 		mexErrMsgTxt("One output");
 86 | 
 87 | 	const mxArray *input_boxes = prhs[0];
 88 | 	if (mxGetClassID(input_boxes) != mxDOUBLE_CLASS && mxGetClassID(input_boxes) != mxSINGLE_CLASS)
 89 | 		mexErrMsgTxt("Input boxes must be Double or Single");
 90 | 
 91 | 	const mxArray *input_overlap = prhs[1];
 92 | 	if (mxGetClassID(input_overlap) != mxDOUBLE_CLASS )
 93 | 		mexErrMsgTxt("Input overlap must be Double");
 94 | 
 95 | 	double overlap = mxGetScalar(input_overlap);
 96 | 
 97 | 	int nSample = (int)mxGetM(input_boxes);
 98 | 	int nDim_boxes = (int)mxGetN(input_boxes);
 99 | 
100 | 	if (nSample * nDim_boxes == 0)
101 | 	{
102 | 		plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL);
103 | 		return;
104 | 	}
105 | 
106 | 	if (nDim_boxes < 5)
107 | 		mexErrMsgTxt("nms_mex boxes must has least 5 columns");
108 | 
109 | 	vector<double> vArea(nSample);
110 | 	if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS)
111 | 	{
112 | 		double *pBoxes = (double*)mxGetData(input_boxes);
113 | 		for (int i = 0; i < nSample; ++i)
114 | 		{
115 | 			vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 
116 | 				* (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1);
117 | 			if (vArea[i] < 0)
118 | 				mexErrMsgTxt("Boxes area must >= 0");
119 | 		}
120 | 	}
121 | 	else
122 | 	{
123 | 		if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS)
124 | 		{
125 | 			float *pBoxes = (float*)mxGetData(input_boxes);
126 | 			for (int i = 0; i < nSample; ++i)
127 | 			{
128 | 				vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 
129 | 					* (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1);
130 | 				if (vArea[i] < 0)
131 | 					mexErrMsgTxt("Boxes area must >= 0");
132 | 			}
133 | 		}
134 | 	}
135 | 
136 | 	vector<int> nPick(nDim_boxes - 4, 0);
137 | 	vector<vector<int> > vPicks(nDim_boxes - 4);
138 | 	plhs[0] = mxCreateCellMatrix_730(nDim_boxes - 4, 1);
139 | 
140 | #pragma omp parallel for ordered schedule(dynamic)
141 | 	for (int i = 0; i < vPicks.size(); ++i)
142 | 	{
143 | 		vPicks[i].resize(nSample);
144 | 	
145 | 		if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS)
146 | 			nms<double>(input_boxes, i+4, overlap, vArea, vPicks[i], nPick[i]);
147 | 		else
148 | 			nms<float>(input_boxes, i+4, overlap, vArea, vPicks[i], nPick[i]);
149 | 
150 | 		mxArray *mxPick = mxCreateNumericMatrix(nPick[i], 1, mxDOUBLE_CLASS, mxREAL);
151 | 		double *pRst = mxGetPr(mxPick);
152 | 		for (int j = 0; j < nPick[i]; ++j)
153 | 			pRst[j] = vPicks[i][j] + 1;
154 | 
155 | 		mxSetCell(plhs[0], i, mxPick);
156 | 	}
157 | 	
158 | }


--------------------------------------------------------------------------------
/functions/nms/nvmex.m:
--------------------------------------------------------------------------------
 1 | function nvmex(cuFileName, outDir)
 2 | %NVMEX Compiles and links a CUDA file for MATLAB usage
 3 | % NVMEX(FILENAME) will create a MEX-File (also with the name FILENAME) by
 4 | % invoking the CUDA compiler, nvcc, and then linking with the MEX
 5 | % function in MATLAB.
 6 | 
 7 | if ispc % Windows
 8 |  Host_Compiler_Location = '-ccbin "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\x86_amd64"';
 9 |  CUDA_INC_Location = ['"' getenv('CUDA_PATH')  '\include"'];
10 |     CUDA_SAMPLES_Location =['"' getenv('NVCUDASAMPLES6_5_ROOT')  '\common\inc"'];
11 |     PIC_Option = '';
12 |     if ( strcmp(computer('arch'),'win32') ==1)
13 |         machine_str = ' --machine 32 ';
14 |         CUDA_LIB_Location = ['"' getenv('CUDA_PATH')  '\lib\Win32"'];
15 |     elseif  ( strcmp(computer('arch'),'win64') ==1)
16 |         machine_str = ' --machine 64 ';
17 |         CUDA_LIB_Location = ['"' getenv('CUDA_PATH')  '\lib\x64"'];
18 |     end
19 |     NVCC = 'nvcc';
20 | else % Mac and Linux (assuming gcc is on the path)
21 |     CUDA_INC_Location = '/usr/local/cuda/include';
22 |     CUDA_SAMPLES_Location = '/usr/local/cuda/samples/common/inc';
23 |     Host_Compiler_Location = ' ';
24 |     PIC_Option = ' --compiler-options -fPIC ';
25 |     machine_str = [];
26 |     CUDA_LIB_Location = '/usr/local/cuda/lib64';
27 |     NVCC = '/usr/local/cuda/bin/nvcc';
28 | end
29 | % !!! End of things to modify !!!
30 | [~, filename] = fileparts(cuFileName);
31 | nvccCommandLine = [ ...
32 | NVCC ' --compile ' Host_Compiler_Location ' ' ...
33 | '-o '  filename '.o ' ...
34 | machine_str PIC_Option ...
35 | ' -I' '"' matlabroot '/extern/include "' ...
36 | ' -I' CUDA_INC_Location ' -I' CUDA_SAMPLES_Location ...
37 | ' "' cuFileName '" ' 
38 |  ];
39 | mexCommandLine = ['mex ' '-outdir ' outDir ' ' filename '.o'  ' -L' CUDA_LIB_Location  ' -lcudart'];
40 | disp(nvccCommandLine);
41 | warning off;
42 | status = system(nvccCommandLine);
43 | warning on;
44 | if status < 0
45 |  error 'Error invoking nvcc';
46 | end
47 | disp(mexCommandLine);
48 | eval(mexCommandLine);
49 | end
50 | 


--------------------------------------------------------------------------------
/functions/rpn/proposal_calc_output_size.m:
--------------------------------------------------------------------------------
 1 | function [output_width_map, output_height_map] = proposal_calc_output_size(conf, test_net_def_file)
 2 | % [output_width_map, output_height_map] = proposal_calc_output_size(conf, test_net_def_file)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 | %     caffe.init_log(fullfile(pwd, 'caffe_log'));
10 |     caffe_net = caffe.Net(test_net_def_file, 'test');
11 |     
12 |      % set gpu/cpu
13 |     if conf.use_gpu
14 |         caffe.set_mode_gpu();
15 |     else
16 |         caffe.set_mode_cpu();
17 |     end
18 |     
19 |     input = 100:conf.max_size;
20 |     output_w = nan(size(input));
21 |     output_h = nan(size(input));
22 |     for i = 1:length(input)
23 |         s = input(i);
24 |         im_blob = single(zeros(s, s, 3, 1));
25 |         net_inputs = {im_blob};
26 | 
27 |         % Reshape net's input blobs
28 |         caffe_net.reshape_as_input(net_inputs);
29 |         caffe_net.forward(net_inputs);
30 |         
31 |         cls_score = caffe_net.blobs('proposal_cls_score').get_data();
32 |         output_w(i) = size(cls_score, 1);
33 |         output_h(i) = size(cls_score, 2);
34 |     end
35 |     
36 |     output_width_map = containers.Map(input, output_w);
37 |     output_height_map = containers.Map(input, output_h);
38 |     
39 |     caffe.reset_all(); 
40 | end


--------------------------------------------------------------------------------
/functions/rpn/proposal_config.m:
--------------------------------------------------------------------------------
 1 | function conf = proposal_config(varargin)
 2 | % conf = proposal_config(varargin)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 |     ip = inputParser;
10 |     
11 |     %% training
12 |     ip.addParamValue('use_gpu',         gpuDeviceCount > 0, ...            
13 |                                                         @islogical);
14 |                                     
15 |     % whether drop the anchors that has edges outside of the image boundary
16 |     ip.addParamValue('drop_boxes_runoff_image', ...
17 |                                         true,           @islogical);
18 |     
19 |     % Image scales -- the short edge of input image                                                                                                
20 |     ip.addParamValue('scales',          600,            @ismatrix);
21 |     % Max pixel size of a scaled input image
22 |     ip.addParamValue('max_size',        1000,           @isscalar);
23 |     % Images per batch, only supports ims_per_batch = 1 currently
24 |     ip.addParamValue('ims_per_batch',   1,              @isscalar);
25 |     % Minibatch size
26 |     ip.addParamValue('batch_size',      256,            @isscalar);
27 |     % Fraction of minibatch that is foreground labeled (class > 0)
28 |     ip.addParamValue('fg_fraction',     0.5,           @isscalar);
29 |     % weight of background samples, when weight of foreground samples is
30 |     % 1.0
31 |     ip.addParamValue('bg_weight',       1.0,            @isscalar);
32 |     % Overlap threshold for a ROI to be considered foreground (if >= fg_thresh)
33 |     ip.addParamValue('fg_thresh',       0.7,            @isscalar);
34 |     % Overlap threshold for a ROI to be considered background (class = 0 if
35 |     % overlap in [bg_thresh_lo, bg_thresh_hi))
36 |     ip.addParamValue('bg_thresh_hi',    0.3,            @isscalar);
37 |     ip.addParamValue('bg_thresh_lo',    0,              @isscalar);
38 |     % mean image, in RGB order
39 |     ip.addParamValue('image_means',     128,            @ismatrix);
40 |     % Use horizontally-flipped images during training?
41 |     ip.addParamValue('use_flipped',     true,           @islogical);
42 |     % Stride in input image pixels at ROI pooling level (network specific)
43 |     % 16 is true for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
44 |     ip.addParamValue('feat_stride',     16,             @isscalar);
45 |     % train proposal target only to labled ground-truths or also include
46 |     % other proposal results (selective search, etc.)
47 |     ip.addParamValue('target_only_gt',  true,           @islogical);
48 | 
49 |     % random seed                    
50 |     ip.addParamValue('rng_seed',        6,              @isscalar);
51 | 
52 |     
53 |     %% testing
54 |     ip.addParamValue('test_scales',     600,            @isscalar);
55 |     ip.addParamValue('test_max_size',   1000,           @isscalar);
56 |     ip.addParamValue('test_nms',        0.3,            @isscalar);
57 |     ip.addParamValue('test_binary',     false,          @islogical);
58 |     ip.addParamValue('test_min_box_size',16,            @isscalar);
59 |     ip.addParamValue('test_drop_boxes_runoff_image', ...
60 |                                         false,          @islogical);
61 |     
62 |     ip.parse(varargin{:});
63 |     conf = ip.Results;
64 |     
65 |     assert(conf.ims_per_batch == 1, 'currently rpn only supports ims_per_batch == 1');
66 |     
67 |     % if image_means is a file, load it
68 |     if ischar(conf.image_means)
69 |         s = load(conf.image_means);
70 |         s_fieldnames = fieldnames(s);
71 |         assert(length(s_fieldnames) == 1);
72 |         conf.image_means = s.(s_fieldnames{1});
73 |     end
74 | end


--------------------------------------------------------------------------------
/functions/rpn/proposal_generate_anchors.m:
--------------------------------------------------------------------------------
 1 | function anchors = proposal_generate_anchors(cache_name, varargin)
 2 | % anchors = proposal_generate_anchors(cache_name, varargin)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 | %% inputs
10 |     ip = inputParser;
11 |     ip.addRequired('cache_name',                        @isstr);
12 | 
13 |     % the size of the base anchor 
14 |     ip.addParamValue('base_size',       16,             @isscalar);
15 |     % ratio list of anchors
16 |     ip.addParamValue('ratios',          [0.5, 1, 2],    @ismatrix);
17 |     % scale list of anchors
18 |     ip.addParamValue('scales',          2.^[3:5],       @ismatrix);    
19 |     ip.addParamValue('ignore_cache',    false,          @islogical);
20 |     ip.parse(cache_name, varargin{:});
21 |     opts = ip.Results;
22 | 
23 | %%
24 |     if ~opts.ignore_cache
25 |         anchor_cache_dir            = fullfile(pwd, 'output', 'rpn_cachedir', cache_name); 
26 |                                       mkdir_if_missing(anchor_cache_dir);
27 |         anchor_cache_file           = fullfile(anchor_cache_dir, 'anchors');
28 |     end
29 |     try
30 |         ld                      = load(anchor_cache_file);
31 |         anchors                 = ld.anchors;
32 |     catch
33 |         base_anchor             = [1, 1, opts.base_size, opts.base_size];
34 |         ratio_anchors           = ratio_jitter(base_anchor, opts.ratios);
35 |         anchors                 = cellfun(@(x) scale_jitter(x, opts.scales), num2cell(ratio_anchors, 2), 'UniformOutput', false);
36 |         anchors                 = cat(1, anchors{:});
37 |         if ~opts.ignore_cache
38 |             save(anchor_cache_file, 'anchors');
39 |         end
40 |     end
41 |     
42 | end
43 | 
44 | function anchors = ratio_jitter(anchor, ratios)
45 |     ratios = ratios(:);
46 |     
47 |     w = anchor(3) - anchor(1) + 1;
48 |     h = anchor(4) - anchor(2) + 1;
49 |     x_ctr = anchor(1) + (w - 1) / 2;
50 |     y_ctr = anchor(2) + (h - 1) / 2;
51 |     size = w * h;
52 |     
53 |     size_ratios = size ./ ratios;
54 |     ws = round(sqrt(size_ratios));
55 |     hs = round(ws .* ratios);
56 |     
57 |     anchors = [x_ctr - (ws - 1) / 2, y_ctr - (hs - 1) / 2, x_ctr + (ws - 1) / 2, y_ctr + (hs - 1) / 2];
58 | end
59 | 
60 | function anchors = scale_jitter(anchor, scales)
61 |     scales = scales(:);
62 | 
63 |     w = anchor(3) - anchor(1) + 1;
64 |     h = anchor(4) - anchor(2) + 1;
65 |     x_ctr = anchor(1) + (w - 1) / 2;
66 |     y_ctr = anchor(2) + (h - 1) / 2;
67 | 
68 |     ws = w * scales;
69 |     hs = h * scales;
70 |     
71 |     anchors = [x_ctr - (ws - 1) / 2, y_ctr - (hs - 1) / 2, x_ctr + (ws - 1) / 2, y_ctr + (hs - 1) / 2];
72 | end
73 | 
74 | 


--------------------------------------------------------------------------------
/functions/rpn/proposal_generate_minibatch.m:
--------------------------------------------------------------------------------
  1 | function [input_blobs, random_scale_inds] = proposal_generate_minibatch(conf, image_roidb)
  2 | % [input_blobs, random_scale_inds] = proposal_generate_minibatch(conf, image_roidb)
  3 | % --------------------------------------------------------
  4 | % Faster R-CNN
  5 | % Copyright (c) 2015, Shaoqing Ren
  6 | % Licensed under The MIT License [see LICENSE for details]
  7 | % --------------------------------------------------------
  8 | 
  9 |     num_images = length(image_roidb);
 10 |     assert(num_images == 1, 'proposal_generate_minibatch_fcn only support num_images == 1');
 11 | 
 12 |     % Sample random scales to use for each image in this batch
 13 |     random_scale_inds = randi(length(conf.scales), num_images, 1);
 14 | 
 15 |     assert(mod(conf.batch_size, num_images) == 0, ...
 16 |         sprintf('num_images %d must divide BATCH_SIZE %d', num_images, conf.batch_size));
 17 |     
 18 |     rois_per_image = conf.batch_size / num_images;
 19 |     fg_rois_per_image = round(rois_per_image * conf.fg_fraction);
 20 |     
 21 |     % Get the input image blob
 22 |     [im_blob, im_scales] = get_image_blob(conf, image_roidb, random_scale_inds);
 23 |     
 24 |     for i = 1:num_images
 25 |         [labels, label_weights, bbox_targets, bbox_loss] = ...
 26 |             sample_rois(conf, image_roidb(i), fg_rois_per_image, rois_per_image, im_scales(i), random_scale_inds(i));
 27 |         
 28 |         % get fcn output size
 29 |         img_size = round(image_roidb(i).im_size * im_scales(i));
 30 |         output_size = cell2mat([conf.output_height_map.values({img_size(1)}), conf.output_width_map.values({img_size(2)})]);
 31 |         
 32 |         assert(img_size(1) == size(im_blob, 1) && img_size(2) == size(im_blob, 2));
 33 |         
 34 |         labels_blob = reshape(labels, size(conf.anchors, 1), output_size(1), output_size(2));
 35 |         label_weights_blob = reshape(label_weights, size(conf.anchors, 1), output_size(1), output_size(2));
 36 |         bbox_targets_blob = reshape(bbox_targets', size(conf.anchors, 1)*4, output_size(1), output_size(2));
 37 |         bbox_loss_blob = reshape(bbox_loss', size(conf.anchors, 1)*4, output_size(1), output_size(2));
 38 |         
 39 |         % permute from [channel, height, width], where channel is the
 40 |         % fastest dimension to [width, height, channel]
 41 |         labels_blob = permute(labels_blob, [3, 2, 1]);
 42 |         label_weights_blob = permute(label_weights_blob, [3, 2, 1]);
 43 |         bbox_targets_blob = permute(bbox_targets_blob, [3, 2, 1]);
 44 |         bbox_loss_blob = permute(bbox_loss_blob, [3, 2, 1]);
 45 |     end
 46 |     
 47 |     % permute data into caffe c++ memory, thus [num, channels, height, width]
 48 |     im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg
 49 |     im_blob = single(permute(im_blob, [2, 1, 3, 4]));
 50 |     labels_blob = single(labels_blob);
 51 |     labels_blob(labels_blob > 0) = 1; %to binary lable (fg and bg)
 52 |     label_weights_blob = single(label_weights_blob);
 53 |     bbox_targets_blob = single(bbox_targets_blob); 
 54 |     bbox_loss_blob = single(bbox_loss_blob);
 55 |     
 56 |     assert(~isempty(im_blob));
 57 |     assert(~isempty(labels_blob));
 58 |     assert(~isempty(label_weights_blob));
 59 |     assert(~isempty(bbox_targets_blob));
 60 |     assert(~isempty(bbox_loss_blob));
 61 |     
 62 |     input_blobs = {im_blob, labels_blob, label_weights_blob, bbox_targets_blob, bbox_loss_blob};
 63 | end
 64 | 
 65 | 
 66 | %% Build an input blob from the images in the roidb at the specified scales.
 67 | function [im_blob, im_scales] = get_image_blob(conf, images, random_scale_inds)
 68 |     
 69 |     num_images = length(images);
 70 |     processed_ims = cell(num_images, 1);
 71 |     im_scales = nan(num_images, 1);
 72 |     for i = 1:num_images
 73 |         im = imread(images(i).image_path);
 74 |         target_size = conf.scales(random_scale_inds(i));
 75 |         
 76 |         [im, im_scale] = prep_im_for_blob(im, conf.image_means, target_size, conf.max_size);
 77 |         
 78 |         im_scales(i) = im_scale;
 79 |         processed_ims{i} = im; 
 80 |     end
 81 |     
 82 |     im_blob = im_list_to_blob(processed_ims);
 83 | end
 84 | 
 85 | %% Generate a random sample of ROIs comprising foreground and background examples.
 86 | function [labels, label_weights, bbox_targets, bbox_loss_weights] = ...
 87 |     sample_rois(conf, image_roidb, fg_rois_per_image, rois_per_image, im_scale, im_scale_ind)
 88 | 
 89 |     bbox_targets = image_roidb.bbox_targets{im_scale_ind};
 90 |     ex_asign_labels = bbox_targets(:, 1);
 91 |     
 92 |     % Select foreground ROIs as those with >= FG_THRESH overlap
 93 |     fg_inds = find(bbox_targets(:, 1) > 0);
 94 |     
 95 |     % Select background ROIs as those within [BG_THRESH_LO, BG_THRESH_HI)
 96 |     bg_inds = find(bbox_targets(:, 1) < 0);
 97 |     
 98 |     % select foreground
 99 |     fg_num = min(fg_rois_per_image, length(fg_inds));
100 |     fg_inds = fg_inds(randperm(length(fg_inds), fg_num));
101 |     
102 |     bg_num = min(rois_per_image - fg_num, length(bg_inds));
103 |     bg_inds = bg_inds(randperm(length(bg_inds), bg_num));
104 | 
105 |     labels = zeros(size(bbox_targets, 1), 1);
106 |     % set foreground labels
107 |     labels(fg_inds) = ex_asign_labels(fg_inds);
108 |     assert(all(ex_asign_labels(fg_inds) > 0));
109 |     
110 |     label_weights = zeros(size(bbox_targets, 1), 1);
111 |     % set foreground labels weights
112 |     label_weights(fg_inds) = 1;
113 |     % set background labels weights
114 |     label_weights(bg_inds) = conf.bg_weight;
115 |     
116 |     bbox_targets = single(full(bbox_targets(:, 2:end)));
117 |     
118 |     bbox_loss_weights = bbox_targets * 0;
119 |     bbox_loss_weights(fg_inds, :) = 1;
120 | end
121 | 
122 | function visual_anchors(image_roidb, anchors, im_scale)
123 |     imshow(imresize(imread(image_roidb.image_path), im_scale));
124 |     hold on;
125 |     cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'r'), num2cell(anchors, 2));
126 |     hold off;
127 | end
128 | 
129 | 


--------------------------------------------------------------------------------
/functions/rpn/proposal_im_detect.m:
--------------------------------------------------------------------------------
  1 | function [pred_boxes, scores, box_deltas_, anchors_, scores_] = proposal_im_detect(conf, caffe_net, im)
  2 | % [pred_boxes, scores, box_deltas_, anchors_, scores_] = proposal_im_detect(conf, im, net_idx)
  3 | % --------------------------------------------------------
  4 | % Faster R-CNN
  5 | % Copyright (c) 2015, Shaoqing Ren
  6 | % Licensed under The MIT License [see LICENSE for details]
  7 | % --------------------------------------------------------    
  8 | 
  9 |     im = single(im);
 10 |     [im_blob, im_scales] = get_image_blob(conf, im);
 11 |     im_size = size(im);
 12 |     scaled_im_size = round(im_size * im_scales);
 13 |     
 14 |     % permute data into caffe c++ memory, thus [num, channels, height, width]
 15 |     im_blob = im_blob(:, :, [3, 2, 1], :); % from rgb to brg
 16 |     im_blob = permute(im_blob, [2, 1, 3, 4]);
 17 |     im_blob = single(im_blob);
 18 | 
 19 |     net_inputs = {im_blob};
 20 | 
 21 |     % Reshape net's input blobs
 22 |     caffe_net.reshape_as_input(net_inputs);
 23 |     output_blobs = caffe_net.forward(net_inputs);
 24 | 
 25 |     % Apply bounding-box regression deltas
 26 |     box_deltas = output_blobs{1};
 27 |     featuremap_size = [size(box_deltas, 2), size(box_deltas, 1)];
 28 |     % permute from [width, height, channel] to [channel, height, width], where channel is the
 29 |         % fastest dimension
 30 |     box_deltas = permute(box_deltas, [3, 2, 1]);
 31 |     box_deltas = reshape(box_deltas, 4, [])';
 32 |     
 33 |     anchors = proposal_locate_anchors(conf, size(im), conf.test_scales, featuremap_size);
 34 |     pred_boxes = fast_rcnn_bbox_transform_inv(anchors, box_deltas);
 35 |       % scale back
 36 |     pred_boxes = bsxfun(@times, pred_boxes - 1, ...
 37 |         ([im_size(2), im_size(1), im_size(2), im_size(1)] - 1) ./ ([scaled_im_size(2), scaled_im_size(1), scaled_im_size(2), scaled_im_size(1)] - 1)) + 1;
 38 |     pred_boxes = clip_boxes(pred_boxes, size(im, 2), size(im, 1));
 39 |     
 40 |     assert(conf.test_binary == false);
 41 |     % use softmax estimated probabilities
 42 |     scores = output_blobs{2}(:, :, end);
 43 |     scores = reshape(scores, size(output_blobs{1}, 1), size(output_blobs{1}, 2), []);
 44 |     % permute from [width, height, channel] to [channel, height, width], where channel is the
 45 |         % fastest dimension
 46 |     scores = permute(scores, [3, 2, 1]);
 47 |     scores = scores(:);
 48 |     
 49 |     box_deltas_ = box_deltas;
 50 |     anchors_ = anchors;
 51 |     scores_ = scores;
 52 |     
 53 |     if conf.test_drop_boxes_runoff_image
 54 |         contained_in_image = is_contain_in_image(anchors, round(size(im) * im_scales));
 55 |         pred_boxes = pred_boxes(contained_in_image, :);
 56 |         scores = scores(contained_in_image, :);
 57 |     end
 58 |     
 59 |     % drop too small boxes
 60 |     [pred_boxes, scores] = filter_boxes(conf.test_min_box_size, pred_boxes, scores);
 61 |     
 62 |     % sort
 63 |     [scores, scores_ind] = sort(scores, 'descend');
 64 |     pred_boxes = pred_boxes(scores_ind, :);
 65 | end
 66 | 
 67 | function [data_blob, rois_blob, im_scale_factors] = get_blobs(conf, im, rois)
 68 |     [data_blob, im_scale_factors] = get_image_blob(conf, im);
 69 |     rois_blob = get_rois_blob(conf, rois, im_scale_factors);
 70 | end
 71 | 
 72 | function [blob, im_scales] = get_image_blob(conf, im)
 73 |     if length(conf.test_scales) == 1
 74 |         [blob, im_scales] = prep_im_for_blob(im, conf.image_means, conf.test_scales, conf.test_max_size);
 75 |     else
 76 |         [ims, im_scales] = arrayfun(@(x) prep_im_for_blob(im, conf.image_means, x, conf.test_max_size), conf.test_scales, 'UniformOutput', false);
 77 |         im_scales = cell2mat(im_scales);
 78 |         blob = im_list_to_blob(ims);    
 79 |     end
 80 | end
 81 | 
 82 | function [rois_blob] = get_rois_blob(conf, im_rois, im_scale_factors)
 83 |     [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, im_scale_factors);
 84 |     rois_blob = single([levels, feat_rois]);
 85 | end
 86 | 
 87 | function [feat_rois, levels] = map_im_rois_to_feat_rois(conf, im_rois, scales)
 88 |     im_rois = single(im_rois);
 89 |     
 90 |     if length(scales) > 1
 91 |         widths = im_rois(:, 3) - im_rois(:, 1) + 1;
 92 |         heights = im_rois(:, 4) - im_rois(:, 2) + 1;
 93 |         
 94 |         areas = widths .* heights;
 95 |         scaled_areas = bsxfun(@times, areas(:), scales(:)'.^2);
 96 |         levels = max(abs(scaled_areas - 224.^2), 2); 
 97 |     else
 98 |         levels = ones(size(im_rois, 1), 1);
 99 |     end
100 |     
101 |     feat_rois = round(bsxfun(@times, im_rois-1, scales(levels)) / conf.feat_stride) + 1;
102 | end
103 | 
104 | function [boxes, scores] = filter_boxes(min_box_size, boxes, scores)
105 |     widths = boxes(:, 3) - boxes(:, 1) + 1;
106 |     heights = boxes(:, 4) - boxes(:, 2) + 1;
107 |     
108 |     valid_ind = widths >= min_box_size & heights >= min_box_size;
109 |     boxes = boxes(valid_ind, :);
110 |     scores = scores(valid_ind, :);
111 | end
112 |     
113 | function boxes = clip_boxes(boxes, im_width, im_height)
114 |     % x1 >= 1 & <= im_width
115 |     boxes(:, 1:4:end) = max(min(boxes(:, 1:4:end), im_width), 1);
116 |     % y1 >= 1 & <= im_height
117 |     boxes(:, 2:4:end) = max(min(boxes(:, 2:4:end), im_height), 1);
118 |     % x2 >= 1 & <= im_width
119 |     boxes(:, 3:4:end) = max(min(boxes(:, 3:4:end), im_width), 1);
120 |     % y2 >= 1 & <= im_height
121 |     boxes(:, 4:4:end) = max(min(boxes(:, 4:4:end), im_height), 1);
122 | end
123 | 
124 | function contained = is_contain_in_image(boxes, im_size)
125 |     contained = boxes >= 1 & bsxfun(@le, boxes, [im_size(2), im_size(1), im_size(2), im_size(1)]);
126 |     
127 |     contained = all(contained, 2);
128 | end
129 |     
130 | 


--------------------------------------------------------------------------------
/functions/rpn/proposal_locate_anchors.m:
--------------------------------------------------------------------------------
 1 | function [anchors, im_scales] = proposal_locate_anchors(conf, im_size, target_scale, feature_map_size)
 2 | % [anchors, im_scales] = proposal_locate_anchors(conf, im_size, target_scale, feature_map_size)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------   
 8 | % generate anchors for each scale
 9 | 
10 |     % only for fcn
11 |     if ~exist('feature_map_size', 'var')
12 |         feature_map_size = [];
13 |     end
14 | 
15 |     func = @proposal_locate_anchors_single_scale;
16 | 
17 |     if exist('target_scale', 'var')
18 |         [anchors, im_scales] = func(im_size, conf, target_scale, feature_map_size);
19 |     else
20 |         [anchors, im_scales] = arrayfun(@(x) func(im_size, conf, x, feature_map_size), ...
21 |             conf.scales, 'UniformOutput', false);
22 |     end
23 | 
24 | end
25 | 
26 | function [anchors, im_scale] = proposal_locate_anchors_single_scale(im_size, conf, target_scale, feature_map_size)
27 |     if isempty(feature_map_size)
28 |         im_scale = prep_im_for_blob_size(im_size, target_scale, conf.max_size);
29 |         img_size = round(im_size * im_scale);
30 |         output_size = cell2mat([conf.output_height_map.values({img_size(1)}), conf.output_width_map.values({img_size(2)})]);
31 |     else
32 |         im_scale = prep_im_for_blob_size(im_size, target_scale, conf.max_size);
33 |         output_size = feature_map_size;
34 |     end
35 |     
36 |     shift_x = [0:(output_size(2)-1)] * conf.feat_stride;
37 |     shift_y = [0:(output_size(1)-1)] * conf.feat_stride;
38 |     [shift_x, shift_y] = meshgrid(shift_x, shift_y);
39 |     
40 |     % concat anchors as [channel, height, width], where channel is the fastest dimension.
41 |     anchors = reshape(bsxfun(@plus, permute(conf.anchors, [1, 3, 2]), ...
42 |         permute([shift_x(:), shift_y(:), shift_x(:), shift_y(:)], [3, 1, 2])), [], 4);
43 |     
44 | %   equals to  
45 | %     anchors = arrayfun(@(x, y) single(bsxfun(@plus, conf.anchors, [x, y, x, y])), shift_x, shift_y, 'UniformOutput', false);
46 | %     anchors = reshape(anchors, [], 1);
47 | %     anchors = cat(1, anchors{:});
48 | 
49 | end


--------------------------------------------------------------------------------
/functions/rpn/proposal_test.m:
--------------------------------------------------------------------------------
 1 | function aboxes = proposal_test(conf, imdb, varargin)
 2 | % aboxes = proposal_test(conf, imdb, varargin)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------   
 8 | 
 9 | %% inputs
10 |     ip = inputParser;
11 |     ip.addRequired('conf',                              @isstruct);
12 |     ip.addRequired('imdb',                              @isstruct);
13 |     ip.addParamValue('net_def_file',    fullfile(pwd, 'proposal_models', 'Zeiler_conv5', 'test.prototxt'), ...
14 |                                                         @isstr);
15 |     ip.addParamValue('net_file',        fullfile(pwd, 'proposal_models', 'Zeiler_conv5', 'Zeiler_conv5.caffemodel'), ...
16 |                                                         @isstr);
17 |     ip.addParamValue('cache_name',      'Zeiler_conv5', ...
18 |                                                         @isstr);
19 |                                                     
20 |     ip.addParamValue('suffix',          '',             @isstr);
21 |     
22 |     ip.parse(conf, imdb, varargin{:});
23 |     opts = ip.Results;
24 |     
25 | 
26 |     cache_dir = fullfile(pwd, 'output', 'rpn_cachedir', opts.cache_name, imdb.name);
27 |     try
28 |         % try to load cache
29 |         ld = load(fullfile(cache_dir, ['proposal_boxes_' imdb.name opts.suffix]));
30 |         aboxes = ld.aboxes;
31 |         clear ld;
32 |     catch    
33 |         %% init net
34 |         % init caffe net
35 |         mkdir_if_missing(cache_dir);
36 |         caffe_log_file_base = fullfile(cache_dir, 'caffe_log');
37 |         caffe.init_log(caffe_log_file_base);
38 |         caffe_net = caffe.Net(opts.net_def_file, 'test');
39 |         caffe_net.copy_from(opts.net_file);
40 | 
41 |         % init log
42 |         timestamp = datestr(datevec(now()), 'yyyymmdd_HHMMSS');
43 |         mkdir_if_missing(fullfile(cache_dir, 'log'));
44 |         log_file = fullfile(cache_dir, 'log', ['test_', timestamp, '.txt']);
45 |         diary(log_file);
46 | 
47 |         % set random seed
48 |         prev_rng = seed_rand(conf.rng_seed);
49 |         caffe.set_random_seed(conf.rng_seed);
50 | 
51 |         % set gpu/cpu
52 |         if conf.use_gpu
53 |             caffe.set_mode_gpu();
54 |         else
55 |             caffe.set_mode_cpu();
56 |         end             
57 | 
58 |         disp('opts:');
59 |         disp(opts);
60 |         disp('conf:');
61 |         disp(conf);
62 |     
63 | %% testing
64 |         num_images = length(imdb.image_ids);
65 |         % all detections are collected into:
66 |         %    all_boxes[image] = N x 5 array of detections in
67 |         %    (x1, y1, x2, y2, score)
68 |         aboxes = cell(num_images, 1);
69 |         abox_deltas = cell(num_images, 1);
70 |         aanchors = cell(num_images, 1);
71 |         ascores = cell(num_images, 1);
72 |         
73 |         count = 0;
74 |         for i = 1:num_images
75 |             count = count + 1;
76 |             fprintf('%s: test (%s) %d/%d ', procid(), imdb.name, count, num_images);
77 |             th = tic;
78 |             im = imread(imdb.image_at(i));
79 | 
80 |             [boxes, scores, abox_deltas{i}, aanchors{i}, ascores{i}] = proposal_im_detect(conf, caffe_net, im);
81 |             
82 |             fprintf(' time: %.3fs\n', toc(th));  
83 |             
84 |             aboxes{i} = [boxes, scores];
85 |         end    
86 |         save(fullfile(cache_dir, ['proposal_boxes_' imdb.name opts.suffix]), 'aboxes', '-v7.3');
87 |         
88 |         diary off;
89 |         caffe.reset_all(); 
90 |         rng(prev_rng);
91 |     end
92 | end
93 | 


--------------------------------------------------------------------------------
/functions/rpn/proposal_visual_debug.m:
--------------------------------------------------------------------------------
 1 | function proposal_visual_debug(conf, image_roidb, input_blobs, bbox_means, bbox_stds, classes, scale_inds)
 2 | % proposal_visual_debug(conf, image_roidb, input_blobs, bbox_means, bbox_stds, classes, scale_inds)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------   
 8 | 
 9 |     im_blob = input_blobs{1};
10 |     labels_blob = input_blobs{2};
11 |     label_weights_blob = input_blobs{3};
12 |     bbox_targets_blob = input_blobs{4};
13 |     bbox_loss_weights_blob = input_blobs{5};
14 |     
15 |     % concat anchors as [channel, height, width], where channel is the fastest dimension.
16 |     rois = proposal_locate_anchors(conf, image_roidb.im_size, conf.scales(scale_inds));
17 | 
18 |     bbox_targets = double(permute(bbox_targets_blob, [3, 2, 1]));
19 |     bbox_targets = reshape(bbox_targets, 4, [])';
20 |     bbox_targets = bsxfun(@times, bbox_targets, bbox_stds);
21 |     bbox_targets = bsxfun(@plus, bbox_targets, bbox_means);
22 |     
23 |     labels_blob = double(permute(labels_blob, [3, 2, 1]));
24 |     labels_blob = labels_blob(:);
25 |     label_weights_blob = double(permute(label_weights_blob, [3, 2, 1]));
26 |     label_weights_blob = label_weights_blob(:);
27 |     pred_boxes = fast_rcnn_bbox_transform_inv(rois, bbox_targets);    
28 | 
29 |     num_anchors = size(conf.anchors, 1);
30 |     for i = 1:size(im_blob, 4)
31 |        for j = 1:num_anchors
32 |            im = im_blob(:, :, [3, 2, 1], i);
33 |            im = permute(im, [2, 1, 3]);
34 |            imshow(mat2gray(im));
35 |            hold on;
36 |            
37 |            sub_rois = rois(j:num_anchors:end, :);
38 |            sub_labels = labels_blob(j:num_anchors:end);
39 |            sub_label_weights = label_weights_blob(j:num_anchors:end);
40 |            sub_pred_boxes = pred_boxes(j:num_anchors:end, :);
41 |            
42 |            % bg
43 |            bg_ind = find(sub_labels == 0 & sub_label_weights > 0);
44 |            if ~isempty(bg_ind)
45 |                cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'k'), ...
46 |                    num2cell(sub_rois(bg_ind, :), 2));
47 |                cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'b'), ...
48 |                    num2cell(sub_rois(bg_ind(round(length(bg_ind)/2)), :), 2));
49 |            end
50 |            
51 |            % fg
52 |            fg_ind = sub_labels > 0;
53 |            cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'r'), ...
54 |                num2cell(sub_rois(fg_ind, :), 2));
55 |            cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', 'g'), ...
56 |                num2cell(sub_pred_boxes(fg_ind, :), 2));
57 |            
58 | %            % others
59 | %            others_ind = find(sub_labels == 0 & sub_label_weights == 0);
60 | %            cellfun(@(x) rectangle('Position', RectLTRB2LTWH(x), 'EdgeColor', [0.5, 0.5, 0.5]), ...
61 | %                num2cell(sub_rois(others_ind, :), 2));
62 | 
63 |            hold off;   
64 |            pause;
65 |        end
66 |     end
67 | end


--------------------------------------------------------------------------------
/imdb/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/imdb/imdb_eval_voc.m:
--------------------------------------------------------------------------------
  1 | function res = imdb_eval_voc(cls, boxes, imdb, cache_name, suffix)
  2 | % res = imdb_eval_voc(cls, boxes, imdb, suffix)
  3 | %   Use the VOCdevkit to evaluate detections specified in boxes
  4 | %   for class cls against the ground-truth boxes in the image
  5 | %   database imdb. Results files are saved with an optional
  6 | %   suffix.
  7 | 
  8 | % AUTORIGHTS
  9 | % ---------------------------------------------------------
 10 | % Copyright (c) 2014, Ross Girshick
 11 | % 
 12 | % This file is part of the R-CNN code and is available 
 13 | % under the terms of the Simplified BSD License provided in 
 14 | % LICENSE. Please retain this notice and LICENSE if you use 
 15 | % this file (or any portion of it) in your project.
 16 | % ---------------------------------------------------------
 17 | 
 18 | % Add a random string ("salt") to the end of the results file name
 19 | % to prevent concurrent evaluations from clobbering each other
 20 | use_res_salt = true;
 21 | % Delete results files after computing APs
 22 | rm_res = true;
 23 | % comp4 because we use outside data (ILSVRC2012)
 24 | comp_id = 'comp4';
 25 | % draw each class curve
 26 | draw_curve = true;
 27 | 
 28 | % save results
 29 | if ~exist('suffix', 'var') || isempty(suffix) || strcmp(suffix, '')
 30 |   suffix = '';
 31 | else
 32 |   if suffix(1) ~= '_'
 33 |     suffix = ['_' suffix];
 34 |   end
 35 | end
 36 | 
 37 | conf.cache_dir = fullfile('output', 'fast_rcnn_cachedir', cache_name, imdb.name);
 38 | VOCopts  = imdb.details.VOCopts;
 39 | image_ids = imdb.image_ids;
 40 | test_set = VOCopts.testset;
 41 | year = VOCopts.dataset(4:end);
 42 | 
 43 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 
 44 | 
 45 | if use_res_salt
 46 |   prev_rng = rng;
 47 |   rng shuffle;
 48 |   salt = sprintf('%d', randi(100000));
 49 |   res_id = [comp_id '-' salt];
 50 |   rng(prev_rng);
 51 | else
 52 |   res_id = comp_id;
 53 | end
 54 | res_fn = sprintf(VOCopts.detrespath, res_id, cls);
 55 | 
 56 | % write out detections in PASCAL format and score
 57 | fid = fopen(res_fn, 'w');
 58 | for i = 1:length(image_ids);
 59 |   bbox = boxes{i};
 60 |   keep = nms(bbox, 0.3);
 61 |   bbox = bbox(keep,:);
 62 |   for j = 1:size(bbox,1)
 63 |     fprintf(fid, '%s %f %.3f %.3f %.3f %.3f\n', image_ids{i}, bbox(j,end), bbox(j,1:4));
 64 |   end
 65 | end
 66 | fclose(fid);
 67 | 
 68 | recall = [];
 69 | prec = [];
 70 | ap = 0;
 71 | ap_auc = 0;
 72 | 
 73 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
 74 | if do_eval
 75 |   % Bug in VOCevaldet requires that tic has been called first
 76 |   tic;
 77 |   [recall, prec, ap] = VOCevaldet(VOCopts, res_id, cls, draw_curve);
 78 |   ap_auc = xVOCap(recall, prec);
 79 | 
 80 |   % force plot limits
 81 |   ylim([0 1]);
 82 |   xlim([0 1]);
 83 | 
 84 |   print(gcf, '-djpeg', '-r0', ...
 85 |         fullfile(conf.cache_dir, [cls '_pr_' imdb.name suffix '.jpg']));
 86 | end
 87 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
 88 | 
 89 | save(fullfile(conf.cache_dir,  [cls '_pr_' imdb.name suffix]), ...
 90 |     'recall', 'prec', 'ap', 'ap_auc');
 91 | 
 92 | res.recall = recall;
 93 | res.prec = prec;
 94 | res.ap = ap;
 95 | res.ap_auc = ap_auc;
 96 | if rm_res
 97 |   delete(res_fn);
 98 | end
 99 | 
100 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 
101 | 


--------------------------------------------------------------------------------
/imdb/imdb_from_voc.m:
--------------------------------------------------------------------------------
 1 | function imdb = imdb_from_voc(root_dir, image_set, year, flip)
 2 | % imdb = imdb_from_voc(root_dir, image_set, year)
 3 | %   Builds an image database for the PASCAL VOC devkit located
 4 | %   at root_dir using the image_set and year.
 5 | %
 6 | %   Inspired by Andrea Vedaldi's MKL imdb and roidb code.
 7 | 
 8 | % AUTORIGHTS
 9 | % ---------------------------------------------------------
10 | % Copyright (c) 2014, Ross Girshick
11 | % 
12 | % This file is part of the R-CNN code and is available 
13 | % under the terms of the Simplified BSD License provided in 
14 | % LICENSE. Please retain this notice and LICENSE if you use 
15 | % this file (or any portion of it) in your project.
16 | % ---------------------------------------------------------
17 | 
18 | %imdb.name = 'voc_train_2007'
19 | %imdb.image_dir = '/work4/rbg/VOC2007/VOCdevkit/VOC2007/JPEGImages/'
20 | %imdb.extension = '.jpg'
21 | %imdb.image_ids = {'000001', ... }
22 | %imdb.sizes = [numimages x 2]
23 | %imdb.classes = {'aeroplane', ... }
24 | %imdb.num_classes
25 | %imdb.class_to_id
26 | %imdb.class_ids
27 | %imdb.eval_func = pointer to the function that evaluates detections
28 | %imdb.roidb_func = pointer to the function that returns regions of interest
29 | 
30 | if nargin < 4
31 |     flip = false;
32 | end
33 | 
34 | cache_file = ['./imdb/cache/imdb_voc_' year '_' image_set];
35 | if flip
36 |     cache_file = [cache_file, '_flip'];
37 | end
38 | try
39 |   load(cache_file);
40 | catch
41 |   VOCopts = get_voc_opts(root_dir);
42 |   VOCopts.testset = image_set;
43 | 
44 |   imdb.name = ['voc_' year '_' image_set];
45 |   imdb.image_dir = fileparts(VOCopts.imgpath);
46 |   imdb.image_ids = textread(sprintf(VOCopts.imgsetpath, image_set), '%s');
47 |   imdb.extension = 'jpg';
48 |   imdb.flip = flip;
49 |   if flip
50 |       image_at = @(i) sprintf('%s/%s.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension);
51 |       flip_image_at = @(i) sprintf('%s/%s_flip.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension);
52 |       for i = 1:length(imdb.image_ids)
53 |           if ~exist(flip_image_at(i), 'file')
54 |              im = imread(image_at(i));
55 |              imwrite(fliplr(im), flip_image_at(i));
56 |           end
57 |       end
58 |       img_num = length(imdb.image_ids)*2;
59 |       image_ids = imdb.image_ids;
60 |       imdb.image_ids(1:2:img_num) = image_ids;
61 |       imdb.image_ids(2:2:img_num) = cellfun(@(x) [x, '_flip'], image_ids, 'UniformOutput', false);
62 |       imdb.flip_from = zeros(img_num, 1);
63 |       imdb.flip_from(2:2:img_num) = 1:2:img_num;
64 |   end
65 |   imdb.classes = VOCopts.classes;
66 |   imdb.num_classes = length(imdb.classes);
67 |   imdb.class_to_id = ...
68 |     containers.Map(imdb.classes, 1:imdb.num_classes);
69 |   imdb.class_ids = 1:imdb.num_classes;
70 | 
71 |   % private VOC details
72 |   imdb.details.VOCopts = VOCopts;
73 | 
74 |   % VOC specific functions for evaluation and region of interest DB
75 |   imdb.eval_func = @imdb_eval_voc;
76 |   imdb.roidb_func = @roidb_from_voc;
77 |   imdb.image_at = @(i) ...
78 |       sprintf('%s/%s.%s', imdb.image_dir, imdb.image_ids{i}, imdb.extension);
79 | 
80 |   for i = 1:length(imdb.image_ids)
81 |     tic_toc_print('imdb (%s): %d/%d\n', imdb.name, i, length(imdb.image_ids));
82 |     info = imfinfo(sprintf(VOCopts.imgpath, imdb.image_ids{i}));
83 |     imdb.sizes(i, :) = [info.Height info.Width];
84 |   end
85 | 
86 |   fprintf('Saving imdb to cache...');
87 |   save(cache_file, 'imdb', '-v7.3');
88 |   fprintf('done\n');
89 | end
90 | 


--------------------------------------------------------------------------------
/imdb/roidb_from_proposal.m:
--------------------------------------------------------------------------------
 1 | function roidb = roidb_from_proposal(imdb, roidb, regions, varargin)
 2 | % roidb = roidb_from_proposal(imdb, roidb, regions, varargin)s
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 | ip = inputParser;
10 | ip.addRequired('imdb', @isstruct);
11 | ip.addRequired('roidb', @isstruct);
12 | ip.addRequired('regions', @isstruct);
13 | ip.addParamValue('keep_raw_proposal', true, @islogical);
14 | ip.parse(imdb, roidb, regions, varargin{:});
15 | opts = ip.Results;
16 | 
17 | assert(strcmp(opts.roidb.name, opts.imdb.name));
18 | rois = opts.roidb.rois;
19 | 
20 | if ~opts.keep_raw_proposal
21 |     % remove proposal boxes in roidb
22 |     for i = 1:length(rois)  
23 |         is_gt = rois(i).gt;
24 |         rois(i).gt = rois(i).gt(is_gt, :);
25 |         rois(i).overlap = rois(i).overlap(is_gt, :);
26 |         rois(i).boxes = rois(i).boxes(is_gt, :);
27 |         rois(i).class = rois(i).class(is_gt, :);
28 |     end
29 | end
30 | 
31 | % add new proposal boxes
32 | for i = 1:length(rois)  
33 |     [~, image_name1] = fileparts(imdb.image_ids{i});
34 |     [~, image_name2] = fileparts(opts.regions.images{i});
35 |     assert(strcmp(image_name1, image_name2));
36 |     
37 |     boxes = opts.regions.boxes{i}(:, 1:4);
38 |     is_gt = rois(i).gt;
39 |     gt_boxes = rois(i).boxes(is_gt, :);
40 |     gt_classes = rois(i).class(is_gt, :);
41 |     all_boxes = cat(1, rois(i).boxes, boxes);
42 |     
43 |     num_gt_boxes = size(gt_boxes, 1);
44 |     num_boxes = size(boxes, 1);
45 |     
46 |     rois(i).gt = cat(1, rois(i).gt, false(num_boxes, 1));
47 |     rois(i).overlap = cat(1, rois(i).overlap, zeros(num_boxes, size(rois(i).overlap, 2)));
48 |     rois(i).boxes = cat(1, rois(i).boxes, boxes);
49 |     rois(i).class = cat(1, rois(i).class, zeros(num_boxes, 1));
50 |     for j = 1:num_gt_boxes
51 |         rois(i).overlap(:, gt_classes(j)) = ...
52 |             max(full(rois(i).overlap(:, gt_classes(j))), boxoverlap(all_boxes, gt_boxes(j, :))); 
53 |     end
54 | end
55 | 
56 | roidb.rois = rois;
57 | 
58 | end


--------------------------------------------------------------------------------
/imdb/roidb_from_voc.m:
--------------------------------------------------------------------------------
  1 | function roidb = roidb_from_voc(imdb, varargin)
  2 | % roidb = roidb_from_voc(imdb, rootDir)
  3 | %   Builds an regions of interest database from imdb image
  4 | %   database. Uses precomputed selective search boxes available
  5 | %   in the R-CNN data package.
  6 | %
  7 | %   Inspired by Andrea Vedaldi's MKL imdb and roidb code.
  8 | 
  9 | % AUTORIGHTS
 10 | % ---------------------------------------------------------
 11 | % Copyright (c) 2014, Ross Girshick
 12 | % 
 13 | % This file is part of the R-CNN code and is available 
 14 | % under the terms of the Simplified BSD License provided in 
 15 | % LICENSE. Please retain this notice and LICENSE if you use 
 16 | % this file (or any portion of it) in your project.
 17 | % ---------------------------------------------------------
 18 | 
 19 | ip = inputParser;
 20 | ip.addRequired('imdb', @isstruct);
 21 | ip.addParamValue('exclude_difficult_samples',       true,   @islogical);
 22 | ip.addParamValue('with_selective_search',           false,  @islogical);
 23 | ip.addParamValue('with_edge_box',                   false,  @islogical);
 24 | ip.addParamValue('with_self_proposal',              false,  @islogical);
 25 | ip.addParamValue('rootDir',                         '.',    @ischar);
 26 | ip.addParamValue('extension',                       '',     @ischar);
 27 | ip.parse(imdb, varargin{:});
 28 | opts = ip.Results;
 29 | 
 30 | roidb.name = imdb.name;
 31 | if ~isempty(opts.extension)
 32 |     opts.extension = ['_', opts.extension];
 33 | end
 34 | regions_file_ss = fullfile(opts.rootDir, sprintf('/data/selective_search_data/%s%s.mat', roidb.name, opts.extension));
 35 | regions_file_eb = fullfile(opts.rootDir, sprintf('/data/edge_box_data/%s%s.mat', roidb.name, opts.extension));
 36 | regions_file_sp = fullfile(opts.rootDir, sprintf('/data/self_proposal_data/%s%s.mat', roidb.name, opts.extension));
 37 | 
 38 | cache_file_ss = [];
 39 | cache_file_eb = [];
 40 | cache_file_sp = [];
 41 | if opts.with_selective_search 
 42 |     cache_file_ss = 'ss_';
 43 |     if~exist(regions_file_ss, 'file')
 44 |         error('roidb_from_ilsvrc:: cannot find %s', regions_file_ss);
 45 |     end
 46 | end
 47 | 
 48 | if opts.with_edge_box 
 49 |     cache_file_eb = 'eb_';
 50 |     if ~exist(regions_file_eb, 'file')
 51 |         error('roidb_from_ilsvrc:: cannot find %s', regions_file_eb);
 52 |     end
 53 | end
 54 | 
 55 | if opts.with_self_proposal 
 56 |     cache_file_sp = 'sp_';
 57 |     if ~exist(regions_file_sp, 'file')
 58 |         error('roidb_from_ilsvrc:: cannot find %s', regions_file_sp);
 59 |     end
 60 | end
 61 | 
 62 | cache_file = fullfile(opts.rootDir, ['/imdb/cache/roidb_' cache_file_ss cache_file_eb cache_file_sp imdb.name opts.extension]);
 63 | if imdb.flip
 64 |     cache_file = [cache_file '_flip'];
 65 | end
 66 | if opts.exclude_difficult_samples
 67 |     cache_file = [cache_file '_easy'];
 68 | end
 69 | cache_file = [cache_file, '.mat'];
 70 | try
 71 |   load(cache_file);
 72 | catch
 73 |   VOCopts = imdb.details.VOCopts;
 74 | 
 75 |   addpath(fullfile(VOCopts.datadir, 'VOCcode')); 
 76 | 
 77 |   roidb.name = imdb.name;
 78 | 
 79 |   fprintf('Loading region proposals...');
 80 |   regions = [];
 81 |   if opts.with_selective_search
 82 |         regions = load_proposals(regions_file_ss, regions);
 83 |   end
 84 |   if opts.with_edge_box
 85 |         regions = load_proposals(regions_file_eb, regions);
 86 |   end
 87 |   if opts.with_self_proposal
 88 |         regions = load_proposals(regions_file_sp, regions);
 89 |   end
 90 |   fprintf('done\n');
 91 |   if isempty(regions)
 92 |       fprintf('Warrning: no windows proposal is loaded !\n');
 93 |       regions.boxes = cell(length(imdb.image_ids), 1);
 94 |       if imdb.flip
 95 |             regions.images = imdb.image_ids(1:2:end);
 96 |       else
 97 |             regions.images = imdb.image_ids;
 98 |       end
 99 |   end
100 | 
101 |   if ~imdb.flip
102 |       for i = 1:length(imdb.image_ids)
103 |         tic_toc_print('roidb (%s): %d/%d\n', roidb.name, i, length(imdb.image_ids));
104 |         try
105 |           voc_rec = PASreadrecord(sprintf(VOCopts.annopath, imdb.image_ids{i}));
106 |         catch
107 |           voc_rec = [];
108 |         end
109 |         if ~isempty(regions)
110 |             [~, image_name1] = fileparts(imdb.image_ids{i});
111 |             [~, image_name2] = fileparts(regions.images{i});
112 |             assert(strcmp(image_name1, image_name2));
113 |         end
114 |         roidb.rois(i) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, false);
115 |       end
116 |   else
117 |       for i = 1:length(imdb.image_ids)/2
118 |         tic_toc_print('roidb (%s): %d/%d\n', roidb.name, i, length(imdb.image_ids)/2);
119 |         try
120 |           voc_rec = PASreadrecord(sprintf(VOCopts.annopath, imdb.image_ids{i*2-1}));
121 |         catch
122 |           voc_rec = [];
123 |         end
124 |         if ~isempty(regions)
125 |             [~, image_name1] = fileparts(imdb.image_ids{i*2-1});
126 |             [~, image_name2] = fileparts(regions.images{i});
127 |             assert(strcmp(image_name1, image_name2));
128 |             assert(imdb.flip_from(i*2) == i*2-1);
129 |         end
130 |         roidb.rois(i*2-1) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, false);
131 |         roidb.rois(i*2) = attach_proposals(voc_rec, regions.boxes{i}, imdb.class_to_id, opts.exclude_difficult_samples, true);
132 |       end
133 |   end
134 | 
135 |   rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 
136 | 
137 |   fprintf('Saving roidb to cache...');
138 |   save(cache_file, 'roidb', '-v7.3');
139 |   fprintf('done\n');
140 | end
141 | 
142 | 
143 | % ------------------------------------------------------------------------
144 | function rec = attach_proposals(voc_rec, boxes, class_to_id, exclude_difficult_samples, flip)
145 | % ------------------------------------------------------------------------
146 | 
147 | % change selective search order from [y1 x1 y2 x2] to [x1 y1 x2 y2]
148 | if ~isempty(boxes)
149 |     boxes = boxes(:, [2 1 4 3]);
150 |     if flip
151 |         boxes(:, [1, 3]) = voc_rec.imgsize(1) + 1 - boxes(:, [3, 1]);
152 |     end
153 | end
154 | 
155 | %           gt: [2108x1 double]
156 | %      overlap: [2108x20 single]
157 | %      dataset: 'voc_2007_trainval'
158 | %        boxes: [2108x4 single]
159 | %         feat: [2108x9216 single]
160 | %        class: [2108x1 uint8]
161 | if isfield(voc_rec, 'objects')
162 |   if exclude_difficult_samples
163 |       valid_objects = ~cat(1, voc_rec.objects(:).difficult);
164 |   else
165 |       valid_objects = 1:length(voc_rec.objects(:));
166 |   end
167 |   gt_boxes = cat(1, voc_rec.objects(valid_objects).bbox);
168 |   if flip
169 |     gt_boxes(:, [1, 3]) = voc_rec.imgsize(1) + 1 - gt_boxes(:, [3, 1]);
170 |   end
171 |   all_boxes = cat(1, gt_boxes, boxes);
172 |   gt_classes = class_to_id.values({voc_rec.objects(valid_objects).class});
173 |   gt_classes = cat(1, gt_classes{:});
174 |   num_gt_boxes = size(gt_boxes, 1);
175 | else
176 |   gt_boxes = [];
177 |   all_boxes = boxes;
178 |   gt_classes = [];
179 |   num_gt_boxes = 0;
180 | end
181 | num_boxes = size(boxes, 1);
182 | 
183 | rec.gt = cat(1, true(num_gt_boxes, 1), false(num_boxes, 1));
184 | rec.overlap = zeros(num_gt_boxes+num_boxes, class_to_id.Count, 'single');
185 | for i = 1:num_gt_boxes
186 |   rec.overlap(:, gt_classes(i)) = ...
187 |       max(rec.overlap(:, gt_classes(i)), boxoverlap(all_boxes, gt_boxes(i, :)));
188 | end
189 | rec.boxes = single(all_boxes);
190 | rec.feat = [];
191 | rec.class = uint8(cat(1, gt_classes, zeros(num_boxes, 1)));
192 | 
193 | % ------------------------------------------------------------------------
194 | function regions = load_proposals(proposal_file, regions)
195 | % ------------------------------------------------------------------------
196 | if isempty(regions)
197 |     regions = load(proposal_file);
198 | else
199 |     regions_more = load(proposal_file);
200 |     if ~all(cellfun(@(x, y) strcmp(x, y), regions.images(:), regions_more.images(:), 'UniformOutput', true))
201 |         error('roidb_from_ilsvrc: %s is has different images list with other proposals.\n', proposal_file);
202 |     end
203 |     regions.boxes = cellfun(@(x, y) [double(x); double(y)], regions.boxes(:), regions_more.boxes(:), 'UniformOutput', false);
204 | end
205 | 


--------------------------------------------------------------------------------
/startup.m:
--------------------------------------------------------------------------------
 1 | function startup()
 2 | % startup()
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 |     curdir = fileparts(mfilename('fullpath'));
10 |     addpath(genpath(fullfile(curdir, 'utils')));
11 |     addpath(genpath(fullfile(curdir, 'functions')));
12 |     addpath(genpath(fullfile(curdir, 'bin')));
13 |     addpath(genpath(fullfile(curdir, 'experiments')));
14 |     addpath(genpath(fullfile(curdir, 'imdb')));
15 | 
16 |     mkdir_if_missing(fullfile(curdir, 'datasets'));
17 | 
18 |     mkdir_if_missing(fullfile(curdir, 'external'));
19 | 
20 |     caffe_path = fullfile(curdir, 'external', 'caffe', 'matlab');
21 |     if exist(caffe_path, 'dir') == 0
22 |         error('matcaffe is missing from external/caffe/matlab; See README.md');
23 |     end
24 |     addpath(genpath(caffe_path));
25 | 
26 |     mkdir_if_missing(fullfile(curdir, 'imdb', 'cache'));
27 | 
28 |     mkdir_if_missing(fullfile(curdir, 'output'));
29 | 
30 |     mkdir_if_missing(fullfile(curdir, 'models'));
31 | 
32 |     fprintf('fast_rcnn startup done\n');
33 | end
34 | 


--------------------------------------------------------------------------------
/utils/RectLTRB2LTWH.m:
--------------------------------------------------------------------------------
1 | function [ rectsLTWH ] = RectLTRB2LTWH( rectsLTRB )
2 | %rects (l, t, r, b) to (l, t, w, h)
3 | 
4 | rectsLTWH = [rectsLTRB(:, 1), rectsLTRB(:, 2), rectsLTRB(:, 3)-rectsLTRB(:,1)+1, rectsLTRB(:,4)-rectsLTRB(:,2)+1];
5 | end
6 | 
7 | 


--------------------------------------------------------------------------------
/utils/RectLTWH2LTRB.m:
--------------------------------------------------------------------------------
1 | function [ rectsLTRB ] = RectLTWH2LTRB(rectsLTWH)
2 | %rects (l, t, r, b) to (l, t, w, h)
3 | 
4 | rectsLTRB = [rectsLTWH(:, 1), rectsLTWH(:, 2), rectsLTWH(:, 1)+rectsLTWH(:,3)-1, rectsLTWH(:,2)+rectsLTWH(:,4)-1];
5 | end
6 | 
7 | 


--------------------------------------------------------------------------------
/utils/active_caffe_mex.m:
--------------------------------------------------------------------------------
 1 | function active_caffe_mex(gpu_id, caffe_version)
 2 | % active_caffe_mex(gpu_id, caffe_version)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 |     % set gpu in matlab
10 |     gpuDevice(gpu_id);
11 | 
12 |     if ~exist('caffe_version', 'var') || isempty(caffe_version)
13 |         caffe_version = 'caffe';
14 |     end
15 |     cur_dir = pwd;
16 |     caffe_dir = fullfile(pwd, 'external', 'caffe', 'matlab', caffe_version);
17 |     
18 |     if ~exist(caffe_dir, 'dir')
19 |         warning('Specified caffe folder (%s) is not exist, change to default one (%s)', ...
20 |             caffe_dir, fullfile(pwd, 'external', 'caffe', 'matlab'));
21 |         caffe_dir = fullfile(pwd, 'external', 'caffe', 'matlab');
22 |     end
23 |     
24 |     addpath(genpath(caffe_dir));
25 |     cd(caffe_dir);
26 |     caffe.set_device(gpu_id-1);
27 |     cd(cur_dir);
28 | end
29 | 


--------------------------------------------------------------------------------
/utils/auto_select_gpu.m:
--------------------------------------------------------------------------------
 1 | function gpu_id = auto_select_gpu()
 2 | % gpu_id = auto_select_gpu()
 3 | % Select the gpu which has the maximum free memory 
 4 | % --------------------------------------------------------
 5 | % Faster R-CNN
 6 | % Copyright (c) 2015, Shaoqing Ren
 7 | % Licensed under The MIT License [see LICENSE for details]
 8 | % --------------------------------------------------------
 9 | 
10 |     % deselects all GPU devices
11 |     gpuDevice([]);
12 | 
13 |     maxFreeMemory = 0;
14 |     for i = 1:gpuDeviceCount
15 |         g = gpuDevice(i);
16 |         freeMemory = g.FreeMemory();
17 |         fprintf('GPU %d: free memory %d\n', i, freeMemory);
18 |         if freeMemory > maxFreeMemory
19 |             maxFreeMemory = freeMemory;
20 |             gpu_id = i;
21 |         end
22 |     end
23 |     fprintf('Use GPU %d\n', gpu_id);
24 |     
25 |     % deselects all GPU devices
26 |     gpuDevice([]);
27 | end
28 | 


--------------------------------------------------------------------------------
/utils/boxoverlap.m:
--------------------------------------------------------------------------------
 1 | function o = boxoverlap(a, b)
 2 | % Compute the symmetric intersection over union overlap between a set of
 3 | % bounding boxes in a and a single bounding box in b.
 4 | %
 5 | % a  a matrix where each row specifies a bounding box
 6 | % b  a matrix where each row specifies a bounding box
 7 | 
 8 | % AUTORIGHTS
 9 | % -------------------------------------------------------
10 | % Copyright (C) 2011-2012 Ross Girshick
11 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick
12 | % 
13 | % This file is part of the voc-releaseX code
14 | % (http://people.cs.uchicago.edu/~rbg/latent/)
15 | % and is available under the terms of an MIT-like license
16 | % provided in COPYING. Please retain this notice and
17 | % COPYING if you use this file (or a portion of it) in
18 | % your project.
19 | % -------------------------------------------------------
20 | 
21 | o = cell(1, size(b, 1));
22 | for i = 1:size(b, 1)
23 |     x1 = max(a(:,1), b(i,1));
24 |     y1 = max(a(:,2), b(i,2));
25 |     x2 = min(a(:,3), b(i,3));
26 |     y2 = min(a(:,4), b(i,4));
27 | 
28 |     w = x2-x1+1;
29 |     h = y2-y1+1;
30 |     inter = w.*h;
31 |     aarea = (a(:,3)-a(:,1)+1) .* (a(:,4)-a(:,2)+1);
32 |     barea = (b(i,3)-b(i,1)+1) * (b(i,4)-b(i,2)+1);
33 |     % intersection over union overlap
34 |     o{i} = inter ./ (aarea+barea-inter);
35 |     % set invalid entries to 0 overlap
36 |     o{i}(w <= 0) = 0;
37 |     o{i}(h <= 0) = 0;
38 | end
39 | 
40 | o = cell2mat(o);
41 | 


--------------------------------------------------------------------------------
/utils/im_list_to_blob.m:
--------------------------------------------------------------------------------
 1 | function blob = im_list_to_blob(ims)
 2 |     max_shape = max(cell2mat(cellfun(@size, ims(:), 'UniformOutput', false)), [], 1);
 3 |     assert(all(cellfun(@(x) size(x, 3), ims, 'UniformOutput', true) == 3));
 4 |     num_images = length(ims);
 5 |     blob = zeros(max_shape(1), max_shape(2), 3, num_images, 'single');
 6 |     
 7 |     for i = 1:length(ims)
 8 |         im = ims{i};
 9 |         blob(1:size(im, 1), 1:size(im, 2), :, i) = im; 
10 |     end
11 | end


--------------------------------------------------------------------------------
/utils/mkdir_if_missing.m:
--------------------------------------------------------------------------------
1 | function made = mkdir_if_missing(path)
2 | made = false;
3 | if exist(path, 'dir') == 0
4 |   mkdir(path);
5 |   made = true;
6 | end
7 | 


--------------------------------------------------------------------------------
/utils/parse_rst.m:
--------------------------------------------------------------------------------
 1 | function results = parse_rst(results, rst)
 2 | % results = parse_rst(results, rst)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 |     if isempty(results)
10 |         for i = 1:length(rst)
11 |             results.(rst(i).blob_name).data = [];
12 |         end
13 |     end
14 |         
15 |     for i = 1:length(rst)
16 |         results.(rst(i).blob_name).data = [results.(rst(i).blob_name).data; rst(i).data(:)];
17 |     end
18 | end


--------------------------------------------------------------------------------
/utils/prep_im_for_blob.m:
--------------------------------------------------------------------------------
 1 | function [im, im_scale] = prep_im_for_blob(im, im_means, target_size, max_size)
 2 |     im = single(im);
 3 |     
 4 |     if ~isa(im, 'gpuArray')
 5 |         try
 6 |             im = bsxfun(@minus, im, im_means);
 7 |         catch
 8 |             im_means = imresize(im_means, [size(im, 1), size(im, 2)], 'bilinear', 'antialiasing', false);    
 9 |             im = bsxfun(@minus, im, im_means);
10 |         end
11 |         im_scale = prep_im_for_blob_size(size(im), target_size, max_size);
12 | 
13 |         target_size = round([size(im, 1), size(im, 2)] * im_scale);
14 |         im = imresize(im, target_size, 'bilinear', 'antialiasing', false);
15 |     else
16 |         % for im as gpuArray
17 |         try
18 |             im = bsxfun(@minus, im, im_means);
19 |         catch
20 |             im_means_scale = max(double(size(im, 1)) / size(im_means, 1), double(size(im, 2)) / size(im_means, 2));
21 |             im_means = imresize(im_means, im_means_scale);    
22 |             y_start = floor((size(im_means, 1) - size(im, 1)) / 2) + 1;
23 |             x_start = floor((size(im_means, 2) - size(im, 2)) / 2) + 1;
24 |             im_means = im_means(y_start:(y_start+size(im, 1)-1), x_start:(x_start+size(im, 2)-1));
25 |             im = bsxfun(@minus, im, im_means);
26 |         end
27 |         
28 |         im_scale = prep_im_for_blob_size(size(im), target_size, max_size);
29 |         im = imresize(im, im_scale);
30 |     end
31 | end


--------------------------------------------------------------------------------
/utils/prep_im_for_blob_size.m:
--------------------------------------------------------------------------------
 1 | function im_scale = prep_im_for_blob_size(im_size, target_size, max_size)
 2 | 
 3 |     im_size_min = min(im_size(1:2));
 4 |     im_size_max = max(im_size(1:2));
 5 |     im_scale = double(target_size) / im_size_min;
 6 |     
 7 |     % Prevent the biggest axis from being more than MAX_SIZE
 8 |     if round(im_scale * im_size_max) > max_size
 9 |         im_scale = double(max_size) / double(im_size_max);
10 |     end
11 | end


--------------------------------------------------------------------------------
/utils/procid.m:
--------------------------------------------------------------------------------
 1 | function s = procid()
 2 | % Returns a string identifying the process.
 3 | 
 4 | % AUTORIGHTS
 5 | % -------------------------------------------------------
 6 | % Copyright (C) 2009-2012 Ross Girshick
 7 | % 
 8 | % This file is part of the voc-releaseX code
 9 | % (http://people.cs.uchicago.edu/~rbg/latent/)
10 | % and is available under the terms of an MIT-like license
11 | % provided in COPYING. Please retain this notice and
12 | % COPYING if you use this file (or a portion of it) in
13 | % your project.
14 | % -------------------------------------------------------
15 | 
16 | d = pwd();
17 | i = strfind(d, filesep);
18 | d = d(i(end)+1:end);
19 | s = d;
20 | 


--------------------------------------------------------------------------------
/utils/seed_rand.m:
--------------------------------------------------------------------------------
 1 | function prev_rng = seed_rand(seed)
 2 | % seed_rand - Set random number generator to a fixed seed.
 3 | %   prev_rng = seed_rand(seed)
 4 | %
 5 | %   Strategic use ensures that results are reproducible.
 6 | %
 7 | %   To restore the previous rng after calling this do:
 8 | %   rng(prev_rng);
 9 | 
10 | % AUTORIGHTS
11 | % ---------------------------------------------------------
12 | % Copyright (c) 2014, Ross Girshick
13 | % 
14 | % This file is part of the R-CNN code and is available 
15 | % under the terms of the Simplified BSD License provided in 
16 | % LICENSE. Please retain this notice and LICENSE if you use 
17 | % this file (or any portion of it) in your project.
18 | % ---------------------------------------------------------
19 | 
20 | if nargin < 1
21 |     % This value works best for me.
22 |     seed = 3;
23 |     % Just kidding, of course ;-).
24 | end
25 | 
26 | prev_rng = rng;
27 | rng(seed, 'twister')
28 | 


--------------------------------------------------------------------------------
/utils/showboxes.m:
--------------------------------------------------------------------------------
 1 | function showboxes(im, boxes, legends, color_conf)
 2 | % Draw bounding boxes on top of an image.
 3 | %   showboxes(im, boxes)
 4 | %
 5 | % -------------------------------------------------------
 6 | 
 7 | fix_width = 800;
 8 | if isa(im, 'gpuArray')
 9 |     im = gather(im);
10 | end
11 | imsz = size(im);
12 | scale = fix_width / imsz(2);
13 | im = imresize(im, scale);
14 | 
15 | if size(boxes{1}, 2) >= 5
16 |     boxes = cellfun(@(x) [x(:, 1:4) * scale, x(:, 5)], boxes, 'UniformOutput', false);
17 | else
18 |     boxes = cellfun(@(x) x(:, 1:4) * scale, boxes, 'UniformOutput', false);
19 | end
20 | 
21 | if ~exist('color_conf', 'var')
22 |     color_conf = 'default';
23 | end
24 | 
25 | image(im); 
26 | axis image;
27 | axis off;
28 | set(gcf, 'Color', 'white');
29 | 
30 | valid_boxes = cellfun(@(x) ~isempty(x), boxes, 'UniformOutput', true);
31 | valid_boxes_num = sum(valid_boxes);
32 | 
33 | if valid_boxes_num > 0
34 |     switch color_conf
35 |         case 'default'
36 |             colors_candidate = colormap('hsv');
37 |             colors_candidate = colors_candidate(1:(floor(size(colors_candidate, 1)/valid_boxes_num)):end, :);
38 |             colors_candidate = mat2cell(colors_candidate, ones(size(colors_candidate, 1), 1))';
39 |             colors = cell(size(valid_boxes));
40 |             colors(valid_boxes) = colors_candidate(1:sum(valid_boxes));
41 |         case 'voc'
42 |             colors_candidate = colormap('hsv');
43 |             colors_candidate = colors_candidate(1:(floor(size(colors_candidate, 1)/20)):end, :);
44 |             colors_candidate = mat2cell(colors_candidate, ones(size(colors_candidate, 1), 1))';
45 |             colors = colors_candidate;
46 |     end
47 |             
48 | 
49 |     for i = 1:length(boxes)
50 |         if isempty(boxes{i})
51 |             continue;
52 |         end
53 | 
54 |         for j = 1:size(boxes{i})
55 |             box = boxes{i}(j, 1:4);
56 |             if size(boxes{i}, 2) >= 5
57 |                 score = boxes{i}(j, end);
58 |                 linewidth = 2 + min(max(score, 0), 1) * 2;
59 |                 rectangle('Position', RectLTRB2LTWH(box), 'LineWidth', linewidth, 'EdgeColor', colors{i});
60 |                 label = sprintf('%s : %.3f', legends{i}, score);
61 |                 text(double(box(1))+2, double(box(2)), label, 'BackgroundColor', 'w');
62 |             else
63 |                 linewidth = 2;
64 |                 rectangle('Position', RectLTRB2LTWH(box), 'LineWidth', linewidth, 'EdgeColor', colors{i});
65 |                 label = sprintf('%s(%d)', legends{i}, i);
66 |                 text(double(box(1))+2, double(box(2)), label, 'BackgroundColor', 'w');
67 |             end
68 |         end
69 | 
70 |     end
71 | end
72 | end
73 | 
74 | function [ rectsLTWH ] = RectLTRB2LTWH( rectsLTRB )
75 | %rects (l, t, r, b) to (l, t, w, h)
76 | 
77 | rectsLTWH = [rectsLTRB(:, 1), rectsLTRB(:, 2), rectsLTRB(:, 3)-rectsLTRB(:,1)+1, rectsLTRB(:,4)-rectsLTRB(2)+1];
78 | end
79 | 
80 | 


--------------------------------------------------------------------------------
/utils/subsample_images.m:
--------------------------------------------------------------------------------
 1 | function [imdbs, roidbs] = subsample_images(imdbs, roidbs, max_num_neg_images, seed)
 2 | 
 3 | if ~exist('seed', 'var')
 4 |   seed = 6;
 5 | end
 6 | 
 7 | % class_num = cellfun(@(x) length(x.class_ids), imdbs, 'UniformOutput', true);
 8 | % assert(length(unique(class_num)) == 1);
 9 | % class_num = unique(class_num);
10 | 
11 | rois = cellfun(@(x) x.rois(:), roidbs, 'UniformOutput', false);
12 | rois_combine = cell2mat(rois(:));
13 | 
14 | % fix the random seed for repeatability
15 | prev_rng = seed_rand(seed);
16 | inds = randperm(length(rois_combine), max_num_neg_images);
17 | inds = sort(inds);
18 | 
19 | img_idx_start = 1;
20 | for i = 1:length(imdbs)
21 |     imdb_img_num = length(imdbs{i}.image_ids);
22 |     img_idx_end = img_idx_start + imdb_img_num - 1;
23 |     inds_start = find(inds >= img_idx_start, 1, 'first');
24 |     inds_end = find(inds <= img_idx_end, 1, 'last');
25 |     
26 |     inds_sub = inds(inds_start:inds_end);
27 |     inds_sub = inds_sub - img_idx_start + 1;
28 |     
29 |     imdbs{i}.image_ids = imdbs{i}.image_ids(inds_sub);
30 |     imdbs{i}.sizes = imdbs{i}.sizes(inds_sub, :);
31 |     if isfield(imdbs{i}, 'image_dir')
32 |         imdbs{i}.image_at = @(x) ...
33 |             sprintf('%s/%s.%s', imdbs{i}.image_dir, imdbs{i}.image_ids{x}, imdbs{i}.extension);
34 |     else
35 |        imdbs{i}.image_at = @(x) ...
36 |             sprintf('%s/%s.%s', imdbs{i}.imagedir, imdbs{i}.image_ids{x}, imdbs{i}.extension); 
37 |     end
38 |     roidbs{i}.rois = roidbs{i}.rois(inds_sub);
39 |     
40 |     img_idx_start = img_idx_start + imdb_img_num;
41 | end
42 | 
43 | % restore previous rng
44 | rng(prev_rng);
45 | 
46 | end


--------------------------------------------------------------------------------
/utils/subsample_images_per_class.m:
--------------------------------------------------------------------------------
 1 | function [imdbs, roidbs] = subsample_images_per_class(imdbs, roidbs, max_per_class_image_num, seed)
 2 | 
 3 | if ~exist('seed', 'var')
 4 |   seed = 6;
 5 | end
 6 | 
 7 | class_num = cellfun(@(x) length(x.class_ids), imdbs, 'UniformOutput', true);
 8 | assert(length(unique(class_num)) == 1);
 9 | class_num = unique(class_num);
10 | 
11 | rois = cellfun(@(x) x.rois, roidbs, 'UniformOutput', false);
12 | rois_combine = cell2mat(rois(:));
13 | rois_combine_class = arrayfun(@(x) x.class, rois_combine, 'UniformOutput', false);
14 | 
15 | %% select images with max_image_num
16 | 
17 | % fix the random seed for repeatability
18 | prev_rng = seed_rand(seed);
19 | inds = cell(class_num, 1);
20 | rois_combine_length = length(rois_combine);
21 | valid_idxs = cell(class_num, 1);
22 | parfor i = 1:class_num
23 |     valid_idxs{i} = cellfun(@(x) any(x == i), rois_combine_class, 'UniformOutput', false);
24 |     valid_idxs{i} = cell2mat(valid_idxs{i});
25 | end
26 | 
27 | for i = 1:class_num
28 |     valid_num = sum(valid_idxs{i});
29 | 
30 |     num = min(valid_num, max_per_class_image_num);
31 |     inds{i} = 1:rois_combine_length;
32 |     inds{i} = inds{i}(valid_idxs{i});
33 |     inds{i} = inds{i}(randperm(length(inds{i}), num));
34 | end
35 | 
36 | inds = cell2mat(inds')';
37 | inds = unique(inds);
38 | 
39 | % restore previous rng
40 | rng(prev_rng);
41 | 
42 | img_idx_start = 1;
43 | for i = 1:length(imdbs)
44 |     imdb_img_num = length(imdbs{i}.image_ids);
45 |     img_idx_end = img_idx_start + imdb_img_num - 1;
46 |     inds_start = find(inds >= img_idx_start, 1, 'first');
47 |     inds_end = find(inds <= img_idx_end, 1, 'last');
48 | 
49 |     inds_sub = inds(inds_start:inds_end);
50 |     inds_sub = inds_sub - img_idx_start + 1;
51 | 
52 |     imdbs{i}.image_ids = imdbs{i}.image_ids(inds_sub);
53 |     imdbs{i}.sizes = imdbs{i}.sizes(inds_sub, :);
54 |     if isfield(imdbs{i}, 'image_dir')
55 |         imdbs{i}.image_at = @(x) ...
56 |           sprintf('%s/%s.%s', imdbs{i}.image_dir, imdbs{i}.image_ids{x}, imdbs{i}.extension);
57 |     else
58 |         imdbs{i}.image_at = @(x) ...
59 |           sprintf('%s/%s.%s', imdbs{i}.imagedir, imdbs{i}.image_ids{x}, imdbs{i}.extension);
60 |     end
61 |     roidbs{i}.rois = roidbs{i}.rois(inds_sub);
62 | 
63 |     img_idx_start = img_idx_start + imdb_img_num;
64 | end
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/utils/symbolic_link.m:
--------------------------------------------------------------------------------
 1 | function symbolic_link(link, target)
 2 | % symbolic_link(link, target)
 3 | % --------------------------------------------------------
 4 | % Faster R-CNN
 5 | % Copyright (c) 2015, Shaoqing Ren
 6 | % Licensed under The MIT License [see LICENSE for details]
 7 | % --------------------------------------------------------
 8 | 
 9 |     if ispc()
10 |         system(sprintf('mklink /J %s %s', link, target)); 
11 |     else 
12 |         system(sprintf('ln -s %s %s', link, target)); 
13 |     end
14 | 
15 | end
16 | 


--------------------------------------------------------------------------------
/utils/tic_toc_print.m:
--------------------------------------------------------------------------------
 1 | function tic_toc_print(fmt, varargin)
 2 | % Print only after 1 second has passed since the last print. 
 3 | % Arguments are the same as for fprintf.
 4 | 
 5 | % AUTORIGHTS
 6 | % -------------------------------------------------------
 7 | % Copyright (C) 2009-2012 Ross Girshick
 8 | % 
 9 | % This file is part of the voc-releaseX code
10 | % (http://people.cs.uchicago.edu/~rbg/latent/)
11 | % and is available under the terms of an MIT-like license
12 | % provided in COPYING. Please retain this notice and
13 | % COPYING if you use this file (or a portion of it) in
14 | % your project.
15 | % -------------------------------------------------------
16 | 
17 | persistent th;
18 | 
19 | if isempty(th)
20 |   th = tic();
21 | end
22 | 
23 | if toc(th) > 1
24 |   fprintf(fmt, varargin{:});
25 |   drawnow;
26 |   th = tic();
27 | end
28 | 


--------------------------------------------------------------------------------
/utils/vis_label.m:
--------------------------------------------------------------------------------
 1 | function vis_label(imdb, roidb)
 2 | 
 3 |     rois = roidb.rois;
 4 |     for iIM = 1:length(rois)
 5 |         im = imread(imdb.image_at(iIM));
 6 |         boxes = arrayfun(@(x) rois(iIM).boxes(rois(iIM).class == x, :), 1:length(imdb.classes), 'UniformOutput', false);
 7 |         legends = imdb.classes;
 8 |         showboxes(im, boxes, legends);
 9 |         pause;
10 |     end
11 | end
12 |   


--------------------------------------------------------------------------------
/utils/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/utils/xVOChash_init.m:
--------------------------------------------------------------------------------
 1 | function hash = xVOChash_init(strs)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | hsize=4999;
 5 | hash.key=cell(hsize,1);
 6 | hash.val=cell(hsize,1);
 7 | 
 8 | for i=1:numel(strs)
 9 |     s=strs{i};
10 |     h=mod(str2double(s([4 6:end])),hsize)+1;
11 |     j=numel(hash.key{h})+1;
12 |     hash.key{h}{j}=strs{i};
13 |     hash.val{h}(j)=i;
14 | end
15 | 
16 | 


--------------------------------------------------------------------------------
/utils/xVOChash_lookup.m:
--------------------------------------------------------------------------------
1 | function ind = xVOChash_lookup(hash,s)
2 | % From the PASCAL VOC 2011 devkit
3 | 
4 | hsize=numel(hash.key);
5 | h=mod(str2double(s([4 6:end])),hsize)+1;
6 | ind=hash.val{h}(strmatch(s,hash.key{h},'exact'));
7 | 


--------------------------------------------------------------------------------