├── .gitignore ├── LICENSE ├── README.md ├── code ├── adaptive_region_pooling │ ├── adaptive_region_pooling_mex.cpp │ ├── convFeat_to_poolFeat.m │ ├── convFeat_to_poolFeat_multi_region.m │ ├── load_pooling_params.m │ └── spm_expected_scale.m ├── bbox-recognition │ ├── extract_features.m │ ├── load_object_recognition_model_on_caffe.m │ ├── load_svm_model.m │ ├── prepare_batch_feat_input_data.m │ ├── read_svm_detection_weights.m │ ├── resolve_cache_dir.m │ ├── score_bboxes_all_imgs.m │ ├── scores_bboxes_img.m │ ├── train_detection_svm_with_hard_mining.m │ └── train_net_bbox_rec.m ├── bbox-regression │ ├── decode_reg_vals_to_bbox_targets.m │ ├── encode_bbox_targets_to_reg_vals.m │ ├── load_bbox_loc_model_on_caffe.m │ ├── regress_bboxes_all_imgs.m │ ├── regress_bboxes_img.m │ └── train_net_bbox_reg.m ├── caffe-funs │ ├── caffe_copy_weights_from_net2net.m │ ├── caffe_forward_net.m │ ├── caffe_get_blobs_size.m │ ├── caffe_get_input_diff.m │ ├── caffe_get_output.m │ ├── caffe_load_model.m │ ├── caffe_reshape_net.m │ ├── caffe_reshape_net_as_input.m │ ├── caffe_set_device.m │ ├── caffe_set_input.m │ ├── caffe_set_output_diff.m │ ├── parse_copy_finetune_prototxt.m │ └── set_net_layer_weights.m ├── conv_features │ ├── extract_conv_features.m │ ├── extract_conv_features_all_images.m │ ├── extract_image_activation_maps.m │ ├── extract_semantic_features_all_images.m │ ├── extract_semantic_seg_features_from_conv5.m │ └── read_feat_conv_data.m ├── data-providers │ ├── extract_edge_boxes_from_dataset.m │ ├── extract_selective_search_boxes_from_dataset.m │ ├── get_grount_truth_bboxes_from_voc.m │ ├── get_image_paths_from_voc.m │ ├── initVOCOpts.m │ ├── load_box_proposals.m │ ├── load_feature_paths.m │ └── load_image_dataset.m ├── examples │ ├── demo_MRCNN_detection.m │ ├── demo_MRCNN_with_Iterative_Localization.m │ ├── demo_MRCNN_with_SCNN_and_Iterative_Localization.m │ ├── demo_MRCNN_with_SCNN_detection.m │ ├── demo_object_detection.m │ ├── demo_object_detection_with_iterative_loc.m │ ├── display_bbox_detections.m │ └── images │ │ ├── 000084.jpg │ │ └── fish-bike.jpg ├── postprocessing │ ├── merge_detected_bboxes.m │ ├── nms.m │ ├── nms_mex.cpp │ ├── post_process_candidate_detections.m │ └── post_process_candidate_detections_all_imgs.m ├── script_create_MRCNN_SCNN_VOC2007_2012.m ├── script_create_MRCNN_VOC2007_2012.m ├── script_extract_sem_seg_aware_features.m ├── script_extract_vgg16_conv_features.m ├── script_test_object_detection_iter_loc.m ├── script_train_cnn_recognition_regression_models.m ├── script_train_linear_svms_of_model.m ├── script_train_net_bbox_rec_pascal.m ├── script_train_net_bbox_rec_sem_seg_aware_pascal.m ├── script_train_net_bbox_reg_pascal.m └── utils │ ├── boxoverlap.m │ ├── compute_ave_recall_of_bbox.m │ ├── compute_average_precision.m │ ├── createListOfImagesFromVOCOpts.m │ ├── eval_voc.m │ ├── evaluate_average_precision_pascal.m │ ├── getImageIdsFromImagePaths.m │ ├── get_image.m │ ├── mkdir_if_missing.m │ ├── printAPResults.m │ ├── procid.m │ ├── read_list_of_files.m │ ├── seed_rand.m │ ├── showboxes.m │ ├── showboxesc.m │ ├── tic_toc_print.m │ ├── writeDetectionsPascalFile.m │ └── xVOCap.m ├── external └── liblinear-1.94 │ ├── COPYRIGHT │ ├── Makefile │ ├── Makefile.win │ ├── README │ ├── blas │ ├── Makefile │ ├── blas.h │ ├── blasp.h │ ├── daxpy.c │ ├── ddot.c │ ├── dnrm2.c │ └── dscal.c │ ├── heart_scale │ ├── linear.cpp │ ├── linear.def │ ├── linear.h │ ├── matlab │ ├── Makefile │ ├── README │ ├── libsvmread.c │ ├── libsvmwrite.c │ ├── linear_model_matlab.c │ ├── linear_model_matlab.h │ ├── make.m │ ├── predict.c │ └── train.c │ ├── predict.c │ ├── python │ ├── Makefile │ ├── README │ ├── liblinear.py │ └── liblinearutil.py │ ├── train.c │ ├── tron.cpp │ └── tron.h ├── model-defs ├── MRCNN_Semantic_Features_model_svm.prototxt ├── MRCNN_model_svm.prototxt ├── Semantic_segmentation_aware_region_deploy_softmax.prototxt ├── Semantic_segmentation_aware_region_deploy_svm.prototxt ├── Semantic_segmentation_aware_region_pascal_solver.prototxt ├── Semantic_segmentation_aware_region_pascal_train_test.prototxt ├── VGG16_Region_Adaptation_BBox_Regression_Module_deploy.prototxt ├── VGG16_Region_Adaptation_BBox_Regression_Module_train_test.prototxt ├── VGG16_Region_Adaptation_BBox_Regression_Module_train_test_solver.prototxt ├── VGG16_Region_Adaptation_Module_train_test.prototxt ├── VGG16_Region_Adaptation_Module_train_test_solver.prototxt ├── VGG16_Region_Adaptation_deploy_softmax.prototxt ├── VGG16_Region_Adaptation_deploy_svm.prototxt └── auxiliary_def_files │ ├── Semantic_segmentation_aware_net_pascal_train_test_stream11.prototxt │ ├── VGG_ILSVRC_16_layers_pascal_train_test_stream1.prototxt │ ├── VGG_ILSVRC_16_layers_pascal_train_test_stream10.prototxt │ ├── VGG_ILSVRC_16_layers_pascal_train_test_stream2.prototxt │ ├── VGG_ILSVRC_16_layers_pascal_train_test_stream3.prototxt │ ├── VGG_ILSVRC_16_layers_pascal_train_test_stream4.prototxt │ ├── VGG_ILSVRC_16_layers_pascal_train_test_stream5.prototxt │ ├── VGG_ILSVRC_16_layers_pascal_train_test_stream6.prototxt │ ├── VGG_ILSVRC_16_layers_pascal_train_test_stream7.prototxt │ ├── VGG_ILSVRC_16_layers_pascal_train_test_stream8.prototxt │ └── VGG_ILSVRC_16_layers_pascal_train_test_stream9.prototxt ├── mrcnn_build.m └── startup.m /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | datasets 3 | bin 4 | external/caffe 5 | feat_cache 6 | models-exps 7 | *~ 8 | -------------------------------------------------------------------------------- /code/adaptive_region_pooling/convFeat_to_poolFeat_multi_region.m: -------------------------------------------------------------------------------- 1 | function [region_feats, regions] = convFeat_to_poolFeat_multi_region(... 2 | multiple_region_params, conv_feats, boxes, random_scale) 3 | % convFeat_to_poolFeat_multi_region given the convolutional features of an 4 | % image, it adaptively pools fixed size region features for each bounding 5 | % box and for multiple type of regions. 6 | % 7 | % INPUTS: 8 | % 1) multiple_region_params: is a M x 1 vector of objects of type struct that 9 | % specify the region pooling parameters for each of the M types of regions 10 | % 2) conv_feats: is a K x 1 cell vector or vector of objects, with K >=1 11 | % different type of convolutional features. 12 | % 3) boxes: a N x 4 array with the bounding box coordinates in the form of 13 | % [x0,y0,x1,y1] (where (x0,y0) is the top-left corner and (x1,y1) the 14 | % bottom left corner) 15 | % 4) random_scale: a boolean value that if set to true then each bounding 16 | % box is projected to the convolutional features of a random scale of the 17 | % image. 18 | % 19 | % OUTPUTS: 20 | % 1) region_feats: is a M x 1 cell array where region_feats{i} is a N x F_i 21 | % array with the region features of each of the N bounding boxes for the 22 | % i-th type of region. F_i is the number of features of the i-th type of 23 | % region. 24 | % 2) regions: is a M x 1 cell array with the region coordinates of each 25 | % bounding box and for each type of region. Specifically, regions{i} is a 26 | % N x 8 array that contains the region coordinates of each of the N bounding 27 | % boxes for the i-th type of region. Note that each region is represented 28 | % by 8 values [xo0, yo0, xo1, yo1, xi0, yi0, xi1, yi1] that correspond to 29 | % its outer rectangle [xo0, yo0, xo1, yo1] and its inner rectangle 30 | % [xi0, yi0, xi1, yi1]. 31 | % 32 | % 33 | % This file is part of the code that implements the following ICCV2015 accepted paper: 34 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 35 | % authors: Spyros Gidaris, Nikos Komodakis 36 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 37 | % Technical report: http://arxiv.org/abs/1505.01749 38 | % code: https://github.com/gidariss/mrcnn-object-detection 39 | % 40 | % Part of the code in this file comes from the SPP-Net code: 41 | % https://github.com/ShaoqingRen/SPP_net 42 | % 43 | % AUTORIGHTS 44 | % -------------------------------------------------------- 45 | % Copyright (c) 2015 Spyros Gidaris 46 | % 47 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 48 | % Technical report: http://arxiv.org/abs/1505.01749 49 | % Licensed under The MIT License [see LICENSE for details] 50 | % --------------------------------------------------------- 51 | % Copyright (c) 2014, Shaoqing Ren 52 | % 53 | % This file is part of the SPP code and is available 54 | % under the terms of the Simplified BSD License provided in 55 | % LICENSE. Please retain this notice and LICENSE if you use 56 | % this file (or any portion of it) in your project. 57 | % --------------------------------------------------------- 58 | 59 | 60 | if ~exist('random_scale', 'var'), random_scale = false; end 61 | 62 | num_regions = length(multiple_region_params); 63 | region_feats = cell(num_regions,1); 64 | regions = cell(num_regions,1); 65 | 66 | for p = 1:num_regions, region_feats{p} = single([]); end 67 | for p = 1:num_regions, regions{p} = single([]); end 68 | 69 | if isempty(boxes), return; end 70 | 71 | % pool the region features the bounding boxes for each type of region 72 | if length(conv_feats) == 1 73 | for p = 1:num_regions 74 | [region_feats{p}, regions{p}] = convFeat_to_poolFeat(... 75 | multiple_region_params(p), conv_feats, boxes, random_scale); 76 | end 77 | else 78 | if iscell(conv_feats) 79 | for p = 1:num_regions 80 | [region_feats{p}, regions{p}] = convFeat_to_poolFeat(... 81 | multiple_region_params(p), conv_feats{multiple_region_params(p).feat_id}, ... 82 | boxes, random_scale); 83 | end 84 | elseif isstruct(conv_feats) 85 | for p = 1:num_regions 86 | [region_feats{p}, regions{p}] = convFeat_to_poolFeat(... 87 | multiple_region_params(p), conv_feats(multiple_region_params(p).feat_id), ... 88 | boxes, random_scale); 89 | end 90 | end 91 | end 92 | end 93 | -------------------------------------------------------------------------------- /code/adaptive_region_pooling/load_pooling_params.m: -------------------------------------------------------------------------------- 1 | function pool_params = load_pooling_params(pool_params_def, varargin) 2 | % load_pooling_params(pool_params_def,...): it initializes the struct that 3 | % contains the adaptive region max pooling parameters and the parameters 4 | % related to the region type 5 | % 6 | % INPUTS: 7 | % 1) pool_params_def: string with path to the configuration file that 8 | % contains the adaptive region max pooling parameters 9 | % The rest input arguments are given in the form of Name,Value pair 10 | % arguments and are related to the region type: 11 | % 'scale_inner': scalar value with the scaling factor of the inner rectangle 12 | % of the region. In case this value is 0 then actually no inner rectangle 13 | % is being used 14 | % 'scale_outer': scalar value with the scaling factor of the outer rectangle 15 | % of the region. 16 | % 'half_bbox': intiger value in the range [1,2,3,4]. If this parameter is set 17 | % to 1, 2, 3, or 4 then each bounding box will be reshaped to its left, 18 | % right, top, or bottom half part correspondingly. This action is performed 19 | % prior to scaling the box according to the scale_inner and scale_outer 20 | % params. If this parameter is missing or if it is empty then the action of 21 | % taking the half part of bounding box is NOT performed. 22 | % 23 | % OUTPUT: 24 | % 1) pool_params: struct that contains the adaptive region max pooling 25 | % parameters and the parameters related to the region type 26 | % 27 | % This file is part of the code that implements the following ICCV2015 accepted paper: 28 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 29 | % authors: Spyros Gidaris, Nikos Komodakis 30 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 31 | % Technical report: http://arxiv.org/abs/1505.01749 32 | % code: https://github.com/gidariss/mrcnn-object-detection 33 | % 34 | % Part of the code in this file comes from the SPP-Net code: 35 | % https://github.com/ShaoqingRen/SPP_net 36 | % 37 | % AUTORIGHTS 38 | % -------------------------------------------------------- 39 | % Copyright (c) 2015 Spyros Gidaris 40 | % 41 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 42 | % Technical report: http://arxiv.org/abs/1505.01749 43 | % Licensed under The MIT License [see LICENSE for details] 44 | % --------------------------------------------------------- 45 | % Copyright (c) 2014, Shaoqing Ren 46 | % 47 | % This file is part of the SPP code and is available 48 | % under the terms of the Simplified BSD License provided in 49 | % LICENSE. Please retain this notice and LICENSE if you use 50 | % this file (or any portion of it) in your project. 51 | % --------------------------------------------------------- 52 | 53 | ip = inputParser; 54 | ip.addParamValue('scale_inner', [], @isnumeric); 55 | ip.addParamValue('scale_outer', [], @isnumeric); 56 | ip.addParamValue('half_bbox', [], @isnumeric); 57 | ip.addParamValue('feat_id', 1, @isnumeric); 58 | 59 | ip.parse(varargin{:}); 60 | opts = ip.Results; 61 | 62 | %% Read the adaptive region max pooling parameters from the configuration file 63 | [~, ~, ext] = fileparts(pool_params_def); 64 | if isempty(ext), pool_params_def = [pool_params_def, '.m']; end 65 | assert(exist(pool_params_def, 'file') ~= 0); 66 | 67 | cur_dir = pwd; % change folder to avoid too long path for eval() 68 | [pool_def_dir, pool_def_file] = fileparts(pool_params_def); 69 | 70 | cd(pool_def_dir); 71 | pool_params = eval(pool_def_file); 72 | cd(cur_dir); 73 | 74 | %% Set the parameters related to the region type 75 | pool_params.scale_inner = opts.scale_inner; 76 | pool_params.scale_outer = opts.scale_outer; 77 | pool_params.half_bbox = opts.half_bbox; 78 | pool_params.feat_id = opts.feat_id; 79 | end 80 | -------------------------------------------------------------------------------- /code/adaptive_region_pooling/spm_expected_scale.m: -------------------------------------------------------------------------------- 1 | function expected_scale = spm_expected_scale(min_img_sz, boxes, spm_params) 2 | % expected_scale = spm_expected_scale(min_img_sz, boxes, spm_params) 3 | % 4 | % min_img_sz min(size(im, 1), size(im, 2)) 5 | % boxes in rows with (l, t, r, b) 6 | % 7 | % AUTORIGHTS 8 | % --------------------------------------------------------- 9 | % Copyright (c) 2014, Shaoqing Ren 10 | % 11 | % This file is part of the SPP code and is available 12 | % under the terms of the Simplified BSD License provided in 13 | % LICENSE. Please retain this notice and LICENSE if you use 14 | % this file (or any portion of it) in your project. 15 | % --------------------------------------------------------- 16 | 17 | area = (boxes(:, 3) - boxes(:, 1) + 1) .* (boxes(:, 4) - boxes(:, 2) + 1); 18 | 19 | expected_scale = spm_params.sz_conv_standard * spm_params.step_standard * min_img_sz ./ sqrt(area); 20 | % scale_expected = standard_img_size * min_img_sz ./ sqrt(area); 21 | expected_scale = round(expected_scale(:)); 22 | 23 | end 24 | -------------------------------------------------------------------------------- /code/bbox-recognition/extract_features.m: -------------------------------------------------------------------------------- 1 | function [ feats, feats_pool ] = extract_features( model, in, bboxes, feat_blob_name ) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | if ~isempty(bboxes) 20 | feats_pool = convFeat_to_poolFeat_multi_region(model.pooler, in, bboxes(:,1:4)); 21 | [outputs, out_blob_names_total] = caffe_forward_net(model.net, feats_pool, feat_blob_name); 22 | idx = find(strcmp(out_blob_names_total,feat_blob_name)); 23 | assert(numel(idx) == 1); 24 | feats = outputs{idx}; 25 | else 26 | feats_pool = {}; 27 | feats = single([]); 28 | end 29 | 30 | end 31 | -------------------------------------------------------------------------------- /code/bbox-recognition/load_object_recognition_model_on_caffe.m: -------------------------------------------------------------------------------- 1 | function model_obj_rec = load_object_recognition_model_on_caffe(model_obj_rec, use_detection_svms, model_phase, model_dir) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % 11 | % AUTORIGHTS 12 | % -------------------------------------------------------- 13 | % Copyright (c) 2015 Spyros Gidaris 14 | % 15 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 16 | % Technical report: http://arxiv.org/abs/1505.01749 17 | % Licensed under The MIT License [see LICENSE for details] 18 | % --------------------------------------------------------- 19 | 20 | curr_dir = pwd; 21 | cd(model_dir); 22 | model_obj_rec.net = caffe_load_model(model_obj_rec.net_def_file, model_obj_rec.net_weights_file, model_phase); 23 | 24 | if use_detection_svms 25 | assert(exist(model_obj_rec.svm_weights_file, 'file')>0); 26 | weights = read_svm_detection_weights(model_obj_rec.svm_weights_file); 27 | model_obj_rec.net = set_net_layer_weights(model_obj_rec.net, model_obj_rec.svm_layer_name, weights); 28 | end 29 | 30 | cd(curr_dir); 31 | end 32 | -------------------------------------------------------------------------------- /code/bbox-recognition/load_svm_model.m: -------------------------------------------------------------------------------- 1 | function model = load_svm_model(filename) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % 11 | % AUTORIGHTS 12 | % -------------------------------------------------------- 13 | % Copyright (c) 2015 Spyros Gidaris 14 | % 15 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 16 | % Technical report: http://arxiv.org/abs/1505.01749 17 | % Licensed under The MIT License [see LICENSE for details] 18 | % --------------------------------------------------------- 19 | 20 | load(filename, 'weights', 'bias'); 21 | model = {weights; bias}; 22 | end 23 | -------------------------------------------------------------------------------- /code/bbox-recognition/prepare_batch_feat_input_data.m: -------------------------------------------------------------------------------- 1 | function data = prepare_batch_feat_input_data(pooler, feature_paths, windows, data_param) 2 | num_windows = size(windows,1); 3 | feat_dim = data_param.feat_dim; 4 | data = zeros(feat_dim, num_windows, 'single'); 5 | num_threads = 6; 6 | 7 | if isfield(data_param, 'num_threads') && data_param.num_threads > 0 8 | num_threads = data_param.num_threads; 9 | end 10 | 11 | select_img_idx = unique(windows(:,1)); 12 | num_sel_img = length(select_img_idx); 13 | feature_paths = feature_paths(select_img_idx); 14 | window_this_img = cell(num_sel_img, 1); 15 | img_window_map = cell(num_sel_img, 1); 16 | for i = 1:num_sel_img 17 | indices = find(windows(:,1) == select_img_idx(i)); 18 | window_this_img{i} = windows(indices, :); 19 | img_window_map{i} = indices(:); 20 | end 21 | assert(sum(cellfun(@(x) size(x, 1), window_this_img, 'UniformOutput', true)) == num_windows); 22 | assert(sum(cellfun(@(x) size(x, 1), img_window_map, 'UniformOutput', true)) == num_windows); 23 | 24 | random_scale = data_param.random_scale; 25 | 26 | num_blocks = 3; 27 | block_size = ceil(num_sel_img / num_blocks); 28 | for b = 1:num_blocks 29 | start_idx = (b-1) * block_size + 1; 30 | stop_idx = min(b*block_size,num_sel_img); 31 | this_block_size = stop_idx - start_idx + 1; 32 | 33 | block_sel_img_idx = select_img_idx(start_idx:stop_idx); 34 | block_window_this_img = window_this_img(start_idx:stop_idx); 35 | block_feature_paths = feature_paths(start_idx:stop_idx); 36 | 37 | block_feats = cell(1,this_block_size); 38 | block_data_map = cell2mat(img_window_map(start_idx:stop_idx)); 39 | 40 | assert(all(block_data_map > 0 & block_data_map <= num_windows)); 41 | assert(length(unique(block_data_map)) == length(block_data_map)); 42 | if num_threads == 1 43 | for i = 1:this_block_size 44 | assert(all(block_window_this_img{i}(:,1) == block_sel_img_idx(i))); 45 | %feat_cache = load(block_feature_paths{i}, 'feat'); 46 | feat_cache = read_feat_conv_data( block_feature_paths{i}, true ); 47 | block_feats{i} = cell2mat(convFeat_to_poolFeat_multi_region(pooler, ... 48 | feat_cache.feat, block_window_this_img{i}(:, 4:7), random_scale)); 49 | end 50 | else 51 | parfor (i = 1:this_block_size,num_threads) 52 | assert(all(block_window_this_img{i}(:,1) == block_sel_img_idx(i))); 53 | feat_cache = read_feat_conv_data( block_feature_paths{i}, true ); 54 | block_feats{i} = cell2mat(convFeat_to_poolFeat_multi_region(pooler, ... 55 | feat_cache.feat, block_window_this_img{i}(:, 4:7), random_scale)); 56 | end 57 | end 58 | data(:,block_data_map) = cell2mat(block_feats); 59 | for i = 1:this_block_size, block_feats{i} = []; end 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /code/bbox-recognition/read_svm_detection_weights.m: -------------------------------------------------------------------------------- 1 | function model = read_svm_detection_weights( filepath ) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % 11 | % AUTORIGHTS 12 | % -------------------------------------------------------- 13 | % Copyright (c) 2015 Spyros Gidaris 14 | % 15 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 16 | % Technical report: http://arxiv.org/abs/1505.01749 17 | % Licensed under The MIT License [see LICENSE for details] 18 | % --------------------------------------------------------- 19 | 20 | load(filepath, 'weights', 'bias'); 21 | model = {weights; bias}; 22 | end 23 | 24 | -------------------------------------------------------------------------------- /code/bbox-recognition/resolve_cache_dir.m: -------------------------------------------------------------------------------- 1 | function cache_dir = resolve_cache_dir(model, model_dir, opts) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % 11 | % AUTORIGHTS 12 | % -------------------------------------------------------- 13 | % Copyright (c) 2015 Spyros Gidaris 14 | % 15 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 16 | % Technical report: http://arxiv.org/abs/1505.01749 17 | % Licensed under The MIT License [see LICENSE for details] 18 | % --------------------------------------------------------- 19 | 20 | cache_dir_base = fullfile(model_dir, 'cache_dir'); 21 | mkdir_if_missing(cache_dir_base); 22 | if ~isempty(opts.cache_dir) 23 | assert(isempty(opts.cache_dir_name)); 24 | cache_dir = opts.cache_dir; 25 | elseif ~isempty(opts.cache_dir_name) 26 | assert(isempty(opts.cache_dir)); 27 | cache_dir = fullfile(cache_dir_base, opts.cache_dir_name); 28 | else 29 | if opts.use_detection_svms 30 | cache_dir = fullfile(model_dir, model.svm_cache_dir(2:end)); 31 | else 32 | cache_dir = fullfile(model_dir, 'softmax'); 33 | end 34 | end 35 | mkdir_if_missing(cache_dir); 36 | end 37 | -------------------------------------------------------------------------------- /code/bbox-recognition/scores_bboxes_img.m: -------------------------------------------------------------------------------- 1 | function scores = scores_bboxes_img( model, conv_feat, bboxes ) 2 | % scores_bboxes_img given a recongiotion model, the convolutional features 3 | % of an image and a set bounding boxes it returns the classification scores 4 | % of each bounding box w.r.t. each of the C categories of the recognition model. 5 | % Those classification scores represent the likelihood of each bounding box 6 | % to tightly enclose an object for each of the C cateogies. 7 | % 8 | % INPUTS: 9 | % model: (type struct) the bounding box recognition model 10 | % conv_feat: the convolutional features of an image 11 | % bboxes: a N x 4 array with the bounding box coordinates in the form of 12 | % [x0,y0,x1,y1] where (x0,y0) is tot-left corner and (x1,y1) is the 13 | % bottom-right corner. N is the number of boundin boxes 14 | % 15 | % OUTPUT: 16 | % scores: N x C array with the classification scores of each bounding box 17 | % for each of the C categories. 18 | % 19 | % This file is part of the code that implements the following ICCV2015 accepted paper: 20 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 21 | % authors: Spyros Gidaris, Nikos Komodakis 22 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 23 | % Technical report: http://arxiv.org/abs/1505.01749 24 | % code: https://github.com/gidariss/mrcnn-object-detection 25 | % 26 | % AUTORIGHTS 27 | % -------------------------------------------------------- 28 | % Copyright (c) 2015 Spyros Gidaris 29 | % 30 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 31 | % Technical report: http://arxiv.org/abs/1505.01749 32 | % Licensed under The MIT License [see LICENSE for details] 33 | % --------------------------------------------------------- 34 | 35 | num_classes = length(model.classes); 36 | 37 | if ~isempty(bboxes) 38 | % given the convolutional features of an image, adaptively pool fixed 39 | % size region features for each bounding box and multiple type of 40 | % regions 41 | region_feats = convFeat_to_poolFeat_multi_region(model.pooler, conv_feat, bboxes(:,1:4)); 42 | % fed the region features to the fully connected layers in order to 43 | % score the bounding box proposals 44 | outputs = caffe_forward_net(model.net, region_feats); 45 | scores = outputs{1}'; 46 | else 47 | scores = zeros(0,num_classes,'single'); 48 | end 49 | 50 | % in the case that there is an exra column than the number of categories, 51 | % then the first column represents the confidence score of each bounding 52 | % box to belong on the background category and it is removed before the 53 | % score array is returned. 54 | if size(scores,2) == (num_classes + 1), scores = scores(:,2:end); end 55 | end 56 | -------------------------------------------------------------------------------- /code/bbox-regression/decode_reg_vals_to_bbox_targets.m: -------------------------------------------------------------------------------- 1 | function bbox_pred = decode_reg_vals_to_bbox_targets(bbox_init, reg_values) 2 | % 3 | % The code in this file comes from the RCNN code: 4 | % https://github.com/rbgirshick/rcnn 5 | % 6 | % AUTORIGHTS 7 | % --------------------------------------------------------- 8 | % Copyright (c) 2014, Ross Girshick 9 | % 10 | % This file is part of the R-CNN code and is available 11 | % under the terms of the Simplified BSD License provided in 12 | % LICENSE. Please retain this notice and LICENSE if you use 13 | % this file (or any portion of it) in your project. 14 | % --------------------------------------------------------- 15 | 16 | bbox_pred = zeros(size(reg_values), 'like', reg_values); 17 | num_classes = size(bbox_pred,2) / 4; 18 | for c = 1:num_classes 19 | reg_values_this = reg_values(:,(c-1)*4 + (1:4)); 20 | bbox_pred(:,(c-1)*4 + (1:4)) = decode_reg_values(bbox_init, reg_values_this); 21 | end 22 | end 23 | 24 | function bbox_pred = decode_reg_values(bbox_init, reg_values) 25 | 26 | 27 | dst_ctr_x = reg_values(:,1); 28 | dst_ctr_y = reg_values(:,2); 29 | dst_scl_x = reg_values(:,3); 30 | dst_scl_y = reg_values(:,4); 31 | 32 | src_w = bbox_init(:,3) - bbox_init(:,1) + eps; 33 | src_h = bbox_init(:,4) - bbox_init(:,2) + eps; 34 | src_ctr_x = bbox_init(:,1) + 0.5*src_w; 35 | src_ctr_y = bbox_init(:,2) + 0.5*src_h; 36 | 37 | pred_ctr_x = (dst_ctr_x .* src_w) + src_ctr_x; 38 | pred_ctr_y = (dst_ctr_y .* src_h) + src_ctr_y; 39 | pred_w = exp(dst_scl_x) .* src_w; 40 | pred_h = exp(dst_scl_y) .* src_h; 41 | 42 | bbox_pred = [pred_ctr_x - 0.5*pred_w, pred_ctr_y - 0.5*pred_h, ... 43 | pred_ctr_x + 0.5*pred_w, pred_ctr_y + 0.5*pred_h]; 44 | end 45 | -------------------------------------------------------------------------------- /code/bbox-regression/encode_bbox_targets_to_reg_vals.m: -------------------------------------------------------------------------------- 1 | function reg_values = encode_bbox_targets_to_reg_vals(bbox_src, bbox_dst_all) 2 | % 3 | % The code in this file comes from the RCNN code: 4 | % https://github.com/rbgirshick/rcnn 5 | % 6 | % AUTORIGHTS 7 | % --------------------------------------------------------- 8 | % Copyright (c) 2014, Ross Girshick 9 | % 10 | % This file is part of the R-CNN code and is available 11 | % under the terms of the Simplified BSD License provided in 12 | % LICENSE. Please retain this notice and LICENSE if you use 13 | % this file (or any portion of it) in your project. 14 | % --------------------------------------------------------- 15 | 16 | num_bbox = size(bbox_src,1); 17 | num_targets = size(bbox_dst_all,2); 18 | reg_values = zeros(num_bbox, num_targets, 'single'); 19 | num_classes = num_targets / 4; 20 | 21 | src_w = bbox_src(:,3) - bbox_src(:,1) + eps; 22 | src_h = bbox_src(:,4) - bbox_src(:,2) + eps; 23 | src_ctr_x = bbox_src(:,1) + 0.5*src_w; 24 | src_ctr_y = bbox_src(:,2) + 0.5*src_h; 25 | 26 | for c = 1:num_classes 27 | 28 | bbox_dst = bbox_dst_all(:, (c-1)*4 + (1:4)); 29 | gt_w = bbox_dst(:,3) - bbox_dst(:,1) + eps; 30 | gt_h = bbox_dst(:,4) - bbox_dst(:,2) + eps; 31 | gt_ctr_x = bbox_dst(:,1) + 0.5*gt_w; 32 | gt_ctr_y = bbox_dst(:,2) + 0.5*gt_h; 33 | 34 | dst_ctr_x = (gt_ctr_x - src_ctr_x) ./ src_w; 35 | dst_ctr_y = (gt_ctr_y - src_ctr_y) ./ src_h; 36 | dst_scl_w = log(gt_w ./ src_w); 37 | dst_scl_h = log(gt_h ./ src_h); 38 | 39 | reg_values(:,(c-1)*4 + (1:4)) = [dst_ctr_x, dst_ctr_y, dst_scl_w, dst_scl_h]; 40 | end 41 | 42 | end 43 | -------------------------------------------------------------------------------- /code/bbox-regression/load_bbox_loc_model_on_caffe.m: -------------------------------------------------------------------------------- 1 | function model_bbox_loc = load_bbox_loc_model_on_caffe(model_bbox_loc, full_model_loc_dir) 2 | curr_dir = pwd; 3 | cd(full_model_loc_dir); 4 | model_phase = 'test'; 5 | model_bbox_loc.net = caffe_load_model( model_bbox_loc.net_def_file, ... 6 | model_bbox_loc.net_weights_file, model_phase); 7 | cd(curr_dir); 8 | end -------------------------------------------------------------------------------- /code/bbox-regression/regress_bboxes_img.m: -------------------------------------------------------------------------------- 1 | function bboxes_out = regress_bboxes_img( model, conv_feat, bboxes_in, img_size) 2 | % regress_bboxes_img given a bounding box regression model, the 3 | % convolutional features of an image and a set bounding boxes with the 4 | % category id of each of them, it regresses to new the bounding box 5 | % coordinates such that the new boxes will (ideally) tighter enclose an 6 | % object of the given category 7 | % 8 | % INPUTS: 9 | % 1) model: (type struct) the bounding box regression model 10 | % 2) conv_feat: the convolutional features of an image 11 | % 3) bboxes_in: a N x 5 array with the bounding box coordinates conv_feat 12 | % the form of [x0,y0,x1,y1,c] where (x0,y0) is tot-left corner, (x1,y1) is 13 | % the bottom-right corner, and c is the category id of the bounding box. 14 | % N is the number of bounding boxes 15 | % 4) img_size: a 1 x 2 vector with image size 16 | % 17 | % OUTPUT: 18 | % 1) bboxes_out : a N x 5 array with the refined bounding boxes. It has the 19 | % same format as bboxes_in 20 | % 21 | % This file is part of the code that implements the following ICCV2015 accepted paper: 22 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 23 | % authors: Spyros Gidaris, Nikos Komodakis 24 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 25 | % Technical report: http://arxiv.org/abs/1505.01749 26 | % code: https://github.com/gidariss/mrcnn-object-detection 27 | % 28 | % 29 | % AUTORIGHTS 30 | % -------------------------------------------------------- 31 | % Copyright (c) 2015 Spyros Gidaris 32 | % 33 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 34 | % Technical report: http://arxiv.org/abs/1505.01749 35 | % Licensed under The MIT License [see LICENSE for details] 36 | % --------------------------------------------------------- 37 | 38 | num_classes = length(model.classes); 39 | if ~isempty(bboxes_in) 40 | % given the convolutional features of an image, adaptively pool fixed 41 | % size region features for each bounding box and multiple type of 42 | % regions 43 | region_feats = convFeat_to_poolFeat_multi_region(model.pooler, conv_feat, bboxes_in(:,1:4)); 44 | % fed the region features to the fully connected layers in order to 45 | % predict the new bounding box regression values. Specifically, for 46 | % each bounding box 4*C values are predicted; 4 for each category type 47 | outputs = caffe_forward_net(model.net, region_feats); 48 | pred_vals = outputs{1}'; % array of size N x (4*C) 49 | assert(size(pred_vals,1) == size(bboxes_in,1)); 50 | assert(size(pred_vals,2) == 4*num_classes); 51 | pred_vals = reshape(pred_vals, [size(pred_vals,1),4,num_classes]); % tensor of size N x 4 x C 52 | class_indices = bboxes_in(:,5); 53 | 54 | % for each bounding box keep the 4 regression values that correspond 55 | % to the catogory to which this bounding box belongs 56 | bboxes_out = zeros(size(pred_vals,1),4,'single'); 57 | for i = 1:size(bboxes_in,1) 58 | bboxes_out(i,:) = pred_vals(i,:,class_indices(i)); 59 | end 60 | 61 | % given the the initial bounding boxes coordinates and the regression 62 | % values return the new bounding box coordinates 63 | bboxes_out = decode_reg_vals_to_bbox_targets(bboxes_in(:,1:4), bboxes_out); 64 | bboxes_out = clip_bbox_inside_the_img(bboxes_out, img_size); 65 | bboxes_out = [bboxes_out, class_indices]; 66 | else 67 | bboxes_out = zeros(0,5,'single'); 68 | end 69 | end 70 | 71 | function bboxes = clip_bbox_inside_the_img(bboxes, img_size) 72 | bboxes(:,1) = max(1, bboxes(:,1)); 73 | bboxes(:,2) = max(1, bboxes(:,2)); 74 | bboxes(:,3) = min(img_size(2), bboxes(:,3)); 75 | bboxes(:,4) = min(img_size(1), bboxes(:,4)); 76 | end 77 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_copy_weights_from_net2net.m: -------------------------------------------------------------------------------- 1 | function net_dst = caffe_copy_weights_from_net2net( net_dst, net_src, layers_dst, layers_src, scl_factor, shape_strict ) 2 | 3 | if ~exist('scl_factor','var'), scl_factor = 1; end 4 | if ~exist('shape_strict','var'), shape_strict = true; end 5 | 6 | assert(iscell(layers_dst)) 7 | assert(iscell(layers_src)) 8 | assert(length(layers_dst) == length(layers_src)); 9 | 10 | if numel(scl_factor) == 1 11 | scl_factor = scl_factor * ones(1,length(layers_dst)); 12 | end 13 | scl_factor = single(scl_factor); 14 | 15 | assert(length(layers_dst) == length(scl_factor)); 16 | 17 | for i = 1:length(layers_dst) 18 | try 19 | fprintf('Copying layer %d/%d %s to %s:\n', i, length(layers_dst), layers_src{i}, layers_dst{i}) 20 | 21 | num_params = min(length(net_dst.layers(layers_dst{i}).params),... 22 | length(net_src.layers(layers_src{i}).params)); 23 | 24 | for p = 1:num_params 25 | param_shape_this_dst = net_dst.layers(layers_dst{i}).params(p).shape; 26 | param_shape_this_src = net_src.layers(layers_src{i}).params(p).shape; 27 | 28 | if shape_strict 29 | assert(all(param_shape_this_dst == param_shape_this_src)); 30 | else 31 | assert(prod(param_shape_this_dst) == prod(param_shape_this_src)); 32 | end 33 | 34 | param_shape_this_dst4 = ones(1,4); 35 | param_shape_this_dst4((end-length(param_shape_this_dst)+1):end) = param_shape_this_dst; 36 | 37 | param_shape_this_src4 = ones(1,4); 38 | param_shape_this_src4((end-length(param_shape_this_src)+1):end) = param_shape_this_src; 39 | 40 | scl_factor_this = scl_factor(i); 41 | 42 | fprintf('param[%d]:[%d, %d, %d, %d] --> [%d, %d, %d, %d] | scl_factor = %.4f ', ... 43 | p, ... 44 | param_shape_this_src4(1), param_shape_this_src4(2), param_shape_this_src4(3), param_shape_this_src4(4), ... 45 | param_shape_this_dst4(1), param_shape_this_dst4(2), param_shape_this_dst4(3), param_shape_this_dst4(4), scl_factor_this); 46 | 47 | 48 | data_src = net_src.layers(layers_src{i}).params(p).get_data; 49 | if (~shape_strict) 50 | data_src = reshape(data_src, size(net_dst.layers(layers_dst{i}).params(p).get_data)); 51 | end 52 | if (scl_factor_this ~= 1) 53 | data_src = data_src * scl_factor_this; 54 | end 55 | net_dst.layers(layers_dst{i}).params(p).set_data(data_src); 56 | 57 | data_dst = net_dst.layers(layers_dst{i}).params(p).get_data; 58 | assert(all(data_src(:) == data_dst(:))); 59 | fprintf(' successful\n') 60 | end 61 | catch exception 62 | fprintf('Exception message %s\n', getReport(exception)); 63 | end 64 | end 65 | 66 | end -------------------------------------------------------------------------------- /code/caffe-funs/caffe_forward_net.m: -------------------------------------------------------------------------------- 1 | function [outputs, out_blob_names_total] = caffe_forward_net(net, input, out_blob_names_extra) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % c 10 | % 11 | % AUTORIGHTS 12 | % -------------------------------------------------------- 13 | % Copyright (c) 2015 Spyros Gidaris 14 | % 15 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 16 | % Technical report: http://arxiv.org/abs/1505.01749 17 | % Licensed under The MIT License [see LICENSE for details] 18 | % --------------------------------------------------------- 19 | 20 | 21 | if ~exist('out_blob_names_extra', 'var'), out_blob_names_extra = {}; end 22 | assert(iscell(out_blob_names_extra)); 23 | 24 | input_size = caffe_get_blobs_size(net, net.inputs); 25 | assert(numel(input_size) == numel(input)); 26 | size_in = size(input{1}); 27 | num_feats = size_in(end); 28 | 29 | out_blob_names = net.outputs; 30 | num_out_blobs = length(out_blob_names); 31 | num_out_blobs_extra = length(out_blob_names_extra); 32 | num_out_blobs_total = num_out_blobs + num_out_blobs_extra; 33 | out_blob_names_total = [out_blob_names(:); out_blob_names_extra(:)]; 34 | 35 | output_size = caffe_get_blobs_size(net, out_blob_names_total); 36 | outputs = cell(num_out_blobs_total,1); 37 | out_feat_dim = zeros(num_out_blobs_total,1); 38 | 39 | batch_size = input_size{1}(4); 40 | num_batches = ceil(num_feats/batch_size); 41 | 42 | for i = 1:num_out_blobs_total 43 | out_feat_dim(i) = prod(output_size{i}(1:(end-1))); 44 | if output_size{i}(end) == batch_size 45 | outputs{i} = zeros(out_feat_dim(i), num_feats, 'single'); 46 | else 47 | outputs{i} = zeros(out_feat_dim(i), output_size{i}(end) * num_batches, 'single'); 48 | end 49 | end 50 | 51 | for i = 1:num_batches 52 | start_idx = (i-1) * batch_size + 1; 53 | stop_idx = min(i * batch_size, num_feats); 54 | batch_size_this = stop_idx - start_idx + 1; 55 | 56 | % forward propagate batch of region images 57 | out = net.forward(prepare_batch(input, input_size, start_idx, stop_idx)); 58 | for j = (num_out_blobs+1):num_out_blobs_total 59 | out{j} = net.blobs(out_blob_names_total{j}).get_data; 60 | end 61 | 62 | for j = 1:num_out_blobs_total 63 | out{j} = reshape(out{j}, [out_feat_dim(j), output_size{j}(end)]); 64 | if output_size{j}(end) == batch_size 65 | if (i == num_batches) 66 | out{j} = out{j}(:,1:batch_size_this); 67 | end 68 | outputs{j}(:,start_idx:stop_idx) = out{j}; 69 | else 70 | start_idx0 = (i-1) * output_size{j}(end) + 1; 71 | stop_idx0 = i * output_size{j}(end); 72 | outputs{j}(:,start_idx0:stop_idx0) = out{j}; 73 | end 74 | end 75 | end 76 | 77 | end 78 | 79 | function batch = prepare_batch(input, input_size, start_idx, stop_idx) 80 | assert(numel(input_size) == numel(input)); 81 | batch_size_this = stop_idx - start_idx + 1; 82 | 83 | batch = cell(1,numel(input_size)); 84 | for i = 1:numel(input_size) 85 | batch{i} = zeros(input_size{i}, 'single'); 86 | reshape_vector = input_size{i}; 87 | reshape_vector(4) = batch_size_this; 88 | batch{i}(:,:,:,1:batch_size_this) = reshape(input{i}(:,start_idx:stop_idx), reshape_vector); 89 | end 90 | end 91 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_get_blobs_size.m: -------------------------------------------------------------------------------- 1 | function blobs_size = caffe_get_blobs_size(net, blob_names) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % 11 | % AUTORIGHTS 12 | % -------------------------------------------------------- 13 | % Copyright (c) 2015 Spyros Gidaris 14 | % 15 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 16 | % Technical report: http://arxiv.org/abs/1505.01749 17 | % Licensed under The MIT License [see LICENSE for details] 18 | % --------------------------------------------------------- 19 | 20 | num_blobs = length(blob_names); 21 | blobs_size = cell(num_blobs,1); 22 | for i = 1:num_blobs 23 | size_this = net.blobs(blob_names{i}).shape; 24 | blobs_size{i} = ones(1,4); 25 | blobs_size{i}((end-length(size_this)+1):end) = size_this; 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_get_input_diff.m: -------------------------------------------------------------------------------- 1 | function [input, input_blob_names] = caffe_get_input_diff(net) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | input_blob_names = net.inputs; 20 | num_inputs = length(input_blob_names); 21 | input = cell(num_inputs,1); 22 | 23 | for i = 1:num_inputs 24 | input{i} = net.blobs(input_blob_names{i}).get_diff; 25 | end 26 | end 27 | 28 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_get_output.m: -------------------------------------------------------------------------------- 1 | function [output, output_blob_names] = caffe_get_output(net) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | output_blob_names = net.outputs; 20 | % output_size = caffe_get_blobs_size(net, output_blob_names); 21 | num_outputs = length(output_blob_names); 22 | output = cell(num_outputs,1); 23 | 24 | for i = 1:num_outputs 25 | output{i} = net.blobs(output_blob_names{i}).get_data; 26 | end 27 | end 28 | 29 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_load_model.m: -------------------------------------------------------------------------------- 1 | function net = caffe_load_model( net_def_file, net_weights_file, phase ) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | if ~exist('phase', 'var'), phase = 'test'; end 20 | assert(exist(net_def_file,'file')>0); 21 | net = caffe.Net(net_def_file, phase); % create net but not load weights 22 | 23 | assert(iscell(net_weights_file)) 24 | num_weights_files = length(net_weights_file); 25 | for i = 1:num_weights_files 26 | assert(exist(net_weights_file{i},'file')>0); 27 | net.copy_from(net_weights_file{i}); 28 | end 29 | 30 | end 31 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_reshape_net.m: -------------------------------------------------------------------------------- 1 | function net = caffe_reshape_net(net, input_new_size) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | input_blob_names = net.inputs; 20 | num_inputs = length(input_blob_names); 21 | assert(numel(input_new_size) == num_inputs); 22 | for i = 1:num_inputs 23 | net.blobs(input_blob_names{i}).reshape(input_new_size{i}); % reshape input blob 24 | end 25 | net.reshape(); 26 | end 27 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_reshape_net_as_input.m: -------------------------------------------------------------------------------- 1 | function caffe_reshape_net_as_input(net, inputs) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | input_size = cellfun(@(x) size(x), inputs, 'UniformOutput', false); 20 | 21 | input_blob_names = net.inputs; 22 | for i = 1:length(input_size) 23 | size_this = net.blobs(input_blob_names{i}).shape(); 24 | input_size_this = ones(size(size_this)); 25 | input_size_this(1:length(input_size{i})) = input_size{i}; 26 | input_size{i} = input_size_this; 27 | end 28 | 29 | caffe_reshape_net(net, input_size); 30 | end 31 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_set_device.m: -------------------------------------------------------------------------------- 1 | function caffe_set_device( device_id ) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | if device_id > 0 20 | caffe.set_mode_gpu(); 21 | caffe.set_device(device_id-1); % zero-based counting of gpus 22 | else 23 | caffe.set_mode_cpu(); 24 | end 25 | end 26 | 27 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_set_input.m: -------------------------------------------------------------------------------- 1 | function caffe_set_input(net, input) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | input_blob_names = net.inputs; 20 | input_size = caffe_get_blobs_size(net, input_blob_names); 21 | assert(numel(input_size) == numel(input)); 22 | num_inputs = length(input_blob_names); 23 | 24 | for i = 1:num_inputs 25 | shape_this = net.blobs(input_blob_names{i}).shape; 26 | input{i} = reshape(input{i}, shape_this); 27 | net.blobs(input_blob_names{i}).set_data(input{i}); 28 | end 29 | end 30 | 31 | -------------------------------------------------------------------------------- /code/caffe-funs/caffe_set_output_diff.m: -------------------------------------------------------------------------------- 1 | function caffe_set_output_diff(net, output) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | output_blob_names = net.outputs; 20 | output_size = caffe_get_blobs_size(net, output_blob_names); 21 | assert(numel(output_size) == numel(output)); 22 | num_outputs = length(output_blob_names); 23 | 24 | for i = 1:num_outputs 25 | shape_this = net.blobs(output_blob_names{i}).shape; 26 | output{i} = reshape(output{i}, shape_this); 27 | net.blobs(output_blob_names{i}).set_diff(output{i}); 28 | end 29 | end 30 | 31 | -------------------------------------------------------------------------------- /code/caffe-funs/parse_copy_finetune_prototxt.m: -------------------------------------------------------------------------------- 1 | function [solver_file, train_net_file, test_net_file, max_iter, snapshot_prefix] = ... 2 | parse_copy_finetune_prototxt(solver_file_path, dest_dir) 3 | % 4 | % This file is part of the code that implements the following ICCV2015 accepted paper: 5 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 6 | % authors: Spyros Gidaris, Nikos Komodakis 7 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 8 | % Technical report: http://arxiv.org/abs/1505.01749 9 | % code: https://github.com/gidariss/mrcnn-object-detection 10 | % 11 | % It is adapted from the SPP-Net code: 12 | % https://github.com/ShaoqingRen/SPP_net 13 | % 14 | % AUTORIGHTS 15 | % -------------------------------------------------------- 16 | % Copyright (c) 2015 Spyros Gidaris 17 | % 18 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 19 | % Technical report: http://arxiv.org/abs/1505.01749 20 | % Licensed under The MIT License [see LICENSE for details] 21 | % --------------------------------------------------------- 22 | % Copyright (c) 2014, Shaoqing Ren 23 | % 24 | % This file is part of the SPP code and is available 25 | % under the terms of the Simplified BSD License provided in 26 | % LICENSE. Please retain this notice and LICENSE if you use 27 | % this file (or any portion of it) in your project. 28 | % --------------------------------------------------------- 29 | 30 | 31 | % copy solver, train_net and test_net to destination folder 32 | % ------------------------------------------------ 33 | 34 | [folder, solver_file, ext] = fileparts(solver_file_path); 35 | solver_file = [solver_file, ext]; 36 | 37 | solver_prototxt_text = textread(solver_file_path, '%[^\n]'); 38 | try % for old caffe 39 | train_net_file_pattern = '(?<=train_net[ :]*")[^"]*(?=")'; 40 | test_net_file_pattern = '(?<=test_net[ :]*")[^"]*(?=")'; 41 | 42 | train_net_file = cellfun(@(x) regexp(x, train_net_file_pattern, 'match'), solver_prototxt_text, 'UniformOutput', false); 43 | train_net_file = train_net_file(cellfun(@(x) ~isempty(x), train_net_file, 'UniformOutput', true)); 44 | if isempty(train_net_file) 45 | error('invalid solver file %s \n', solver_file_path); 46 | end 47 | train_net_file = train_net_file{1}{1}; 48 | 49 | test_net_file = cellfun(@(x) regexp(x, test_net_file_pattern, 'match'), solver_prototxt_text, 'UniformOutput', false); 50 | test_net_file = test_net_file(cellfun(@(x) ~isempty(x), test_net_file, 'UniformOutput', true)); 51 | if isempty(test_net_file) 52 | error('invalid solver file %s \n', solver_file_path); 53 | end 54 | test_net_file = cellfun(@(x) x{1}, test_net_file, 'UniformOutput', false); 55 | catch % for new caffe 56 | train_test_net_file_pattern = '(?<=net[ :]*")[^"]*(?=")'; 57 | train_test_net_file_pattern = cellfun(@(x) regexp(x, train_test_net_file_pattern, 'match'), solver_prototxt_text, 'UniformOutput', false); 58 | train_test_net_file_pattern = train_test_net_file_pattern(cellfun(@(x) ~isempty(x), train_test_net_file_pattern, 'UniformOutput', true)); 59 | if isempty(train_test_net_file_pattern) 60 | error('invalid solver file %s \n', solver_file_path); 61 | end 62 | train_net_file = train_test_net_file_pattern{1}{1}; 63 | test_net_file = {train_net_file}; 64 | end 65 | mkdir_if_missing(dest_dir); 66 | copyfile(fullfile(folder, solver_file), dest_dir); 67 | copyfile(fullfile(folder, train_net_file), dest_dir); 68 | 69 | for i = 1:length(test_net_file), copyfile(fullfile(folder, test_net_file{i}), dest_dir); end 70 | 71 | max_iter_pattern = '(?<=max_iter[ :]*)[0-9]*'; 72 | max_iter = cellfun(@(x) regexp(x, max_iter_pattern, 'match'), solver_prototxt_text, 'UniformOutput', false); 73 | max_iter = max_iter(cellfun(@(x) ~isempty(x), max_iter, 'UniformOutput', true)); 74 | if isempty(max_iter) 75 | error('invalid solver file %s \n', solver_file_path); 76 | end 77 | max_iter = str2double(max_iter{1}{1}); 78 | 79 | snapshot_prefix_pattern = '(?<=snapshot_prefix[ :]*")[^"]*(?=")'; 80 | snapshot_prefix = cellfun(@(x) regexp(x, snapshot_prefix_pattern, 'match'), solver_prototxt_text, 'UniformOutput', false); 81 | snapshot_prefix = snapshot_prefix(cellfun(@(x) ~isempty(x), snapshot_prefix, 'UniformOutput', true)); 82 | if isempty(snapshot_prefix) 83 | error('invalid solver file %s \n', solver_file_path); 84 | end 85 | snapshot_prefix = snapshot_prefix{1}{1}; 86 | end 87 | -------------------------------------------------------------------------------- /code/caffe-funs/set_net_layer_weights.m: -------------------------------------------------------------------------------- 1 | function net = set_net_layer_weights( net, layer_name, weights ) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % c 10 | % 11 | % AUTORIGHTS 12 | % -------------------------------------------------------- 13 | % Copyright (c) 2015 Spyros Gidaris 14 | % 15 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 16 | % Technical report: http://arxiv.org/abs/1505.01749 17 | % Licensed under The MIT License [see LICENSE for details] 18 | % --------------------------------------------------------- 19 | 20 | num_params = length(net.layers(layer_name).params); 21 | assert(num_params == length(weights)); 22 | assert(iscell(weights)) 23 | for p = 1:num_params 24 | param_shape = net.layers(layer_name).params(p).shape; 25 | for d = 1:length(param_shape) 26 | assert(all(param_shape(d) == size(weights{p},d))); 27 | end 28 | net.layers(layer_name).params(p).set_data(weights{p}); 29 | end 30 | end 31 | 32 | -------------------------------------------------------------------------------- /code/conv_features/extract_conv_features.m: -------------------------------------------------------------------------------- 1 | function [rsp, scale] = extract_conv_features(CNN, img, scale, mean_pix) 2 | % extract_conv_features extract the convolutional features of one image 3 | % for the specified scales using the provided convolutional neural network. 4 | % 5 | % INPUTS: 6 | % 1) CNN: the caffe CNN struct with the convolutional neural network that 7 | % implements the activation mas module 8 | % 2) image: a Height x Width x 3 uint8 array that represents the image 9 | % pixels 10 | % 3) scale: NumScales x 1 or 1 x NumScales vector with the images scales 11 | % that will be used. The i-th value should be the size in pixels of the 12 | % smallest dimension of the image in the i-th scale. The scales are 13 | % expected to sorted in ascending order. 14 | % 4) mean_pix: is a 3 x 1 or 1 x 3 vector with the mean pixel value per 15 | % color channel that is subtracted from the scaled image before is being 16 | % fed to the CNN 17 | % 18 | % OUTPUTS: 19 | % 1) rsp: a 1 x NumScales cell array with the convolutonal feature maps of 20 | % each scale. The i-th element is a H_i x W_i x C array with the 21 | % convolutional feature maps of the i-th scale. H_i and W_i are the height 22 | % and width correspondingly of the convolutional feature maps for the i-th 23 | % scale. 24 | % 2) scale: a 1 x NumScales vector with the used image scales 25 | % 26 | % This file is part of the code that implements the following ICCV2015 accepted paper: 27 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 28 | % authors: Spyros Gidaris, Nikos Komodakis 29 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 30 | % Technical report: http://arxiv.org/abs/1505.01749 31 | % code: https://github.com/gidariss/mrcnn-object-detection 32 | % 33 | % Part of the code in this file comes from the SPP-Net code: 34 | % https://github.com/ShaoqingRen/SPP_net 35 | % 36 | % AUTORIGHTS 37 | % -------------------------------------------------------- 38 | % Copyright (c) 2015 Spyros Gidaris 39 | % 40 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 41 | % Technical report: http://arxiv.org/abs/1505.01749 42 | % Licensed under The MIT License [see LICENSE for details] 43 | % --------------------------------------------------------- 44 | % Copyright (c) 2014, Shaoqing Ren 45 | % 46 | % This file is part of the SPP code and is available 47 | % under the terms of the Simplified BSD License provided in 48 | % LICENSE. Please retain this notice and LICENSE if you use 49 | % this file (or any portion of it) in your project. 50 | % --------------------------------------------------------- 51 | 52 | 53 | if size(img,3) == 1, img = repmat(img, [1, 1, 3]); end 54 | rsp = {}; 55 | num_scales = length(scale); 56 | % fprintf('scales = {') 57 | for i = 1:num_scales 58 | scale_this = scale(i); 59 | % pre-process the image by 1) scaling it such that its smalled 60 | % dimension to be scale_this and 2) subtracting from each color channel 61 | % the average color value (mean_pix) 62 | img_scaled = preprocess_img(img, scale_this, mean_pix); 63 | % change the order of dimensions from [height x width x channels] -> 64 | % [width x height x channels] in order to be compatible with the C++ 65 | % implementation of CAFFE 66 | img_scaled = permute(img_scaled, [2 1 3]); 67 | 68 | if (numel(img_scaled) > 1200*2400*3) 69 | % If it doesn't fit in the GPU memory then skip this scale 70 | % the value 1200*2400*3 was selected for a GPU with 6Gbytes of memory. 71 | scale = scale(1:(i-1)); 72 | rsp = rsp(1:(i-1)); 73 | % fprintf('It does not fit in the GPU memory\n'); 74 | break; 75 | end 76 | % fprintf('[%d %d] ', size(img_scaled,2), size(img_scaled,1)); 77 | 78 | % reshape the network such the it will get as input one image of size size(img_scaled) 79 | CNN = caffe_reshape_net(CNN, {[size(img_scaled), 1]}); 80 | % get the convolutional feature maps of the image 81 | response = CNN.forward({img_scaled}); 82 | % change the order of dimensions from [width x height x channels] -> 83 | % [height x width x channels] 84 | rsp{i} = permute(response{1}, [2, 1, 3]); 85 | end 86 | %fprintf('}') 87 | end 88 | 89 | function im_scaled_size = get_scaled_image_size(im_size, scale_sel) 90 | im_height = im_size(1); 91 | im_width = im_size(2); 92 | if (im_width < im_height) 93 | im_resized_width = scale_sel; 94 | im_resized_height = ceil(im_resized_width * im_height / im_width); 95 | else 96 | im_resized_height = scale_sel; 97 | im_resized_width = ceil(im_resized_height * im_width / im_height); 98 | end 99 | im_scaled_size = [im_resized_height, im_resized_width]; 100 | end 101 | 102 | function img = preprocess_img(img, scale, mean_pix) 103 | if numel(mean_pix) == 1, mean_pix = [mean_pix, mean_pix, mean_pix]; end 104 | 105 | im_height = size(img, 1); 106 | im_width = size(img, 2); 107 | im_scaled_size = get_scaled_image_size([im_height, im_width], scale); 108 | 109 | if (scale <= 224) 110 | img = imresize(img, [im_scaled_size(1), im_scaled_size(2)], 'bilinear'); 111 | else 112 | img = imresize(img, [im_scaled_size(1), im_scaled_size(2)], 'bilinear', 'antialiasing', false); 113 | end 114 | 115 | img = single(img); 116 | img = img(:, :, [3 2 1]); 117 | for c = 1:3, img(:,:,c) = img(:,:,c) - mean_pix(c); end 118 | end 119 | -------------------------------------------------------------------------------- /code/conv_features/extract_conv_features_all_images.m: -------------------------------------------------------------------------------- 1 | function extract_conv_features_all_images( net, input_file_paths, destination_dir, varargin) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % Part of the code in this file comes from the SPP-Net code: 11 | % https://github.com/ShaoqingRen/SPP_net 12 | % 13 | % AUTORIGHTS 14 | % -------------------------------------------------------- 15 | % Copyright (c) 2015 Spyros Gidaris 16 | % 17 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 18 | % Technical report: http://arxiv.org/abs/1505.01749 19 | % Licensed under The MIT License [see LICENSE for details] 20 | % --------------------------------------------------------- 21 | % Copyright (c) 2014, Shaoqing Ren 22 | % 23 | % This file is part of the SPP code and is available 24 | % under the terms of the Simplified BSD License provided in 25 | % LICENSE. Please retain this notice and LICENSE if you use 26 | % this file (or any portion of it) in your project. 27 | % --------------------------------------------------------- 28 | 29 | ip = inputParser; 30 | ip.addOptional('start', 1, @isscalar); 31 | ip.addOptional('end', 0, @isscalar); 32 | ip.addOptional('scales', [480 576 688 874 1200], @ismatrix); 33 | ip.addOptional('mean_pix', [103.939, 116.779, 123.68], @isnumeric); 34 | ip.addOptional('force', false, @islogical); 35 | 36 | ip.parse(varargin{:}); 37 | opts = ip.Results; 38 | 39 | if opts.end <= 0 40 | opts.end = length(input_file_paths); 41 | else 42 | opts.end = min(opts.end, length(input_file_paths)); 43 | end 44 | 45 | % Where to save feature cache 46 | mkdir_if_missing(destination_dir); 47 | 48 | opts.output_dir = destination_dir; 49 | mkdir_if_missing(opts.output_dir); 50 | 51 | % Log feature extraction 52 | timestamp = datestr(datevec(now()), 'yyyymmdd_HHMMSS'); 53 | diary_file = [destination_dir, 'extract_conv_features_all_images_', timestamp '.txt']; 54 | diary(diary_file); 55 | fprintf('Logging output in %s\n', diary_file); 56 | 57 | fprintf('\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'); 58 | fprintf('Extract convolutional features options:\n'); 59 | disp(opts); 60 | fprintf('~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n'); 61 | 62 | filenames = getImageIdsFromImagePaths( input_file_paths ); 63 | total_time = 0; 64 | total_file_size_mega = 0; 65 | count = 0; 66 | num_imgs = opts.end - opts.start + 1; 67 | for i = opts.start:opts.end 68 | fprintf('%s: extract conv features: %d/%d\n', procid(), i, opts.end); 69 | output_file_path = [destination_dir, filesep, filenames{i}, '.mat']; 70 | 71 | if (~exist(output_file_path, 'file') || opts.force) 72 | tot_th = tic; 73 | 74 | try 75 | count = count + 1; 76 | file_size_mega = process_image(net, input_file_paths{i}, output_file_path, opts); 77 | catch exception 78 | file_size_mega = 0; 79 | fprintf('Error: Cannot proccess %s.\n', output_file_path); 80 | fprintf('Exception message %s\n', getReport(exception)); 81 | end 82 | 83 | total_file_size_mega = total_file_size_mega + file_size_mega; 84 | avg_file_size_mega = total_file_size_mega/count; 85 | est_total_size_giga = num_imgs * avg_file_size_mega / 1024; 86 | total_time = total_time + toc(tot_th); 87 | avg_time = total_time/count; 88 | est_rem_time = avg_time * (num_imgs - i) / 60; 89 | fprintf('[avg time: %.2fs] [est rem. time: %.2fmins] [avg space %.3fMega] [est total space %.2fGiga]\n', ... 90 | avg_time, est_rem_time, avg_file_size_mega, est_total_size_giga); 91 | else 92 | fprintf(' [already exists]\n'); 93 | end 94 | end 95 | 96 | end 97 | 98 | function fileSizeInMbs = process_image(net, input_file_path, output_file_path, opts) 99 | th = tic; 100 | 101 | d = init_feat_data(); 102 | image = get_image(input_file_path); 103 | d.feat.im_height = size(image,1); 104 | d.feat.im_width = size(image,2); 105 | [d.feat.rsp, d.feat.scale] = extract_conv_features(net, image, opts.scales, opts.mean_pix); 106 | 107 | fprintf(' [features: %.3fs]', toc(th)); 108 | th = tic; 109 | save(output_file_path, '-struct', 'd'); 110 | fileInfo = dir(output_file_path); 111 | fileSizeInMbs = fileInfo.bytes / (1024*1024); 112 | fprintf(' [saving: %.3fs]', toc(th)); 113 | fprintf(' [Mbytes: %.4f]\n', fileSizeInMbs); 114 | end 115 | 116 | function d = init_feat_data() 117 | d.gt = []; 118 | d.overlap = []; 119 | d.boxes = []; 120 | d.class = []; 121 | d.feat = []; 122 | end 123 | -------------------------------------------------------------------------------- /code/conv_features/extract_image_activation_maps.m: -------------------------------------------------------------------------------- 1 | function conv_feat_data = extract_image_activation_maps(CNN, image, scales, mean_pix, ... 2 | Semantic_Aware_CNN, semantic_scales) 3 | % extract_image_activation_maps(CNN, image, scales, mean_pix): extract the 4 | % activation maps of one image (section 3 of technical report) for the 5 | % specified scales using the convolutional neural network CNN. 6 | % extract_image_activation_maps(CNN, image, scales, mean_pix, Semantic_Aware_CNN, semantic_scales): 7 | % In this case the current function it also extracts the semantic 8 | % segmentation aware activation maps (see section 4 of the technical 9 | % report). 10 | % 11 | % INPUTS: 12 | % 1) CNN: the caffe net struct with the convolutional neural network that 13 | % implements the activation mas module (section 3) 14 | % 2) image: a Height x Width x 3 uint8 array that represents the image 15 | % pixels 16 | % 3) scales: NumScales x 1 or 1 x NumScales vector with the images scales 17 | % that will be used. The i-th value should be the size in pixels of the 18 | % smallest dimension of the image in the i-th scale. 19 | % 4) mean_pix: is a 3 x 1 or 1 x 3 vector with the mean pixel value per 20 | % color channel that is subtracted from the scaled image before is being 21 | % fed to the CNN 22 | % 5) Semantic_Aware_CNN (OPTIONAL): the caffe net struct with the 23 | % convolutional neural that implements the activation mas module for the 24 | % semantic segmentation aware CNN features (section 4). The Semantic_Aware_CNN 25 | % network gets as input the convolutional feature maps that the CNN network 26 | % yields and outputs semantic segmentation aware activation maps. 27 | % 6) semantic_scales: a NumScales2 x 1 or 1 x NumScales2 vector with the 28 | % images scales that will be used for the semantic segmentation aware 29 | % features. The elements of this vector should be a subset of the scales vector. 30 | % 31 | % OUTPUTS: 32 | % 1) conv_feat_data: a struct that includes the activation maps of the 33 | % image. Its field is: 34 | % conv_feat_data.feat: 35 | % 1.a) In case the function is called with the arguments: 36 | % extract_image_activation_maps(CNN, image, scales, mean_pix) 37 | % then conv_feat_data.feat is a struct that includes 1) the convolutional 38 | % feature maps (field rsp) that the CNN network yields, 2) the image scales 39 | % from which they were extracted (field scale), and 3) the original size 40 | % of the image (fields im_height and im_width) 41 | % 1.b) In case the function is called with the arguments: 42 | % extract_image_activation_maps(CNN, image, scales, mean_pix, Semantic_Aware_CNN, semantic_scales): 43 | % then it is a 1 x 2 cell array where 1st element is a struct with the 44 | % convolutional feature maps of the CNN network (like in the 1.a case) 45 | % and the 2nd element is a struct with the convolutional feature maps of 46 | % the Semantic_Aware_CNN network (like in the 1.a case). 47 | % 48 | % This file is part of the code that implements the following ICCV2015 accepted paper: 49 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 50 | % authors: Spyros Gidaris, Nikos Komodakis 51 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 52 | % Technical report: http://arxiv.org/abs/1505.01749 53 | % code: https://github.com/gidariss/mrcnn-object-detection 54 | % 55 | % 56 | % AUTORIGHTS 57 | % -------------------------------------------------------- 58 | % Copyright (c) 2015 Spyros Gidaris 59 | % 60 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 61 | % Technical report: http://arxiv.org/abs/1505.01749 62 | % Licensed under The MIT License [see LICENSE for details] 63 | % --------------------------------------------------------- 64 | 65 | conv_feat_data = init_feat_data(); 66 | conv_feat_data.feat.im_height = size(image,1); 67 | conv_feat_data.feat.im_width = size(image,2); 68 | % extract the activation maps of an image for a given set of scales 69 | [conv_feat_data.feat.rsp, conv_feat_data.feat.scale] = extract_conv_features(... 70 | CNN, image, scales, mean_pix); 71 | 72 | if exist('Semantic_Aware_CNN','var')>0 73 | assert(exist('semantic_scales','var')>0) 74 | % extract the semantic segmentation aware activation maps of an image 75 | % of a given set of scales and the convolutional feature maps (activation 76 | % maps) that were previously extracted from the image using the CNN 77 | % network 78 | semantic_conv_feat_data = conv_feat_data; 79 | semantic_conv_feat_data.feat = pick_scales_if_there(... 80 | semantic_conv_feat_data.feat, semantic_scales); 81 | conf.do_interleave = true; % if set to true then the 82 | % resolution augmentation technique described on the OverFeat paper: http://arxiv.org/abs/1312.6229 83 | % (section 3.3 of OverFeat technical report) is being used 84 | conf.interleave_num_steps = 2; % a scalar value for the number of steps 85 | % that are being using on the above resolution augmentation technique 86 | 87 | % extract the semantic segmentation aware activation maps 88 | semantic_conv_feat_data.feat.rsp = extract_semantic_seg_features_from_conv5(... 89 | Semantic_Aware_CNN, semantic_conv_feat_data.feat.rsp, conf); 90 | conv_feat_data.feat = {conv_feat_data.feat, semantic_conv_feat_data.feat}; 91 | end 92 | 93 | end 94 | 95 | function d = init_feat_data() 96 | d.feat = []; 97 | end 98 | 99 | function feat = pick_scales_if_there(feat, scales) 100 | num_scales = length(scales); 101 | 102 | found_scales = zeros(size(scales)); 103 | found_rsp = {}; 104 | c = 0; 105 | for s = 1:num_scales 106 | scale_index = find(feat.scale == scales(s)); 107 | if ~isempty(scale_index) 108 | assert(numel(scale_index) == 1); 109 | c = c + 1; 110 | found_scales(c) = scales(s); 111 | found_rsp{c} = feat.rsp{scale_index}; 112 | end 113 | end 114 | feat.scale = found_scales(1:c); 115 | feat.rsp = found_rsp; 116 | 117 | end 118 | 119 | -------------------------------------------------------------------------------- /code/conv_features/extract_semantic_features_all_images.m: -------------------------------------------------------------------------------- 1 | function extract_semantic_features_all_images(net, conv5_file_paths, destination_dir, varargin) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | ip = inputParser; 20 | ip.addOptional('start', 1, @isscalar); 21 | ip.addOptional('end', 0, @isscalar); 22 | ip.addOptional('scales', [576 874 1200], @ismatrix); 23 | ip.addOptional('do_interleave', 0, @isscalar); 24 | ip.addOptional('interleave_num_steps', 1, @isscalar); 25 | ip.addOptional('force', false, @islogical); 26 | 27 | ip.parse(varargin{:}); 28 | opts = ip.Results; 29 | 30 | if opts.end <= 0 31 | opts.end = length(conv5_file_paths); 32 | else 33 | opts.end = min(opts.end, length(conv5_file_paths)); 34 | end 35 | 36 | % Where to save feature cache 37 | mkdir_if_missing(destination_dir); 38 | 39 | opts.output_dir = destination_dir; 40 | mkdir_if_missing(opts.output_dir); 41 | 42 | % Log feature extraction 43 | timestamp = datestr(datevec(now()), 'yyyymmdd_HHMMSS'); 44 | diary_file = [destination_dir, 'extract_semantic_features_all_images_', timestamp '.txt']; 45 | diary(diary_file); 46 | fprintf('Logging output in %s\n', diary_file); 47 | 48 | fprintf('\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'); 49 | fprintf('Extract semantic segmentation aware CNN features options:\n'); 50 | disp(opts); 51 | fprintf('~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n'); 52 | 53 | filenames = getImageIdsFromImagePaths( conv5_file_paths ); 54 | total_time = 0; 55 | total_file_size_mega = 0; 56 | count = 0; 57 | num_imgs = opts.end - opts.start + 1; 58 | for i = opts.start:opts.end 59 | fprintf('%s: extract semantic segmentation aware conv features: %d/%d\n', procid(), i, opts.end); 60 | output_file_path = [destination_dir, filesep, filenames{i}, '.mat']; 61 | 62 | if (~exist(output_file_path, 'file') || opts.force) 63 | tot_th = tic; 64 | 65 | try 66 | count = count + 1; 67 | file_size_mega = process_image(net, conv5_file_paths{i}, output_file_path, opts); 68 | catch exception 69 | file_size_mega = 0; 70 | fprintf('Error: Cannot proccess %s.\n', output_file_path); 71 | fprintf('Exception message %s\n', getReport(exception)); 72 | end 73 | 74 | total_file_size_mega = total_file_size_mega + file_size_mega; 75 | avg_file_size_mega = total_file_size_mega/count; 76 | est_total_size_giga = num_imgs * avg_file_size_mega / 1024; 77 | total_time = total_time + toc(tot_th); 78 | avg_time = total_time/count; 79 | est_rem_time = avg_time * (num_imgs - i) / 60; 80 | fprintf('[avg time: %.2fs] [est rem. time: %.2fmins] [avg space %.3fMega] [est total space %.2fGiga]\n', ... 81 | avg_time, est_rem_time, avg_file_size_mega, est_total_size_giga); 82 | else 83 | fprintf(' [already exists]\n'); 84 | end 85 | end 86 | 87 | end 88 | 89 | function fileSizeInMbs = process_image(net, input_file_path, output_file_path, opts) 90 | th = tic; 91 | 92 | d = read_feat_conv_data(input_file_path); 93 | d.feat = pick_scales_if_there(d.feat, opts.scales); 94 | d.feat.rsp = extract_semantic_seg_features_from_conv5(net, d.feat.rsp, opts); 95 | 96 | fprintf(' [features: %.3fs]', toc(th)); 97 | th = tic; 98 | save(output_file_path, '-struct', 'd'); 99 | fileInfo = dir(output_file_path); 100 | fileSizeInMbs = fileInfo.bytes / (1024*1024); 101 | fprintf(' [saving: %.3fs]', toc(th)); 102 | fprintf(' [Mbytes: %.4f]\n', fileSizeInMbs); 103 | end 104 | 105 | function feat = pick_scales_if_there(feat, scales) 106 | num_scales = length(scales); 107 | 108 | found_scales = zeros(size(scales)); 109 | found_rsp = {}; 110 | c = 0; 111 | for s = 1:num_scales 112 | scale_index = find(feat.scale == scales(s)); 113 | if ~isempty(scale_index) 114 | assert(numel(scale_index) == 1); 115 | c = c + 1; 116 | found_scales(c) = scales(s); 117 | found_rsp{c} = feat.rsp{scale_index}; 118 | end 119 | end 120 | feat.scale = found_scales(1:c); 121 | feat.rsp = found_rsp; 122 | end -------------------------------------------------------------------------------- /code/conv_features/extract_semantic_seg_features_from_conv5.m: -------------------------------------------------------------------------------- 1 | function rsp = extract_semantic_seg_features_from_conv5(Semantic_Aware_CNN, input_conv_feat_maps, opts) 2 | % extract_conv_features extract the convolutional features of one image 3 | % for the specified scales using the provided convolutional neural network. 4 | % 5 | % INPUTS: 6 | % 1) Semantic_Aware_CNN: the caffe net struct with the convolutional neural 7 | % network that implements the activation mas module for the semantic segmentation aware 8 | % CNN features (section 4 of technical report) 9 | % 2) input_conv_feat_maps: a 1 x NumScales cell array with the input 10 | % convolutonal feature maps from a set of scales that they will be used as 11 | % input to the Semantic_Aware_CNN in order to extract the semantic segmentation 12 | % aware convolutional feature maps. 13 | % 3) opts: a struct with the options that are being used for extracting the 14 | % activation maps. Its fields are: 15 | % opts.do_interleave: a boolean value that if set to true then the 16 | % resolution augmentation technique described on the OverFeat paper: http://arxiv.org/abs/1312.6229 17 | % (section 3.3 of OverFeat technical report) is being used 18 | % opts.interleave_num_steps: a scalar value for the number of steps that 19 | % are being using on the above resolution augmentation technique 20 | % 21 | % OUTPUTS: 22 | % 1) rsp: a 1 x NumScales cell array with the semantic segmentation aware 23 | % convolutonal feature maps of each scale. The i-th element is a 24 | % H_i x W_i x C array with the convolutional feature maps of the i-th 25 | % scale. H_i and W_i are the height and width correspondingly of the 26 | % convolutional feature maps for the i-th scale. 27 | % 28 | % This file is part of the code that implements the following ICCV2015 accepted paper: 29 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 30 | % authors: Spyros Gidaris, Nikos Komodakis 31 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 32 | % Technical report: http://arxiv.org/abs/1505.01749 33 | % code: https://github.com/gidariss/mrcnn-object-detection 34 | % 35 | % AUTORIGHTS 36 | % -------------------------------------------------------- 37 | % Copyright (c) 2015 Spyros Gidaris 38 | % 39 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 40 | % Technical report: http://arxiv.org/abs/1505.01749 41 | % Licensed under The MIT License [see LICENSE for details] 42 | % --------------------------------------------------------- 43 | 44 | interleave_num_steps = 0; 45 | if (isfield(opts, 'do_interleave') && opts.do_interleave) 46 | interleave_num_steps = opts.interleave_num_steps; 47 | end 48 | 49 | if isnumeric(input_conv_feat_maps), input_conv_feat_maps = {input_conv_feat_maps}; end 50 | 51 | num_inputs = length(input_conv_feat_maps); 52 | rsp = {}; 53 | for i = 1:num_inputs 54 | in = input_conv_feat_maps{i}; 55 | in = permute(in, [2, 1, 3]); % change order of width, height for compatibility with caffe 56 | in = preprocess_input(in, interleave_num_steps); 57 | 58 | % reshape the network such that it will accept the proper size for 59 | % input 60 | Semantic_Aware_CNN = caffe_reshape_net(Semantic_Aware_CNN, ... 61 | {[size(in,1), size(in,2), size(in,3), size(in,4)]}); 62 | 63 | % get the convolutional feature maps of the image 64 | response = Semantic_Aware_CNN.forward({in}); 65 | response{1} = postprocess_output(response{1}, interleave_num_steps); 66 | rsp{i} = permute(response{1}, [2, 1, 3]); 67 | end 68 | rsp = rsp(:)'; 69 | end 70 | 71 | function input = preprocess_input(input, interleave_num_steps) 72 | if (interleave_num_steps > 1) 73 | % pre-process the input in case the resolution augmentation technique 74 | % described on the OverFeat paper: http://arxiv.org/abs/1312.6229 75 | % (section 3.3 of OverFeat technical report) is being used 76 | length_1 = size(input,1) - (interleave_num_steps-1); 77 | length_2 = size(input,2) - (interleave_num_steps-1); 78 | batch_size = interleave_num_steps * interleave_num_steps; 79 | 80 | batch = cell([1,1,1,batch_size]); 81 | 82 | for j = 1:interleave_num_steps 83 | for i = 1:interleave_num_steps 84 | inter_i = (i-1)+(1:length_1); 85 | inter_j = (j-1)+(1:length_2); 86 | batch_id = (j-1)*interleave_num_steps + i; 87 | batch{batch_id} = input(inter_i,inter_j,:); 88 | end 89 | end 90 | input = cell2mat(batch); 91 | end 92 | end 93 | 94 | function final_output = postprocess_output(output, interleave_num_steps) 95 | if (interleave_num_steps > 1) 96 | % post-process the output in case the resolution augmentation technique 97 | % described on the OverFeat paper: http://arxiv.org/abs/1312.6229 98 | % (section 3.3 of OverFeat technical report) is being used 99 | final_out_size = [size(output,1)*interleave_num_steps, size(output,2)*interleave_num_steps, size(output,3)]; 100 | final_output = zeros(final_out_size, 'like', output); 101 | for j = 1:interleave_num_steps 102 | for i = 1:interleave_num_steps 103 | inter_i = (i-1) + (1:interleave_num_steps:size(final_output,1)); 104 | inter_j = (j-1) + (1:interleave_num_steps:size(final_output,2)); 105 | batch_id = (j-1)*interleave_num_steps + i; 106 | final_output(inter_i,inter_j,:) = output(:,:,:,batch_id); 107 | end 108 | end 109 | else 110 | final_output = output; 111 | end 112 | end -------------------------------------------------------------------------------- /code/conv_features/read_feat_conv_data.m: -------------------------------------------------------------------------------- 1 | function feat_data = read_feat_conv_data( feature_path, to_cell ) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | 20 | if iscell(feature_path) 21 | if ~exist('to_cell','var') 22 | to_cell = true; 23 | end 24 | if to_cell 25 | feat_data = load(feature_path{1}); 26 | tmp = feat_data.feat; 27 | feat_data.feat(1) = []; 28 | feat_data = rmfield(feat_data,'feat'); 29 | feat_data.feat{1} = tmp; 30 | for i = 2:length(feature_path) 31 | tmp = load(feature_path{i}); 32 | feat_data.feat{i} = tmp.feat; 33 | end 34 | else 35 | feat_data = load(feature_path{1}); 36 | for i = 2:length(feature_path) 37 | tmp = load(feature_path{i}); 38 | feat_data.feat(i) = tmp.feat; 39 | end 40 | end 41 | else 42 | feat_data = load(feature_path); 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /code/data-providers/extract_edge_boxes_from_dataset.m: -------------------------------------------------------------------------------- 1 | function all_bbox_proposals = extract_edge_boxes_from_dataset(image_db, edge_boxes_dst_file) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | try 20 | ld = load(edge_boxes_dst_file); 21 | all_bbox_proposals = ld.all_bbox_proposals; 22 | catch 23 | edge_boxes_path = fullfile(pwd, 'external', 'edges'); 24 | model=load(fullfile(edge_boxes_path,'models/forest/modelBsds')); model=model.model; 25 | model.opts.multiscale=0; model.opts.sharpen=2; model.opts.nThreads=4; 26 | 27 | % set up opts for edgeBoxes 28 | opts = edgeBoxes; 29 | opts.alpha = .65; % step size of sliding window search 30 | opts.beta = .70; % nms threshold for object proposals 31 | opts.minScore = .01; % min score of boxes to detect 32 | opts.maxBoxes = 2000; % max number of boxes to detect 33 | 34 | chunk_size = 1000; 35 | num_imgs = numel(image_db.image_paths); 36 | num_chunks = ceil(num_imgs/chunk_size); 37 | all_bbox_proposals = cell(num_imgs,1); 38 | 39 | total_num_elems = 0; 40 | total_time = 0; 41 | for chunk = 1:num_chunks 42 | start_idx = (chunk-1) * chunk_size + 1; 43 | stop_idx = min(chunk * chunk_size, num_imgs); 44 | th = tic; 45 | all_bbox_proposals(start_idx:stop_idx) = edgeBoxes(image_db.image_paths(start_idx:stop_idx),model,opts); 46 | for i = start_idx:stop_idx 47 | boxes = single(all_bbox_proposals{i}(:,1:4)); 48 | all_bbox_proposals{i} = [boxes(:,1:2), boxes(:,1:2) + boxes(:,3:4)-1]; 49 | total_num_elems = total_num_elems + numel(all_bbox_proposals{i}); 50 | end 51 | elapsed_time = toc(th); 52 | total_time = total_time + elapsed_time; 53 | est_rem_time = (total_time / stop_idx) * (num_imgs - stop_idx); 54 | est_num_bytes = (total_num_elems / stop_idx) * num_imgs * 4 / (1024*1024*1024); 55 | fprintf('Extract edge boxes %s %d/%d: ET %.2fmin | ETA %.2fmin | EST. NUM BYTES %.2f giga\n', ... 56 | image_db.image_set_name, stop_idx, num_imgs, ... 57 | total_time/60, est_rem_time/60, est_num_bytes); 58 | end 59 | 60 | save(edge_boxes_dst_file, 'all_bbox_proposals', '-v7.3'); 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /code/data-providers/extract_selective_search_boxes_from_dataset.m: -------------------------------------------------------------------------------- 1 | function all_bbox_proposals = extract_selective_search_boxes_from_dataset(image_db, ss_boxes_dst_file) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | try 20 | ld = load(ss_boxes_dst_file); 21 | all_bbox_proposals = ld.all_bbox_proposals; 22 | catch 23 | chunk_size = 1000; 24 | num_imgs = numel(image_db.image_paths); 25 | num_chunks = ceil(num_imgs/chunk_size); 26 | 27 | ss_boxes_dst_file_in_progress1 = regexprep(ss_boxes_dst_file, '.mat', '_in_progress.mat'); 28 | ss_boxes_dst_file_in_progress2 = regexprep(ss_boxes_dst_file, '.mat', '_in_progress_prev.mat'); 29 | 30 | try 31 | try 32 | ld = load(ss_boxes_dst_file_in_progress1); 33 | all_bbox_proposals = ld.all_bbox_proposals; 34 | first_chunk = ld.chunk + 1; 35 | catch 36 | ld = load(ss_boxes_dst_file_in_progress2); 37 | all_bbox_proposals = ld.all_bbox_proposals; 38 | first_chunk = ld.chunk + 1; 39 | end 40 | catch exception 41 | fprintf('Exception message %s\n', getReport(exception)); 42 | all_bbox_proposals = cell(num_imgs,1); 43 | first_chunk = 1; 44 | end 45 | 46 | total_num_elems = 0; 47 | total_time = 0; 48 | count = 0; 49 | for chunk = first_chunk:num_chunks 50 | start_idx = (chunk-1) * chunk_size + 1; 51 | stop_idx = min(chunk * chunk_size, num_imgs); 52 | th = tic; 53 | all_bbox_proposals(start_idx:stop_idx) = extract_selective_search_prposlas(image_db.image_paths(start_idx:stop_idx)); 54 | for i = start_idx:stop_idx 55 | count = count + 1; 56 | total_num_elems = total_num_elems + numel(all_bbox_proposals{i}); 57 | end 58 | elapsed_time = toc(th); 59 | total_time = total_time + elapsed_time; 60 | est_rem_time = (total_time / count) * (num_imgs - stop_idx); 61 | est_num_bytes = (total_num_elems / count) * num_imgs * 4 / (1024*1024*1024); 62 | fprintf('Extract Selective Search boxes %s %d/%d: ET %.2fmin | ETA %.2fmin | EST. NUM BYTES %.2f giga\n', ... 63 | image_db.image_set_name, stop_idx, num_imgs, ... 64 | total_time/60, est_rem_time/60, est_num_bytes); 65 | 66 | if (exist(ss_boxes_dst_file_in_progress1,'file')>0) 67 | copyfile(ss_boxes_dst_file_in_progress1,ss_boxes_dst_file_in_progress2); 68 | end 69 | 70 | save(ss_boxes_dst_file_in_progress1, 'all_bbox_proposals', 'chunk', '-v7.3'); 71 | end 72 | 73 | save(ss_boxes_dst_file, 'all_bbox_proposals', '-v7.3'); 74 | end 75 | end 76 | 77 | function all_box_proposals = extract_selective_search_prposlas(image_paths) 78 | fast_mode = true; 79 | num_imgs = length(image_paths); 80 | all_box_proposals = cell(num_imgs,1); 81 | parfor (i = 1:num_imgs) 82 | % th = tic; 83 | img = imread(image_paths{i}); 84 | all_box_proposals{i} = selective_search_boxes(img, fast_mode); 85 | all_box_proposals{i} = single(all_box_proposals{i}(:,[2 1 4 3])); 86 | % fprintf(' image %d/%d: elapsed time %.2f\n', i, num_imgs, toc(th)) 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /code/data-providers/get_grount_truth_bboxes_from_voc.m: -------------------------------------------------------------------------------- 1 | function [ all_bboxes_gt ] = get_grount_truth_bboxes_from_voc( voc_path, image_set, voc_year, with_hard_samples, cache_dir ) 2 | 3 | if ~exist('cache_dir','var') 4 | cache_dir = '.'; 5 | end 6 | 7 | VOCopts = initVOCOpts( voc_path, voc_year ); 8 | fullimage_set_name = ['voc_', voc_year, '_' image_set]; 9 | addpath([voc_path, filesep, 'VOCcode']); 10 | cache_file = [cache_dir, filesep, 'gt_bbox_', fullimage_set_name]; 11 | if with_hard_samples, cache_file = [cache_file, '_with_hard_samples']; end 12 | cache_file = [cache_file, '.mat']; 13 | 14 | if exist(cache_file,'file') 15 | all_bboxes_gt = loadGroundTruthBBoxes(cache_file); 16 | else 17 | class_to_id = containers.Map(VOCopts.classes, 1:length(VOCopts.classes)); 18 | image_ids = textread(sprintf(VOCopts.imgsetpath, image_set), '%s'); 19 | num_imgs = length(image_ids); 20 | all_bboxes_gt = cell(num_imgs, 1); 21 | for img_idx = 1:num_imgs 22 | if mod(img_idx,500) == 0, fprintf('Load gt bboxes::%s %d/%d\n', fullimage_set_name, img_idx, num_imgs); end 23 | all_bboxes_gt{img_idx} = getGroundTruthBBoxes(sprintf(VOCopts.annopath, image_ids{img_idx}), class_to_id, with_hard_samples); 24 | end 25 | saveGroundTruthBBoxes(cache_file, all_bboxes_gt); 26 | end 27 | 28 | end 29 | 30 | function gt_bboxes = getGroundTruthBBoxes(filename, class_to_id, with_hard_samples) 31 | try 32 | voc_rec = PASreadrecord(filename); 33 | valid_objects = 1:length(voc_rec.objects(:)); 34 | is_difficult = cat(1, voc_rec.objects(:).difficult); 35 | if ~with_hard_samples 36 | valid_objects = valid_objects(~is_difficult); 37 | is_difficult = is_difficult(~is_difficult); 38 | end 39 | 40 | gt_bboxes = single(cat(1, voc_rec.objects(valid_objects).bbox)); 41 | gt_class_idx = class_to_id.values({voc_rec.objects(valid_objects).class}); 42 | gt_class_idx = single(cat(1, gt_class_idx{:})); 43 | gt_bboxes = single([gt_bboxes, gt_class_idx, is_difficult(:)]); 44 | catch 45 | gt_bboxes = zeros(0, 6, 'single'); 46 | end 47 | 48 | end 49 | 50 | function all_bboxes_gt = loadGroundTruthBBoxes(filename) 51 | load(filename, 'all_bboxes_gt'); 52 | end 53 | 54 | function saveGroundTruthBBoxes(filename, all_bboxes_gt) 55 | save(filename, 'all_bboxes_gt', '-v7.3'); 56 | end 57 | 58 | -------------------------------------------------------------------------------- /code/data-providers/get_image_paths_from_voc.m: -------------------------------------------------------------------------------- 1 | function [ image_paths, image_set_name ] = get_image_paths_from_voc( voc_path, image_set, voc_year ) 2 | 3 | VOCopts = initVOCOpts( voc_path, voc_year ); 4 | VOCopts.testset = image_set; 5 | image_set_name = ['voc_', voc_year, '_' image_set]; 6 | 7 | image_ext = '.jpg'; 8 | image_dir = fileparts(VOCopts.imgpath); 9 | image_ids = textread(sprintf(VOCopts.imgsetpath, image_set), '%s'); 10 | image_paths = strcat([image_dir, filesep], image_ids, image_ext); 11 | end 12 | 13 | 14 | -------------------------------------------------------------------------------- /code/data-providers/initVOCOpts.m: -------------------------------------------------------------------------------- 1 | function [ VOCopts ] = initVOCOpts( voc_path, voc_year ) 2 | 3 | VOCopts.datadir = [voc_path, filesep]; 4 | VOCopts.dataset = sprintf('VOC%s', voc_year); 5 | VOCopts.resdir = [voc_path, filesep, 'results', filesep, VOCopts.dataset, filesep]; 6 | 7 | % % change this path to a writable local directory for the example code 8 | % VOCopts.localdir = [voc_path, filesep, 'local', filesep, VOCopts.dataset '/']; 9 | 10 | % initialize the test set 11 | 12 | %VOCopts.testset='val'; % use validation data for development test set 13 | VOCopts.testset='test'; % use test set for final challenge 14 | 15 | % initialize main challenge paths 16 | 17 | VOCopts.annopath = [VOCopts.datadir VOCopts.dataset '/Annotations/%s.xml']; 18 | VOCopts.imgpath = [VOCopts.datadir VOCopts.dataset '/JPEGImages/%s.jpg']; 19 | VOCopts.imgsetpath = [VOCopts.datadir VOCopts.dataset '/ImageSets/Main/%s.txt']; 20 | VOCopts.clsimgsetpath = [VOCopts.datadir VOCopts.dataset '/ImageSets/Main/%s_%s.txt']; 21 | VOCopts.clsrespath = [VOCopts.resdir 'Main/%s_cls_' VOCopts.testset '_%s.txt']; 22 | VOCopts.detrespath = [VOCopts.resdir 'Main/%s_det_' VOCopts.testset '_%s.txt']; 23 | 24 | % initialize segmentation task paths 25 | 26 | VOCopts.seg.clsimgpath = [VOCopts.datadir, VOCopts.dataset, '/SegmentationClass/%s.png']; 27 | VOCopts.seg.instimgpath = [VOCopts.datadir, VOCopts.dataset, '/SegmentationObject/%s.png']; 28 | VOCopts.seg.imgsetpath = [VOCopts.datadir, VOCopts.dataset, '/ImageSets/Segmentation/%s.txt']; 29 | 30 | VOCopts.seg.clsresdir = [VOCopts.resdir, 'Segmentation/%s_%s_cls']; 31 | VOCopts.seg.instresdir = [VOCopts.resdir, 'Segmentation/%s_%s_inst']; 32 | VOCopts.seg.clsrespath = [VOCopts.seg.clsresdir, '/%s.png']; 33 | VOCopts.seg.instrespath = [VOCopts.seg.instresdir, '/%s.png']; 34 | 35 | % initialize layout task paths 36 | 37 | VOCopts.layout.imgsetpath = [VOCopts.datadir, VOCopts.dataset, '/ImageSets/Layout/%s.txt']; 38 | VOCopts.layout.respath = [VOCopts.resdir, 'Layout/%s_layout_', VOCopts.testset, '_%s.xml']; 39 | 40 | % initialize the VOC challenge options 41 | 42 | % VOC2007 classes 43 | VOCopts.classes={... 44 | 'aeroplane' 45 | 'bicycle' 46 | 'bird' 47 | 'boat' 48 | 'bottle' 49 | 'bus' 50 | 'car' 51 | 'cat' 52 | 'chair' 53 | 'cow' 54 | 'diningtable' 55 | 'dog' 56 | 'horse' 57 | 'motorbike' 58 | 'person' 59 | 'pottedplant' 60 | 'sheep' 61 | 'sofa' 62 | 'train' 63 | 'tvmonitor'}; 64 | 65 | 66 | VOCopts.nclasses=length(VOCopts.classes); 67 | 68 | VOCopts.poses={... 69 | 'Unspecified' 70 | 'SideFaceLeft' 71 | 'SideFaceRight' 72 | 'Frontal' 73 | 'Rear'}; 74 | 75 | VOCopts.nposes=length(VOCopts.poses); 76 | 77 | VOCopts.parts={... 78 | 'head' 79 | 'hand' 80 | 'foot'}; 81 | 82 | VOCopts.maxparts=[1 2 2]; % max of each of above parts 83 | 84 | VOCopts.nparts=length(VOCopts.parts); 85 | 86 | VOCopts.minoverlap=0.5; 87 | 88 | % % initialize example options 89 | % 90 | % VOCopts.exannocachepath=[VOCopts.localdir '%s_anno.mat']; 91 | % 92 | % VOCopts.exfdpath=[VOCopts.localdir '%s_fd.mat']; 93 | 94 | end 95 | 96 | -------------------------------------------------------------------------------- /code/data-providers/load_box_proposals.m: -------------------------------------------------------------------------------- 1 | function all_box_proposals = load_box_proposals( image_db, method ) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | 20 | base_directory = fullfile(pwd,'data/'); 21 | mkdir_if_missing(base_directory); 22 | 23 | selective_search_path = fullfile(base_directory, 'selective_search_data/'); 24 | edge_boxes_path = fullfile(base_directory, 'edge_boxes_data/'); 25 | voc_path = [pwd, '/datasets/VOC%s/']; 26 | 27 | if ischar(method), method = {method}; end 28 | assert(iscell(method)); 29 | num_methods = numel(method); 30 | all_box_proposals_methods = cell(num_methods,1); 31 | 32 | image_set_name = image_db.image_set_name; 33 | 34 | for m = 1:num_methods 35 | switch method{m} 36 | case 'selective_search' 37 | mkdir_if_missing(selective_search_path); 38 | proposals_path = sprintf('%s/%s.mat', selective_search_path, image_set_name); 39 | all_box_proposals = extract_selective_search_boxes_from_dataset(... 40 | image_db, proposals_path); 41 | case 'edge_boxes' 42 | mkdir_if_missing(edge_boxes_path); 43 | proposals_path = sprintf('%s/%s.mat', edge_boxes_path, image_set_name); 44 | all_box_proposals = extract_edge_boxes_from_dataset(image_db, proposals_path); 45 | otherwise 46 | error('not supported option') 47 | end 48 | all_box_proposals_methods{m} = all_box_proposals; 49 | end 50 | all_box_proposals = merge_bboxes(all_box_proposals_methods); 51 | end 52 | 53 | function all_box_proposals = merge_bboxes(all_box_proposals_methods) 54 | 55 | num_methods = length(all_box_proposals_methods); 56 | num_imgs = length(all_box_proposals_methods{1}); 57 | 58 | if num_methods == 1 59 | all_box_proposals = all_box_proposals_methods{1}; 60 | return; 61 | end 62 | all_box_proposals = cell(num_imgs, 1); 63 | 64 | for i = 1:num_imgs 65 | aboxes_this_img_this = cell(num_methods, 1); 66 | for d = 1:num_methods 67 | aboxes_this_img_this{d} = all_box_proposals_methods{d}{i}; 68 | end 69 | all_box_proposals{i} = cell2mat(aboxes_this_img_this); 70 | end 71 | 72 | end 73 | -------------------------------------------------------------------------------- /code/data-providers/load_feature_paths.m: -------------------------------------------------------------------------------- 1 | function image_db = load_feature_paths(image_db, feat_cache_names) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | feat_cache_directory = fullfile(pwd,'feat_cache/'); 19 | 20 | if ischar(feat_cache_names) 21 | feat_cache_names = {feat_cache_names}; 22 | end 23 | assert(iscell(feat_cache_names)); 24 | num_feats = length(feat_cache_names); 25 | image_db.feature_paths_all = cell(num_feats,1); 26 | feat_cache_dir = {}; 27 | for f = 1:num_feats 28 | feat_cache_dir{f} = [feat_cache_directory, feat_cache_names{f}, filesep, image_db.image_set_name, filesep]; 29 | feature_paths_all{f} = strcat(feat_cache_dir{f}, getImageIdsFromImagePaths( image_db.image_paths ),'.mat'); 30 | end 31 | 32 | if num_feats == 1 33 | image_db.feature_paths = feature_paths_all{1}; 34 | else 35 | image_db.feature_paths = cell(length(feature_paths_all{1}), 1); 36 | for i = 1:length(image_db.feature_paths) 37 | image_db.feature_paths{i} = cell(num_feats,1); 38 | for f = 1:num_feats 39 | image_db.feature_paths{i}{f} = feature_paths_all{f}{i}; 40 | end 41 | end 42 | end 43 | image_db.feat_cache_dir = feat_cache_dir; 44 | end 45 | 46 | -------------------------------------------------------------------------------- /code/examples/demo_MRCNN_detection.m: -------------------------------------------------------------------------------- 1 | function demo_MRCNN_detection 2 | % object detection demo using the Multi-Region CNN recognition model 3 | % (section 3 of technical report) only. No localization module is used. 4 | % 5 | % This file is part of the code that implements the following ICCV2015 accepted paper: 6 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 7 | % authors: Spyros Gidaris, Nikos Komodakis 8 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 9 | % Technical report: http://arxiv.org/abs/1505.01749 10 | % code: https://github.com/gidariss/mrcnn-object-detection 11 | % 12 | % 13 | % AUTORIGHTS 14 | % -------------------------------------------------------- 15 | % Copyright (c) 2015 Spyros Gidaris 16 | % 17 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 18 | % Technical report: http://arxiv.org/abs/1505.01749 19 | % Licensed under The MIT License [see LICENSE for details] 20 | % --------------------------------------------------------- 21 | 22 | gpu_id = 1; % gpu_id is a one-based index; if a non positive value is given 23 | % then the CPU will be used instead. 24 | 25 | caffe_set_device( gpu_id ); 26 | caffe.reset_all(); 27 | 28 | %***************************** LOAD MODEL ********************************* 29 | fprintf('Loading detection models... '); th = tic; 30 | 31 | % set the path of the bounding box recognition moddel for object detection 32 | model_rec_dir_name = 'MRCNN_VOC2007_2012'; % model's directory name 33 | full_model_rec_dir = fullfile(pwd, 'models-exps', model_rec_dir_name); % full path to the model's directory 34 | use_detection_svms = true; 35 | model_rec_mat_name = 'detection_model_svm.mat'; % model's matlab filename 36 | full_model_rec_path = fullfile(full_model_rec_dir, model_rec_mat_name); % full path to the model's matlab file 37 | assert(exist(full_model_rec_dir,'dir')>0); 38 | assert(exist(full_model_rec_path,'file')>0); 39 | 40 | % Load the bounding box recognition moddel for object detection 41 | ld = load(full_model_rec_path, 'model'); 42 | model_obj_rec = ld.model; 43 | model_phase_rec = 'test'; 44 | clear ld; 45 | 46 | model_obj_rec = load_object_recognition_model_on_caffe(... 47 | model_obj_rec, use_detection_svms, model_phase_rec, full_model_rec_dir); 48 | 49 | % Load the activation maps module that is responsible for producing the 50 | % convolutional features (called activation maps) of an image. For the 51 | % activation maps module we use the convolutional layers (till conv5_3) 52 | % of the VGG16 model 53 | 54 | % set the path to the directory that contain the caffe defintion and 55 | % weights files of the activation maps module 56 | net_files_dir = fullfile(pwd,'data','vgg_pretrained_models'); 57 | % path to the defintion file of the activation maps module 58 | model_obj_rec.act_net_def_file = fullfile( net_files_dir,'vgg16_conv5_deploy.prototxt'); 59 | % path to the weights file of the activation maps module 60 | model_obj_rec.act_net_weights_file = {fullfile(net_files_dir,'VGG_ILSVRC_16_Convolutional_Layers.caffemodel')}; 61 | assert(exist(net_files_dir,'dir')>0); 62 | assert(exist(model_obj_rec.act_net_def_file ,'file')>0); 63 | assert(exist(model_obj_rec.act_net_weights_file{1},'file')>0); 64 | % image scales that are being used for extracting the activation maps 65 | model_obj_rec.scales = [480 576 688 874 1200]; 66 | % mean pixel value per color channel for image pre-processing before 67 | % feeding it to the VGG16 convolutional layers. 68 | model_obj_rec.mean_pix = [103.939, 116.779, 123.68]; 69 | % load the activation maps module on caffe 70 | model_obj_rec.act_maps_net = caffe_load_model( model_obj_rec.act_net_def_file, model_obj_rec.act_net_weights_file); 71 | fprintf(' %.3f sec\n', toc(th)); 72 | %************************************************************************** 73 | 74 | img = imread('./code/examples/images/000084.jpg'); % load image 75 | category_names = model_obj_rec.classes; % a C x 1 cell array with the name 76 | % of the categories that the detection system looks for. C is the numbe of 77 | % categories. 78 | num_categories = length(category_names); 79 | 80 | 81 | conf = struct; 82 | % the threholds that are being used for removing easy negatives before the 83 | % non-max-suppression step 84 | conf.thresh = -3 * ones(num_categories,1); % It contains the 85 | % threshold per category that will be used for removing scored boxes with 86 | % low confidence prior to applying the non-max-suppression step. 87 | conf.nms_iou_thrs = 0.3; % IoU threshold for the non-max-suppression step 88 | conf.box_method = 'edge_boxes'; % string with the box proposals algorithm that 89 | % will be used in order to generate the set of candidate boxes. Currently 90 | % it supports the 'edge_boxes' or the 'selective_search' types only. 91 | 92 | % detect object in the image 93 | [ bbox_detections ] = demo_object_detection( img, model_obj_rec, conf ); 94 | 95 | % visualize the bounding box detections. 96 | score_thresh = 0.0 * zeros(num_categories, 1); % score threshold per 97 | % category for keeping or discarding a detection. For the purposes of this 98 | % demo we set the score thresholds to 0 value. However, this is not the 99 | % optimal value. Someone should tune those thresholds in order to achieve 100 | % the desired trade-off between precision and recall. 101 | display_bbox_detections( img, bbox_detections, score_thresh, category_names ); 102 | 103 | 104 | caffe.reset_all(); % free the memory occupied by the caffe models 105 | end -------------------------------------------------------------------------------- /code/examples/display_bbox_detections.m: -------------------------------------------------------------------------------- 1 | function display_bbox_detections( img, bbox_detections, score_thresh, category_names ) 2 | 3 | all_dets = []; 4 | num_categories = length(bbox_detections); 5 | for i = 1:num_categories 6 | bbox_detections_this_category = bbox_detections{i}; 7 | detection_scores_this_category = bbox_detections_this_category(:,5); 8 | is_above_the_thresh = detection_scores_this_category >= score_thresh(i); 9 | bbox_detections_this_category = bbox_detections_this_category(is_above_the_thresh,:); 10 | bbox_detections_this_category = [i * ones(size(bbox_detections_this_category, 1), 1), bbox_detections_this_category]; 11 | all_dets = cat(1, all_dets, bbox_detections_this_category); 12 | end 13 | 14 | fprintf('Visualize the bounding box detections:\n') 15 | [~, ord] = sort(all_dets(:,end), 'descend'); 16 | for i = 1:length(ord) 17 | score_this = all_dets(ord(i), end); 18 | category_name_this = category_names{all_dets(ord(i), 1)}; 19 | showboxes(img, all_dets(ord(i), 2:5)); 20 | title(sprintf('det #%d: %s score = %.3f', i, category_name_this, score_this)); 21 | fprintf('det #%d: %s score = %.3f. press any key to continue\n', ... 22 | i, category_name_this, score_this); 23 | drawnow; 24 | pause; 25 | end 26 | 27 | 28 | fprintf('No more detection\n'); 29 | 30 | end 31 | 32 | -------------------------------------------------------------------------------- /code/examples/images/000084.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gidariss/mrcnn-object-detection/2f355c0539961aa22f57d31971aa163a35f3152c/code/examples/images/000084.jpg -------------------------------------------------------------------------------- /code/examples/images/fish-bike.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gidariss/mrcnn-object-detection/2f355c0539961aa22f57d31971aa163a35f3152c/code/examples/images/fish-bike.jpg -------------------------------------------------------------------------------- /code/postprocessing/merge_detected_bboxes.m: -------------------------------------------------------------------------------- 1 | function aboxes_out = merge_detected_bboxes(aboxes) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | 19 | num_methods = length(aboxes); 20 | num_classes = length(aboxes{1}); 21 | num_imgs = length(aboxes{1}{1}); 22 | 23 | aboxes_out = cell(num_classes, 1); 24 | 25 | for j = 1:num_classes, aboxes_out{j} = cell(num_imgs,1); end 26 | 27 | for i = 1:num_imgs 28 | for j = 1:num_classes 29 | aboxes_this_img_this_cls = cell(num_methods, 1); 30 | for d = 1:num_methods 31 | aboxes_this_img_this_cls{d} = aboxes{d}{j}{i}; 32 | end 33 | aboxes_out{j}{i} = cell2mat(aboxes_this_img_this_cls); 34 | end 35 | end 36 | 37 | aboxes_out = {aboxes_out}; 38 | 39 | end 40 | -------------------------------------------------------------------------------- /code/postprocessing/nms.m: -------------------------------------------------------------------------------- 1 | function pick = nms(boxes, overlap) 2 | % top = nms(boxes, overlap) 3 | % Non-maximum suppression. (FAST VERSION) 4 | % Greedily select high-scoring detections and skip detections 5 | % that are significantly covered by a previously selected 6 | % detection. 7 | % 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m), 9 | % but an inner loop has been eliminated to significantly speed it 10 | % up in the case of a large number of boxes 11 | 12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz 13 | % All rights reserved. 14 | % 15 | % This file is part of the Exemplar-SVM library and is made 16 | % available under the terms of the MIT license (see COPYING file). 17 | % Project homepage: https://github.com/quantombone/exemplarsvm 18 | 19 | 20 | if isempty(boxes) 21 | pick = []; 22 | return; 23 | end 24 | 25 | if size(boxes, 1) < 10000 26 | pick = nms_mex(double(boxes), double(overlap)); 27 | return; 28 | end 29 | 30 | x1 = boxes(:,1); 31 | y1 = boxes(:,2); 32 | x2 = boxes(:,3); 33 | y2 = boxes(:,4); 34 | s = boxes(:,end); 35 | 36 | area = (x2-x1+1) .* (y2-y1+1); 37 | [vals, I] = sort(s); 38 | 39 | pick = s*0; 40 | counter = 1; 41 | while ~isempty(I) 42 | last = length(I); 43 | i = I(last); 44 | pick(counter) = i; 45 | counter = counter + 1; 46 | 47 | xx1 = max(x1(i), x1(I(1:last-1))); 48 | yy1 = max(y1(i), y1(I(1:last-1))); 49 | xx2 = min(x2(i), x2(I(1:last-1))); 50 | yy2 = min(y2(i), y2(I(1:last-1))); 51 | 52 | w = max(0.0, xx2-xx1+1); 53 | h = max(0.0, yy2-yy1+1); 54 | 55 | inter = w.*h; 56 | o = inter ./ (area(i) + area(I(1:last-1)) - inter); 57 | 58 | I = I(find(o<=overlap)); 59 | end 60 | 61 | pick = pick(1:(counter-1)); 62 | -------------------------------------------------------------------------------- /code/postprocessing/nms_mex.cpp: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | #ifdef WIN32 3 | #include 4 | #include 5 | #else 6 | #include 7 | #endif 8 | #include 9 | #include 10 | using namespace std; 11 | 12 | struct score { 13 | double s; 14 | int idx; 15 | bool operator() (score i, score j) { return (i.idx < j.idx);} 16 | } score; 17 | 18 | template 19 | void nms(const mxArray *input_boxes, double overlap, vector &vPick, int &nPick) 20 | { 21 | int nSample = (int)mxGetM(input_boxes); 22 | int nDim_boxes = (int)mxGetN(input_boxes); 23 | 24 | T *pBoxes = (T*)mxGetData(input_boxes); 25 | 26 | vector vArea(nSample); 27 | for (int i = 0; i < nSample; ++i) 28 | { 29 | vArea[i] = double(pBoxes[2*nSample + i] - pBoxes[0*nSample + i] + 1) 30 | * (pBoxes[3*nSample + i] - pBoxes[1*nSample + i] + 1); 31 | if (vArea[i] < 0) 32 | mexErrMsgTxt("Boxes area must >= 0"); 33 | } 34 | 35 | std::multimap scores; 36 | for (int i = 0; i < nSample; ++i) 37 | scores.insert(std::pair(pBoxes[4*nSample + i], i)); 38 | 39 | nPick = 0; 40 | 41 | do 42 | { 43 | int last = scores.rbegin()->second; 44 | vPick[nPick] = last; 45 | nPick += 1; 46 | 47 | for (typename std::multimap::iterator it = scores.begin(); it != scores.end();) 48 | { 49 | int it_idx = it->second; 50 | T xx1 = max(pBoxes[0*nSample + last], pBoxes[0*nSample + it_idx]); 51 | T yy1 = max(pBoxes[1*nSample + last], pBoxes[1*nSample + it_idx]); 52 | T xx2 = min(pBoxes[2*nSample + last], pBoxes[2*nSample + it_idx]); 53 | T yy2 = min(pBoxes[3*nSample + last], pBoxes[3*nSample + it_idx]); 54 | 55 | double w = std::max( (T)0.0, xx2-xx1+1), h = std::max((T)0.0, yy2-yy1+1); 56 | 57 | double ov = w*h / (vArea[last] + vArea[it_idx] - w*h); 58 | 59 | if (ov > overlap) 60 | { 61 | #ifdef WIN32 62 | it = scores.erase(it); 63 | #else 64 | typename std::multimap::iterator save=it; ++save; 65 | scores.erase(it); 66 | it=save; 67 | #endif 68 | } 69 | else 70 | { 71 | it++; 72 | } 73 | } 74 | 75 | } while (scores.size() != 0); 76 | } 77 | 78 | 79 | //void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[]) 80 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 81 | { 82 | if (nrhs != 2) 83 | mexErrMsgTxt("Wrong number of inputs"); 84 | if (nlhs != 1) 85 | mexErrMsgTxt("One output"); 86 | 87 | const mxArray *input_boxes = prhs[0]; 88 | if (mxGetClassID(input_boxes) != mxDOUBLE_CLASS && mxGetClassID(input_boxes) != mxSINGLE_CLASS) 89 | mexErrMsgTxt("Input boxes must be Double or Single"); 90 | 91 | const mxArray *input_overlap = prhs[1]; 92 | if (mxGetClassID(input_overlap) != mxDOUBLE_CLASS ) 93 | mexErrMsgTxt("Input overlap must be Double"); 94 | 95 | double overlap = mxGetScalar(input_overlap); 96 | 97 | int nSample = (int)mxGetM(input_boxes); 98 | int nDim_boxes = (int)mxGetN(input_boxes); 99 | 100 | if (nSample * nDim_boxes == 0) 101 | { 102 | plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL); 103 | return; 104 | } 105 | 106 | if (nDim_boxes != 5) 107 | mexErrMsgTxt("nms_mex boxes must has 5 columns"); 108 | 109 | 110 | int nPick = 0; 111 | vector vPick(nSample); 112 | if(mxGetClassID(input_boxes) == mxDOUBLE_CLASS) 113 | nms(input_boxes, overlap, vPick, nPick); 114 | else 115 | nms(input_boxes, overlap, vPick, nPick); 116 | 117 | plhs[0] = mxCreateNumericMatrix(nPick, 1, mxDOUBLE_CLASS, mxREAL); 118 | double *pRst = mxGetPr(plhs[0]); 119 | for (int i = 0; i < nPick; ++i) 120 | pRst[i] = vPick[i] + 1; 121 | } 122 | -------------------------------------------------------------------------------- /code/script_create_MRCNN_SCNN_VOC2007_2012.m: -------------------------------------------------------------------------------- 1 | function script_create_MRCNN_SCNN_VOC2007_2012 2 | 3 | models_dir = fullfile(pwd, 'models-exps'); 4 | net_def_file_directory = fullfile(pwd, 'model-defs'); 5 | 6 | % create the model directory 7 | model_dir_dst = fullfile(models_dir, 'MRCNN_SEMANTIC_FEATURES_VOC2007_2012'); 8 | mkdir_if_missing(model_dir_dst); 9 | 10 | % path to the model mat file 11 | model_mat_file = fullfile(model_dir_dst, 'detection_model_svm.mat'); 12 | 13 | % path to the model caffe definition file 14 | model_net_def_file = fullfile(model_dir_dst, 'deploy_svm.prototxt'); 15 | % source path to the model caffe definition file 16 | model_net_def_file_scr = fullfile(net_def_file_directory, 'MRCNN_Semantic_Features_model_svm.prototxt'); 17 | 18 | assert(exist(model_net_def_file_scr,'file')>0); 19 | copyfile(model_net_def_file_scr,model_net_def_file); 20 | assert(exist(model_net_def_file,'file')>0); 21 | 22 | region_dir_src = {}; 23 | net_weigths_file_region_dst = {}; 24 | pooler_regions = {}; 25 | 26 | % set the source directory of the region adaptation modules 27 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0010_voc2012_2007_EB_ZP'); % region 1 28 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_RHalf1_voc2012_2007_EB_ZP'); % region 2 29 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_RHalf2_voc2012_2007_EB_ZP'); % region 3 30 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_RHalf3_voc2012_2007_EB_ZP'); % region 4 31 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_RHalf4_voc2012_2007_EB_ZP'); % region 5 32 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0005_voc2012_2007_EB_ZP'); % region 6 33 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0308_voc2012_2007_EB_ZP'); % region 7 34 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0510_voc2012_2007_EB_ZP'); % region 8 35 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0815_voc2012_2007_EB_ZP'); % region 9 36 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R1018_voc2012_2007_EB_ZP'); % region 10 37 | % region 11: Semantic Segmentation Aware Features Region 38 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_RSemSegAware_voc2012_2007_EB_ZP'); 39 | num_regions = length(region_dir_src); 40 | 41 | for r = 1:num_regions 42 | [~, region_name] = fileparts(region_dir_src{r}); 43 | region_model_mat_file = fullfile(region_dir_src{r}, 'detection_model_softmax.mat'); 44 | assert(exist(region_model_mat_file,'file')>0); 45 | 46 | ld = load(region_model_mat_file, 'model'); model_this = ld.model; clear ld; 47 | pooler_regions{r} = model_this.pooler; 48 | 49 | net_weigths_file_region_dst{r} = fullfile(model_dir_dst,sprintf('%s.caffemodel', region_name)); 50 | 51 | if (r == num_regions) % Semantic Segmentation Aware Features Region 52 | layers_region_src_this = {'fc1'}; 53 | net_def_file_region_dst_this1 = fullfile(net_def_file_directory, 'auxiliary_def_files',... 54 | sprintf('Semantic_segmentation_aware_net_pascal_train_test_stream%d.prototxt',r)); 55 | else 56 | layers_region_src_this = {'fc6','fc7'}; 57 | net_def_file_region_dst_this1 = fullfile(net_def_file_directory, 'auxiliary_def_files',... 58 | sprintf('VGG_ILSVRC_16_layers_pascal_train_test_stream%d.prototxt',r)); 59 | end 60 | 61 | 62 | assert(exist(net_def_file_region_dst_this1, 'file')>0) 63 | 64 | caffe.set_mode_cpu(); 65 | curr_dir = pwd; 66 | cd(region_dir_src{r}); 67 | net_region_src = caffe_load_model(model_this.net_def_file, model_this.net_weights_file); 68 | cd(curr_dir); 69 | net_region_dst = caffe.Net(net_def_file_region_dst_this1, 'test'); 70 | layers_region_dst_this = strcat(layers_region_src_this, sprintf('_s%d',r)); 71 | layers_region_dst_this 72 | net_region_dst = caffe_copy_weights_from_net2net( net_region_dst, net_region_src, layers_region_dst_this, layers_region_src_this); 73 | net_region_dst.save(net_weigths_file_region_dst{r}); 74 | caffe.reset_all(); 75 | end 76 | 77 | % merge region types 78 | pooler = pooler_regions{1}; 79 | for r = 1:num_regions 80 | fprintf('Region #%d - pooler: \n', r); 81 | disp(pooler_regions{r}) 82 | pooler(r) = pooler_regions{r}; 83 | end 84 | pooler(end).feat_id = 2; 85 | 86 | % convert paths from absolute to relative to the model directory 87 | [a,b,c] = fileparts(model_net_def_file); 88 | model_net_def_file = ['./',b,c]; 89 | for i = 1:length(net_weigths_file_region_dst) 90 | [a,b,c] = fileparts(net_weigths_file_region_dst{i}); 91 | net_weigths_file_region_dst{i} = ['./',b,c]; 92 | end 93 | 94 | % prepare and save the model structure of the multi-region cnn model 95 | 96 | feat_blob_name = {'fc_feat'}; % name of the output blob of the last hidden layer of the model 97 | model_feat_cache = {'VGG_ILSVRC_16_layers', 'Semantic_Segmentation_Aware_Feats'}; % code-name of the activation maps that the VGG16 convolutional layers produce 98 | 99 | model = struct; 100 | model.net_def_file = model_net_def_file; 101 | model.net_weights_file = net_weigths_file_region_dst; 102 | model.pooler = pooler; 103 | model.feat_blob_name = feat_blob_name; 104 | model.feat_cache = model_feat_cache; 105 | model.score_out_blob = 'pascal_svm'; 106 | model.svm_layer_name = 'pascal_svm'; 107 | 108 | VOCopts = initVOCOpts( '', '2007'); 109 | model.classes = VOCopts.classes; 110 | fprintf('model:\n') 111 | disp(model); 112 | save(model_mat_file, 'model', '-v7.3'); 113 | 114 | end -------------------------------------------------------------------------------- /code/script_create_MRCNN_VOC2007_2012.m: -------------------------------------------------------------------------------- 1 | function script_create_MRCNN_VOC2007_2012 2 | 3 | models_dir = fullfile(pwd, 'models-exps'); 4 | net_def_file_directory = fullfile(pwd, 'model-defs'); 5 | 6 | % create the model directory 7 | model_dir_dst = fullfile(models_dir, 'MRCNN_VOC2007_2012'); 8 | mkdir_if_missing(model_dir_dst); 9 | 10 | % path to the model mat file 11 | model_mat_file = fullfile(model_dir_dst, 'detection_model_svm.mat'); 12 | 13 | % path to the model caffe definition file 14 | model_net_def_file = fullfile(model_dir_dst, 'deploy_svm.prototxt'); 15 | % source path to the model caffe definition file 16 | model_net_def_file_scr = fullfile(net_def_file_directory, 'MRCNN_model_svm.prototxt'); 17 | 18 | assert(exist(model_net_def_file_scr,'file')>0); 19 | copyfile(model_net_def_file_scr,model_net_def_file); 20 | assert(exist(model_net_def_file,'file')>0); 21 | 22 | region_dir_src = {}; 23 | net_weigths_file_region_dst = {}; 24 | pooler_regions = {}; 25 | 26 | % set the source directory of the region adaptation modules 27 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0010_voc2012_2007_EB_ZP'); % region 1 28 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_RHalf1_voc2012_2007_EB_ZP'); % region 2 29 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_RHalf2_voc2012_2007_EB_ZP'); % region 3 30 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_RHalf3_voc2012_2007_EB_ZP'); % region 4 31 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_RHalf4_voc2012_2007_EB_ZP'); % region 5 32 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0005_voc2012_2007_EB_ZP'); % region 6 33 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0308_voc2012_2007_EB_ZP'); % region 7 34 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0510_voc2012_2007_EB_ZP'); % region 8 35 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R0815_voc2012_2007_EB_ZP'); % region 9 36 | region_dir_src{end+1} = fullfile(models_dir, 'vgg_R1018_voc2012_2007_EB_ZP'); % region 10 37 | num_regions = length(region_dir_src); 38 | 39 | for r = 1:num_regions 40 | [~, region_name] = fileparts(region_dir_src{r}); 41 | region_model_mat_file = fullfile(region_dir_src{r}, 'detection_model_softmax.mat'); 42 | assert(exist(region_model_mat_file,'file')>0); 43 | 44 | ld = load(region_model_mat_file, 'model'); model_this = ld.model; clear ld; 45 | pooler_regions{r} = model_this.pooler; 46 | 47 | net_weigths_file_region_dst{r} = fullfile(model_dir_dst,sprintf('%s.caffemodel', region_name)); 48 | 49 | net_def_file_region_dst_this1 = fullfile(net_def_file_directory, 'auxiliary_def_files',... 50 | sprintf('VGG_ILSVRC_16_layers_pascal_train_test_stream%d.prototxt',r)); 51 | assert(exist(net_def_file_region_dst_this1, 'file')>0) 52 | 53 | caffe.set_mode_cpu(); 54 | curr_dir = pwd; 55 | cd(region_dir_src{r}); 56 | net_region_src = caffe_load_model(model_this.net_def_file, model_this.net_weights_file); 57 | cd(curr_dir); 58 | net_region_dst = caffe.Net(net_def_file_region_dst_this1, 'test'); 59 | layers_region_src_this = {'fc6','fc7'}; 60 | layers_region_dst_this = strcat(layers_region_src_this, sprintf('_s%d',r)); 61 | layers_region_dst_this 62 | net_region_dst = caffe_copy_weights_from_net2net( net_region_dst, net_region_src, layers_region_dst_this, layers_region_src_this); 63 | net_region_dst.save(net_weigths_file_region_dst{r}); 64 | caffe.reset_all(); 65 | end 66 | 67 | % merge region types 68 | pooler = pooler_regions{1}; 69 | for r = 1:num_regions 70 | fprintf('Region #%d - pooler: \n', r); 71 | disp(pooler_regions{r}) 72 | pooler(r) = pooler_regions{r}; 73 | end 74 | 75 | % convert paths from absolute to relative to the model directory 76 | [a,b,c] = fileparts(model_net_def_file); 77 | model_net_def_file = ['./',b,c]; 78 | for i = 1:length(net_weigths_file_region_dst) 79 | [a,b,c] = fileparts(net_weigths_file_region_dst{i}); 80 | net_weigths_file_region_dst{i} = ['./',b,c]; 81 | end 82 | 83 | % prepare and save the model structure of the multi-region cnn model 84 | 85 | feat_blob_name = {'fc_feat'}; % name of the output blob of the last hidden layer of the model 86 | model_feat_cache = 'VGG_ILSVRC_16_layers'; % code-name of the activation maps that the VGG16 convolutional layers produce 87 | 88 | model = struct; 89 | model.net_def_file = model_net_def_file; 90 | model.net_weights_file = net_weigths_file_region_dst; 91 | model.pooler = pooler; 92 | model.feat_blob_name = feat_blob_name; 93 | model.feat_cache = model_feat_cache; 94 | model.score_out_blob = 'pascal_svm'; 95 | model.svm_layer_name = 'pascal_svm'; 96 | 97 | VOCopts = initVOCOpts( '', '2007'); 98 | model.classes = VOCopts.classes; 99 | fprintf('model:\n') 100 | disp(model); 101 | save(model_mat_file, 'model', '-v7.3'); 102 | 103 | end -------------------------------------------------------------------------------- /code/script_extract_sem_seg_aware_features.m: -------------------------------------------------------------------------------- 1 | function script_extract_sem_seg_aware_features(image_set, voc_year, varargin) 2 | % image_set: string with imaget set name, e.g. 'test','trainval',... 3 | % voc_year: string with the year of the imaget set, e.g. '2007','2012',... 4 | % 5 | % This file is part of the code that implements the following ICCV2015 accepted paper: 6 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 7 | % authors: Spyros Gidaris, Nikos Komodakis 8 | % institutation: Universite Paris Est, Ecole des Ponts ParisTech 9 | % Technical report: http://arxiv.org/abs/1505.01749 10 | % code: https://github.com/gidariss/mrcnn-object-detection 11 | % 12 | % AUTORIGHTS 13 | % -------------------------------------------------------- 14 | % Copyright (c) 2015 Spyros Gidaris 15 | % 16 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 17 | % Technical report: http://arxiv.org/abs/1505.01749 18 | % Licensed under The MIT License [see LICENSE for details] 19 | % --------------------------------------------------------- 20 | 21 | ip = inputParser; 22 | ip.addOptional('start', 1, @isscalar); 23 | ip.addOptional('end', 0, @isscalar); 24 | ip.addOptional('scales', [576 874 1200], @ismatrix); 25 | ip.addOptional('gpu_id', 0, @isnumeric); 26 | ip.addOptional('feat_cache_name','Semantic_Segmentation_Aware_Feats', @ischar); 27 | ip.addOptional('feat_cache_name_input','VGG_ILSVRC_16_layers', @ischar); 28 | ip.addOptional('use_flips', false, @islogical); 29 | 30 | ip.parse(varargin{:}); 31 | opts = ip.Results; 32 | 33 | net_files_dir = fullfile(pwd,'data', 'vgg_pretrained_models'); 34 | net_def_file = fullfile(net_files_dir,'Semantic_Segmentation_Features_CNN.prototxt'); 35 | net_weights_file = fullfile(net_files_dir,'Semantic_Segmentation_Features_CNN.caffemodel'); 36 | assert(exist(net_files_dir, 'dir')>0); 37 | assert(exist(net_def_file, 'file')>0); 38 | assert(exist(net_weights_file, 'file')>0); 39 | 40 | image_db = load_image_dataset('image_set',image_set,... 41 | 'voc_year',voc_year,'use_flips',opts.use_flips, ... 42 | 'feat_cache_names',opts.feat_cache_name_input); 43 | 44 | feat_cache_dir_parent = fullfile(pwd, 'feat_cache'); 45 | feat_cache_dir_child = fullfile(feat_cache_dir_parent, opts.feat_cache_name); 46 | feat_cache_dir_image_set = fullfile(feat_cache_dir_child, image_db.image_set_name); 47 | 48 | mkdir_if_missing(feat_cache_dir_parent); 49 | mkdir_if_missing(feat_cache_dir_child); 50 | mkdir_if_missing(feat_cache_dir_image_set); 51 | 52 | caffe_set_device( opts.gpu_id ); 53 | caffe.reset_all(); 54 | net = caffe_load_model( net_def_file, {net_weights_file}); 55 | 56 | extract_semantic_features_all_images(net, image_db.feature_paths, ... 57 | feat_cache_dir_image_set,'start',opts.start,'end',opts.end,... 58 | 'scales',opts.scales,'do_interleave', true, 'interleave_num_steps', 2); 59 | 60 | caffe.reset_all(); 61 | 62 | end 63 | 64 | -------------------------------------------------------------------------------- /code/script_extract_vgg16_conv_features.m: -------------------------------------------------------------------------------- 1 | function script_extract_vgg16_conv_features(image_set, voc_year, varargin) 2 | % image_set: string with the PASCAL VOC imaget set name, e.g. 'test','trainval',... 3 | % voc_year: string with the PASCAL VOC challenge year of the imaget set, e.g. '2007','2012',... 4 | % 5 | % This file is part of the code that implements the following ICCV2015 accepted paper: 6 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 7 | % authors: Spyros Gidaris, Nikos Komodakis 8 | % institutation: Universite Paris Est, Ecole des Ponts ParisTech 9 | % Technical report: http://arxiv.org/abs/1505.01749 10 | % code: https://github.com/gidariss/mrcnn-object-detection 11 | % 12 | % AUTORIGHTS 13 | % -------------------------------------------------------- 14 | % Copyright (c) 2015 Spyros Gidaris 15 | % 16 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 17 | % Technical report: http://arxiv.org/abs/1505.01749 18 | % Licensed under The MIT License [see LICENSE for details] 19 | % --------------------------------------------------------- 20 | 21 | ip = inputParser; 22 | ip.addOptional('start', 1, @isscalar); % index of the first image in the set from which it will start extracting the feature maps. 23 | ip.addOptional('end', 0, @isscalar); % index of the last image in the set till which it will extract the feature maps. 24 | ip.addOptional('scales', [480 576 688 874 1200], @ismatrix); 25 | ip.addOptional('gpu_id', 0, @isnumeric); 26 | ip.addOptional('use_flips', false, @islogical); 27 | 28 | ip.parse(varargin{:}); 29 | opts = ip.Results; 30 | 31 | mean_pix = [103.939, 116.779, 123.68]; 32 | net_files_dir = fullfile(pwd,'data','vgg_pretrained_models'); 33 | net_def_file = fullfile(net_files_dir,'vgg16_conv5_deploy.prototxt'); 34 | net_weights_file = fullfile(net_files_dir,'VGG_ILSVRC_16_Convolutional_Layers.caffemodel'); 35 | assert(exist(net_files_dir,'dir')>0); 36 | assert(exist(net_def_file,'file')>0); 37 | assert(exist(net_weights_file,'file')>0); 38 | 39 | image_db = load_image_dataset('image_set',image_set,... 40 | 'voc_year',voc_year,'use_flips',opts.use_flips); 41 | 42 | feat_cache_name = 'VGG_ILSVRC_16_layers'; 43 | feat_cache_dir_parent = fullfile(pwd, 'feat_cache'); 44 | feat_cache_dir_child = fullfile(feat_cache_dir_parent, feat_cache_name); 45 | feat_cache_dir_image_set = fullfile(feat_cache_dir_child, image_db.image_set_name); 46 | 47 | mkdir_if_missing(feat_cache_dir_parent); 48 | mkdir_if_missing(feat_cache_dir_child); 49 | mkdir_if_missing(feat_cache_dir_image_set); 50 | 51 | caffe_set_device( opts.gpu_id ); 52 | caffe.reset_all(); 53 | net = caffe_load_model( net_def_file, {net_weights_file}); 54 | 55 | extract_conv_features_all_images(net, image_db.image_paths, feat_cache_dir_image_set, ... 56 | 'start',opts.start,'end',opts.end,'scales',opts.scales,'mean_pix',mean_pix); 57 | 58 | caffe.reset_all(); 59 | 60 | end 61 | 62 | -------------------------------------------------------------------------------- /code/script_train_linear_svms_of_model.m: -------------------------------------------------------------------------------- 1 | function script_train_linear_svms_of_model(model_dir_name, varargin) 2 | %************************** OPTIONS ************************************* 3 | ip = inputParser; 4 | ip.addParamValue('gpu_id', 0, @isscalar); 5 | 6 | ip.addParamValue('voc_year_train', {'2007','2012'}, @iscell); 7 | ip.addParamValue('image_set_train', {'trainval', 'trainval'}, @iscell); 8 | ip.addParamValue('proposals_method_train', {'selective_search', 'edge_boxes'}, @iscell); 9 | 10 | ip.addParamValue('model_mat_name', 'detection_model_svm.mat', @ischar); 11 | ip.addParamValue('svm_layer_name', 'pascal_svm', @ischar); 12 | ip.addParamValue('svm_C', 10^-3, @isnumeric); 13 | 14 | ip.parse(varargin{:}); 15 | opts = ip.Results; 16 | 17 | gpu_id = opts.gpu_id; 18 | 19 | full_model_dir = fullfile('./', 'models-exps', model_dir_name); 20 | full_model_path = fullfile(full_model_dir, opts.model_mat_name); 21 | assert(exist(full_model_dir,'dir')>0); 22 | assert(exist(full_model_path, 'file')>0); 23 | 24 | model_obj_rec = load_model(full_model_path); % object recognition model 25 | 26 | feat_cache_names = model_obj_rec.feat_cache; 27 | voc_year_train = opts.voc_year_train; 28 | image_set_train = opts.image_set_train; 29 | proposals_method_train = opts.proposals_method_train; 30 | 31 | svm_C = opts.svm_C; 32 | %************************************************************************** 33 | 34 | %*************************** LOAD DATASET ********************************* 35 | image_db_train = load_image_dataset(... 36 | 'image_set', image_set_train, ... 37 | 'voc_year', voc_year_train, ... 38 | 'proposals_method', proposals_method_train, ... 39 | 'feat_cache_names', feat_cache_names); 40 | 41 | image_paths_train = image_db_train.image_paths; 42 | feature_paths_train = image_db_train.feature_paths; 43 | all_regions_train = image_db_train.all_regions; 44 | all_bbox_gt_train = image_db_train.all_bbox_gt; 45 | proposals_suffix_train = image_db_train.proposals_suffix; 46 | image_set_name_train = image_db_train.image_set_name; 47 | 48 | experiment_name = sprintf('exp_train_svm_%s', proposals_suffix_train); 49 | 50 | cache_directory = fullfile('./', 'cache_dir'); 51 | experiment_dir = fullfile(cache_directory, [image_set_name_train, sprintf('/%s/',experiment_name)]); 52 | model_obj_rec.cache_dir = experiment_dir; 53 | %************************************************************************** 54 | 55 | %***************************** LOAD MODEL ********************************* 56 | caffe_set_device( gpu_id ); 57 | caffe.reset_all(); 58 | curr_dir = pwd; 59 | cd(full_model_dir) 60 | model_obj_rec.net = caffe_load_model( model_obj_rec.net_def_file, model_obj_rec.net_weights_file); 61 | %************************************************************************** 62 | %***************************** TRAIN SVMS ******************************** 63 | mkdir_if_missing(cache_directory); 64 | mkdir_if_missing(experiment_dir); 65 | 66 | svm_weights_file = train_detection_svm_with_hard_mining( ... 67 | model_obj_rec, image_paths_train, feature_paths_train, all_bbox_gt_train, ... 68 | all_regions_train, 'exp_dir', experiment_dir, 'train_classes', ... 69 | model_obj_rec.classes, 'svm_C', svm_C); 70 | caffe.reset_all(); 71 | cd(curr_dir); 72 | model_obj_rec = rmfield(model_obj_rec,'net'); 73 | update_model_with_svm_weights(full_model_path, model_obj_rec, svm_weights_file, opts.svm_layer_name, svm_C); 74 | %************************************************************************** 75 | end 76 | 77 | function model = load_model(filename) 78 | ld = load(filename, 'model'); 79 | model = ld.model; 80 | end 81 | 82 | function update_model_with_svm_weights(filename, model, svm_weights_file, svm_layer_name, svm_C) 83 | model.svm_weights_file = svm_weights_file; 84 | 85 | if ~isfield(model,'svm_layer_name'), model.svm_layer_name = svm_layer_name; end 86 | 87 | model.svm_train_opts = struct; 88 | model.svm_train_opts.svm_C = svm_C; 89 | model.svm_cache_dir = fileparts(svm_weights_file); 90 | save(filename, 'model'); 91 | end 92 | -------------------------------------------------------------------------------- /code/utils/boxoverlap.m: -------------------------------------------------------------------------------- 1 | function o = boxoverlap(a, b, element_wise) 2 | % Compute the symmetric intersection over union overlap between a set of 3 | % bounding boxes in a and a single bounding box in b. 4 | % 5 | % a a matrix where each row specifies a bounding box 6 | % b a matrix where each row specifies a bounding box 7 | 8 | % AUTORIGHTS 9 | % ------------------------------------------------------- 10 | % Copyright (C) 2011-2012 Ross Girshick 11 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick 12 | % 13 | % This file is part of the voc-releaseX code 14 | % (http://people.cs.uchicago.edu/~rbg/latent/) 15 | % and is available under the terms of an MIT-like license 16 | % provided in COPYING. Please retain this notice and 17 | % COPYING if you use this file (or a portion of it) in 18 | % your project. 19 | % ------------------------------------------------------- 20 | 21 | if ~exist('element_wise','var'), element_wise = false; end 22 | 23 | if element_wise 24 | assert(size(b,1) == size(a,1)); 25 | x1 = max(a(:,1), b(:,1)); 26 | y1 = max(a(:,2), b(:,2)); 27 | x2 = min(a(:,3), b(:,3)); 28 | y2 = min(a(:,4), b(:,4)); 29 | 30 | w = x2 - x1 + 1; 31 | h = y2 - y1 + 1; 32 | inter = w.*h; 33 | aarea = (a(:,3)-a(:,1) +1) .* (a(:,4)-a(:,2)+1); 34 | barea = (b(:,3)-b(:,1) +1) .* (b(:,4)-b(:,2)+1); 35 | % intersection over min 36 | o = inter ./ (aarea+barea-inter); 37 | % set invalid entries to 0 overlap 38 | o(w <= 0) = 0; 39 | o(h <= 0) = 0; 40 | else 41 | o = cell(1, size(b, 1)); 42 | for i = 1:size(b, 1) 43 | x1 = max(a(:,1), b(i,1)); 44 | y1 = max(a(:,2), b(i,2)); 45 | x2 = min(a(:,3), b(i,3)); 46 | y2 = min(a(:,4), b(i,4)); 47 | 48 | w = x2-x1+1; 49 | h = y2-y1+1; 50 | inter = w.*h; 51 | aarea = (a(:,3)-a(:,1)+1) .* (a(:,4)-a(:,2)+1); 52 | barea = (b(i,3)-b(i,1)+1) * (b(i,4)-b(i,2)+1); 53 | % intersection over union overlap 54 | o{i} = inter ./ (aarea+barea-inter); 55 | % set invalid entries to 0 overlap 56 | o{i}(w <= 0) = 0; 57 | o{i}(h <= 0) = 0; 58 | end 59 | o = cell2mat(o); 60 | end 61 | 62 | end 63 | -------------------------------------------------------------------------------- /code/utils/compute_ave_recall_of_bbox.m: -------------------------------------------------------------------------------- 1 | function [ ave_recall, recall, thresholds] = compute_ave_recall_of_bbox( bbox_pred, bbox_gt, thresholds ) 2 | 3 | if ~exist('thresholds','var') || isempty(thresholds) 4 | thresholds = 0.5:0.05:0.95; 5 | end 6 | 7 | if iscell(bbox_pred) 8 | assert(iscell(bbox_gt)); 9 | assert(length(bbox_pred) == length(bbox_gt)); 10 | num_imgs = length(bbox_pred); 11 | overlap = cell(num_imgs,1); 12 | for i = 1:num_imgs 13 | bbox_pred_this = bbox_pred{i}; 14 | bbox_gt_this = bbox_gt{i}; 15 | num_bbox_gt = size(bbox_gt_this,1); 16 | num_bbox_pred = size(bbox_pred_this,1); 17 | overlap{i} = zeros(num_bbox_gt,1,'single'); 18 | 19 | 20 | if num_bbox_gt && num_bbox_pred 21 | [overlap{i}, ~] = closest_candidates(bbox_gt_this(:,1:4), bbox_pred_this(:,1:4)); 22 | overlap{i} = single(overlap{i}); 23 | % overlap{i} = max(boxoverlap(bbox_pred_this(:,1:4), bbox_gt_this(:,1:4)),[],1)'; 24 | end 25 | end 26 | overlap = cell2mat(overlap); 27 | else 28 | assert(size(bbox_pred,1) == size(bbox_gt,1)); 29 | assert(size(bbox_pred,2) == size(bbox_gt,2)); 30 | assert(size(bbox_pred,2) == 4); 31 | overlap = boxoverlap(bbox_pred, bbox_gt, true); 32 | assert(size(bbox_pred,1) == size(overlap,1)); 33 | assert(size(overlap,2)==1); 34 | end 35 | 36 | [thresholds, recall, ave_recall] = compute_average_recall(overlap); 37 | 38 | end 39 | 40 | function [overlap, recall, AR] = compute_average_recall(unsorted_overlaps) 41 | all_overlaps = sort(unsorted_overlaps(:)', 'ascend'); 42 | num_pos = numel(all_overlaps); 43 | dx = 0.001; 44 | 45 | overlap = 0:dx:1; 46 | overlap(end) = 1; 47 | recall = zeros(length(overlap), 1); 48 | for i = 1:length(overlap) 49 | recall(i) = sum(all_overlaps >= overlap(i)) / (num_pos+eps); 50 | end 51 | 52 | good_recall = recall(overlap >= 0.5); 53 | AR = 2 * dx * trapz(good_recall); 54 | 55 | if num_pos == 0 56 | AR = 0; 57 | end 58 | end 59 | 60 | function [best_overlap,best_boxes] = closest_candidates(gt_boxes, candidates) 61 | % do a matching between gt_boxes and candidates 62 | 63 | num_gt_boxes = size(gt_boxes, 1); 64 | num_candidates = size(candidates, 1); 65 | 66 | % iou_matrix = zeros(num_gt_boxes, num_candidates); 67 | iou_matrix = boxoverlap(candidates, gt_boxes)'; 68 | % for i = 1:num_gt_boxes 69 | % iou = boxoverlap(gt_boxes(i,:), candidates); 70 | % iou_matrix(i,:) = iou'; 71 | % end 72 | 73 | best_overlap = zeros(num_gt_boxes, 1); 74 | best_boxes = -ones(num_gt_boxes, 4); 75 | 76 | [best_overlap,best_boxes] = greedy_matching(iou_matrix, gt_boxes, candidates); 77 | end 78 | 79 | function [best_overlap,best_boxes] = greedy_matching(iou_matrix, gt_boxes, candidates) 80 | [n, m] = size(iou_matrix); 81 | assert(n == size(gt_boxes, 1)); 82 | assert(m == size(candidates, 1)); 83 | if n > m 84 | gt_matching = greedy_matching_rowwise(iou_matrix'); 85 | candidate_matching = (1:m)'; 86 | else 87 | gt_matching = (1:n)'; 88 | candidate_matching = greedy_matching_rowwise(iou_matrix); 89 | end 90 | 91 | best_overlap = zeros(n, 1); 92 | best_boxes = zeros(n, 4); 93 | for pair_idx = 1:numel(gt_matching) 94 | gt_idx = gt_matching(pair_idx); 95 | candidate_idx = candidate_matching(pair_idx); 96 | 97 | best_overlap(gt_idx) = iou_matrix(gt_idx, candidate_idx); 98 | best_boxes(gt_idx,:) = candidates(candidate_idx, :); 99 | end 100 | end 101 | 102 | function [matching, objective] = greedy_matching_rowwise(iou_matrix) 103 | assert(size(iou_matrix, 1) <= size(iou_matrix, 2)); 104 | n = size(iou_matrix, 1); 105 | matching = zeros(n, 1); 106 | objective = 0; 107 | for i = 1:n 108 | % find max element int matrix 109 | [max_per_row, max_col_per_row] = max(iou_matrix, [], 2); 110 | [max_iou,row] = max(max_per_row); 111 | if max_iou == -inf 112 | break 113 | end 114 | 115 | objective = objective + max_iou; 116 | col = max_col_per_row(row); 117 | matching(row) = col; 118 | iou_matrix(row,:) = -inf; 119 | iou_matrix(:,col) = -inf; 120 | end 121 | end 122 | -------------------------------------------------------------------------------- /code/utils/compute_average_precision.m: -------------------------------------------------------------------------------- 1 | function [ ap, recall, precision] = compute_average_precision( score, label, draw ) 2 | 3 | [score, order]=sort(score, 'descend'); 4 | tp = label(order)>0; 5 | fp = label(order)<0; 6 | 7 | cfp = cumsum(fp); 8 | ctp = cumsum(tp); 9 | num_tp = sum(label>0); 10 | recall = ctp/sum(label>0); 11 | precision = ctp./(cfp+ctp); 12 | 13 | % compute average precision 14 | 15 | ap=0; 16 | for t=0:0.1:1 17 | p=max(precision(recall>=t)); 18 | if isempty(p) 19 | p=0; 20 | end 21 | ap=ap+p/11; 22 | end 23 | 24 | if exist('draw', 'var') && draw 25 | plot(recall, precision); title('Precision - Recall'); 26 | end 27 | 28 | end 29 | -------------------------------------------------------------------------------- /code/utils/createListOfImagesFromVOCOpts.m: -------------------------------------------------------------------------------- 1 | function [ source_directory, list_of_images ] = createListOfImagesFromVOCOpts( path_to_voc_devkit, set_name ) 2 | 3 | addpath([path_to_voc_devkit,'/VOCcode']); 4 | VOCopts = VOCInitFrom( path_to_voc_devkit ); 5 | 6 | image_ids = textread(sprintf(VOCopts.imgsetpath,set_name),'%s'); 7 | list_of_images = cell(numel(image_ids),1); 8 | 9 | for i = 1:numel(image_ids) 10 | list_of_images{i} = sprintf(VOCopts.imgpath,image_ids{i}); 11 | end 12 | 13 | 14 | end 15 | 16 | -------------------------------------------------------------------------------- /code/utils/eval_voc.m: -------------------------------------------------------------------------------- 1 | function res = eval_voc(cls, boxes, image_ids, VOCopts) 2 | % 3 | % This file comes from the R-CNN code: 4 | % https://github.com/rbgirshick/rcnn 5 | % 6 | % --------------------------------------------------------- 7 | % Copyright (c) 2014, Ross Girshick 8 | % 9 | % This file is part of the R-CNN code and is available 10 | % under the terms of the Simplified BSD License provided in 11 | % LICENSE. Please retain this notice and LICENSE if you use 12 | % this file (or any portion of it) in your project. 13 | % --------------------------------------------------------- 14 | 15 | % Add a random string ("salt") to the end of the results file name 16 | % to prevent concurrent evaluations from clobbering each other 17 | use_res_salt = true; 18 | % Delete results files after computing APs 19 | rm_res = true; 20 | % comp4 because we use outside data (ILSVRC2012) 21 | comp_id = 'comp4'; 22 | % draw each class curve 23 | draw_curve = true; 24 | 25 | % save results 26 | test_set = VOCopts.testset; 27 | year = VOCopts.dataset(4:end); 28 | 29 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 30 | 31 | if use_res_salt 32 | prev_rng = rng; 33 | rng shuffle; 34 | salt = sprintf('%d', randi(100000)); 35 | res_id = [comp_id '-' salt]; 36 | rng(prev_rng); 37 | else 38 | res_id = comp_id; 39 | end 40 | res_fn = sprintf(VOCopts.detrespath, res_id, cls); 41 | 42 | % write out detections in PASCAL format and score 43 | fid = fopen(res_fn, 'w'); 44 | for i = 1:length(image_ids); 45 | bbox = boxes{i}; 46 | for j = 1:size(bbox,1) 47 | fprintf(fid, '%s %f %.3f %.3f %.3f %.3f\n', image_ids{i}, bbox(j,end), bbox(j,1:4)); 48 | end 49 | end 50 | fclose(fid); 51 | 52 | recall = []; 53 | prec = []; 54 | ap = 0; 55 | ap_auc = 0; 56 | 57 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 58 | if do_eval 59 | % Bug in VOCevaldet requires that tic has been called first 60 | tic; 61 | [recall, prec, ap] = VOCevaldet(VOCopts, res_id, cls, draw_curve); 62 | ap_auc = xVOCap(recall, prec); 63 | 64 | % force plot limits 65 | ylim([0 1]); 66 | xlim([0 1]); 67 | end 68 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 69 | 70 | res.recall = recall; 71 | res.prec = prec; 72 | res.ap = ap; 73 | res.ap_auc = ap_auc; 74 | if rm_res 75 | delete(res_fn); 76 | end 77 | 78 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 79 | end 80 | -------------------------------------------------------------------------------- /code/utils/getImageIdsFromImagePaths.m: -------------------------------------------------------------------------------- 1 | function [ image_ids ] = getImageIdsFromImagePaths( image_paths ) 2 | num_imgs = numel(image_paths); 3 | image_ids = cell(num_imgs,1); 4 | for img_idx = 1:num_imgs 5 | [img_dir, image_id, img_ext] = fileparts(image_paths{img_idx}); 6 | image_ids{img_idx} = image_id; 7 | end 8 | end 9 | 10 | -------------------------------------------------------------------------------- /code/utils/get_image.m: -------------------------------------------------------------------------------- 1 | function image = get_image(image_path) 2 | [~,~,ext] = fileparts(image_path); 3 | flip_suffix = ['_flip',ext]; 4 | num_chars = length(flip_suffix); 5 | if strcmp(image_path((end-num_chars+1):end),flip_suffix) 6 | img_name = regexprep(image_path, flip_suffix, ext); 7 | image = imread(img_name); 8 | image = fliplr(image); 9 | else 10 | image = imread(image_path); 11 | end 12 | if size(image,3) == 1, image = repmat(image, [1, 1, 3]); end 13 | end 14 | -------------------------------------------------------------------------------- /code/utils/mkdir_if_missing.m: -------------------------------------------------------------------------------- 1 | function made = mkdir_if_missing(path) 2 | made = false; 3 | if exist(path, 'dir') == 0 4 | mkdir(path); 5 | made = true; 6 | end 7 | -------------------------------------------------------------------------------- /code/utils/printAPResults.m: -------------------------------------------------------------------------------- 1 | function printAPResults( classes, results ) 2 | % 3 | % This file is part of the code that implements the following ICCV2015 accepted paper: 4 | % title: "Object detection via a multi-region & semantic segmentation-aware CNN model" 5 | % authors: Spyros Gidaris, Nikos Komodakis 6 | % institution: Universite Paris Est, Ecole des Ponts ParisTech 7 | % Technical report: http://arxiv.org/abs/1505.01749 8 | % code: https://github.com/gidariss/mrcnn-object-detection 9 | % 10 | % AUTORIGHTS 11 | % -------------------------------------------------------- 12 | % Copyright (c) 2015 Spyros Gidaris 13 | % 14 | % "Object detection via a multi-region & semantic segmentation-aware CNN model" 15 | % Technical report: http://arxiv.org/abs/1505.01749 16 | % Licensed under The MIT License [see LICENSE for details] 17 | % --------------------------------------------------------- 18 | if ~isnumeric(results) 19 | aps = [results(:).ap]'; 20 | else 21 | aps = results; 22 | end 23 | 24 | for i = 1:numel(classes) 25 | class_string = classes{i}(1:min(5,length(classes{i}))); 26 | fprintf('& %5s ', class_string) 27 | end 28 | fprintf('& %5s \\\\ \n', 'mAP') 29 | for i = 1:numel(classes) 30 | fprintf('& %2.3f ', aps(i)) 31 | end 32 | fprintf('& %2.3f \\\\ \n', mean(aps)) 33 | 34 | end 35 | 36 | -------------------------------------------------------------------------------- /code/utils/procid.m: -------------------------------------------------------------------------------- 1 | function s = procid() 2 | % Returns a string identifying the process. 3 | 4 | % AUTORIGHTS 5 | % ------------------------------------------------------- 6 | % Copyright (C) 2009-2012 Ross Girshick 7 | % 8 | % This file is part of the voc-releaseX code 9 | % (http://people.cs.uchicago.edu/~rbg/latent/) 10 | % and is available under the terms of an MIT-like license 11 | % provided in COPYING. Please retain this notice and 12 | % COPYING if you use this file (or a portion of it) in 13 | % your project. 14 | % ------------------------------------------------------- 15 | 16 | d = pwd(); 17 | i = strfind(d, filesep); 18 | d = d(i(end)+1:end); 19 | s = d; 20 | -------------------------------------------------------------------------------- /code/utils/read_list_of_files.m: -------------------------------------------------------------------------------- 1 | function [ list_files ] = read_list_of_files( list_file_path ) 2 | 3 | 4 | list_files = {}; 5 | fid = fopen(list_file_path); 6 | tline = fgets(fid); 7 | c = 0; 8 | while ischar(tline) 9 | c = c + 1; 10 | list_files{c} = tline; 11 | % fprintf('%s\n',list_files{c}) 12 | tline = fgets(fid); 13 | end 14 | fclose(fid); 15 | end 16 | 17 | -------------------------------------------------------------------------------- /code/utils/seed_rand.m: -------------------------------------------------------------------------------- 1 | function prev_rng = seed_rand() 2 | % seed_rand - Set random number generator to a fixed seed. 3 | % prev_rng = seed_rand(seed) 4 | % 5 | % Strategic use ensures that results are reproducible. 6 | % 7 | % To restore the previous rng after calling this do: 8 | % rng(prev_rng); 9 | 10 | % AUTORIGHTS 11 | % --------------------------------------------------------- 12 | % Copyright (c) 2014, Ross Girshick 13 | % 14 | % This file is part of the R-CNN code and is available 15 | % under the terms of the Simplified BSD License provided in 16 | % LICENSE. Please retain this notice and LICENSE if you use 17 | % this file (or any portion of it) in your project. 18 | % --------------------------------------------------------- 19 | 20 | % This value works best for me. 21 | seed = 3; 22 | % Just kidding, of course ;-). 23 | 24 | prev_rng = rng; 25 | rng(seed, 'twister') 26 | end 27 | -------------------------------------------------------------------------------- /code/utils/showboxes.m: -------------------------------------------------------------------------------- 1 | function showboxes(im, boxes, out) 2 | % Draw bounding boxes on top of an image. 3 | % showboxes(im, boxes, out) 4 | % 5 | % If out is given, a pdf of the image is generated (requires export_fig). 6 | 7 | % AUTORIGHTS 8 | % ------------------------------------------------------- 9 | % Copyright (C) 2011-2012 Ross Girshick 10 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick 11 | % Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan 12 | % 13 | % This file is part of the voc-releaseX code 14 | % (http://people.cs.uchicago.edu/~rbg/latent/) 15 | % and is available under the terms of an MIT-like license 16 | % provided in COPYING. Please retain this notice and 17 | % COPYING if you use this file (or a portion of it) in 18 | % your project. 19 | % ------------------------------------------------------- 20 | 21 | if nargin > 2 22 | % different settings for producing pdfs 23 | print = true; 24 | %wwidth = 2.25; 25 | %cwidth = 1.25; 26 | cwidth = 1.4; 27 | wwidth = cwidth + 1.1; 28 | imsz = size(im); 29 | % resize so that the image is 300 pixels per inch 30 | % and 1.2 inches tall 31 | scale = 1.2 / (imsz(1)/300); 32 | im = imresize(im, scale, 'method', 'cubic'); 33 | %f = fspecial('gaussian', [3 3], 0.5); 34 | %im = imfilter(im, f); 35 | boxes = (boxes-1)*scale+1; 36 | else 37 | print = false; 38 | cwidth = 2; 39 | end 40 | 41 | image(im); 42 | if print 43 | truesize(gcf); 44 | end 45 | axis image; 46 | axis off; 47 | set(gcf, 'Color', 'white'); 48 | 49 | if ~isempty(boxes) 50 | numfilters = floor(size(boxes, 2)/4); 51 | if print 52 | % if printing, increase the contrast around the boxes 53 | % by printing a white box under each color box 54 | for i = 1:numfilters 55 | x1 = boxes(:,1+(i-1)*4); 56 | y1 = boxes(:,2+(i-1)*4); 57 | x2 = boxes(:,3+(i-1)*4); 58 | y2 = boxes(:,4+(i-1)*4); 59 | % remove unused filters 60 | del = find(((x1 == 0) .* (x2 == 0) .* (y1 == 0) .* (y2 == 0)) == 1); 61 | x1(del) = []; 62 | x2(del) = []; 63 | y1(del) = []; 64 | y2(del) = []; 65 | if i == 1 66 | w = wwidth; 67 | else 68 | w = wwidth; 69 | end 70 | 71 | % if i == 13+1 || i == 14+1 72 | % c = 'k'; 73 | % w = cwidth + 0.5; 74 | % else 75 | c = 'w'; 76 | % end 77 | 78 | line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', 'color', c, 'linewidth', w); 79 | end 80 | end 81 | % draw the boxes with the detection window on top (reverse order) 82 | for i = numfilters:-1:1 83 | x1 = boxes(:,1+(i-1)*4); 84 | y1 = boxes(:,2+(i-1)*4); 85 | x2 = boxes(:,3+(i-1)*4); 86 | y2 = boxes(:,4+(i-1)*4); 87 | % remove unused filters 88 | del = find(((x1 == 0) .* (x2 == 0) .* (y1 == 0) .* (y2 == 0)) == 1); 89 | x1(del) = []; 90 | x2(del) = []; 91 | y1(del) = []; 92 | y2(del) = []; 93 | if i == 1 94 | c = 'r'; %[160/255 0 0]; 95 | s = '-'; 96 | % elseif i == 13+1 || i == 14+1 97 | % c = 'c'; 98 | % s = '--'; 99 | else 100 | c = 'b'; 101 | s = '-'; 102 | end 103 | line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', 'color', c, 'linewidth', cwidth, 'linestyle', s); 104 | end 105 | end 106 | 107 | % save to pdf 108 | if print 109 | % requires export_fig from http://www.mathworks.com/matlabcentral/fileexchange/23629-exportfig 110 | export_fig([out]); 111 | end 112 | -------------------------------------------------------------------------------- /code/utils/showboxesc.m: -------------------------------------------------------------------------------- 1 | function showboxesc(im, boxes, color, style) 2 | % showboxes(im, boxes) 3 | % Draw boxes on top of image. 4 | 5 | % AUTORIGHTS 6 | % ------------------------------------------------------- 7 | % Copyright (C) 2009-2012 Ross Girshick 8 | % 9 | % This file is part of the voc-releaseX code 10 | % (http://people.cs.uchicago.edu/~rbg/latent/) 11 | % and is available under the terms of an MIT-like license 12 | % provided in COPYING. Please retain this notice and 13 | % COPYING if you use this file (or a portion of it) in 14 | % your project. 15 | % ------------------------------------------------------- 16 | 17 | default_color = false; 18 | if ~exist('color', 'var') 19 | default_color = true; 20 | end 21 | 22 | if isempty(im) 23 | hold on; 24 | else 25 | image(im); 26 | axis image; 27 | axis off; 28 | end 29 | if ~isempty(boxes) 30 | for j = 1:size(boxes,1) 31 | numfilters = floor(size(boxes, 2)/4); 32 | for i = 1:numfilters 33 | x1 = boxes(j,1+(i-1)*4); 34 | y1 = boxes(j,2+(i-1)*4); 35 | x2 = boxes(j,3+(i-1)*4); 36 | y2 = boxes(j,4+(i-1)*4); 37 | % remove unused filters 38 | del = find(((x1 == 0) .* (x2 == 0) .* (y1 == 0) .* (y2 == 0)) == 1); 39 | x1(del) = []; 40 | x2(del) = []; 41 | y1(del) = []; 42 | y2(del) = []; 43 | 44 | if default_color 45 | % 0 => diff 46 | % 1 => fn 47 | % 2 => tp 48 | style = '-'; 49 | if boxes(j,end) == 0 50 | color = 'c'; 51 | elseif boxes(j,end) == 1 52 | color = 'r'; 53 | elseif boxes(j,end) == 2 54 | color = 'g'; 55 | elseif boxes(j,end) == 3 56 | color = 'b'; 57 | elseif boxes(j,end) == 4 58 | color = 'm'; 59 | style = '--'; 60 | end 61 | end 62 | 63 | line([x1 x1 x2 x2 x1 x1]', [y1 y2 y2 y1 y1 y2]', 'color', color, ... 64 | 'linewidth', 1, ... 65 | 'linestyle', style); 66 | end 67 | end 68 | end 69 | drawnow; 70 | if isempty(im) 71 | hold off; 72 | end 73 | 74 | -------------------------------------------------------------------------------- /code/utils/tic_toc_print.m: -------------------------------------------------------------------------------- 1 | function tic_toc_print(fmt, varargin) 2 | % Print only after 1 second has passed since the last print. 3 | % Arguments are the same as for fprintf. 4 | 5 | % AUTORIGHTS 6 | % ------------------------------------------------------- 7 | % Copyright (C) 2009-2012 Ross Girshick 8 | % 9 | % This file is part of the voc-releaseX code 10 | % (http://people.cs.uchicago.edu/~rbg/latent/) 11 | % and is available under the terms of an MIT-like license 12 | % provided in COPYING. Please retain this notice and 13 | % COPYING if you use this file (or a portion of it) in 14 | % your project. 15 | % ------------------------------------------------------- 16 | 17 | persistent th; 18 | 19 | if isempty(th) 20 | th = tic(); 21 | end 22 | 23 | if toc(th) > 1 24 | fprintf(fmt, varargin{:}); 25 | drawnow; 26 | th = tic(); 27 | end 28 | -------------------------------------------------------------------------------- /code/utils/writeDetectionsPascalFile.m: -------------------------------------------------------------------------------- 1 | function res_file = writeDetectionsPascalFile(boxes, image_paths, VOCopts, cls, dst_path) 2 | 3 | image_ids = getImageIdsFromImagePaths(image_paths); 4 | 5 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 6 | 7 | res_fn = sprintf(VOCopts.detrespath, 'comp4', cls); 8 | [path, filename, ext] = fileparts(res_fn); 9 | res_file = [dst_path, filesep, filename, ext]; 10 | 11 | fid = fopen(res_file, 'w'); 12 | for i = 1:length(image_ids); 13 | bbox = boxes{i}; 14 | for j = 1:size(bbox,1) 15 | fprintf(fid, '%s %f %d %d %d %d\n', image_ids{i}, bbox(j,end), bbox(j,1:4)); 16 | end 17 | end 18 | fclose(fid); 19 | 20 | end 21 | 22 | -------------------------------------------------------------------------------- /code/utils/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /external/liblinear-1.94/COPYRIGHT: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2007-2013 The LIBLINEAR Project. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither name of copyright holders nor the names of its contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /external/liblinear-1.94/Makefile: -------------------------------------------------------------------------------- 1 | CXX ?= g++ 2 | CC ?= gcc 3 | CFLAGS = -Wall -Wconversion -O3 -fPIC 4 | LIBS = blas/blas.a 5 | SHVER = 1 6 | OS = $(shell uname) 7 | #LIBS = -lblas 8 | 9 | all: train predict 10 | 11 | lib: linear.o tron.o blas/blas.a 12 | if [ "$(OS)" = "Darwin" ]; then \ 13 | SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,liblinear.so.$(SHVER)"; \ 14 | else \ 15 | SHARED_LIB_FLAG="-shared -Wl,-soname,liblinear.so.$(SHVER)"; \ 16 | fi; \ 17 | $(CXX) $${SHARED_LIB_FLAG} linear.o tron.o blas/blas.a -o liblinear.so.$(SHVER) 18 | 19 | train: tron.o linear.o train.c blas/blas.a 20 | $(CXX) $(CFLAGS) -o train train.c tron.o linear.o $(LIBS) 21 | 22 | predict: tron.o linear.o predict.c blas/blas.a 23 | $(CXX) $(CFLAGS) -o predict predict.c tron.o linear.o $(LIBS) 24 | 25 | tron.o: tron.cpp tron.h 26 | $(CXX) $(CFLAGS) -c -o tron.o tron.cpp 27 | 28 | linear.o: linear.cpp linear.h 29 | $(CXX) $(CFLAGS) -c -o linear.o linear.cpp 30 | 31 | blas/blas.a: blas/*.c blas/*.h 32 | make -C blas OPTFLAGS='$(CFLAGS)' CC='$(CC)'; 33 | 34 | clean: 35 | make -C blas clean 36 | make -C matlab clean 37 | rm -f *~ tron.o linear.o train predict liblinear.so.$(SHVER) 38 | -------------------------------------------------------------------------------- /external/liblinear-1.94/Makefile.win: -------------------------------------------------------------------------------- 1 | #You must ensure nmake.exe, cl.exe, link.exe are in system path. 2 | #VCVARS32.bat 3 | #Under dosbox prompt 4 | #nmake -f Makefile.win 5 | 6 | ########################################## 7 | CXX = cl.exe 8 | CFLAGS = -nologo -O2 -EHsc -I. -D __WIN32__ -D _CRT_SECURE_NO_DEPRECATE 9 | TARGET = windows 10 | 11 | all: $(TARGET)\train.exe $(TARGET)\predict.exe 12 | 13 | $(TARGET)\train.exe: tron.obj linear.obj train.c blas\*.c 14 | $(CXX) $(CFLAGS) -Fe$(TARGET)\train.exe tron.obj linear.obj train.c blas\*.c 15 | 16 | $(TARGET)\predict.exe: tron.obj linear.obj predict.c blas\*.c 17 | $(CXX) $(CFLAGS) -Fe$(TARGET)\predict.exe tron.obj linear.obj predict.c blas\*.c 18 | 19 | linear.obj: linear.cpp linear.h 20 | $(CXX) $(CFLAGS) -c linear.cpp 21 | 22 | tron.obj: tron.cpp tron.h 23 | $(CXX) $(CFLAGS) -c tron.cpp 24 | 25 | lib: linear.cpp linear.h linear.def tron.obj 26 | $(CXX) $(CFLAGS) -LD linear.cpp tron.obj blas\*.c -Fe$(TARGET)\liblinear -link -DEF:linear.def 27 | 28 | clean: 29 | -erase /Q *.obj $(TARGET)\. 30 | 31 | -------------------------------------------------------------------------------- /external/liblinear-1.94/blas/Makefile: -------------------------------------------------------------------------------- 1 | AR = ar rcv 2 | RANLIB = ranlib 3 | 4 | HEADERS = blas.h blasp.h 5 | FILES = dnrm2.o daxpy.o ddot.o dscal.o 6 | 7 | CFLAGS = $(OPTFLAGS) 8 | FFLAGS = $(OPTFLAGS) 9 | 10 | blas: $(FILES) $(HEADERS) 11 | $(AR) blas.a $(FILES) 12 | $(RANLIB) blas.a 13 | 14 | clean: 15 | - rm -f *.o 16 | - rm -f *.a 17 | - rm -f *~ 18 | 19 | .c.o: 20 | $(CC) $(CFLAGS) -c $*.c 21 | 22 | 23 | -------------------------------------------------------------------------------- /external/liblinear-1.94/blas/blas.h: -------------------------------------------------------------------------------- 1 | /* blas.h -- C header file for BLAS Ver 1.0 */ 2 | /* Jesse Bennett March 23, 2000 */ 3 | 4 | /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." 5 | 6 | - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ 7 | 8 | #ifndef BLAS_INCLUDE 9 | #define BLAS_INCLUDE 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | /* Data types specific to BLAS implementation */ 16 | typedef struct { float r, i; } fcomplex; 17 | typedef struct { double r, i; } dcomplex; 18 | typedef int blasbool; 19 | 20 | #include "blasp.h" /* Prototypes for all BLAS functions */ 21 | 22 | #define FALSE 0 23 | #define TRUE 1 24 | 25 | /* Macro functions */ 26 | #define MIN(a,b) ((a) <= (b) ? (a) : (b)) 27 | #define MAX(a,b) ((a) >= (b) ? (a) : (b)) 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /external/liblinear-1.94/blas/daxpy.c: -------------------------------------------------------------------------------- 1 | #include "blas.h" 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy, 8 | int *incy) 9 | { 10 | long int i, m, ix, iy, nn, iincx, iincy; 11 | register double ssa; 12 | 13 | /* constant times a vector plus a vector. 14 | uses unrolled loop for increments equal to one. 15 | jack dongarra, linpack, 3/11/78. 16 | modified 12/3/93, array(1) declarations changed to array(*) */ 17 | 18 | /* Dereference inputs */ 19 | nn = *n; 20 | ssa = *sa; 21 | iincx = *incx; 22 | iincy = *incy; 23 | 24 | if( nn > 0 && ssa != 0.0 ) 25 | { 26 | if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */ 27 | { 28 | m = nn-3; 29 | for (i = 0; i < m; i += 4) 30 | { 31 | sy[i] += ssa * sx[i]; 32 | sy[i+1] += ssa * sx[i+1]; 33 | sy[i+2] += ssa * sx[i+2]; 34 | sy[i+3] += ssa * sx[i+3]; 35 | } 36 | for ( ; i < nn; ++i) /* clean-up loop */ 37 | sy[i] += ssa * sx[i]; 38 | } 39 | else /* code for unequal increments or equal increments not equal to 1 */ 40 | { 41 | ix = iincx >= 0 ? 0 : (1 - nn) * iincx; 42 | iy = iincy >= 0 ? 0 : (1 - nn) * iincy; 43 | for (i = 0; i < nn; i++) 44 | { 45 | sy[iy] += ssa * sx[ix]; 46 | ix += iincx; 47 | iy += iincy; 48 | } 49 | } 50 | } 51 | 52 | return 0; 53 | } /* daxpy_ */ 54 | 55 | #ifdef __cplusplus 56 | } 57 | #endif 58 | -------------------------------------------------------------------------------- /external/liblinear-1.94/blas/ddot.c: -------------------------------------------------------------------------------- 1 | #include "blas.h" 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | double ddot_(int *n, double *sx, int *incx, double *sy, int *incy) 8 | { 9 | long int i, m, nn, iincx, iincy; 10 | double stemp; 11 | long int ix, iy; 12 | 13 | /* forms the dot product of two vectors. 14 | uses unrolled loops for increments equal to one. 15 | jack dongarra, linpack, 3/11/78. 16 | modified 12/3/93, array(1) declarations changed to array(*) */ 17 | 18 | /* Dereference inputs */ 19 | nn = *n; 20 | iincx = *incx; 21 | iincy = *incy; 22 | 23 | stemp = 0.0; 24 | if (nn > 0) 25 | { 26 | if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */ 27 | { 28 | m = nn-4; 29 | for (i = 0; i < m; i += 5) 30 | stemp += sx[i] * sy[i] + sx[i+1] * sy[i+1] + sx[i+2] * sy[i+2] + 31 | sx[i+3] * sy[i+3] + sx[i+4] * sy[i+4]; 32 | 33 | for ( ; i < nn; i++) /* clean-up loop */ 34 | stemp += sx[i] * sy[i]; 35 | } 36 | else /* code for unequal increments or equal increments not equal to 1 */ 37 | { 38 | ix = 0; 39 | iy = 0; 40 | if (iincx < 0) 41 | ix = (1 - nn) * iincx; 42 | if (iincy < 0) 43 | iy = (1 - nn) * iincy; 44 | for (i = 0; i < nn; i++) 45 | { 46 | stemp += sx[ix] * sy[iy]; 47 | ix += iincx; 48 | iy += iincy; 49 | } 50 | } 51 | } 52 | 53 | return stemp; 54 | } /* ddot_ */ 55 | 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | -------------------------------------------------------------------------------- /external/liblinear-1.94/blas/dnrm2.c: -------------------------------------------------------------------------------- 1 | #include /* Needed for fabs() and sqrt() */ 2 | #include "blas.h" 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | double dnrm2_(int *n, double *x, int *incx) 9 | { 10 | long int ix, nn, iincx; 11 | double norm, scale, absxi, ssq, temp; 12 | 13 | /* DNRM2 returns the euclidean norm of a vector via the function 14 | name, so that 15 | 16 | DNRM2 := sqrt( x'*x ) 17 | 18 | -- This version written on 25-October-1982. 19 | Modified on 14-October-1993 to inline the call to SLASSQ. 20 | Sven Hammarling, Nag Ltd. */ 21 | 22 | /* Dereference inputs */ 23 | nn = *n; 24 | iincx = *incx; 25 | 26 | if( nn > 0 && iincx > 0 ) 27 | { 28 | if (nn == 1) 29 | { 30 | norm = fabs(x[0]); 31 | } 32 | else 33 | { 34 | scale = 0.0; 35 | ssq = 1.0; 36 | 37 | /* The following loop is equivalent to this call to the LAPACK 38 | auxiliary routine: CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */ 39 | 40 | for (ix=(nn-1)*iincx; ix>=0; ix-=iincx) 41 | { 42 | if (x[ix] != 0.0) 43 | { 44 | absxi = fabs(x[ix]); 45 | if (scale < absxi) 46 | { 47 | temp = scale / absxi; 48 | ssq = ssq * (temp * temp) + 1.0; 49 | scale = absxi; 50 | } 51 | else 52 | { 53 | temp = absxi / scale; 54 | ssq += temp * temp; 55 | } 56 | } 57 | } 58 | norm = scale * sqrt(ssq); 59 | } 60 | } 61 | else 62 | norm = 0.0; 63 | 64 | return norm; 65 | 66 | } /* dnrm2_ */ 67 | 68 | #ifdef __cplusplus 69 | } 70 | #endif 71 | -------------------------------------------------------------------------------- /external/liblinear-1.94/blas/dscal.c: -------------------------------------------------------------------------------- 1 | #include "blas.h" 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | int dscal_(int *n, double *sa, double *sx, int *incx) 8 | { 9 | long int i, m, nincx, nn, iincx; 10 | double ssa; 11 | 12 | /* scales a vector by a constant. 13 | uses unrolled loops for increment equal to 1. 14 | jack dongarra, linpack, 3/11/78. 15 | modified 3/93 to return if incx .le. 0. 16 | modified 12/3/93, array(1) declarations changed to array(*) */ 17 | 18 | /* Dereference inputs */ 19 | nn = *n; 20 | iincx = *incx; 21 | ssa = *sa; 22 | 23 | if (nn > 0 && iincx > 0) 24 | { 25 | if (iincx == 1) /* code for increment equal to 1 */ 26 | { 27 | m = nn-4; 28 | for (i = 0; i < m; i += 5) 29 | { 30 | sx[i] = ssa * sx[i]; 31 | sx[i+1] = ssa * sx[i+1]; 32 | sx[i+2] = ssa * sx[i+2]; 33 | sx[i+3] = ssa * sx[i+3]; 34 | sx[i+4] = ssa * sx[i+4]; 35 | } 36 | for ( ; i < nn; ++i) /* clean-up loop */ 37 | sx[i] = ssa * sx[i]; 38 | } 39 | else /* code for increment not equal to 1 */ 40 | { 41 | nincx = nn * iincx; 42 | for (i = 0; i < nincx; i += iincx) 43 | sx[i] = ssa * sx[i]; 44 | } 45 | } 46 | 47 | return 0; 48 | } /* dscal_ */ 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | -------------------------------------------------------------------------------- /external/liblinear-1.94/linear.def: -------------------------------------------------------------------------------- 1 | LIBRARY liblinear 2 | EXPORTS 3 | train @1 4 | cross_validation @2 5 | save_model @3 6 | load_model @4 7 | get_nr_feature @5 8 | get_nr_class @6 9 | get_labels @7 10 | predict_values @8 11 | predict @9 12 | predict_probability @10 13 | free_and_destroy_model @11 14 | free_model_content @12 15 | destroy_param @13 16 | check_parameter @14 17 | check_probability_model @15 18 | set_print_string_function @16 19 | -------------------------------------------------------------------------------- /external/liblinear-1.94/linear.h: -------------------------------------------------------------------------------- 1 | #ifndef _LIBLINEAR_H 2 | #define _LIBLINEAR_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | struct feature_node 9 | { 10 | int index; 11 | double value; 12 | }; 13 | 14 | struct problem 15 | { 16 | int l, n; 17 | double *y; 18 | struct feature_node **x; 19 | double bias; /* < 0 if no bias term */ 20 | }; 21 | 22 | enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */ 23 | 24 | struct parameter 25 | { 26 | int solver_type; 27 | 28 | /* these are for training only */ 29 | double eps; /* stopping criteria */ 30 | double C; 31 | int nr_weight; 32 | int *weight_label; 33 | double* weight; 34 | double p; 35 | }; 36 | 37 | struct model 38 | { 39 | struct parameter param; 40 | int nr_class; /* number of classes */ 41 | int nr_feature; 42 | double *w; 43 | int *label; /* label of each class */ 44 | double bias; 45 | }; 46 | 47 | struct model* train(const struct problem *prob, const struct parameter *param); 48 | void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target); 49 | 50 | double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values); 51 | double predict(const struct model *model_, const struct feature_node *x); 52 | double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates); 53 | 54 | int save_model(const char *model_file_name, const struct model *model_); 55 | struct model *load_model(const char *model_file_name); 56 | 57 | int get_nr_feature(const struct model *model_); 58 | int get_nr_class(const struct model *model_); 59 | void get_labels(const struct model *model_, int* label); 60 | 61 | void free_model_content(struct model *model_ptr); 62 | void free_and_destroy_model(struct model **model_ptr_ptr); 63 | void destroy_param(struct parameter *param); 64 | 65 | const char *check_parameter(const struct problem *prob, const struct parameter *param); 66 | int check_probability_model(const struct model *model); 67 | void set_print_string_function(void (*print_func) (const char*)); 68 | 69 | #ifdef __cplusplus 70 | } 71 | #endif 72 | 73 | #endif /* _LIBLINEAR_H */ 74 | 75 | -------------------------------------------------------------------------------- /external/liblinear-1.94/matlab/Makefile: -------------------------------------------------------------------------------- 1 | # This Makefile is used under Linux 2 | 3 | MATLABDIR ?= /opt/matlab2012b 4 | CXX ?= g++ 5 | #CXX = g++-3.3 6 | CC ?= gcc 7 | CFLAGS = -Wall -Wconversion -O3 -fPIC -I$(MATLABDIR)/extern/include -I.. 8 | 9 | MEX = $(MATLABDIR)/bin/mex 10 | MEX_OPTION = CC\#$(CXX) CXX\#$(CXX) CFLAGS\#"$(CFLAGS)" CXXFLAGS\#"$(CFLAGS)" 11 | # comment the following line if you use MATLAB on a 32-bit computer 12 | MEX_OPTION += -largeArrayDims 13 | MEX_EXT = $(shell $(MATLABDIR)/bin/mexext) 14 | 15 | OCTAVEDIR ?= /usr/include/octave 16 | OCTAVE_MEX = env CC=$(CXX) mkoctfile 17 | OCTAVE_MEX_OPTION = --mex 18 | OCTAVE_MEX_EXT = mex 19 | OCTAVE_CFLAGS = -Wall -O3 -fPIC -I$(OCTAVEDIR) -I.. 20 | 21 | all: matlab 22 | 23 | matlab: binary 24 | 25 | octave: 26 | @make MEX="$(OCTAVE_MEX)" MEX_OPTION="$(OCTAVE_MEX_OPTION)" \ 27 | MEX_EXT="$(OCTAVE_MEX_EXT)" CFLAGS="$(OCTAVE_CFLAGS)" \ 28 | binary 29 | 30 | binary: train.$(MEX_EXT) predict.$(MEX_EXT) libsvmread.$(MEX_EXT) libsvmwrite.$(MEX_EXT) 31 | 32 | train.$(MEX_EXT): train.c ../linear.h ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a 33 | $(MEX) $(MEX_OPTION) train.c ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a 34 | 35 | predict.$(MEX_EXT): predict.c ../linear.h ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a 36 | $(MEX) $(MEX_OPTION) predict.c ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a 37 | 38 | libsvmread.$(MEX_EXT): libsvmread.c 39 | $(MEX) $(MEX_OPTION) libsvmread.c 40 | 41 | libsvmwrite.$(MEX_EXT): libsvmwrite.c 42 | $(MEX) $(MEX_OPTION) libsvmwrite.c 43 | 44 | linear_model_matlab.o: linear_model_matlab.c ../linear.h 45 | $(CXX) $(CFLAGS) -c linear_model_matlab.c 46 | 47 | ../linear.o: ../linear.cpp ../linear.h 48 | make -C .. linear.o 49 | 50 | ../tron.o: ../tron.cpp ../tron.h 51 | make -C .. tron.o 52 | 53 | ../blas/blas.a: ../blas/*.c ../blas/*.h 54 | make -C ../blas OPTFLAGS='$(CFLAGS)' CC='$(CC)'; 55 | 56 | clean: 57 | make -C ../blas clean 58 | rm -f *~ *.o *.mex* *.obj ../linear.o ../tron.o 59 | -------------------------------------------------------------------------------- /external/liblinear-1.94/matlab/libsvmread.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "mex.h" 8 | 9 | #ifdef MX_API_VER 10 | #if MX_API_VER < 0x07030000 11 | typedef int mwIndex; 12 | #endif 13 | #endif 14 | #ifndef max 15 | #define max(x,y) (((x)>(y))?(x):(y)) 16 | #endif 17 | #ifndef min 18 | #define min(x,y) (((x)<(y))?(x):(y)) 19 | #endif 20 | 21 | void exit_with_help() 22 | { 23 | mexPrintf( 24 | "Usage: [label_vector, instance_matrix] = libsvmread('filename');\n" 25 | ); 26 | } 27 | 28 | static void fake_answer(int nlhs, mxArray *plhs[]) 29 | { 30 | int i; 31 | for(i=0;i start from 0 87 | strtok(line," \t"); // label 88 | while (1) 89 | { 90 | idx = strtok(NULL,":"); // index:value 91 | val = strtok(NULL," \t"); 92 | if(val == NULL) 93 | break; 94 | 95 | errno = 0; 96 | index = (int) strtol(idx,&endptr,10); 97 | if(endptr == idx || errno != 0 || *endptr != '\0' || index <= inst_max_index) 98 | { 99 | mexPrintf("Wrong input format at line %d\n",l+1); 100 | fake_answer(nlhs, plhs); 101 | return; 102 | } 103 | else 104 | inst_max_index = index; 105 | 106 | min_index = min(min_index, index); 107 | elements++; 108 | } 109 | max_index = max(max_index, inst_max_index); 110 | l++; 111 | } 112 | rewind(fp); 113 | 114 | // y 115 | plhs[0] = mxCreateDoubleMatrix(l, 1, mxREAL); 116 | // x^T 117 | if (min_index <= 0) 118 | plhs[1] = mxCreateSparse(max_index-min_index+1, l, elements, mxREAL); 119 | else 120 | plhs[1] = mxCreateSparse(max_index, l, elements, mxREAL); 121 | 122 | labels = mxGetPr(plhs[0]); 123 | samples = mxGetPr(plhs[1]); 124 | ir = mxGetIr(plhs[1]); 125 | jc = mxGetJc(plhs[1]); 126 | 127 | k=0; 128 | for(i=0;i start from 0 159 | 160 | errno = 0; 161 | samples[k] = strtod(val,&endptr); 162 | if (endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) 163 | { 164 | mexPrintf("Wrong input format at line %d\n",i+1); 165 | fake_answer(nlhs, plhs); 166 | return; 167 | } 168 | ++k; 169 | } 170 | } 171 | jc[l] = k; 172 | 173 | fclose(fp); 174 | free(line); 175 | 176 | { 177 | mxArray *rhs[1], *lhs[1]; 178 | rhs[0] = plhs[1]; 179 | if(mexCallMATLAB(1, lhs, 1, rhs, "transpose")) 180 | { 181 | mexPrintf("Error: cannot transpose problem\n"); 182 | fake_answer(nlhs, plhs); 183 | return; 184 | } 185 | plhs[1] = lhs[0]; 186 | } 187 | } 188 | 189 | void mexFunction( int nlhs, mxArray *plhs[], 190 | int nrhs, const mxArray *prhs[] ) 191 | { 192 | char filename[256]; 193 | 194 | if(nrhs != 1 || nlhs != 2) 195 | { 196 | exit_with_help(); 197 | fake_answer(nlhs, plhs); 198 | return; 199 | } 200 | 201 | mxGetString(prhs[0], filename, mxGetN(prhs[0]) + 1); 202 | 203 | if(filename == NULL) 204 | { 205 | mexPrintf("Error: filename is NULL\n"); 206 | return; 207 | } 208 | 209 | read_problem(filename, nlhs, plhs); 210 | 211 | return; 212 | } 213 | 214 | -------------------------------------------------------------------------------- /external/liblinear-1.94/matlab/libsvmwrite.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "mex.h" 5 | 6 | #ifdef MX_API_VER 7 | #if MX_API_VER < 0x07030000 8 | typedef int mwIndex; 9 | #endif 10 | #endif 11 | 12 | void exit_with_help() 13 | { 14 | mexPrintf( 15 | "Usage: libsvmwrite('filename', label_vector, instance_matrix);\n" 16 | ); 17 | } 18 | 19 | static void fake_answer(int nlhs, mxArray *plhs[]) 20 | { 21 | int i; 22 | for(i=0;i 0) 89 | { 90 | exit_with_help(); 91 | fake_answer(nlhs, plhs); 92 | return; 93 | } 94 | 95 | // Transform the input Matrix to libsvm format 96 | if(nrhs == 3) 97 | { 98 | char filename[256]; 99 | if(!mxIsDouble(prhs[1]) || !mxIsDouble(prhs[2])) 100 | { 101 | mexPrintf("Error: label vector and instance matrix must be double\n"); 102 | return; 103 | } 104 | 105 | mxGetString(prhs[0], filename, mxGetN(prhs[0])+1); 106 | 107 | if(mxIsSparse(prhs[2])) 108 | libsvmwrite(filename, prhs[1], prhs[2]); 109 | else 110 | { 111 | mexPrintf("Instance_matrix must be sparse\n"); 112 | return; 113 | } 114 | } 115 | else 116 | { 117 | exit_with_help(); 118 | return; 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /external/liblinear-1.94/matlab/linear_model_matlab.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../linear.h" 4 | 5 | #include "mex.h" 6 | 7 | #ifdef MX_API_VER 8 | #if MX_API_VER < 0x07030000 9 | typedef int mwIndex; 10 | #endif 11 | #endif 12 | 13 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) 14 | 15 | #define NUM_OF_RETURN_FIELD 6 16 | 17 | static const char *field_names[] = { 18 | "Parameters", 19 | "nr_class", 20 | "nr_feature", 21 | "bias", 22 | "Label", 23 | "w", 24 | }; 25 | 26 | const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_) 27 | { 28 | int i; 29 | int nr_w; 30 | double *ptr; 31 | mxArray *return_model, **rhs; 32 | int out_id = 0; 33 | int n, w_size; 34 | 35 | rhs = (mxArray **)mxMalloc(sizeof(mxArray *)*NUM_OF_RETURN_FIELD); 36 | 37 | // Parameters 38 | // for now, only solver_type is needed 39 | rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); 40 | ptr = mxGetPr(rhs[out_id]); 41 | ptr[0] = model_->param.solver_type; 42 | out_id++; 43 | 44 | // nr_class 45 | rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); 46 | ptr = mxGetPr(rhs[out_id]); 47 | ptr[0] = model_->nr_class; 48 | out_id++; 49 | 50 | if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) 51 | nr_w=1; 52 | else 53 | nr_w=model_->nr_class; 54 | 55 | // nr_feature 56 | rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); 57 | ptr = mxGetPr(rhs[out_id]); 58 | ptr[0] = model_->nr_feature; 59 | out_id++; 60 | 61 | // bias 62 | rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); 63 | ptr = mxGetPr(rhs[out_id]); 64 | ptr[0] = model_->bias; 65 | out_id++; 66 | 67 | if(model_->bias>=0) 68 | n=model_->nr_feature+1; 69 | else 70 | n=model_->nr_feature; 71 | 72 | w_size = n; 73 | // Label 74 | if(model_->label) 75 | { 76 | rhs[out_id] = mxCreateDoubleMatrix(model_->nr_class, 1, mxREAL); 77 | ptr = mxGetPr(rhs[out_id]); 78 | for(i = 0; i < model_->nr_class; i++) 79 | ptr[i] = model_->label[i]; 80 | } 81 | else 82 | rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); 83 | out_id++; 84 | 85 | // w 86 | rhs[out_id] = mxCreateDoubleMatrix(nr_w, w_size, mxREAL); 87 | ptr = mxGetPr(rhs[out_id]); 88 | for(i = 0; i < w_size*nr_w; i++) 89 | ptr[i]=model_->w[i]; 90 | out_id++; 91 | 92 | /* Create a struct matrix contains NUM_OF_RETURN_FIELD fields */ 93 | return_model = mxCreateStructMatrix(1, 1, NUM_OF_RETURN_FIELD, field_names); 94 | 95 | /* Fill struct matrix with input arguments */ 96 | for(i = 0; i < NUM_OF_RETURN_FIELD; i++) 97 | mxSetField(return_model,0,field_names[i],mxDuplicateArray(rhs[i])); 98 | /* return */ 99 | plhs[0] = return_model; 100 | mxFree(rhs); 101 | 102 | return NULL; 103 | } 104 | 105 | const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct) 106 | { 107 | int i, num_of_fields; 108 | int nr_w; 109 | double *ptr; 110 | int id = 0; 111 | int n, w_size; 112 | mxArray **rhs; 113 | 114 | num_of_fields = mxGetNumberOfFields(matlab_struct); 115 | rhs = (mxArray **) mxMalloc(sizeof(mxArray *)*num_of_fields); 116 | 117 | for(i=0;inr_class=0; 121 | nr_w=0; 122 | model_->nr_feature=0; 123 | model_->w=NULL; 124 | model_->label=NULL; 125 | 126 | // Parameters 127 | ptr = mxGetPr(rhs[id]); 128 | model_->param.solver_type = (int)ptr[0]; 129 | id++; 130 | 131 | // nr_class 132 | ptr = mxGetPr(rhs[id]); 133 | model_->nr_class = (int)ptr[0]; 134 | id++; 135 | 136 | if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) 137 | nr_w=1; 138 | else 139 | nr_w=model_->nr_class; 140 | 141 | // nr_feature 142 | ptr = mxGetPr(rhs[id]); 143 | model_->nr_feature = (int)ptr[0]; 144 | id++; 145 | 146 | // bias 147 | ptr = mxGetPr(rhs[id]); 148 | model_->bias = (int)ptr[0]; 149 | id++; 150 | 151 | if(model_->bias>=0) 152 | n=model_->nr_feature+1; 153 | else 154 | n=model_->nr_feature; 155 | w_size = n; 156 | 157 | // Label 158 | if(mxIsEmpty(rhs[id]) == 0) 159 | { 160 | model_->label = Malloc(int, model_->nr_class); 161 | ptr = mxGetPr(rhs[id]); 162 | for(i=0;inr_class;i++) 163 | model_->label[i] = (int)ptr[i]; 164 | } 165 | id++; 166 | 167 | ptr = mxGetPr(rhs[id]); 168 | model_->w=Malloc(double, w_size*nr_w); 169 | for(i = 0; i < w_size*nr_w; i++) 170 | model_->w[i]=ptr[i]; 171 | id++; 172 | mxFree(rhs); 173 | 174 | return NULL; 175 | } 176 | 177 | -------------------------------------------------------------------------------- /external/liblinear-1.94/matlab/linear_model_matlab.h: -------------------------------------------------------------------------------- 1 | const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_); 2 | const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct); 3 | -------------------------------------------------------------------------------- /external/liblinear-1.94/matlab/make.m: -------------------------------------------------------------------------------- 1 | % This make.m is for MATLAB and OCTAVE under Windows, Mac, and Unix 2 | 3 | try 4 | Type = ver; 5 | % This part is for OCTAVE 6 | if(strcmp(Type(1).Name, 'Octave') == 1) 7 | mex libsvmread.c 8 | mex libsvmwrite.c 9 | mex train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/*.c 10 | mex predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/*.c 11 | % This part is for MATLAB 12 | % Add -largeArrayDims on 64-bit machines of MATLAB 13 | else 14 | mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmread.c 15 | mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmwrite.c 16 | mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims train.c linear_model_matlab.c ../linear.cpp ../tron.cpp "../blas/*.c" 17 | mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp "../blas/*.c" 18 | end 19 | catch 20 | fprintf('If make.m fails, please check README about detailed instructions.\n'); 21 | end 22 | -------------------------------------------------------------------------------- /external/liblinear-1.94/python/Makefile: -------------------------------------------------------------------------------- 1 | all = lib 2 | 3 | lib: 4 | make -C .. lib 5 | -------------------------------------------------------------------------------- /external/liblinear-1.94/tron.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRON_H 2 | #define _TRON_H 3 | 4 | class function 5 | { 6 | public: 7 | virtual double fun(double *w) = 0 ; 8 | virtual void grad(double *w, double *g) = 0 ; 9 | virtual void Hv(double *s, double *Hs) = 0 ; 10 | 11 | virtual int get_nr_variable(void) = 0 ; 12 | virtual ~function(void){} 13 | }; 14 | 15 | class TRON 16 | { 17 | public: 18 | TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000); 19 | ~TRON(); 20 | 21 | void tron(double *w); 22 | void set_print_string(void (*i_print) (const char *buf)); 23 | 24 | private: 25 | int trcg(double delta, double *g, double *s, double *r); 26 | double norm_inf(int n, double *x); 27 | 28 | double eps; 29 | int max_iter; 30 | function *fun_obj; 31 | void info(const char *fmt,...); 32 | void (*tron_print_string)(const char *buf); 33 | }; 34 | #endif 35 | -------------------------------------------------------------------------------- /model-defs/Semantic_segmentation_aware_region_deploy_softmax.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape{dim:128 dim:512 dim:9 dim:9} 3 | 4 | layer { name: "fc1" type: "InnerProduct" bottom: "data" top: "fc1" param {lr_mult: 0.01 decay_mult: 1} param {lr_mult: 0.02 decay_mult: 0} 5 | inner_product_param{num_output: 2048 weight_filler{type: "gaussian" std: 0.0001} bias_filler{type: "constant" value: 0}}} 6 | layer {bottom: "fc1" top: "fc1" name: "relu1" type: "ReLU"} 7 | layer {bottom: "fc1" top: "fc1" name: "drop1" type: "Dropout" dropout_param{dropout_ratio: 0.5}} 8 | 9 | layer {name: "fc2_pascal" type: "InnerProduct" bottom: "fc1" top: "fc2_pascal" param {lr_mult: 1 decay_mult: 1} param {lr_mult: 2 decay_mult: 0} 10 | inner_product_param{num_output: 21 weight_filler{type: "gaussian" std: 0.03} bias_filler{type: "constant" value: 0}}} 11 | 12 | layer { name: "pascal_softmax" type: "Softmax" bottom: "fc2_pascal" top: "pascal_softmax"} 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /model-defs/Semantic_segmentation_aware_region_deploy_svm.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape{dim:128 dim:512 dim:9 dim:9} 3 | 4 | layer { name: "fc1" type: "InnerProduct" bottom: "data" top: "fc1" param {lr_mult: 0.01 decay_mult: 1} param {lr_mult: 0.02 decay_mult: 0} 5 | inner_product_param{num_output: 2048 weight_filler{type: "gaussian" std: 0.0001} bias_filler{type: "constant" value: 0}}} 6 | layer {bottom: "fc1" top: "fc1" name: "relu1" type: "ReLU"} 7 | layer {bottom: "fc1" top: "fc1" name: "drop1" type: "Dropout" dropout_param{dropout_ratio: 0.5}} 8 | 9 | layer {name: "pascal_svm" type: "InnerProduct" bottom: "fc1" top: "pascal_svm" param {lr_mult: 1 decay_mult: 1} param {lr_mult: 2 decay_mult: 0} 10 | inner_product_param{num_output: 20 weight_filler{type: "gaussian" std: 0.03} bias_filler{type: "constant" value: 0}}} 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /model-defs/Semantic_segmentation_aware_region_pascal_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "Semantic_segmentation_aware_region_pascal_train_test.prototxt" 2 | test_iter: 1 3 | weight_decay: 0.0005 4 | test_interval: 2000 5 | momentum: 0.9 6 | base_lr: 0.001 7 | lr_policy: "step" 8 | gamma: 0.1 9 | stepsize: 50000 10 | display: 2000 11 | max_iter: 250000 12 | snapshot: 2000 13 | snapshot_prefix: "model" 14 | solver_mode: 1 15 | -------------------------------------------------------------------------------- /model-defs/Semantic_segmentation_aware_region_pascal_train_test.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape{dim:128 dim:512 dim:9 dim:9} 3 | input: "label" 4 | input_shape{dim:128 dim:1} 5 | 6 | layer { name: "fc1" type: "InnerProduct" bottom: "data" top: "fc1" param {lr_mult: 0.01 decay_mult: 1} param {lr_mult: 0.02 decay_mult: 0} 7 | inner_product_param{num_output: 2048 weight_filler{type: "gaussian" std: 0.0001} bias_filler{type: "constant" value: 0}}} 8 | layer {bottom: "fc1" top: "fc1" name: "relu1" type: "ReLU"} 9 | layer {bottom: "fc1" top: "fc1" name: "drop1" type: "Dropout" dropout_param{dropout_ratio: 0.5}} 10 | 11 | layer {name: "fc2_pascal" type: "InnerProduct" bottom: "fc1" top: "fc2_pascal" param {lr_mult: 1 decay_mult: 1} param {lr_mult: 2 decay_mult: 0} 12 | inner_product_param{num_output: 21 weight_filler{type: "gaussian" std: 0.03} bias_filler{type: "constant" value: 0}}} 13 | 14 | layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc2_pascal" bottom: "label" top: "loss"} 15 | layer { name: "accuracy" type: "Accuracy" bottom: "fc2_pascal" bottom: "label" top: "accuracy"} 16 | 17 | layer { name: "predictions" type: "Softmax" bottom: "fc2_pascal" top: "predictions" include: { phase: TEST } } 18 | layer { name: "predictions_silence" type: "Silence" bottom: "predictions" include: { phase: TEST } } 19 | 20 | 21 | -------------------------------------------------------------------------------- /model-defs/VGG16_Region_Adaptation_BBox_Regression_Module_deploy.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape{dim:128 dim:25088 dim:1 dim:1} 3 | 4 | #--------------------------layer 6------------------------ 5 | layer { name: "fc6" type: "InnerProduct" bottom: "data" top: "fc6" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 6 | layer { bottom: "fc6" top: "fc6" name: "relu6" type: "ReLU"} 7 | layer { bottom: "fc6" top: "fc6" name: "drop6" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 8 | 9 | #--------------------------layer 7------------------------ 10 | layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 11 | layer { bottom: "fc7" top: "fc7" name: "relu7" type: "ReLU"} 12 | layer { bottom: "fc7" top: "fc7" name: "drop7" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 13 | 14 | #--------------------------layer 8------------------------ 15 | layer { name: "fc8_bbox_reg" type: "InnerProduct" bottom: "fc7" top: "fc8_bbox_reg" param {lr_mult: 1 decay_mult: 1.} param {lr_mult: 2 decay_mult: 0.} 16 | inner_product_param{ num_output: 80 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 0 }}} 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /model-defs/VGG16_Region_Adaptation_BBox_Regression_Module_train_test.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape{dim:128 dim:25088 dim:1 dim:1} # 25088 = 512 * 7 * 7 3 | input: "mask" 4 | input_shape{dim:128 dim:80} # 80 = 4 values per class 5 | input: "targets" 6 | input_shape{dim:128 dim:80} # 80 = 4 values per class 7 | 8 | #--------------------------layer 6------------------------ 9 | layer { name: "fc6" type: "InnerProduct" bottom: "data" top: "fc6" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 10 | layer { bottom: "fc6" top: "fc6" name: "relu6" type: "ReLU"} 11 | layer { bottom: "fc6" top: "fc6" name: "drop6" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 12 | 13 | #--------------------------layer 7------------------------ 14 | layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 15 | layer { bottom: "fc7" top: "fc7" name: "relu7" type: "ReLU"} 16 | layer { bottom: "fc7" top: "fc7" name: "drop7" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 17 | 18 | #--------------------------layer 8------------------------ 19 | layer { name: "fc8_bbox_reg" type: "InnerProduct" bottom: "fc7" top: "fc8_bbox_reg" param {lr_mult: 1 decay_mult: 1.} param {lr_mult: 2 decay_mult: 0.} 20 | inner_product_param{ num_output: 80 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 0 }}} 21 | 22 | #-------------------------- loss ------------------------- 23 | # mask the predicted values 24 | layer {name: "fc8_bbox_reg_mask" type: "Eltwise" bottom: "fc8_bbox_reg" bottom: "mask" top: "predictions" eltwise_param{operation: PROD stable_prod_grad: true}} 25 | layer {name: "targets_mask" type: "Eltwise" bottom: "targets" bottom: "mask" top: "targets_mask" eltwise_param{operation: PROD stable_prod_grad: true}} 26 | layer {name: "error" type: "EuclideanLoss" bottom: "targets_mask" bottom: "predictions" top: "error"} 27 | -------------------------------------------------------------------------------- /model-defs/VGG16_Region_Adaptation_BBox_Regression_Module_train_test_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "VGG16_Region_Adaptation_BBox_Regression_Module_train_test.prototxt" 2 | test_iter: 1 3 | test_interval: 2000 4 | momentum: 0.9 5 | weight_decay: 0.0005 6 | base_lr: 0.01 7 | lr_policy: "step" 8 | gamma: 0.1 9 | stepsize: 60000 10 | display: 2000 11 | max_iter: 180000 12 | snapshot: 2000 13 | snapshot_prefix: "model" 14 | # debug_info: true 15 | solver_mode: 1 16 | -------------------------------------------------------------------------------- /model-defs/VGG16_Region_Adaptation_Module_train_test.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape{dim:128 dim:25088 dim:1 dim:1} # 25088 = 512 * 7 * 7 3 | input: "label" 4 | input_shape{dim:128 dim:1} 5 | 6 | layer { name: "fc6" type: "InnerProduct" bottom: "data" top: "fc6" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 7 | layer { bottom: "fc6" top: "fc6" name: "relu6" type: "ReLU"} 8 | layer { bottom: "fc6" top: "fc6" name: "drop6" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 9 | 10 | layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 11 | layer { bottom: "fc7" top: "fc7" name: "relu7" type: "ReLU"} 12 | layer { bottom: "fc7" top: "fc7" name: "drop7" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 13 | 14 | layer { name: "fc8_pascal" type: "InnerProduct" bottom: "fc7" top: "fc8_pascal" param {lr_mult: 1 decay_mult: 1.} param {lr_mult: 2 decay_mult: 0.} 15 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 }}} 16 | 17 | layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc8_pascal" bottom: "label" top: "loss"} 18 | layer { name: "accuracy" type: "Accuracy" bottom: "fc8_pascal" bottom: "label" top: "accuracy"} 19 | 20 | layer { name: "predictions" type: "Softmax" bottom: "fc8_pascal" top: "predictions" include: { phase: TEST } } 21 | layer { name: "predictions_silence" type: "Silence" bottom: "predictions" include: { phase: TEST } } 22 | -------------------------------------------------------------------------------- /model-defs/VGG16_Region_Adaptation_Module_train_test_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "VGG16_Region_Adaptation_Module_train_test.prototxt" 2 | test_iter: 1 3 | test_interval: 2000 4 | weight_decay: 0.0005 5 | momentum: 0.9 6 | base_lr: 0.001 7 | lr_policy: "step" 8 | gamma: 0.1 9 | stepsize: 80000 10 | display: 2000 11 | max_iter: 180000 12 | snapshot: 2000 13 | snapshot_prefix: "model" 14 | solver_mode: 1 15 | -------------------------------------------------------------------------------- /model-defs/VGG16_Region_Adaptation_deploy_softmax.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape{dim:128 dim:25088 dim:1 dim:1} # 25088 = 512 * 7 * 7 3 | 4 | layer { name: "fc6" type: "InnerProduct" bottom: "data" top: "fc6" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 5 | layer { bottom: "fc6" top: "fc6" name: "relu6" type: "ReLU"} 6 | layer { bottom: "fc6" top: "fc6" name: "drop6" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 7 | 8 | layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 9 | layer { bottom: "fc7" top: "fc7" name: "relu7" type: "ReLU"} 10 | layer { bottom: "fc7" top: "fc7" name: "drop7" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 11 | 12 | layer { name: "fc8_pascal" type: "InnerProduct" bottom: "fc7" top: "fc8_pascal" param {lr_mult: 1 decay_mult: 1.} param {lr_mult: 2 decay_mult: 0.} 13 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 }}} 14 | 15 | layer { name: "pascal_softmax" type: "Softmax" bottom: "fc8_pascal" top: "pascal_softmax"} 16 | -------------------------------------------------------------------------------- /model-defs/VGG16_Region_Adaptation_deploy_svm.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape{dim:128 dim:25088 dim:1 dim:1} # 25088 = 512 * 7 * 7 3 | 4 | layer { name: "fc6" type: "InnerProduct" bottom: "data" top: "fc6" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 5 | layer { bottom: "fc6" top: "fc6" name: "relu6" type: "ReLU"} 6 | layer { bottom: "fc6" top: "fc6" name: "drop6" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 7 | 8 | layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param {lr_mult: 1} param {lr_mult: 2} inner_product_param{ num_output: 4096}} 9 | layer { bottom: "fc7" top: "fc7" name: "relu7" type: "ReLU"} 10 | layer { bottom: "fc7" top: "fc7" name: "drop7" type: "Dropout" dropout_param { dropout_ratio: 0.5 } } 11 | 12 | layer { name: "pascal_svm" type: "InnerProduct" bottom: "fc7" top: "pascal_svm" param {lr_mult: 1 decay_mult: 1.} param {lr_mult: 2 decay_mult: 0.} 13 | inner_product_param{ num_output: 20 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 }}} 14 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/Semantic_segmentation_aware_net_pascal_train_test_stream11.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape{dim:128 dim:512 dim:9 dim:9} 3 | input: "label" 4 | input_shape{dim:128 dim:1} 5 | 6 | layer { name: "fc1_s11" type: "InnerProduct" bottom: "data" top: "fc1_s11" param {lr_mult: 0.01 decay_mult: 1} param {lr_mult: 0.02 decay_mult: 0} 7 | inner_product_param{num_output: 2048 weight_filler{type: "gaussian" std: 0.0001} bias_filler{type: "constant" value: 0}}} 8 | layer {bottom: "fc1_s11" top: "fc1_s11" name: "relu1_s11" type: "ReLU"} 9 | layer {bottom: "fc1_s11" top: "fc1_s11" name: "drop1_s11" type: "Dropout" dropout_param{dropout_ratio: 0.5}} 10 | 11 | layer {name: "fc2_pascal_s11" type: "InnerProduct" bottom: "fc1_s11" top: "fc2_pascal_s11" param {lr_mult: 1 decay_mult: 1} param {lr_mult: 2 decay_mult: 0} 12 | inner_product_param{num_output: 21 weight_filler{type: "gaussian" std: 0.03} bias_filler{type: "constant" value: 0}}} 13 | 14 | layer { name: "loss" type: "SoftmaxWithLoss" bottom: "fc2_pascal_s11" bottom: "label" top: "loss"} 15 | layer { name: "accuracy" type: "Accuracy" bottom: "fc2_pascal_s11" bottom: "label" top: "accuracy"} 16 | 17 | layer { name: "predictions" type: "Softmax" bottom: "fc2_pascal_s11" top: "predictions" include: { phase: TEST } } 18 | layer { name: "predictions_silence" type: "Silence" bottom: "predictions" include: { phase: TEST } } 19 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream1.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s1" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s1" type: INNER_PRODUCT bottom: "data_s1" top: "fc6_s1" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s1" type: RELU bottom: "fc6_s1" top: "fc6_s1" } 15 | layers { name: "drop6_s1" type: DROPOUT bottom: "fc6_s1" top: "fc6_s1" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s1" type: INNER_PRODUCT bottom: "fc6_s1" top: "fc7_s1" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s1" type: RELU bottom: "fc7_s1" top: "fc7_s1"} 20 | layers {name: "drop7_s1" type: DROPOUT bottom: "fc7_s1" top: "fc7_s1" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s1" type: INNER_PRODUCT bottom: "fc7_s1" top: "fc8_pascal_s1" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s1" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s1" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s1" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream10.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s10" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s10" type: INNER_PRODUCT bottom: "data_s10" top: "fc6_s10" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s10" type: RELU bottom: "fc6_s10" top: "fc6_s10" } 15 | layers { name: "drop6_s10" type: DROPOUT bottom: "fc6_s10" top: "fc6_s10" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s10" type: INNER_PRODUCT bottom: "fc6_s10" top: "fc7_s10" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s10" type: RELU bottom: "fc7_s10" top: "fc7_s10"} 20 | layers {name: "drop7_s10" type: DROPOUT bottom: "fc7_s10" top: "fc7_s10" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s10" type: INNER_PRODUCT bottom: "fc7_s10" top: "fc8_pascal_s10" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s10" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s10" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s10" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream2.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s2" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s2" type: INNER_PRODUCT bottom: "data_s2" top: "fc6_s2" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s2" type: RELU bottom: "fc6_s2" top: "fc6_s2" } 15 | layers { name: "drop6_s2" type: DROPOUT bottom: "fc6_s2" top: "fc6_s2" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s2" type: INNER_PRODUCT bottom: "fc6_s2" top: "fc7_s2" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s2" type: RELU bottom: "fc7_s2" top: "fc7_s2"} 20 | layers {name: "drop7_s2" type: DROPOUT bottom: "fc7_s2" top: "fc7_s2" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s2" type: INNER_PRODUCT bottom: "fc7_s2" top: "fc8_pascal_s2" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s2" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s2" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s2" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream3.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s3" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s3" type: INNER_PRODUCT bottom: "data_s3" top: "fc6_s3" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s3" type: RELU bottom: "fc6_s3" top: "fc6_s3" } 15 | layers { name: "drop6_s3" type: DROPOUT bottom: "fc6_s3" top: "fc6_s3" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s3" type: INNER_PRODUCT bottom: "fc6_s3" top: "fc7_s3" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s3" type: RELU bottom: "fc7_s3" top: "fc7_s3"} 20 | layers {name: "drop7_s3" type: DROPOUT bottom: "fc7_s3" top: "fc7_s3" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s3" type: INNER_PRODUCT bottom: "fc7_s3" top: "fc8_pascal_s3" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s3" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s3" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s3" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream4.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s4" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s4" type: INNER_PRODUCT bottom: "data_s4" top: "fc6_s4" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s4" type: RELU bottom: "fc6_s4" top: "fc6_s4" } 15 | layers { name: "drop6_s4" type: DROPOUT bottom: "fc6_s4" top: "fc6_s4" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s4" type: INNER_PRODUCT bottom: "fc6_s4" top: "fc7_s4" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s4" type: RELU bottom: "fc7_s4" top: "fc7_s4"} 20 | layers {name: "drop7_s4" type: DROPOUT bottom: "fc7_s4" top: "fc7_s4" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s4" type: INNER_PRODUCT bottom: "fc7_s4" top: "fc8_pascal_s4" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s4" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s4" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s4" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream5.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s5" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s5" type: INNER_PRODUCT bottom: "data_s5" top: "fc6_s5" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s5" type: RELU bottom: "fc6_s5" top: "fc6_s5" } 15 | layers { name: "drop6_s5" type: DROPOUT bottom: "fc6_s5" top: "fc6_s5" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s5" type: INNER_PRODUCT bottom: "fc6_s5" top: "fc7_s5" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s5" type: RELU bottom: "fc7_s5" top: "fc7_s5"} 20 | layers {name: "drop7_s5" type: DROPOUT bottom: "fc7_s5" top: "fc7_s5" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s5" type: INNER_PRODUCT bottom: "fc7_s5" top: "fc8_pascal_s5" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s5" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s5" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s5" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream6.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s6" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s6" type: INNER_PRODUCT bottom: "data_s6" top: "fc6_s6" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s6" type: RELU bottom: "fc6_s6" top: "fc6_s6" } 15 | layers { name: "drop6_s6" type: DROPOUT bottom: "fc6_s6" top: "fc6_s6" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s6" type: INNER_PRODUCT bottom: "fc6_s6" top: "fc7_s6" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s6" type: RELU bottom: "fc7_s6" top: "fc7_s6"} 20 | layers {name: "drop7_s6" type: DROPOUT bottom: "fc7_s6" top: "fc7_s6" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s6" type: INNER_PRODUCT bottom: "fc7_s6" top: "fc8_pascal_s6" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s6" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s6" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s6" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream7.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s7" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s7" type: INNER_PRODUCT bottom: "data_s7" top: "fc6_s7" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s7" type: RELU bottom: "fc6_s7" top: "fc6_s7" } 15 | layers { name: "drop6_s7" type: DROPOUT bottom: "fc6_s7" top: "fc6_s7" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s7" type: INNER_PRODUCT bottom: "fc6_s7" top: "fc7_s7" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s7" type: RELU bottom: "fc7_s7" top: "fc7_s7"} 20 | layers {name: "drop7_s7" type: DROPOUT bottom: "fc7_s7" top: "fc7_s7" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s7" type: INNER_PRODUCT bottom: "fc7_s7" top: "fc8_pascal_s7" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s7" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s7" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s7" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream8.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s8" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s8" type: INNER_PRODUCT bottom: "data_s8" top: "fc6_s8" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s8" type: RELU bottom: "fc6_s8" top: "fc6_s8" } 15 | layers { name: "drop6_s8" type: DROPOUT bottom: "fc6_s8" top: "fc6_s8" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s8" type: INNER_PRODUCT bottom: "fc6_s8" top: "fc7_s8" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s8" type: RELU bottom: "fc7_s8" top: "fc7_s8"} 20 | layers {name: "drop7_s8" type: DROPOUT bottom: "fc7_s8" top: "fc7_s8" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s8" type: INNER_PRODUCT bottom: "fc7_s8" top: "fc8_pascal_s8" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s8" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s8" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s8" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /model-defs/auxiliary_def_files/VGG_ILSVRC_16_layers_pascal_train_test_stream9.prototxt: -------------------------------------------------------------------------------- 1 | input: "data_s9" 2 | input_dim: 128 3 | input_dim: 25088 4 | input_dim: 1 5 | input_dim: 1 6 | input: "label" 7 | input_dim: 128 8 | input_dim: 1 9 | input_dim: 1 10 | input_dim: 1 11 | 12 | #--------------------------layer 6------------------------ 13 | layers { name: "fc6_s9" type: INNER_PRODUCT bottom: "data_s9" top: "fc6_s9" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096}} 14 | layers { name: "relu6_s9" type: RELU bottom: "fc6_s9" top: "fc6_s9" } 15 | layers { name: "drop6_s9" type: DROPOUT bottom: "fc6_s9" top: "fc6_s9" dropout_param{ dropout_ratio: 0.5}} 16 | 17 | #--------------------------layer 7------------------------ 18 | layers {name: "fc7_s9" type: INNER_PRODUCT bottom: "fc6_s9" top: "fc7_s9" blobs_lr: 1.0 blobs_lr: 2.0 inner_product_param{ num_output: 4096 }} 19 | layers {name: "relu7_s9" type: RELU bottom: "fc7_s9" top: "fc7_s9"} 20 | layers {name: "drop7_s9" type: DROPOUT bottom: "fc7_s9" top: "fc7_s9" dropout_param{ dropout_ratio: 0.5}} 21 | 22 | #--------------------------layer 8------------------------ 23 | layers {name: "fc8_pascal_s9" type: INNER_PRODUCT bottom: "fc7_s9" top: "fc8_pascal_s9" blobs_lr: 1. blobs_lr: 2. weight_decay: 1. weight_decay: 0. 24 | inner_product_param{ num_output: 21 weight_filler { type: "gaussian" std: 0.01} bias_filler { type: "constant" value: 1 } }} 25 | 26 | #-----------------------output------------------------ 27 | layers { name: "loss" type: SOFTMAX_LOSS bottom: "fc8_pascal_s9" bottom: "label" top: "loss"} 28 | layers { name: "predictions" type: SOFTMAX bottom: "fc8_pascal_s9" top: "predictions" include: { phase: TEST } } 29 | layers { name: "accuracy" type: ACCURACY bottom: "fc8_pascal_s9" bottom: "label" top: "accuracy"} 30 | -------------------------------------------------------------------------------- /mrcnn_build.m: -------------------------------------------------------------------------------- 1 | function mrcnn_build() 2 | if ~exist('liblinear_train') 3 | fprintf('Compiling liblinear version 1.94\n'); 4 | fprintf('Source code page:\n'); 5 | fprintf(' http://www.csie.ntu.edu.tw/~cjlin/liblinear/\n'); 6 | mex -outdir bin ... 7 | CFLAGS="\$CFLAGS -std=c99 -O3 -fPIC" -largeArrayDims ... 8 | external/liblinear-1.94/matlab/train.c ... 9 | external/liblinear-1.94/matlab/linear_model_matlab.c ... 10 | external/liblinear-1.94/linear.cpp ... 11 | external/liblinear-1.94/tron.cpp ... 12 | "external/liblinear-1.94/blas/*.c" ... 13 | -output liblinear_train; 14 | end 15 | 16 | if ~exist('adaptive_region_pooling_mex') 17 | fprintf('Compiling adaptive_region_pooling_mex\n'); 18 | 19 | mex -outdir bin ... 20 | -largeArrayDims ... 21 | code/adaptive_region_pooling/adaptive_region_pooling_mex.cpp ... 22 | -output adaptive_region_pooling_mex; 23 | end 24 | 25 | if ~exist('nms_mex') 26 | fprintf('Compiling nms_mex\n'); 27 | 28 | mex -outdir bin ... 29 | -largeArrayDims ... 30 | code/postprocessing/nms_mex.cpp ... 31 | -output nms_mex; 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /startup.m: -------------------------------------------------------------------------------- 1 | function startup() 2 | 3 | curdir = fileparts(mfilename('fullpath')); 4 | 5 | % set to edge_boxes_path the path where the edge boxes code 6 | % (https://github.com/pdollar/edges) is installed 7 | edge_boxes_path = '../edges/'; 8 | if exist(edge_boxes_path,'dir') > 0 9 | addpath(edge_boxes_path) 10 | else 11 | warning('The Edge Boxes installation directory "%s" is not valid. Please install the Edge boxes code (https://github.com/pdollar/edges) and set the path to its installation directory in the edge_boxes_path variable of the startup.m file if you want use the Edge Box proposals', edge_boxes_path) 12 | end 13 | 14 | edge_boxes_link_path = fullfile(curdir, 'external', 'edges'); 15 | if exist(edge_boxes_link_path, 'dir') == 0 16 | warning('A link to the edge box installation directory is missing from external/edges; See README.md'); 17 | end 18 | 19 | % set to pdollar_toolbox_path the path where the Piotr's Matlab Toolbox 20 | % (http://vision.ucsd.edu/~pdollar/toolbox/doc/index.html) is installed 21 | pdollar_toolbox_path = '../pdollar-toolbox/'; 22 | if exist(pdollar_toolbox_path,'dir') > 0 23 | addpath(genpath(pdollar_toolbox_path)) 24 | else 25 | warning('The installation directory "%s" to Piotrs image processing toolbox (http://vision.ucsd.edu/~pdollar/toolbox/doc/index.html) is not valid. Please install the toolbox and set the installation directory path to the pdollar_toolbox_path variable of the startup.m file if you to want use the Edge Box proposals', pdollar_toolbox_path) 26 | end 27 | 28 | % set to selective_search_boxes_path the path where the Selective Search code 29 | % (http://huppelen.nl/publications/SelectiveSearchCodeIJCV.zip) is installed 30 | selective_search_boxes_path = '../selective_search/'; 31 | if exist(selective_search_boxes_path,'dir') > 0 32 | addpath(genpath(selective_search_boxes_path)) 33 | else 34 | warning('The installation directory "%s" to the Selective Serach code (http://huppelen.nl/publications/SelectiveSearchCodeIJCV.zip) is not valid. Please install the Selective Search code and set the installation directory path to the selective_search_boxes_path variable of the startup.m file if you want to use the Selective Search proposals', selective_search_boxes_path) 35 | end 36 | 37 | addpath(genpath(fullfile(curdir, 'code'))); 38 | addpath(fullfile(curdir, 'bin')); 39 | 40 | mkdir_if_missing(fullfile(curdir, 'external')); 41 | caffe_path = fullfile(curdir, 'external', 'caffe', 'matlab'); 42 | if exist(caffe_path, 'dir') == 0 43 | error('matcaffe is missing from external/caffe/matlab; See README.md'); 44 | end 45 | addpath(genpath(caffe_path)); 46 | 47 | 48 | 49 | 50 | mkdir_if_missing(fullfile(curdir, 'models-exps')); 51 | mkdir_if_missing(fullfile(curdir, 'feat_cache')); 52 | mkdir_if_missing(fullfile(curdir, 'data')); 53 | mkdir_if_missing(fullfile(curdir, 'bin')); 54 | 55 | end 56 | --------------------------------------------------------------------------------