├── output ├── .gitignore └── README.md ├── experiments ├── logs │ └── .gitignore ├── cfgs │ ├── fc_only.yml │ ├── piecewise.yml │ ├── no_bbox_reg.yml │ ├── multiscale.yml │ └── svm.yml ├── README.md └── scripts │ ├── all_vgg16.sh │ ├── all_caffenet.sh │ ├── all_vgg_cnn_m_1024.sh │ ├── multitask_no_bbox_reg_vgg16.sh │ ├── multitask_no_bbox_reg_caffenet.sh │ ├── multitask_no_bbox_reg_vgg_cnn_m_1024.sh │ ├── default_vgg16.sh │ ├── default_caffenet.sh │ ├── default_vgg_cnn_m_1024.sh │ ├── svd_vgg16.sh │ ├── svd_caffenet.sh │ ├── svm_vgg16.sh │ ├── fc_only_vgg16.sh │ ├── svm_caffenet.sh │ ├── svd_vgg_cnn_m_1024.sh │ ├── multiscale_caffenet.sh │ ├── no_bbox_reg_vgg16.sh │ ├── svm_vgg_cnn_m_1024.sh │ ├── no_bbox_reg_caffenet.sh │ ├── piecewise_vgg16.sh │ ├── multiscale_vgg_cnn_m_1024.sh │ ├── piecewise_caffenet.sh │ ├── no_bbox_reg_vgg_cnn_m_1024.sh │ └── piecewise_vgg_cnn_m_1024.sh ├── lib ├── utils │ ├── .gitignore │ ├── __init__.py │ ├── timer.py │ ├── nms.py │ ├── blob.py │ ├── bbox.pyx │ └── nms.pyx ├── Makefile ├── roi_data_layer │ ├── __init__.py │ └── roidb.py ├── datasets │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── xVOCap.m │ │ └── voc_eval.m │ ├── __init__.py │ └── factory.py ├── fast_rcnn │ ├── __init__.py │ └── train.py └── setup.py ├── selective_search ├── .gitignore ├── cat.jpg ├── 000015.jpg ├── MergeBlobs.p ├── ChangeEdges.p ├── SSSimBoxFill.p ├── SSSimColour.p ├── SSSimTexture.p ├── SSSimColourSize.p ├── Dependencies │ ├── Rgb2C.p │ ├── Rgb2Ooo.p │ ├── Rgb2Rg.p │ ├── Rgb2Rgi.p │ ├── Blob2Image.p │ ├── Blob2Vector.p │ ├── BlobAddSizes.p │ ├── Blobs2Boxes.p │ ├── Vector2Hist.p │ ├── gaussianFilter.p │ ├── Image2ColourSpace.p │ ├── BlobAddTextureHists.p │ ├── SegmentIndices2Blobs.p │ ├── Image2OrientedGradients.p │ ├── BoxUnion.m │ ├── NormalizeArray.m │ ├── FelzenSegment │ │ ├── Makefile │ │ ├── README │ │ ├── segment.cpp │ │ ├── imutil.h │ │ ├── misc.h │ │ ├── disjoint-set.h │ │ ├── convolve.h │ │ ├── segment-graph.h │ │ ├── image.h │ │ ├── filter.h │ │ └── segment-image.h │ ├── BoxRemoveDuplicates.m │ ├── FilterBoxesWidth.m │ ├── ShowRectsWithinImage.m │ ├── BoxSize.m │ ├── BoxIntersection.m │ ├── GetPascalOverlap.m │ ├── BoxBestOverlap.m │ ├── NormalizeRows.m │ ├── PascalOverlap.m │ ├── CountVisualWordsIndex.m │ ├── ShowBlobs.m │ ├── ShowImageCell.m │ ├── mexCountWordsIndex.cpp │ └── anigaussm │ │ ├── anigauss_mex.c │ │ └── anigauss.m ├── SSSimBoxFillOrig.p ├── SSSimBoxFillSize.p ├── SSSimTextureSize.p ├── BlobStructColourHist.p ├── SSSimBoxFillOrigSize.p ├── SSSimTextureSizeFill.p ├── BlobStructTextureHist.p ├── GroundTruthVOC2007test.mat ├── SSSimColourTextureSizeFill.p ├── SSSimColourTextureSizeFillOrig.p ├── BlobStruct2HierarchicalGrouping.p ├── SSSimSize.m ├── README.md ├── selective_search.m ├── BlobBestOverlap.m ├── RecreateBlobHierarchyIndIm.m ├── RecreateBlobHierarchy.m ├── BoxAverageBestOverlap.m ├── BlobAverageBestOverlap.m ├── License.txt ├── demo.m ├── selective_search_rcnn.m ├── Image2HierarchicalGrouping.m └── demoPascal2007.m ├── .gitignore ├── data ├── pylintrc ├── demo │ ├── 000004.jpg │ ├── 001551.jpg │ ├── 000004_boxes.mat │ └── 001551_boxes.mat ├── .gitignore ├── scripts │ ├── fetch_fast_rcnn_models.sh │ ├── fetch_imagenet_models.sh │ └── fetch_selective_search_data.sh └── README.md ├── tools ├── README.md ├── _init_paths.py ├── test_net.py ├── reval.py ├── train_net.py └── compress_net.py ├── todo.txt ├── .gitmodules ├── help ├── INRIA │ └── VOCcode │ │ ├── PASemptyrecord.m │ │ ├── PASemptyobject.m │ │ ├── PASerrmsg.m │ │ ├── VOCinit.m │ │ ├── VOCroc.m │ │ ├── PASreadrecord.m │ │ ├── VOCpr.m │ │ └── VOCevaldet.m ├── test │ └── README.md └── train │ └── README.md ├── README.md ├── matlab ├── README.md ├── fast_rcnn_load_net.m ├── showboxes.m ├── nms.m ├── fast_rcnn_demo.m └── fast_rcnn_im_detect.m ├── models ├── README.md ├── CaffeNet │ ├── solver.prototxt │ ├── piecewise │ │ └── solver.prototxt │ └── no_bbox_reg │ │ ├── solver.prototxt │ │ ├── test.prototxt │ │ └── train.prototxt ├── VGG16 │ ├── solver.prototxt │ ├── fc_only │ │ └── solver.prototxt │ ├── piecewise │ │ └── solver.prototxt │ └── no_bbox_reg │ │ └── solver.prototxt └── VGG_CNN_M_1024 │ ├── solver.prototxt │ ├── piecewise │ └── solver.prototxt │ └── no_bbox_reg │ ├── solver.prototxt │ ├── test.prototxt │ └── train.prototxt ├── commands.txt └── LICENSE /output/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /experiments/logs/.gitignore: -------------------------------------------------------------------------------- 1 | *.txt* 2 | -------------------------------------------------------------------------------- /lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.so 3 | -------------------------------------------------------------------------------- /selective_search/.gitignore: -------------------------------------------------------------------------------- 1 | *.mex* 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .ipynb_checkpoints 3 | utils/*.c 4 | utils/*.so 5 | -------------------------------------------------------------------------------- /data/pylintrc: -------------------------------------------------------------------------------- 1 | [TYPECHECK] 2 | 3 | ignored-modules = numpy, numpy.random, cv2 4 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | Tools for training, testing, and compressing Fast R-CNN networks. 2 | -------------------------------------------------------------------------------- /experiments/cfgs/fc_only.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: fc_only 2 | TRAIN: 3 | SNAPSHOT_INFIX: fc_only 4 | -------------------------------------------------------------------------------- /data/demo/000004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/data/demo/000004.jpg -------------------------------------------------------------------------------- /data/demo/001551.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/data/demo/001551.jpg -------------------------------------------------------------------------------- /experiments/cfgs/piecewise.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: piecewise 2 | TRAIN: 3 | SNAPSHOT_INFIX: piecewise 4 | -------------------------------------------------------------------------------- /selective_search/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/cat.jpg -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | selective_search* 2 | imagenet_models* 3 | fast_rcnn_models* 4 | VOCdevkit* 5 | cache 6 | -------------------------------------------------------------------------------- /data/demo/000004_boxes.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/data/demo/000004_boxes.mat -------------------------------------------------------------------------------- /data/demo/001551_boxes.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/data/demo/001551_boxes.mat -------------------------------------------------------------------------------- /selective_search/000015.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/000015.jpg -------------------------------------------------------------------------------- /selective_search/MergeBlobs.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/MergeBlobs.p -------------------------------------------------------------------------------- /selective_search/ChangeEdges.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/ChangeEdges.p -------------------------------------------------------------------------------- /selective_search/SSSimBoxFill.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimBoxFill.p -------------------------------------------------------------------------------- /selective_search/SSSimColour.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimColour.p -------------------------------------------------------------------------------- /selective_search/SSSimTexture.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimTexture.p -------------------------------------------------------------------------------- /selective_search/SSSimColourSize.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimColourSize.p -------------------------------------------------------------------------------- /selective_search/Dependencies/Rgb2C.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Rgb2C.p -------------------------------------------------------------------------------- /selective_search/SSSimBoxFillOrig.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimBoxFillOrig.p -------------------------------------------------------------------------------- /selective_search/SSSimBoxFillSize.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimBoxFillSize.p -------------------------------------------------------------------------------- /selective_search/SSSimTextureSize.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimTextureSize.p -------------------------------------------------------------------------------- /selective_search/BlobStructColourHist.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/BlobStructColourHist.p -------------------------------------------------------------------------------- /selective_search/Dependencies/Rgb2Ooo.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Rgb2Ooo.p -------------------------------------------------------------------------------- /selective_search/Dependencies/Rgb2Rg.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Rgb2Rg.p -------------------------------------------------------------------------------- /selective_search/Dependencies/Rgb2Rgi.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Rgb2Rgi.p -------------------------------------------------------------------------------- /selective_search/SSSimBoxFillOrigSize.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimBoxFillOrigSize.p -------------------------------------------------------------------------------- /selective_search/SSSimTextureSizeFill.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimTextureSizeFill.p -------------------------------------------------------------------------------- /selective_search/BlobStructTextureHist.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/BlobStructTextureHist.p -------------------------------------------------------------------------------- /selective_search/Dependencies/Blob2Image.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Blob2Image.p -------------------------------------------------------------------------------- /todo.txt: -------------------------------------------------------------------------------- 1 | - ImageNet ILSVRC detection dataset 2 | - COCO bounding-box detection 3 | - Port PASCAL evaluation code from Matlab to Python 4 | -------------------------------------------------------------------------------- /selective_search/Dependencies/Blob2Vector.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Blob2Vector.p -------------------------------------------------------------------------------- /selective_search/Dependencies/BlobAddSizes.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/BlobAddSizes.p -------------------------------------------------------------------------------- /selective_search/Dependencies/Blobs2Boxes.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Blobs2Boxes.p -------------------------------------------------------------------------------- /selective_search/Dependencies/Vector2Hist.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Vector2Hist.p -------------------------------------------------------------------------------- /selective_search/GroundTruthVOC2007test.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/GroundTruthVOC2007test.mat -------------------------------------------------------------------------------- /selective_search/Dependencies/gaussianFilter.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/gaussianFilter.p -------------------------------------------------------------------------------- /selective_search/SSSimColourTextureSizeFill.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimColourTextureSizeFill.p -------------------------------------------------------------------------------- /selective_search/Dependencies/Image2ColourSpace.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Image2ColourSpace.p -------------------------------------------------------------------------------- /selective_search/SSSimColourTextureSizeFillOrig.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimColourTextureSizeFillOrig.p -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "caffe-fast-rcnn"] 2 | path = caffe-fast-rcnn 3 | url = https://github.com/rbgirshick/caffe-fast-rcnn.git 4 | branch = fast-rcnn 5 | -------------------------------------------------------------------------------- /experiments/cfgs/no_bbox_reg.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: "no_bbox_reg" 2 | TRAIN: 3 | BBOX_REG: False 4 | SNAPSHOT_INFIX: no_bbox_reg 5 | TEST: 6 | BBOX_REG: False 7 | -------------------------------------------------------------------------------- /selective_search/BlobStruct2HierarchicalGrouping.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/BlobStruct2HierarchicalGrouping.p -------------------------------------------------------------------------------- /selective_search/Dependencies/BlobAddTextureHists.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/BlobAddTextureHists.p -------------------------------------------------------------------------------- /selective_search/Dependencies/SegmentIndices2Blobs.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/SegmentIndices2Blobs.p -------------------------------------------------------------------------------- /selective_search/Dependencies/Image2OrientedGradients.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Image2OrientedGradients.p -------------------------------------------------------------------------------- /help/INRIA/VOCcode/PASemptyrecord.m: -------------------------------------------------------------------------------- 1 | function record=PASemptyrecord 2 | record.imgname=''; 3 | record.imgsize=[]; 4 | record.database=''; 5 | record.objects=PASemptyobject; 6 | return -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Train and Test on Another Dataset 2 | - [Train](https://github.com/zeyuanxy/fast-rcnn/blob/master/help/train/README.md) 3 | - [Test](https://github.com/zeyuanxy/fast-rcnn/blob/master/help/test/README.md) 4 | -------------------------------------------------------------------------------- /matlab/README.md: -------------------------------------------------------------------------------- 1 | A basic demo in MATLAB. 2 | 3 | Detection is also implemented in MATLAB (though missing some bells and whistles 4 | compared to the Python version) via the fast_rcnn_im_detect() function. 5 | 6 | See fast_rcnn_demo.m for example usage. 7 | -------------------------------------------------------------------------------- /experiments/cfgs/multiscale.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: multiscale 2 | TRAIN: 3 | SCALES: !!python/tuple [480, 576, 688, 864, 1200] 4 | MAX_SIZE: 2000 5 | SNAPSHOT_INFIX: multiscale 6 | TEST: 7 | SCALES: !!python/tuple [480, 576, 688, 864, 1200] 8 | MAX_SIZE: 2000 9 | -------------------------------------------------------------------------------- /models/README.md: -------------------------------------------------------------------------------- 1 | Prototxt files that define models and solvers. 2 | 3 | Three models are defined, with some variations of each to support experiments 4 | in the paper. 5 | - Caffenet (model **S**) 6 | - VGG_CNN_M_1024 (model **M**) 7 | - VGG16 (model **L**) 8 | -------------------------------------------------------------------------------- /experiments/cfgs/svm.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: svm 2 | TRAIN: 3 | # don't use flipped examples when training SVMs for two reasons: 4 | # 1) R-CNN didn't 5 | # 2) I've tried and it doesn't help, yet makes SVM training take 2x longer 6 | USE_FLIPPED: False 7 | TEST: 8 | SVM: True 9 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /experiments/README.md: -------------------------------------------------------------------------------- 1 | Scripts to reproduce (most) of the experiments in the paper. 2 | 3 | Scripts are under `experiments/scripts`. 4 | 5 | Each script saves a log file under `experiments/logs`. 6 | 7 | Configuration override files used in the experiments are stored in `experiments/cfgs`. 8 | -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /help/INRIA/VOCcode/PASemptyobject.m: -------------------------------------------------------------------------------- 1 | function object=PASemptyobject 2 | object.label=''; 3 | object.orglabel=''; 4 | object.bbox=[]; 5 | object.polygon=[]; 6 | object.mask=''; 7 | object.class=''; 8 | object.view=''; 9 | object.truncated=false; 10 | object.difficult=false; 11 | return -------------------------------------------------------------------------------- /selective_search/SSSimSize.m: -------------------------------------------------------------------------------- 1 | function [similarity indSim] = SSSimSize(a, b, blobStruct) 2 | % function similarity = SSSimSize(a, b, blobStruct) 3 | % 4 | % Calculate size similarity 5 | 6 | similarity = (blobStruct.imSize - blobStruct.size(a) - blobStruct.size(b)) ... 7 | ./ blobStruct.imSize; 8 | 9 | indSim = similarity; -------------------------------------------------------------------------------- /help/INRIA/VOCcode/PASerrmsg.m: -------------------------------------------------------------------------------- 1 | function PASerrmsg(PASerr,SYSerr) 2 | fprintf('Pascal Error Message: %s\n',PASerr); 3 | fprintf('System Error Message: %s\n',SYSerr); 4 | k=input('Enter K for keyboard, any other key to continue or ^C to quit ...','s'); 5 | if (~isempty(k)), if (lower(k)=='k'), keyboard; end; end; 6 | fprintf('\n'); 7 | return -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /commands.txt: -------------------------------------------------------------------------------- 1 | ./tools/train_net.py --gpu 0 --solver models/VGG_CNN_M_1024/solver.prototxt \ 2 | --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel --imdb inria_train 3 | 4 | ./tools/test_net.py --gpu 1 --def models/VGG_CNN_M_1024/test.prototxt \ 5 | --net output/default/train/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel --imdb inria_test 6 | -------------------------------------------------------------------------------- /output/README.md: -------------------------------------------------------------------------------- 1 | Artifacts generated by the scripts in `tools` are written in this directory. 2 | 3 | Trained Fast R-CNN networks are saved under: 4 | 5 | ``` 6 | output/// 7 | ``` 8 | 9 | Test outputs are saved under: 10 | 11 | ``` 12 | output//// 13 | ``` 14 | -------------------------------------------------------------------------------- /lib/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from . import config 9 | from . import train 10 | from . import test 11 | -------------------------------------------------------------------------------- /experiments/scripts/all_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | GPU=$1 5 | NET=vgg16 6 | ./experiments/scripts/default_${NET}.sh $GPU 7 | ./experiments/scripts/fc_only_${NET}.sh $GPU 8 | ./experiments/scripts/multitask_no_bbox_reg_${NET}.sh $GPU 9 | ./experiments/scripts/no_bbox_reg_${NET}.sh $GPU 10 | ./experiments/scripts/piecewise_${NET}.sh $GPU 11 | ./experiments/scripts/svd_${NET}.sh $GPU 12 | ./experiments/scripts/svm_${NET}.sh $GPU 13 | -------------------------------------------------------------------------------- /experiments/scripts/all_caffenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | GPU=$1 5 | NET=caffenet 6 | ./experiments/scripts/default_${NET}.sh $GPU 7 | ./experiments/scripts/multiscale_${NET}.sh $GPU 8 | ./experiments/scripts/multitask_no_bbox_reg_${NET}.sh $GPU 9 | ./experiments/scripts/no_bbox_reg_${NET}.sh $GPU 10 | ./experiments/scripts/piecewise_${NET}.sh $GPU 11 | ./experiments/scripts/svd_${NET}.sh $GPU 12 | ./experiments/scripts/svm_${NET}.sh $GPU 13 | -------------------------------------------------------------------------------- /experiments/scripts/all_vgg_cnn_m_1024.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | GPU=$1 5 | NET=vgg_cnn_m_1024 6 | ./experiments/scripts/default_${NET}.sh $GPU 7 | ./experiments/scripts/multiscale_${NET}.sh $GPU 8 | ./experiments/scripts/multitask_no_bbox_reg_${NET}.sh $GPU 9 | ./experiments/scripts/no_bbox_reg_${NET}.sh $GPU 10 | ./experiments/scripts/piecewise_${NET}.sh $GPU 11 | ./experiments/scripts/svd_${NET}.sh $GPU 12 | ./experiments/scripts/svm_${NET}.sh $GPU 13 | -------------------------------------------------------------------------------- /models/CaffeNet/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/CaffeNet/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | # function 12 | snapshot: 0 13 | # We still use the snapshot prefix, though 14 | snapshot_prefix: "caffenet_fast_rcnn" 15 | #debug_info: true 16 | -------------------------------------------------------------------------------- /selective_search/Dependencies/BoxUnion.m: -------------------------------------------------------------------------------- 1 | function union = BoxUnion(a, b) 2 | % union = BoxUnion(a, b) 3 | % 4 | % Creates the union box of two bounding boxes. 5 | % 6 | % a: Input bonding box "a" 7 | % b: Input bounding box "b" 8 | % 9 | % union: Intersection of box a and b 10 | % 11 | % Jasper Uijlings - 2013 12 | 13 | union = [min(a(:,1),b(:,1)) min(a(:,2),b(:,2)) ... 14 | max(a(:,3),b(:,3)) max(a(:,4),b(:,4))]; 15 | -------------------------------------------------------------------------------- /models/VGG16/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG16/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | # iter_size: 1 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | # We disable standard caffe solver snapshotting and implement our own snapshot 12 | # function 13 | snapshot: 0 14 | # We still use the snapshot prefix, though 15 | snapshot_prefix: "vgg16_fast_rcnn" 16 | #debug_info: true 17 | -------------------------------------------------------------------------------- /models/CaffeNet/piecewise/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/CaffeNet/piecewise/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | # function 12 | snapshot: 0 13 | # We still use the snapshot prefix, though 14 | snapshot_prefix: "caffenet_fast_rcnn" 15 | #debug_info: true 16 | -------------------------------------------------------------------------------- /models/VGG_CNN_M_1024/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG_CNN_M_1024/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | # function 12 | snapshot: 0 13 | # We still use the snapshot prefix, though 14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn" 15 | #debug_info: true 16 | -------------------------------------------------------------------------------- /models/CaffeNet/no_bbox_reg/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/CaffeNet/no_bbox_reg/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | # function 12 | snapshot: 0 13 | # We still use the snapshot prefix, though 14 | snapshot_prefix: "caffenet_fast_rcnn" 15 | #debug_info: true 16 | -------------------------------------------------------------------------------- /models/VGG16/fc_only/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG16/fc_only/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | # iter_size: 1 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | # We disable standard caffe solver snapshotting and implement our own snapshot 12 | # function 13 | snapshot: 0 14 | # We still use the snapshot prefix, though 15 | snapshot_prefix: "vgg16_fast_rcnn" 16 | #debug_info: true 17 | -------------------------------------------------------------------------------- /models/VGG16/piecewise/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG16/piecewise/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | # iter_size: 1 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | # We disable standard caffe solver snapshotting and implement our own snapshot 12 | # function 13 | snapshot: 0 14 | # We still use the snapshot prefix, though 15 | snapshot_prefix: "vgg16_fast_rcnn" 16 | #debug_info: true 17 | -------------------------------------------------------------------------------- /models/VGG16/no_bbox_reg/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG16/no_bbox_reg/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | # iter_size: 1 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | # We disable standard caffe solver snapshotting and implement our own snapshot 12 | # function 13 | snapshot: 0 14 | # We still use the snapshot prefix, though 15 | snapshot_prefix: "vgg16_fast_rcnn" 16 | #debug_info: true 17 | -------------------------------------------------------------------------------- /models/VGG_CNN_M_1024/piecewise/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG_CNN_M_1024/piecewise/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | # function 12 | snapshot: 0 13 | # We still use the snapshot prefix, though 14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn" 15 | #debug_info: true 16 | -------------------------------------------------------------------------------- /models/VGG_CNN_M_1024/no_bbox_reg/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG_CNN_M_1024/no_bbox_reg/train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | average_loss: 100 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | # We disable standard caffe solver snapshotting and implement our own snapshot 11 | # function 12 | snapshot: 0 13 | # We still use the snapshot prefix, though 14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn" 15 | #debug_info: true 16 | -------------------------------------------------------------------------------- /selective_search/Dependencies/NormalizeArray.m: -------------------------------------------------------------------------------- 1 | function b = NormalizeArray(a) 2 | % Normalizes array a. This means that the minimum value will become 0 and 3 | % the maximum value 1. 4 | % 5 | % a: Input array. 6 | % 7 | % b: Normalized output array 8 | % 9 | % Jasper Uijlings - 2013 10 | 11 | minVal = min(a(:)); 12 | maxVal = max(a(:)); 13 | 14 | diffVal = maxVal - minVal; 15 | 16 | b = a - minVal; 17 | if diffVal ~= 0 18 | b = b ./ diffVal; 19 | end 20 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/Makefile: -------------------------------------------------------------------------------- 1 | INCDIR = -I. 2 | DBG = -g 3 | OPT = -O3 4 | CPP = g++ 5 | CFLAGS = $(DBG) $(OPT) $(INCDIR) 6 | LINK = -lm 7 | 8 | .cpp.o: 9 | $(CPP) $(CFLAGS) -c $< -o $@ 10 | 11 | all: segment 12 | 13 | segment: segment.cpp segment-image.h segment-graph.h disjoint-set.h 14 | $(CPP) $(CFLAGS) -o segment segment.cpp $(LINK) 15 | 16 | clean: 17 | /bin/rm -f segment *.o 18 | 19 | clean-all: clean 20 | /bin/rm -f *~ 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /experiments/scripts/multitask_no_bbox_reg_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/multitask_no_bbox_reg_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/test_net.py --gpu $1 \ 13 | --def models/VGG16/test.prototxt \ 14 | --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel \ 15 | --imdb voc_2007_test \ 16 | --cfg experiments/cfgs/no_bbox_reg.yml 17 | -------------------------------------------------------------------------------- /experiments/scripts/multitask_no_bbox_reg_caffenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/multitask_no_bbox_reg_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/test_net.py --gpu $1 \ 13 | --def models/CaffeNet/test.prototxt \ 14 | --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel \ 15 | --imdb voc_2007_test \ 16 | --cfg experiments/cfgs/no_bbox_reg.yml 17 | -------------------------------------------------------------------------------- /selective_search/README.md: -------------------------------------------------------------------------------- 1 | This is mostly the code from [Segmentation as Selective Search for Object Recognition](http://koen.me/research/selectivesearch/), downloaded November 2013. 2 | I simply needed a way to call this stuff from Python: `selective_search.py` and `selective_search.m` are the only new files. 3 | 4 | import selective_search_ijcv_with_python as selective_search 5 | windows = selective_search.get_windows(image_filenames) 6 | 7 | To make sure this works, simply `python selective_search.py`. 8 | 9 | Sergey Karayev 10 | 25 Nov 2013 11 | -------------------------------------------------------------------------------- /experiments/scripts/multitask_no_bbox_reg_vgg_cnn_m_1024.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/multitask_no_bbox_reg_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/test_net.py --gpu $1 \ 13 | --def models/VGG_CNN_M_1024/test.prototxt \ 14 | --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel \ 15 | --imdb voc_2007_test \ 16 | --cfg experiments/cfgs/no_bbox_reg.yml 17 | -------------------------------------------------------------------------------- /selective_search/selective_search.m: -------------------------------------------------------------------------------- 1 | image_db = '/home/szy/INRIA/'; 2 | image_filenames = textread([image_db '/data/ImageSets/train.txt'], '%s', 'delimiter', '\n'); 3 | for i = 1:length(image_filenames) 4 | if exist([image_db '/data/Images/' image_filenames{i} '.jpg'], 'file') == 2 5 | image_filenames{i} = [image_db '/data/Images/' image_filenames{i} '.jpg']; 6 | end 7 | if exist([image_db '/data/Images/' image_filenames{i} '.png'], 'file') == 2 8 | image_filenames{i} = [image_db '/data/Images/' image_filenames{i} '.png']; 9 | end 10 | end 11 | selective_search_rcnn(image_filenames, 'output.mat'); 12 | -------------------------------------------------------------------------------- /experiments/scripts/default_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/default_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/VGG16/solver.prototxt \ 14 | --weights data/imagenet_models/VGG16.v2.caffemodel \ 15 | --imdb voc_2007_trainval 16 | 17 | time ./tools/test_net.py --gpu $1 \ 18 | --def models/VGG16/test.prototxt \ 19 | --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel \ 20 | --imdb voc_2007_test 21 | -------------------------------------------------------------------------------- /selective_search/Dependencies/BoxRemoveDuplicates.m: -------------------------------------------------------------------------------- 1 | function [boxesOut uniqueIdx] = BoxRemoveDuplicates(boxesIn) 2 | % function boxOut = BoxRemoveDuplicates(boxIn) 3 | % 4 | % Removes duplicate boxes. Leaves the boxes in the same order 5 | % Keeps the first box of each kind. 6 | % 7 | % boxesIn: N x 4 array containing boxes 8 | % 9 | % boxexOut: M x 4 array of boxes witout duplicates. M <= N 10 | % uniqueIdx: Indices of retained boxes from boxesIn 11 | % 12 | % Jasper Uijlings - 2013 13 | 14 | [dummy uniqueIdx] = unique(boxesIn, 'rows', 'first'); 15 | uniqueIdx = sort(uniqueIdx); 16 | boxesOut = boxesIn(uniqueIdx,:); 17 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FilterBoxesWidth.m: -------------------------------------------------------------------------------- 1 | function [outBoxes idsGood]= FilterBoxesWidth(inBoxes, minLen) 2 | % [outBoxes idsGood]= FilterBoxesWidth(inBoxes, minLen) 3 | % 4 | % Filters out small boxes. Boxes have to have a width and height 5 | % larger than minLen 6 | % 7 | % inBoxes: M x 4 array of boxes 8 | % minLen: Minimum width and height of boxes 9 | % 10 | % outBoxes: N x 4 array of boxes, N < M 11 | % idsGood: M x 1 logical array denoting boxes kept 12 | % 13 | % Jasper Uijlings - 2013 14 | 15 | [nr nc] = BoxSize(inBoxes); 16 | 17 | idsGood = (nr >= minLen) & (nc >= minLen); 18 | outBoxes = inBoxes(idsGood,:); -------------------------------------------------------------------------------- /experiments/scripts/default_caffenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/default_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/CaffeNet/solver.prototxt \ 14 | --weights data/imagenet_models/CaffeNet.v2.caffemodel \ 15 | --imdb voc_2007_trainval 16 | 17 | time ./tools/test_net.py --gpu $1 \ 18 | --def models/CaffeNet/test.prototxt \ 19 | --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel \ 20 | --imdb voc_2007_test 21 | -------------------------------------------------------------------------------- /experiments/scripts/default_vgg_cnn_m_1024.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/default_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/VGG_CNN_M_1024/solver.prototxt \ 14 | --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel \ 15 | --imdb voc_2007_trainval 16 | 17 | time ./tools/test_net.py --gpu $1 \ 18 | --def models/VGG_CNN_M_1024/test.prototxt \ 19 | --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel \ 20 | --imdb voc_2007_test 21 | -------------------------------------------------------------------------------- /experiments/scripts/svd_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/svd_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/compress_net.py \ 13 | --def models/VGG16/test.prototxt \ 14 | --def-svd models/VGG16/compressed/test.prototxt \ 15 | --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel 16 | 17 | time ./tools/test_net.py --gpu $1 \ 18 | --def models/VGG16/compressed/test.prototxt \ 19 | --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000_svd_fc6_1024_fc7_256.caffemodel \ 20 | --imdb voc_2007_test 21 | -------------------------------------------------------------------------------- /experiments/scripts/svd_caffenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/svd_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/compress_net.py \ 13 | --def models/CaffeNet/test.prototxt \ 14 | --def-svd models/CaffeNet/compressed/test.prototxt \ 15 | --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel 16 | 17 | time ./tools/test_net.py --gpu $1 \ 18 | --def models/CaffeNet/compressed/test.prototxt \ 19 | --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000_svd_fc6_1024_fc7_256.caffemodel \ 20 | --imdb voc_2007_test 21 | -------------------------------------------------------------------------------- /experiments/scripts/svm_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/svm_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_svms.py --gpu $1 \ 13 | --def models/VGG16/test.prototxt \ 14 | --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/svm.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/VGG16/test.prototxt \ 20 | --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000_svm.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/svm.yml 23 | -------------------------------------------------------------------------------- /experiments/scripts/fc_only_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/fc_only_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/VGG16/fc_only/solver.prototxt \ 14 | --weights data/imagenet_models/VGG16.v2.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/fc_only.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/VGG16/test.prototxt \ 20 | --net output/fc_only/voc_2007_trainval/vgg16_fast_rcnn_fc_only_iter_40000.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/fc_only.yml 23 | -------------------------------------------------------------------------------- /experiments/scripts/svm_caffenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/svm_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_svms.py --gpu $1 \ 13 | --def models/CaffeNet/test.prototxt \ 14 | --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/svm.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/CaffeNet/test.prototxt \ 20 | --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000_svm.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/svm.yml 23 | -------------------------------------------------------------------------------- /selective_search/Dependencies/ShowRectsWithinImage.m: -------------------------------------------------------------------------------- 1 | function ShowRectsWithinImage(rects, numRow, numCol, image, imageNames) 2 | % ShowRects(Rects, numRow, numCol, image) 3 | % 4 | % Shows only the rectangles of the image 5 | % 6 | % Jasper Uijlings - 2013 7 | 8 | if ~exist('imageNames', 'var') 9 | imageNames = cell(size(rects,1), 1); 10 | for i=1:size(rects,1) 11 | imageNames{i} = sprintf('%d', i); 12 | end 13 | end 14 | 15 | % Convert to images 16 | idx = 1; 17 | images = cell(size(rects,1),1); 18 | for i=1:size(rects,1) 19 | bbox = rects(i,:); 20 | images{idx} = image(bbox(1):bbox(3),bbox(2):bbox(4),:); 21 | idx = idx + 1; 22 | end 23 | 24 | ShowImageCell(images, numRow, numCol, 'rects', imageNames); -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Set up paths for Fast R-CNN.""" 9 | 10 | import os.path as osp 11 | import sys 12 | 13 | def add_path(path): 14 | if path not in sys.path: 15 | sys.path.insert(0, path) 16 | 17 | this_dir = osp.dirname(__file__) 18 | 19 | # Add caffe to PYTHONPATH 20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python') 21 | add_path(caffe_path) 22 | 23 | # Add lib to PYTHONPATH 24 | lib_path = osp.join(this_dir, '..', 'lib') 25 | add_path(lib_path) 26 | -------------------------------------------------------------------------------- /experiments/scripts/svd_vgg_cnn_m_1024.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/svd_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/compress_net.py \ 13 | --def models/VGG_CNN_M_1024/test.prototxt \ 14 | --def-svd models/VGG_CNN_M_1024/compressed/test.prototxt \ 15 | --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel 16 | 17 | time ./tools/test_net.py --gpu $1 \ 18 | --def models/VGG_CNN_M_1024/compressed/test.prototxt \ 19 | --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000_svd_fc6_1024_fc7_256.caffemodel \ 20 | --imdb voc_2007_test 21 | -------------------------------------------------------------------------------- /experiments/scripts/multiscale_caffenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/multiscale_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/CaffeNet/solver.prototxt \ 14 | --weights data/imagenet_models/CaffeNet.v2.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/multiscale.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/CaffeNet/test.prototxt \ 20 | --net output/multiscale/voc_2007_trainval/caffenet_fast_rcnn_multiscale_iter_40000.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/multiscale.yml 23 | -------------------------------------------------------------------------------- /selective_search/Dependencies/BoxSize.m: -------------------------------------------------------------------------------- 1 | function [numRows numColumns area] = BoxSize(bbox) 2 | % [numRows numColumns Surface] = BoxSize(bbox) 3 | % 4 | % Retrieves number of rows, columns, and surface area from bounding box 5 | % 6 | % bbox: 4 x N Bounding box as [rowBegin colBegin rowEnd colEnd] 7 | % 8 | % numRows: Number of rows of boxes 9 | % numColumns: Number of columns of boxes 10 | % area: Area of boxes 11 | % 12 | % Jasper Uijlings - 2013 13 | 14 | % Box is empty 15 | if isempty(bbox) 16 | numRows = 0; 17 | numColumns = 0; 18 | area = 0; 19 | return 20 | end 21 | 22 | numRows = bbox(:,3) - bbox(:,1) + 1; 23 | numColumns = bbox(:,4) - bbox(:,2) + 1; 24 | area = numRows .* numColumns; 25 | 26 | -------------------------------------------------------------------------------- /experiments/scripts/no_bbox_reg_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/no_bbox_reg_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/VGG16/no_bbox_reg/solver.prototxt \ 14 | --weights data/imagenet_models/VGG16.v2.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/no_bbox_reg.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/VGG16/no_bbox_reg/test.prototxt \ 20 | --net output/no_bbox_reg/voc_2007_trainval/vgg16_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/no_bbox_reg.yml 23 | -------------------------------------------------------------------------------- /selective_search/Dependencies/BoxIntersection.m: -------------------------------------------------------------------------------- 1 | function intersection = BoxIntersection(a, b) 2 | % intersection = BoxIntersection(a, b) 3 | % 4 | % Creates the intersection of two bounding boxes. Returns minus ones if 5 | % there is no intersection 6 | % 7 | % a: Input bonding box "a" 8 | % b: Input bounding box "b" 9 | % 10 | % intersection: Intersection of box a and b 11 | % 12 | % Jasper Uijlings - 2013 13 | 14 | intersection = [max(a(:,1),b(:,1)) max(a(:,2),b(:,2)) ... 15 | min(a(:,3),b(:,3)) min(a(:,4),b(:,4))]; 16 | 17 | [numRows numColumns] = BoxSize(intersection); 18 | 19 | % There is no intersection box 20 | negIds = numRows < 1 | numColumns < 1; 21 | intersection(negIds,:) = -1; 22 | 23 | 24 | -------------------------------------------------------------------------------- /experiments/scripts/svm_vgg_cnn_m_1024.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/svm_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_svms.py --gpu $1 \ 13 | --def models/VGG_CNN_M_1024/test.prototxt \ 14 | --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/svm.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/VGG_CNN_M_1024/test.prototxt \ 20 | --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000_svm.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/svm.yml 23 | -------------------------------------------------------------------------------- /experiments/scripts/no_bbox_reg_caffenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/no_bbox_reg_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/CaffeNet/no_bbox_reg/solver.prototxt \ 14 | --weights data/imagenet_models/CaffeNet.v2.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/no_bbox_reg.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/CaffeNet/no_bbox_reg/test.prototxt \ 20 | --net output/no_bbox_reg/voc_2007_trainval/caffenet_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/no_bbox_reg.yml 23 | -------------------------------------------------------------------------------- /experiments/scripts/piecewise_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/piecewise_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/VGG16/piecewise/solver.prototxt \ 14 | --weights output/no_bbox_reg/voc_2007_trainval/vgg16_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/piecewise.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/VGG16/test.prototxt \ 20 | --net output/piecewise/voc_2007_trainval/vgg16_fast_rcnn_piecewise_iter_40000.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/piecewise.yml 23 | -------------------------------------------------------------------------------- /matlab/fast_rcnn_load_net.m: -------------------------------------------------------------------------------- 1 | % -------------------------------------------------------- 2 | % Fast R-CNN 3 | % Copyright (c) 2015 Microsoft 4 | % Licensed under The MIT License [see LICENSE for details] 5 | % Written by Ross Girshick 6 | % -------------------------------------------------------- 7 | 8 | function model = fast_rcnn_load_net(def, net, use_gpu) 9 | % Load a Fast R-CNN network. 10 | 11 | init_key = caffe('init', def, net, 'test'); 12 | if exist('use_gpu', 'var') && ~use_gpu 13 | caffe('set_mode_cpu'); 14 | else 15 | caffe('set_mode_gpu'); 16 | end 17 | 18 | model.init_key = init_key; 19 | % model.stride is correct for the included models, but may not be correct 20 | % for other models! 21 | model.stride = 16; 22 | model.pixel_means = reshape([102.9801, 115.9465, 122.7717], [1 1 3]); 23 | -------------------------------------------------------------------------------- /experiments/scripts/multiscale_vgg_cnn_m_1024.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/multiscale_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/VGG_CNN_M_1024/solver.prototxt \ 14 | --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/multiscale.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/VGG_CNN_M_1024/test.prototxt \ 20 | --net output/multiscale/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_multiscale_iter_40000.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/multiscale.yml 23 | -------------------------------------------------------------------------------- /experiments/scripts/piecewise_caffenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/piecewise_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/CaffeNet/piecewise/solver.prototxt \ 14 | --weights output/no_bbox_reg/voc_2007_trainval/caffenet_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/piecewise.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/CaffeNet/test.prototxt \ 20 | --net output/piecewise/voc_2007_trainval/caffenet_fast_rcnn_piecewise_iter_40000.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/piecewise.yml 23 | -------------------------------------------------------------------------------- /experiments/scripts/no_bbox_reg_vgg_cnn_m_1024.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/no_bbox_reg_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/VGG_CNN_M_1024/no_bbox_reg/solver.prototxt \ 14 | --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/no_bbox_reg.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/VGG_CNN_M_1024/no_bbox_reg/test.prototxt \ 20 | --net output/no_bbox_reg/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/no_bbox_reg.yml 23 | -------------------------------------------------------------------------------- /selective_search/Dependencies/GetPascalOverlap.m: -------------------------------------------------------------------------------- 1 | function score = GetPascalOverlap(bb, bbgt) 2 | % Directly copied from Pascal code 3 | % 4 | % Gets the overlap measure according to Pascal 5 | % 6 | % bb: Bounding Box 7 | % bbgt: Ground truth bounding box 8 | % 9 | % score: Score between 0 and 1. 1 is complete overlap. 10 | 11 | score = 0; 12 | 13 | % intersection bbox 14 | bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; 15 | iw=bi(3)-bi(1)+1; 16 | ih=bi(4)-bi(2)+1; 17 | if iw>0 & ih>0 % intersection should be non-zero 18 | % compute overlap as area of intersection / area of union 19 | ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... 20 | (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... 21 | iw*ih; 22 | score=iw*ih/ua; 23 | end -------------------------------------------------------------------------------- /experiments/scripts/piecewise_vgg_cnn_m_1024.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | LOG="experiments/logs/piecewise_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 9 | exec &> >(tee -a "$LOG") 10 | echo Logging output to "$LOG" 11 | 12 | time ./tools/train_net.py --gpu $1 \ 13 | --solver models/VGG_CNN_M_1024/piecewise/solver.prototxt \ 14 | --weights output/no_bbox_reg/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \ 15 | --imdb voc_2007_trainval \ 16 | --cfg experiments/cfgs/piecewise.yml 17 | 18 | time ./tools/test_net.py --gpu $1 \ 19 | --def models/VGG_CNN_M_1024/test.prototxt \ 20 | --net output/piecewise/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_piecewise_iter_40000.caffemodel \ 21 | --imdb voc_2007_test \ 22 | --cfg experiments/cfgs/piecewise.yml 23 | -------------------------------------------------------------------------------- /selective_search/Dependencies/BoxBestOverlap.m: -------------------------------------------------------------------------------- 1 | function [scores index] = BoxBestOverlap(gtBoxes, testBoxes) 2 | % [scores index] = BoxBestOverlap(gtBox, testBoxes) 3 | % 4 | % Get overlap scores (Pascal-wise) for testBoxes bounding boxes 5 | % 6 | % gtBoxes: Ground truth bounding boxes 7 | % testBoxes: Test bounding boxes 8 | % 9 | % scores: Highest overlap scores for each testBoxes bbox. 10 | % index: Index for each testBoxes box which ground truth box is best 11 | % 12 | % Jasper Uijlings - 2013 13 | 14 | numGT = size(gtBoxes,1); 15 | numTest = size(testBoxes,1); 16 | 17 | scoreM = zeros(numGT, numTest); 18 | 19 | 20 | for j=1:numGT 21 | scoreM(j,:) = PascalOverlap(gtBoxes(j,:), testBoxes); 22 | end 23 | 24 | 25 | [scores index] = max(scoreM, [], 2); 26 | 27 | 28 | -------------------------------------------------------------------------------- /matlab/showboxes.m: -------------------------------------------------------------------------------- 1 | % -------------------------------------------------------- 2 | % Fast R-CNN 3 | % Copyright (c) 2015 Microsoft 4 | % Licensed under The MIT License [see LICENSE for details] 5 | % Written by Ross Girshick 6 | % -------------------------------------------------------- 7 | 8 | function showboxes(im, boxes) 9 | 10 | image(im); 11 | axis image; 12 | axis off; 13 | set(gcf, 'Color', 'white'); 14 | 15 | if ~isempty(boxes) 16 | x1 = boxes(:, 1); 17 | y1 = boxes(:, 2); 18 | x2 = boxes(:, 3); 19 | y2 = boxes(:, 4); 20 | c = 'r'; 21 | s = '-'; 22 | line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', ... 23 | 'color', c, 'linewidth', 2, 'linestyle', s); 24 | for i = 1:size(boxes, 1) 25 | text(double(x1(i)), double(y1(i)) - 2, ... 26 | sprintf('%.3f', boxes(i, end)), ... 27 | 'backgroundcolor', 'r', 'color', 'w'); 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /selective_search/BlobBestOverlap.m: -------------------------------------------------------------------------------- 1 | function [scores index] = BlobBestOverlap(gtBlobs, testBlobs) 2 | % [scores index] = BlobBestOverlap(gtBlobs, testBlobs) 3 | % 4 | % Get overlap scores (Pascal-wise) for test blobs 5 | % 6 | % groundTruthBlob: ground truth blobs 7 | % test: Test blobs 8 | % 9 | % scores: Highest overlap scores for each test blob. 10 | % index: Index for each test blob which ground truth blob 11 | % is best 12 | % 13 | % Jasper Uijlings - 2013 14 | 15 | numTarget = length(gtBlobs); 16 | numTest = length(testBlobs); 17 | 18 | scoreM = zeros(numTest, numTarget); 19 | 20 | for i=1:numTest 21 | for j=1:numTarget 22 | scoreM(i,j) = PascalOverlapBlob(gtBlobs{j}, testBlobs{i}); 23 | end 24 | end 25 | 26 | [scores index] = max(scoreM, [], 2); 27 | 28 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/README: -------------------------------------------------------------------------------- 1 | 2 | Implementation of the segmentation algorithm described in: 3 | 4 | Efficient Graph-Based Image Segmentation 5 | Pedro F. Felzenszwalb and Daniel P. Huttenlocher 6 | International Journal of Computer Vision, 59(2) September 2004. 7 | 8 | The program takes a color image (PPM format) and produces a segmentation 9 | with a random color assigned to each region. 10 | 11 | 1) Type "make" to compile "segment". 12 | 13 | 2) Run "segment sigma k min input output". 14 | 15 | The parameters are: (see the paper for details) 16 | 17 | sigma: Used to smooth the input image before segmenting it. 18 | k: Value for the threshold function. 19 | min: Minimum component size enforced by post-processing. 20 | input: Input image. 21 | output: Output image. 22 | 23 | Typical parameters are sigma = 0.5, k = 500, min = 20. 24 | Larger values for k result in larger components in the result. 25 | 26 | -------------------------------------------------------------------------------- /data/scripts/fetch_fast_rcnn_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | FILE=fast_rcnn_models.tgz 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE 8 | CHECKSUM=5f7dde9f5376e18c8e065338cc5df3f7 9 | 10 | if [ -f $FILE ]; then 11 | echo "File already exists. Checking md5..." 12 | os=`uname -s` 13 | if [ "$os" = "Linux" ]; then 14 | checksum=`md5sum $FILE | awk '{ print $1 }'` 15 | elif [ "$os" = "Darwin" ]; then 16 | checksum=`cat $FILE | md5` 17 | fi 18 | if [ "$checksum" = "$CHECKSUM" ]; then 19 | echo "Checksum is correct. No need to download." 20 | exit 0 21 | else 22 | echo "Checksum is incorrect. Need to download again." 23 | fi 24 | fi 25 | 26 | echo "Downloading Fast R-CNN demo models (0.96G)..." 27 | 28 | wget $URL -O $FILE 29 | 30 | echo "Unzipping..." 31 | 32 | tar zxvf $FILE 33 | 34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM." 35 | -------------------------------------------------------------------------------- /data/scripts/fetch_imagenet_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | FILE=imagenet_models.tgz 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE 8 | CHECKSUM=8b1d4b9da0593fc70ef403284f810adc 9 | 10 | if [ -f $FILE ]; then 11 | echo "File already exists. Checking md5..." 12 | os=`uname -s` 13 | if [ "$os" = "Linux" ]; then 14 | checksum=`md5sum $FILE | awk '{ print $1 }'` 15 | elif [ "$os" = "Darwin" ]; then 16 | checksum=`cat $FILE | md5` 17 | fi 18 | if [ "$checksum" = "$CHECKSUM" ]; then 19 | echo "Checksum is correct. No need to download." 20 | exit 0 21 | else 22 | echo "Checksum is incorrect. Need to download again." 23 | fi 24 | fi 25 | 26 | echo "Downloading pretrained ImageNet models (1G)..." 27 | 28 | wget $URL -O $FILE 29 | 30 | echo "Unzipping..." 31 | 32 | tar zxvf $FILE 33 | 34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM." 35 | -------------------------------------------------------------------------------- /selective_search/Dependencies/NormalizeRows.m: -------------------------------------------------------------------------------- 1 | function b = NormalizeRows(a, n) 2 | % Normalizes the rows of a. Makes sure there is no division by zero: b will 3 | % not contain any NaN entries. 4 | % 5 | % a: data with row vectors 6 | % n: The rows will sum to n. By default n = 1 7 | % 8 | % b: normalized data with row vecors. All rows sum to one except 9 | % the ones that are zero in the first place: these remain 10 | % zero. 11 | % 12 | % Jasper Uijlings - 2013 13 | 14 | % Get sums 15 | sumA = sum(a,2); 16 | 17 | % Make sure there is no division by zero 18 | sumA(sumA == 0) = 1; 19 | 20 | % Do the normalization 21 | if nargin == 1 22 | b = bsxfun(@rdivide, a, sumA); 23 | else 24 | b = bsxfun(@rdivide, a, sumA / n); 25 | end 26 | 27 | % Do the normalization 28 | % if nargin == 1 29 | % b = a ./ repmat(sumA, 1, size(a,2)); 30 | % else 31 | % b = a .* n ./ repmat(sumA, 1, size(a,2)); 32 | % end -------------------------------------------------------------------------------- /data/scripts/fetch_selective_search_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | FILE=selective_search_data.tgz 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE 8 | CHECKSUM=7078c1db87a7851b31966b96774cd9b9 9 | 10 | if [ -f $FILE ]; then 11 | echo "File already exists. Checking md5..." 12 | os=`uname -s` 13 | if [ "$os" = "Linux" ]; then 14 | checksum=`md5sum $FILE | awk '{ print $1 }'` 15 | elif [ "$os" = "Darwin" ]; then 16 | checksum=`cat $FILE | md5` 17 | fi 18 | if [ "$checksum" = "$CHECKSUM" ]; then 19 | echo "Checksum is correct. No need to download." 20 | exit 0 21 | else 22 | echo "Checksum is incorrect. Need to download again." 23 | fi 24 | fi 25 | 26 | echo "Downloading precomputed selective search boxes (0.5G)..." 27 | 28 | wget $URL -O $FILE 29 | 30 | echo "Unzipping..." 31 | 32 | tar zxvf $FILE 33 | 34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM." 35 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | from distutils.core import setup 10 | from distutils.extension import Extension 11 | from Cython.Distutils import build_ext 12 | 13 | cmdclass = {} 14 | ext_modules = [ 15 | Extension( 16 | "utils.cython_bbox", 17 | ["utils/bbox.pyx"], 18 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"], 19 | ), 20 | Extension( 21 | "utils.cython_nms", 22 | ["utils/nms.pyx"], 23 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"], 24 | ) 25 | ] 26 | cmdclass.update({'build_ext': build_ext}) 27 | 28 | setup( 29 | name='fast_rcnn', 30 | cmdclass=cmdclass, 31 | ext_modules=ext_modules, 32 | include_dirs=[np.get_include()] 33 | ) 34 | -------------------------------------------------------------------------------- /selective_search/RecreateBlobHierarchyIndIm.m: -------------------------------------------------------------------------------- 1 | function [hBlobs blobsInit blobsRest] = RecreateBlobHierarchyIndIm(blobIndIm, blobBoxes, hierarchy) 2 | % function hBlobs = RecreateBlobHierarchyIndIm(blobIndIm, boxes, hierarchy) 3 | % 4 | % Recreate hierarchy from the initial segmentation image 5 | % 6 | % blobIndIm: Image with indices denoting segments 7 | % blobBoxes: Boxes belonging to blobs in blobIndIm 8 | % hierarchy: Hierarchy denoting hierarchical merging 9 | % 10 | % hBlobs: All blobs in the hierarchy 11 | % blobsInit: The initial blobs 12 | % blobsRest: All blobs but the initial blobs 13 | % 14 | % Jasper Uijlings - 2013 15 | 16 | % Get blobs of initial segmentation 17 | blobsInit = SegmentIndices2Blobs(blobIndIm, blobBoxes); 18 | 19 | % Add sizes 20 | blobsInit = BlobAddSizes(blobsInit); 21 | 22 | % Reconstruct hierarchy 23 | hBlobs = RecreateBlobHierarchy(blobsInit, hierarchy); 24 | 25 | if nargout == 3 26 | blobsRest = hBlobs(length(blobsInit)+1:end); 27 | end -------------------------------------------------------------------------------- /selective_search/RecreateBlobHierarchy.m: -------------------------------------------------------------------------------- 1 | function hBlobs = RecreateBlobHierarchy(blobs, hierarchy) 2 | % [blobs hierarchy] = RecreateBlobHierarchy(blobs, hierarchy) 3 | % 4 | % Recreates the hierarchical grouping using the starting blobs and the 5 | % resulting hierarchy. This allows one to save the grouping using 6 | % relatively small disk space while still being able to fastly recreate the 7 | % complete grouping. 8 | % 9 | % blobs: Input cell array with blobs 10 | % hierarchy: Hierarchy of the blobs as created by 11 | % HierarchicalGrouping.m 12 | % 13 | % hBlobs: All segments of the hierarchical grouping. 14 | % 15 | % Jasper Uijlings - 2013 16 | 17 | hBlobs = cell(length(hierarchy) + 1,1); 18 | 19 | hBlobs(1:length(blobs)) = blobs; 20 | 21 | for i=length(blobs)+1:length(hBlobs) 22 | n = find(hierarchy == i); 23 | 24 | if length(n) ~= 2 25 | error('One can not merge more than 2 blobs!'); 26 | end 27 | 28 | hBlobs{i} = MergeBlobs(hBlobs{n(1)}, hBlobs{n(2)}); 29 | end -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /selective_search/Dependencies/PascalOverlap.m: -------------------------------------------------------------------------------- 1 | function scores = PascalOverlap(targetBox, testBoxes) 2 | % scores = PascalOverlap(targetBox, testBoxes) 3 | % 4 | % Function obtains the pascal overlap scores between the targetBox and 5 | % all testBoxes 6 | % 7 | % targetBox: 1 x 4 array containing target box 8 | % testBoxes: N x 4 array containing test boxes 9 | % 10 | % scores: N x 1 array containing for each testBox the pascal 11 | % overlap score. 12 | % 13 | % Jasper Uijlings - 2013 14 | 15 | intersectBoxes = BoxIntersection(targetBox, testBoxes); 16 | overlapI = intersectBoxes(:,1) ~= -1; % Get which boxes overlap 17 | 18 | % Intersection size 19 | [nr nc intersectionSize] = BoxSize(intersectBoxes(overlapI,:)); 20 | 21 | % Union size 22 | [nr nc testBoxSize] = BoxSize(testBoxes(overlapI,:)); 23 | [nr nc targetBoxSize] = BoxSize(targetBox); 24 | unionSize = testBoxSize + targetBoxSize - intersectionSize; 25 | 26 | scores = zeros(size(testBoxes,1),1); 27 | scores(overlapI) = intersectionSize ./ unionSize; 28 | -------------------------------------------------------------------------------- /lib/utils/nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def nms(dets, thresh): 11 | x1 = dets[:, 0] 12 | y1 = dets[:, 1] 13 | x2 = dets[:, 2] 14 | y2 = dets[:, 3] 15 | scores = dets[:, 4] 16 | 17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 18 | order = scores.argsort()[::-1] 19 | 20 | keep = [] 21 | while order.size > 0: 22 | i = order[0] 23 | keep.append(i) 24 | xx1 = np.maximum(x1[i], x1[order[1:]]) 25 | yy1 = np.maximum(y1[i], y1[order[1:]]) 26 | xx2 = np.minimum(x2[i], x2[order[1:]]) 27 | yy2 = np.minimum(y2[i], y2[order[1:]]) 28 | 29 | w = np.maximum(0.0, xx2 - xx1 + 1) 30 | h = np.maximum(0.0, yy2 - yy1 + 1) 31 | inter = w * h 32 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 33 | 34 | inds = np.where(ovr <= thresh)[0] 35 | order = order[inds + 1] 36 | 37 | return keep 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Fast R-CNN 2 | 3 | Copyright (c) Microsoft Corporation 4 | 5 | All rights reserved. 6 | 7 | MIT License 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a 10 | copy of this software and associated documentation files (the "Software"), 11 | to deal in the Software without restriction, including without limitation 12 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 | and/or sell copies of the Software, and to permit persons to whom the 14 | Software is furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included 17 | in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 23 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 24 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /selective_search/Dependencies/CountVisualWordsIndex.m: -------------------------------------------------------------------------------- 1 | function [cb counts] = CountVisualWordsIndex(indexIm, wordIm, numIndex, numWords) 2 | % cb = CountVisualWordsIndex(indexIm, wordIm, numIndex, numWords) 3 | % 4 | % Counts the number of visual words for the visual words in wordIm. 5 | % wordIm is an array with visual word identities. Zeros will be ignored. 6 | % indexIm is an array with regions to which visual words belong. 7 | % 8 | % WARNING: VERY FEW CHECKS FOR INTEGRETY. WRONG INPUT WILL CRASH THE SYSTEM 9 | % 10 | % indexIm: Array with indices. Range: [1,numIndex] 11 | % wordIm: Array with visual word identities. Range: [0,numWords] 12 | % numIndex: Number of indices in indexIm. 13 | % numWords: Number of visual words. 14 | % 15 | % cb: numIndex x numWords array with histogram counts 16 | % counts: numIndex x 1 array with counts per row of cb. 17 | % 18 | % Jasper Uijlings - 2013 19 | 20 | if size(indexIm,1) ~= size(wordIm,1) | size(indexIm,2) ~= size(wordIm,2) 21 | error('First two input arguments should have the same 2D dimension'); 22 | end 23 | 24 | wordIm = double(wordIm); 25 | 26 | [cb counts] = mexCountWordsIndex(indexIm, wordIm, numIndex, numWords); 27 | -------------------------------------------------------------------------------- /help/INRIA/VOCcode/VOCinit.m: -------------------------------------------------------------------------------- 1 | clear VOCopts 2 | 3 | % get current directory with forward slashes 4 | 5 | cwd=cd; 6 | cwd(cwd=='\')='/'; 7 | 8 | % change this path to point to your copy of the PASCAL VOC data 9 | VOCopts.datadir=[cwd '/']; 10 | 11 | % change this path to a writable directory for your results 12 | VOCopts.resdir=[cwd '/results/test/']; 13 | 14 | % change this path to a writable local directory for the example code 15 | VOCopts.localdir=[cwd '/local/']; 16 | 17 | % initialize the test set 18 | 19 | VOCopts.testset='test'; % use test set for final challenge 20 | 21 | % initialize paths 22 | 23 | VOCopts.imgsetpath=[VOCopts.datadir 'data/ImageSets/%s.txt']; 24 | VOCopts.clsimgsetpath=[VOCopts.datadir 'data/ImageSets/%s_%s.txt']; 25 | VOCopts.annopath=[VOCopts.datadir 'data/Annotations/%s.txt']; 26 | VOCopts.imgpath=[VOCopts.datadir 'data/Images/%s.png']; 27 | VOCopts.clsrespath=[VOCopts.resdir '%s_cls_' VOCopts.testset '_%s.txt']; 28 | VOCopts.detrespath=[VOCopts.resdir '%s_det_' VOCopts.testset '_%s.txt']; 29 | 30 | % initialize the VOC challenge options 31 | 32 | VOCopts.classes={'person'}; 33 | VOCopts.nclasses=length(VOCopts.classes); 34 | 35 | VOCopts.minoverlap=0.5; 36 | 37 | % initialize example options 38 | 39 | VOCopts.exfdpath=[VOCopts.localdir '%s_fd.mat']; 40 | 41 | % datasets 42 | VOCopts.dataset = ['inria']; 43 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from .imdb import imdb 9 | from .pascal_voc import pascal_voc 10 | from .inria import inria 11 | from . import factory 12 | 13 | import os.path as osp 14 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..') 15 | 16 | # We assume your matlab binary is in your path and called `matlab'. 17 | # If either is not true, just add it to your path and alias it as matlab, or 18 | # you could change this file. 19 | MATLAB = 'matlab' 20 | 21 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python 22 | def _which(program): 23 | import os 24 | def is_exe(fpath): 25 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 26 | 27 | fpath, fname = os.path.split(program) 28 | if fpath: 29 | if is_exe(program): 30 | return program 31 | else: 32 | for path in os.environ["PATH"].split(os.pathsep): 33 | path = path.strip('"') 34 | exe_file = os.path.join(path, program) 35 | if is_exe(exe_file): 36 | return exe_file 37 | 38 | return None 39 | 40 | if _which(MATLAB) is None: 41 | msg = ("MATLAB command '{}' not found. " 42 | "Please add '{}' to your PATH.").format(MATLAB, MATLAB) 43 | raise EnvironmentError(msg) 44 | -------------------------------------------------------------------------------- /matlab/nms.m: -------------------------------------------------------------------------------- 1 | function pick = nms(boxes, overlap) 2 | % top = nms(boxes, overlap) 3 | % Non-maximum suppression. (FAST VERSION) 4 | % Greedily select high-scoring detections and skip detections 5 | % that are significantly covered by a previously selected 6 | % detection. 7 | % 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m), 9 | % but an inner loop has been eliminated to significantly speed it 10 | % up in the case of a large number of boxes 11 | 12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz 13 | % All rights reserved. 14 | % 15 | % This file is part of the Exemplar-SVM library and is made 16 | % available under the terms of the MIT license (see COPYING file). 17 | % Project homepage: https://github.com/quantombone/exemplarsvm 18 | 19 | 20 | if isempty(boxes) 21 | pick = []; 22 | return; 23 | end 24 | 25 | x1 = boxes(:,1); 26 | y1 = boxes(:,2); 27 | x2 = boxes(:,3); 28 | y2 = boxes(:,4); 29 | s = boxes(:,end); 30 | 31 | area = (x2-x1+1) .* (y2-y1+1); 32 | [vals, I] = sort(s); 33 | 34 | pick = s*0; 35 | counter = 1; 36 | while ~isempty(I) 37 | last = length(I); 38 | i = I(last); 39 | pick(counter) = i; 40 | counter = counter + 1; 41 | 42 | xx1 = max(x1(i), x1(I(1:last-1))); 43 | yy1 = max(y1(i), y1(I(1:last-1))); 44 | xx2 = min(x2(i), x2(I(1:last-1))); 45 | yy2 = min(y2(i), y2(I(1:last-1))); 46 | 47 | w = max(0.0, xx2-xx1+1); 48 | h = max(0.0, yy2-yy1+1); 49 | 50 | inter = w.*h; 51 | o = inter ./ (area(i) + area(I(1:last-1)) - inter); 52 | 53 | I = I(find(o<=overlap)); 54 | end 55 | 56 | pick = pick(1:(counter-1)); 57 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir, rm_res) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir, rm_res); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir, rm_res) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = 1; 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | if rm_res 57 | delete(res_fn); 58 | end 59 | 60 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 61 | -------------------------------------------------------------------------------- /help/INRIA/VOCcode/VOCroc.m: -------------------------------------------------------------------------------- 1 | function [fp,tp,auc] = VOCroc(VOCopts,id,cls,draw) 2 | 3 | % load test set 4 | [gtids,gt]=textread(sprintf(VOCopts.clsimgsetpath,cls,VOCopts.testset),'%s %d'); 5 | 6 | % load results 7 | [ids,confidence]=textread(sprintf(VOCopts.clsrespath,id,cls),'%s %f'); 8 | 9 | % map results to ground truth images 10 | out=ones(size(gt))*-inf; 11 | tic; 12 | for i=1:length(ids) 13 | % display progress 14 | if toc>1 15 | fprintf('%s: roc: %d/%d\n',cls,i,length(ids)); 16 | drawnow; 17 | tic; 18 | end 19 | 20 | % find ground truth image 21 | j=strmatch(ids{i},gtids,'exact'); 22 | if isempty(j) 23 | error('unrecognized image "%s"',ids{i}); 24 | elseif length(j)>1 25 | error('multiple image "%s"',ids{i}); 26 | else 27 | out(j)=confidence(i); 28 | end 29 | end 30 | 31 | % compute true and false positive rates 32 | [so,si]=sort(-out); 33 | tp=cumsum(gt(si)>0)/sum(gt>0); 34 | fp=cumsum(gt(si)<0)/sum(gt<0); 35 | [uo,ui]=unique(so); 36 | tp=[0;tp(ui);1]; 37 | fp=[0;fp(ui);1]; 38 | 39 | % compute lower envelope and area under curve 40 | di=[true ; tp(2:end-1)~=tp(1:end-2) ; true]; 41 | x=fp(di); 42 | y=tp(di); 43 | auc=(x(2:end)-x(1:end-1))'*y(1:end-1); 44 | 45 | if draw 46 | % plot lower envelope 47 | xp=[0 ; reshape([x x]',[],1) ; 1 ; 1]; 48 | yp=[0 ; 0 ; reshape([y y]',[],1) ; 1]; 49 | 50 | plot(xp,yp,'-'); 51 | grid; 52 | axis([0 1 0 1]); 53 | xlabel 'false positive rate' 54 | ylabel 'true positive rate' 55 | title(sprintf('class: %s, subset: %s, AUC = %.3f',cls,VOCopts.testset,auc)); 56 | end 57 | -------------------------------------------------------------------------------- /selective_search/BoxAverageBestOverlap.m: -------------------------------------------------------------------------------- 1 | function [abo mabo boScores avgNumBoxes] = BoxAverageBestOverlap(gtBoxes, gtNrs, testBoxes) 2 | % [abo mabo boScores avgNumBoxes] = BoxAverageBestOverlap(gtBoxes, gtNrs, testBoxes) 3 | % 4 | % Calculate Average Best Overlap scores 5 | % 6 | % gtBoxes: Cell array of ground truth boxes per class (see 7 | % GetAllObjectBoxes) 8 | % gtNrs: Cell array with image nrs corresponding to ground truth. 9 | % testBoxes: Cell array of testboxes per image. 10 | % 11 | % abo: Average Best Overlap per class (Pascal Overlap criterion) 12 | % mabo: Mean Average Best Overlap (mean(abo)) 13 | % boScores: Best Overlap Score per GT box. 14 | % avgNumBoxes: Average number of boxes per image 15 | % 16 | % Jasper Uijlings - 2013 17 | 18 | % Check nr of gt elements 19 | nClasses = length(gtBoxes); 20 | 21 | boScores = cell(1, nClasses); 22 | for cI = 1:nClasses 23 | boScores{cI} = zeros(size(gtBoxes{cI}, 1),1); 24 | end 25 | 26 | % indices per class 27 | classIdx = ones(1, nClasses); 28 | 29 | for cI = 1:length(gtBoxes) 30 | for i = 1:size(gtBoxes{cI}, 1) 31 | boScores{cI}(classIdx(cI)) = ... 32 | BoxBestOverlap(gtBoxes{cI}(i,:), testBoxes{gtNrs{cI}(i)}); 33 | classIdx(cI) = classIdx(cI) + 1; 34 | end 35 | end 36 | 37 | % Calculation abo and mabo measures 38 | abo = zeros(nClasses, 1); 39 | for cI = 1:nClasses 40 | abo(cI) = mean(boScores{cI}); 41 | end 42 | mabo = mean(abo); 43 | 44 | % Calculation avgNumBoxes 45 | numBoxes = zeros(length(testBoxes), 1); 46 | for i=1:length(testBoxes) 47 | numBoxes(i) = size(testBoxes{i}, 1); 48 | end 49 | avgNumBoxes = mean(numBoxes); -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/segment.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include "segment-image.h" 25 | 26 | int main(int argc, char **argv) { 27 | if (argc != 6) { 28 | fprintf(stderr, "usage: %s sigma k min input(ppm) output(ppm)\n", argv[0]); 29 | return 1; 30 | } 31 | 32 | float sigma = atof(argv[1]); 33 | float k = atof(argv[2]); 34 | int min_size = atoi(argv[3]); 35 | 36 | printf("loading input image.\n"); 37 | image *input = loadPPM(argv[4]); 38 | 39 | printf("processing\n"); 40 | int num_ccs; 41 | image *seg = segment_image(input, sigma, k, min_size, &num_ccs); 42 | savePPM(seg, argv[5]); 43 | 44 | printf("got %d components\n", num_ccs); 45 | printf("done! uff...thats hard work.\n"); 46 | 47 | return 0; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /selective_search/Dependencies/ShowBlobs.m: -------------------------------------------------------------------------------- 1 | function ShowBlobs(blobs, numRow, numCol, image, minSize, imNames) 2 | % PlotBlobs(blobs, numRow, numCol, image, minSize) plots all blobs in numCol columns 3 | % 4 | % Jasper Uijlings - 2013 5 | 6 | if nargin == 4 7 | minSize = 0; 8 | end 9 | 10 | doNames = exist('imNames', 'var'); 11 | 12 | if doNames 13 | if ~iscell(imNames) 14 | imageNamesC = cell(size(imNames)); 15 | for i=1:length(imNames) 16 | imageNamesC{i} = sprintf('%g', imNames(i)); 17 | end 18 | imNames = imageNamesC; 19 | end 20 | end 21 | 22 | % Convert to images 23 | idx = 1; 24 | for i=1:length(blobs) 25 | if not(isfield(blobs{i}, 'size')) 26 | blobs{i}.size = sum(sum(blobs{i}.mask)); 27 | end 28 | if blobs{i}.size > minSize 29 | images{idx} = Blob2Image(blobs{i}, image); 30 | if doNames 31 | iiNames{idx} = imNames{i}; 32 | end 33 | idx = idx + 1; 34 | end 35 | end 36 | 37 | 38 | if doNames; 39 | ShowImageCell(images, numRow, numCol, '', iiNames); 40 | else 41 | ShowImageCell(images, numRow, numCol); 42 | end 43 | 44 | % totImages = idx - 1; 45 | % 46 | % numFigures = ceil(totImages / (numCol * numRow)) 47 | % 48 | % n = 1; 49 | % screenSize = get(0, 'ScreenSize'); 50 | % 51 | % for i=1:numFigures 52 | % figure('Position', [1, 1, screenSize(3)/2, screenSize(4)]); 53 | % clf; 54 | % for j = 1:numCol * numRow 55 | % if(n <= totImages) 56 | % subplot(numRow, numCol, j); 57 | % imshow(images{n}); 58 | % n = n + 1; 59 | % end 60 | % end 61 | % end 62 | -------------------------------------------------------------------------------- /selective_search/Dependencies/ShowImageCell.m: -------------------------------------------------------------------------------- 1 | function ShowImageCell(imageCell, n, m, figurename, imageNames) 2 | % ShowImageCell(imageCell, n, m, figurename, imageNames) 3 | % 4 | % Generate a figure with thumbnails of the images in the imageCell. 5 | % 6 | % imageCell: Cell array with images which can be displayed 7 | % with imshow. 8 | % n: number of thumbnail rows per figure. 9 | % m: number of thumbnail columns per figure. 10 | % figurename: Name of the figures (optional). 11 | % 12 | % Jasper Uijlings - 2013 13 | 14 | totImages = length(imageCell); 15 | numFigures = ceil(totImages / (n * m)); 16 | 17 | if nargin < 4 18 | figurename = 'untitled'; 19 | end 20 | 21 | if nargin < 5 22 | imageNames = cell(length(imageCell)); 23 | end 24 | 25 | if ~iscell(imageNames) 26 | imageNamesC = cell(length(imageNames)); 27 | for i=1:length(imageNames) 28 | imageNamesC{i} = sprintf('%g', imageNames(i)); 29 | end 30 | imageNames = imageNamesC; 31 | end 32 | 33 | idx = 1; 34 | screenSize = get(0, 'ScreenSize'); 35 | 36 | for i=1:numFigures 37 | if ispc 38 | figure('Position', [1, 1, screenSize(3), screenSize(4)], 'Name', figurename); 39 | % figure('Position', [1 49 1920 946] , 'Name', figurename); 40 | else 41 | figure('Position', [1, 1, screenSize(3)/2, screenSize(4)], 'Name', figurename); 42 | end 43 | clf; 44 | for j = 1:n * m 45 | if(idx <= totImages) 46 | subplot(n, m, j); 47 | imshow(imageCell{idx}); 48 | xlabel(imageNames{idx}); 49 | idx = idx + 1; 50 | end 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | 13 | def im_list_to_blob(ims): 14 | """Convert a list of images into a network input. 15 | 16 | Assumes images are already prepared (means subtracted, BGR order, ...). 17 | """ 18 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 19 | num_images = len(ims) 20 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 21 | dtype=np.float32) 22 | for i in xrange(num_images): 23 | im = ims[i] 24 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 25 | # Move channels (axis 3) to axis 1 26 | # Axis order will become: (batch elem, channel, height, width) 27 | channel_swap = (0, 3, 1, 2) 28 | blob = blob.transpose(channel_swap) 29 | return blob 30 | 31 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 32 | """Mean subtract and scale an image for use in a blob.""" 33 | im = im.astype(np.float32, copy=False) 34 | im -= pixel_means 35 | im_shape = im.shape 36 | im_size_min = np.min(im_shape[0:2]) 37 | im_size_max = np.max(im_shape[0:2]) 38 | im_scale = float(target_size) / float(im_size_min) 39 | # Prevent the biggest axis from being more than MAX_SIZE 40 | if np.round(im_scale * im_size_max) > max_size: 41 | im_scale = float(max_size) / float(im_size_max) 42 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 43 | interpolation=cv2.INTER_LINEAR) 44 | 45 | return im, im_scale 46 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | This directory holds (*after you download them*): 2 | - Pre-computed object proposals 3 | - Caffe models pre-trained on ImageNet 4 | - Fast R-CNN models 5 | - Symlinks to datasets 6 | 7 | To download precomputed Selective Search proposals for PASCAL VOC 2007 and 2012, run: 8 | 9 | ``` 10 | ./data/scripts/fetch_selective_search_data.sh 11 | ``` 12 | 13 | This script will populate `data/selective_search_data`. 14 | 15 | To download Caffe models (CaffeNet, VGG_CNN_M_1024, VGG16) pre-trained on ImageNet, run: 16 | 17 | ``` 18 | ./data/scripts/fetch_imagenet_models.sh 19 | ``` 20 | 21 | This script will populate `data/imagenet_models`. 22 | 23 | To download Fast R-CNN models trained on VOC 2007, run: 24 | 25 | ``` 26 | ./data/scripts/fetch_fast_rcnn_models.sh 27 | ``` 28 | 29 | This script will populate `data/fast_rcnn_models`. 30 | 31 | In order to train and test with PASCAL VOC, you will need to establish symlinks. 32 | From the `data` directory (`cd data`): 33 | 34 | ``` 35 | # For VOC 2007 36 | ln -s /your/path/to/VOC2007/VOCdevkit VOCdevkit2007 37 | 38 | # For VOC 2012 39 | ln -s /your/path/to/VOC2012/VOCdevkit VOCdevkit2012 40 | ``` 41 | 42 | Since you'll likely be experimenting with multiple installs of Fast R-CNN in 43 | parallel, you'll probably want to keep all of this data in a shared place and 44 | use symlinks. On my system I create the following symlinks inside `data`: 45 | 46 | ``` 47 | # data/cache holds various outputs created by the datasets package 48 | ln -s /data/fast_rcnn_shared/cache 49 | 50 | # move the imagenet_models to shared location and symlink to them 51 | ln -s /data/fast_rcnn_shared/imagenet_models 52 | 53 | # move the selective search data to a shared location and symlink to them 54 | ln -s /data/fast_rcnn_shared/selective_search_data 55 | 56 | ln -s /data/VOC2007/VOCdevkit VOCdevkit2007 57 | ln -s /data/VOC2012/VOCdevkit VOCdevkit2012 58 | ``` 59 | -------------------------------------------------------------------------------- /selective_search/Dependencies/mexCountWordsIndex.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mex.h" 3 | void mexFunction(int nlhs, mxArray *out[], int nrhs, const mxArray *input[]) 4 | { 5 | // Checking number of arguments 6 | if (nlhs > 2){ 7 | mexErrMsgTxt("Error: function has only two output parameters"); 8 | return; 9 | } 10 | 11 | if (nrhs != 4){ 12 | mexErrMsgTxt("Error: Needs exactly two four input parameters"); 13 | return; 14 | } 15 | 16 | int numWords = (int) mxGetScalar(input[3]); 17 | int numIndices = (int) mxGetScalar(input[2]); 18 | 19 | // Load in arrays 20 | double* indices = mxGetPr(input[0]); 21 | double* a = mxGetPr( input[1] ); 22 | int aNum = (int) mxGetNumberOfElements(input[1]); 23 | int totIndices = (int) mxGetNumberOfElements(input[0]); // number of elements. Not confuse with max 24 | int numLoops = aNum / totIndices; 25 | 26 | // Create output histogram 27 | out[0] = mxCreateDoubleMatrix(numIndices, numWords, mxREAL); 28 | double* histogram = mxGetPr(out[0]); 29 | //histogram = histogram - 1; 30 | 31 | out[1] = mxCreateDoubleMatrix(numIndices, 1, mxREAL); 32 | double* count = mxGetPr(out[1]); 33 | 34 | double* aP = a; 35 | int iPval; 36 | for(int j=0; j < numLoops; j++){ 37 | double* iP = indices; 38 | for(int i=0;i < totIndices; i++){ 39 | //mexPrintf("%d\n", i); 40 | if (*aP){ 41 | //(*(histogram + (((int) *aP) -1) * numIndices + ((int) *iP - 1)))++; 42 | //count++; 43 | iPval = ((int) *iP) -1; 44 | histogram[(((int) *aP) - 1) * numIndices + iPval]++; 45 | count[iPval]++; 46 | } 47 | 48 | //arrayI = (int) *aP; 49 | //histogram[arrayI]++; 50 | aP++; 51 | iP++; 52 | } 53 | } 54 | 55 | return; 56 | } 57 | -------------------------------------------------------------------------------- /lib/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/imutil.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* some image utilities */ 20 | 21 | #ifndef IMUTIL_H 22 | #define IMUTIL_H 23 | 24 | #include "image.h" 25 | #include "misc.h" 26 | 27 | /* compute minimum and maximum value in an image */ 28 | template 29 | void min_max(image *im, T *ret_min, T *ret_max) { 30 | int width = im->width(); 31 | int height = im->height(); 32 | 33 | T min = imRef(im, 0, 0); 34 | T max = imRef(im, 0, 0); 35 | for (int y = 0; y < height; y++) { 36 | for (int x = 0; x < width; x++) { 37 | T val = imRef(im, x, y); 38 | if (min > val) 39 | min = val; 40 | if (max < val) 41 | max = val; 42 | } 43 | } 44 | 45 | *ret_min = min; 46 | *ret_max = max; 47 | } 48 | 49 | /* threshold image */ 50 | template 51 | image *threshold(image *src, int t) { 52 | int width = src->width(); 53 | int height = src->height(); 54 | image *dst = new image(width, height); 55 | 56 | for (int y = 0; y < height; y++) { 57 | for (int x = 0; x < width; x++) { 58 | imRef(dst, x, y) = (imRef(src, x, y) >= t); 59 | } 60 | } 61 | 62 | return dst; 63 | } 64 | 65 | #endif 66 | 67 | -------------------------------------------------------------------------------- /matlab/fast_rcnn_demo.m: -------------------------------------------------------------------------------- 1 | % -------------------------------------------------------- 2 | % Fast R-CNN 3 | % Copyright (c) 2015 Microsoft 4 | % Licensed under The MIT License [see LICENSE for details] 5 | % Written by Ross Girshick 6 | % -------------------------------------------------------- 7 | 8 | function fast_rcnn_demo() 9 | % Fast R-CNN demo (in matlab). 10 | 11 | [folder, name, ext] = fileparts(mfilename('fullpath')); 12 | 13 | caffe_path = fullfile(folder, '..', 'caffe-fast-rcnn', 'matlab', 'caffe'); 14 | addpath(caffe_path); 15 | 16 | use_gpu = true; 17 | % You can try other models here: 18 | def = fullfile(folder, '..', 'models', 'VGG16', 'test.prototxt');; 19 | net = fullfile(folder, '..', 'data', 'fast_rcnn_models', ... 20 | 'vgg16_fast_rcnn_iter_40000.caffemodel'); 21 | model = fast_rcnn_load_net(def, net, use_gpu); 22 | 23 | car_ind = 7; 24 | sofa_ind = 18; 25 | tv_ind = 20; 26 | 27 | demo(model, '000004', [car_ind], {'car'}); 28 | demo(model, '001551', [sofa_ind, tv_ind], {'sofa', 'tvmonitor'}); 29 | fprintf('\n'); 30 | 31 | % ------------------------------------------------------------------------ 32 | function demo(model, im_id, cls_inds, cls_names) 33 | % ------------------------------------------------------------------------ 34 | [folder, name, ext] = fileparts(mfilename('fullpath')); 35 | box_file = fullfile(folder, '..', 'data', 'demo', [im_id '_boxes.mat']); 36 | % Boxes were saved with 0-based indexing 37 | ld = load(box_file); boxes = single(ld.boxes) + 1; clear ld; 38 | im_file = fullfile(folder, '..', 'data', 'demo', [im_id '.jpg']); 39 | im = imread(im_file); 40 | dets = fast_rcnn_im_detect(model, im, boxes); 41 | 42 | THRESH = 0.8; 43 | for j = 1:length(cls_inds) 44 | cls_ind = cls_inds(j); 45 | cls_name = cls_names{j}; 46 | I = find(dets{cls_ind}(:, end) >= THRESH); 47 | showboxes(im, dets{cls_ind}(I, :)); 48 | title(sprintf('%s detections with p(%s | box) >= %.3f', ... 49 | cls_name, cls_name, THRESH)) 50 | fprintf('\n> Press any key to continue'); 51 | pause; 52 | end 53 | -------------------------------------------------------------------------------- /selective_search/Dependencies/anigaussm/anigauss_mex.c: -------------------------------------------------------------------------------- 1 | /* 2 | The Matlab mex function. 3 | If necessary to recompile, type: 4 | mex -v -g anigauss_mex.c anigauss.c 5 | from within matlab. 6 | For windows platforms, you may want to use the provided "anigauss.dll" file. 7 | */ 8 | 9 | 10 | #include "mex.h" 11 | 12 | extern void anigauss(double *input, double *output, int sizex, int sizey, 13 | double sigmav, double sigmau, double phi, int orderv, int orderu); 14 | 15 | void mexFunction(int nlhs,mxArray *plhs[],int nrhs, const mxArray *prhs[]) 16 | { 17 | double *in, *out; 18 | double sigmav, sigmau, phi = 0.0; 19 | int orderv = 0, orderu = 0; 20 | int m, n; 21 | 22 | /* 23 | * Check the input arguments and the output argument 24 | */ 25 | if ((nrhs<2) || (nrhs>6) || (nrhs==5) || (nlhs!=1)) 26 | mexErrMsgTxt( 27 | "use: out = anigauss(in, sigmav, sigmau, phi, orderv, orderu);"); 28 | 29 | if ( mxGetNumberOfDimensions(prhs[0]) != 2 ) 30 | { mexErrMsgTxt("anigauss: input array should be of dimension 2"); } 31 | 32 | if (nrhs>=2) { 33 | in = mxGetPr(prhs[0]); 34 | sigmav = mxGetScalar(prhs[1]); 35 | sigmau = sigmav; 36 | } 37 | if (nrhs>=3) 38 | sigmau = mxGetScalar(prhs[2]); 39 | if (nrhs>=4) 40 | phi = mxGetScalar(prhs[3]); 41 | if (nrhs==6) { 42 | orderv = (int)(mxGetScalar(prhs[4])+0.5); 43 | orderu = (int)(mxGetScalar(prhs[5])+0.5); 44 | } 45 | 46 | if ((orderv<0) || (orderu<0)) 47 | { mexErrMsgTxt("anigauss: derivative orders should be positive"); } 48 | 49 | m = mxGetM(prhs[0]); 50 | n = mxGetN(prhs[0]); 51 | 52 | /* pointers to output array */ 53 | 54 | plhs[0]=mxCreateDoubleMatrix(m, n, mxREAL ); 55 | if ( plhs[0] == NULL ) 56 | { mexErrMsgTxt("No more memory for out array"); } 57 | out = (double *)mxGetPr( plhs[0] ); 58 | 59 | anigauss(in, out, m, n, sigmav, sigmau, phi-90.0, orderv, orderu); 60 | } 61 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/misc.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* random stuff */ 20 | 21 | #ifndef MISC_H 22 | #define MISC_H 23 | 24 | #include 25 | 26 | #ifndef M_PI 27 | #define M_PI 3.141592653589793 28 | #endif 29 | 30 | typedef unsigned char uchar; 31 | 32 | typedef struct { uchar r, g, b; } rgb; 33 | 34 | inline bool operator==(const rgb &a, const rgb &b) { 35 | return ((a.r == b.r) && (a.g == b.g) && (a.b == b.b)); 36 | } 37 | 38 | template 39 | inline T abs(const T &x) { return (x > 0 ? x : -x); }; 40 | 41 | template 42 | inline int sign(const T &x) { return (x >= 0 ? 1 : -1); }; 43 | 44 | template 45 | inline T square(const T &x) { return x*x; }; 46 | 47 | template 48 | inline T bound(const T &x, const T &min, const T &max) { 49 | return (x < min ? min : (x > max ? max : x)); 50 | } 51 | 52 | template 53 | inline bool check_bound(const T &x, const T&min, const T &max) { 54 | return ((x < min) || (x > max)); 55 | } 56 | 57 | inline int vlib_round(float x) { return (int)(x + 0.5F); } 58 | 59 | inline int vlib_round(double x) { return (int)(x + 0.5); } 60 | 61 | inline double gaussian(double val, double sigma) { 62 | return exp(-square(val/sigma)/2)/(sqrt(2*M_PI)*sigma); 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/disjoint-set.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #ifndef DISJOINT_SET 20 | #define DISJOINT_SET 21 | 22 | // disjoint-set forests using union-by-rank and path compression (sort of). 23 | 24 | typedef struct { 25 | int rank; 26 | int p; 27 | int size; 28 | } uni_elt; 29 | 30 | class universe { 31 | public: 32 | universe(int elements); 33 | ~universe(); 34 | int find(int x); 35 | void join(int x, int y); 36 | int size(int x) const { return elts[x].size; } 37 | int num_sets() const { return num; } 38 | 39 | private: 40 | uni_elt *elts; 41 | int num; 42 | }; 43 | 44 | universe::universe(int elements) { 45 | elts = new uni_elt[elements]; 46 | num = elements; 47 | for (int i = 0; i < elements; i++) { 48 | elts[i].rank = 0; 49 | elts[i].size = 1; 50 | elts[i].p = i; 51 | } 52 | } 53 | 54 | universe::~universe() { 55 | delete [] elts; 56 | } 57 | 58 | int universe::find(int x) { 59 | int y = x; 60 | while (y != elts[y].p) 61 | y = elts[y].p; 62 | elts[x].p = y; 63 | return y; 64 | } 65 | 66 | void universe::join(int x, int y) { 67 | if (elts[x].rank > elts[y].rank) { 68 | elts[y].p = x; 69 | elts[x].size += elts[y].size; 70 | } else { 71 | elts[x].p = y; 72 | elts[y].size += elts[x].size; 73 | if (elts[x].rank == elts[y].rank) 74 | elts[y].rank++; 75 | } 76 | num--; 77 | } 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/convolve.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* convolution */ 20 | 21 | #ifndef CONVOLVE_H 22 | #define CONVOLVE_H 23 | 24 | #include 25 | #include 26 | #include 27 | #include "image.h" 28 | 29 | /* convolve src with mask. dst is flipped! */ 30 | static void convolve_even(image *src, image *dst, 31 | std::vector &mask) { 32 | int width = src->width(); 33 | int height = src->height(); 34 | int len = mask.size(); 35 | 36 | for (int y = 0; y < height; y++) { 37 | for (int x = 0; x < width; x++) { 38 | float sum = mask[0] * imRef(src, x, y); 39 | for (int i = 1; i < len; i++) { 40 | sum += mask[i] * 41 | (imRef(src, std::max(x-i,0), y) + 42 | imRef(src, std::min(x+i, width-1), y)); 43 | } 44 | imRef(dst, y, x) = sum; 45 | } 46 | } 47 | } 48 | 49 | /* convolve src with mask. dst is flipped! */ 50 | static void convolve_odd(image *src, image *dst, 51 | std::vector &mask) { 52 | int width = src->width(); 53 | int height = src->height(); 54 | int len = mask.size(); 55 | 56 | for (int y = 0; y < height; y++) { 57 | for (int x = 0; x < width; x++) { 58 | float sum = mask[0] * imRef(src, x, y); 59 | for (int i = 1; i < len; i++) { 60 | sum += mask[i] * 61 | (imRef(src, std::max(x-i,0), y) - 62 | imRef(src, std::min(x+i, width-1), y)); 63 | } 64 | imRef(dst, y, x) = sum; 65 | } 66 | } 67 | } 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | 10 | __sets = {} 11 | 12 | import datasets.pascal_voc 13 | import datasets.inria 14 | import numpy as np 15 | 16 | def _selective_search_IJCV_top_k(split, year, top_k): 17 | """Return an imdb that uses the top k proposals from the selective search 18 | IJCV code. 19 | """ 20 | imdb = datasets.pascal_voc(split, year) 21 | imdb.roidb_handler = imdb.selective_search_IJCV_roidb 22 | imdb.config['top_k'] = top_k 23 | return imdb 24 | 25 | # Set up voc__ using selective search "fast" mode 26 | for year in ['2007', '2012']: 27 | for split in ['train', 'val', 'trainval', 'test']: 28 | name = 'voc_{}_{}'.format(year, split) 29 | __sets[name] = (lambda split=split, year=year: 30 | datasets.pascal_voc(split, year)) 31 | 32 | # Set up voc___top_ using selective search "quality" mode 33 | # but only returning the first k boxes 34 | for top_k in np.arange(1000, 11000, 1000): 35 | for year in ['2007', '2012']: 36 | for split in ['train', 'val', 'trainval', 'test']: 37 | name = 'voc_{}_{}_top_{:d}'.format(year, split, top_k) 38 | __sets[name] = (lambda split=split, year=year, top_k=top_k: 39 | _selective_search_IJCV_top_k(split, year, top_k)) 40 | 41 | # Set up inria_ using selective search "fast" mode 42 | inria_devkit_path = '/home/szy/INRIA' 43 | for split in ['train', 'test']: 44 | name = '{}_{}'.format('inria', split) 45 | __sets[name] = (lambda split=split: datasets.inria(split, inria_devkit_path)) 46 | 47 | towncenter_devkit_path = '/home/szy/TownCenter' 48 | for split in ['test']: 49 | name = '{}_{}'.format('towncenter', split) 50 | __sets[name] = (lambda split=split: datasets.inria(split, towncenter_devkit_path)) 51 | 52 | def get_imdb(name): 53 | """Get an imdb (image database) by name.""" 54 | if not __sets.has_key(name): 55 | raise KeyError('Unknown dataset: {}'.format(name)) 56 | return __sets[name]() 57 | 58 | def list_imdbs(): 59 | """List all registered imdbs.""" 60 | return __sets.keys() 61 | -------------------------------------------------------------------------------- /lib/utils/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/segment-graph.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #ifndef SEGMENT_GRAPH 20 | #define SEGMENT_GRAPH 21 | 22 | #include 23 | #include 24 | #include "disjoint-set.h" 25 | 26 | // threshold function 27 | #define THRESHOLD(size, c) (c/size) 28 | 29 | typedef struct { 30 | float w; 31 | int a, b; 32 | } edge; 33 | 34 | bool operator<(const edge &a, const edge &b) { 35 | return a.w < b.w; 36 | } 37 | 38 | /* 39 | * Segment a graph 40 | * 41 | * Returns a disjoint-set forest representing the segmentation. 42 | * 43 | * num_vertices: number of vertices in graph. 44 | * num_edges: number of edges in graph 45 | * edges: array of edges. 46 | * c: constant for treshold function. 47 | */ 48 | universe *segment_graph(int num_vertices, int num_edges, edge *edges, 49 | float c) { 50 | // sort edges by weight 51 | std::sort(edges, edges + num_edges); 52 | 53 | // make a disjoint-set forest 54 | universe *u = new universe(num_vertices); 55 | 56 | // init thresholds 57 | float *threshold = new float[num_vertices]; 58 | for (int i = 0; i < num_vertices; i++) 59 | threshold[i] = THRESHOLD(1,c); 60 | 61 | // for each edge, in non-decreasing weight order... 62 | for (int i = 0; i < num_edges; i++) { 63 | edge *pedge = &edges[i]; 64 | 65 | // components conected by this edge 66 | int a = u->find(pedge->a); 67 | int b = u->find(pedge->b); 68 | if (a != b) { 69 | if ((pedge->w <= threshold[a]) && 70 | (pedge->w <= threshold[b])) { 71 | u->join(a, b); 72 | a = u->find(a); 73 | threshold[a] = pedge->w + THRESHOLD(u->size(a), c); 74 | } 75 | } 76 | } 77 | 78 | // free up 79 | delete threshold; 80 | return u; 81 | } 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Test a Fast R-CNN network on an image database.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.test import test_net 14 | from fast_rcnn.config import cfg, cfg_from_file 15 | from datasets.factory import get_imdb 16 | import caffe 17 | import argparse 18 | import pprint 19 | import time, os, sys 20 | 21 | def parse_args(): 22 | """ 23 | Parse input arguments 24 | """ 25 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 26 | parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use', 27 | default=0, type=int) 28 | parser.add_argument('--def', dest='prototxt', 29 | help='prototxt file defining the network', 30 | default=None, type=str) 31 | parser.add_argument('--net', dest='caffemodel', 32 | help='model to test', 33 | default=None, type=str) 34 | parser.add_argument('--cfg', dest='cfg_file', 35 | help='optional config file', default=None, type=str) 36 | parser.add_argument('--wait', dest='wait', 37 | help='wait until net file exists', 38 | default=True, type=bool) 39 | parser.add_argument('--imdb', dest='imdb_name', 40 | help='dataset to test', 41 | default='voc_2007_test', type=str) 42 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 43 | action='store_true') 44 | 45 | if len(sys.argv) == 1: 46 | parser.print_help() 47 | sys.exit(1) 48 | 49 | args = parser.parse_args() 50 | return args 51 | 52 | if __name__ == '__main__': 53 | args = parse_args() 54 | 55 | print('Called with args:') 56 | print(args) 57 | 58 | if args.cfg_file is not None: 59 | cfg_from_file(args.cfg_file) 60 | 61 | print('Using config:') 62 | pprint.pprint(cfg) 63 | 64 | while not os.path.exists(args.caffemodel) and args.wait: 65 | print('Waiting for {} to exist...'.format(args.caffemodel)) 66 | time.sleep(10) 67 | 68 | caffe.set_mode_gpu() 69 | caffe.set_device(args.gpu_id) 70 | net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) 71 | net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] 72 | 73 | imdb = get_imdb(args.imdb_name) 74 | imdb.competition_mode(args.comp_mode) 75 | 76 | test_net(net, imdb) 77 | -------------------------------------------------------------------------------- /selective_search/BlobAverageBestOverlap.m: -------------------------------------------------------------------------------- 1 | function [abo mabo boScores avgNumSegments] = BlobAverageBestOverlap(gtBlobs, gtNrs, blobIndIm, blobBoxes, hierarchy, minWidth) 2 | % [abo mabo boScores avgNumSegments] = BlobAverageBestOverlap(gtBlobs, 3 | % gtNrs, blobIndIm, blobBoxes, hierarchy, minWidth) 4 | % 5 | % Calculate Average Best Overlap scores 6 | % 7 | % gtBlobs: Cell array of ground truth segments per class (see 8 | % GetAllObjectBoxes) 9 | % gtNrs: Cell array with image nrs corresponding to ground truth. 10 | % blobIndIm: Image with indices per blob (mexFelzenSegmentIndex) 11 | % blobBoxes: Boxes corresponding to blobs in blobIndIm 12 | % hierarchy: Hierarchy necessary to reconstruct all blobs in grouping 13 | % minWidth: (optional) Filter out blobs with a width smaller than minWidth. 14 | % 15 | % abo: Average Best Overlap per class (Pascal Overlap criterion) 16 | % mabo: Mean Average Best Overlap (mean(abo)) 17 | % boScores: Best Overlap Score per GT segment. 18 | % avgNumBlobs: Average number of blobs per image 19 | % 20 | % Jasper Uijlings - 2013 21 | 22 | if ~exist('minWidth', 'var') 23 | minWidth = 0; 24 | end 25 | 26 | nClasses = length(gtBlobs); 27 | 28 | % Memory initialization 29 | numSegments = zeros(length(blobIndIm), 1); 30 | boScores = cell(1, nClasses); 31 | for cI = 1:nClasses 32 | boScores{cI} = length(gtBlobs{cI}); 33 | end 34 | 35 | % indices per class 36 | classIdx = ones(1, nClasses); 37 | 38 | for cI=1:length(gtBlobs) 39 | for i=1:length(gtBlobs{cI}) 40 | testImNr = gtNrs{cI}(i); 41 | 42 | % the hierarchy here contains possibly multiple groupings with 43 | % different initial measures 44 | testBlobsT = cell(length(hierarchy{testImNr}), 1); 45 | testBlobsT{1} = RecreateBlobHierarchyIndIm(blobIndIm{testImNr}, blobBoxes{testImNr}, hierarchy{testImNr}{1}); 46 | for j=2:length(hierarchy{testImNr}) % Without initial blobs here 47 | [aa bb testBlobsT{j}] = RecreateBlobHierarchyIndIm(blobIndIm{testImNr}, blobBoxes{testImNr}, hierarchy{testImNr}{j}); 48 | end 49 | testBlobs = cat(1, testBlobsT{:}); 50 | 51 | % Get rid of too small blobs 52 | testBlobs = FilterBlobsWidth(testBlobs, minWidth); 53 | numSegments(testImNr) = length(testBlobs); 54 | 55 | % Calculate overlap scores 56 | boScores{cI}(classIdx(cI)) = BlobBestOverlap(testBlobs, gtBlobs{cI}(i)); 57 | 58 | classIdx(cI) = classIdx(cI) + 1; 59 | end 60 | end 61 | 62 | abo = zeros(nClasses, 1); 63 | 64 | for cI = 1:nClasses 65 | abo(cI) = mean(boScores{cI}); 66 | end 67 | 68 | mabo = mean(abo); 69 | 70 | % Average of numSegments. Make sure that only images for which the 71 | % numSegments are actually calculated are taken into account. 72 | avgNumSegments = mean(numSegments(numSegments > 0)); 73 | -------------------------------------------------------------------------------- /selective_search/License.txt: -------------------------------------------------------------------------------- 1 | Copyright University of Amsterdam. All rights reserved. 2 | 3 | Contact persons: 4 | Jasper Uijlings (jrr disi.unitn.it) 5 | Koen van de Sande (ksande uva.nl) 6 | 7 | This software is being made available for individual research use only. 8 | Any commercial use or redistribution of this software requires a license from 9 | the University of Amsterdam. 10 | 11 | You may use this work subject to the following conditions: 12 | 13 | 1. This work is provided "as is" by the copyright holder, with 14 | absolutely no warranties of correctness, fitness, intellectual property 15 | ownership, or anything else whatsoever. You use the work 16 | entirely at your own risk. The copyright holder will not be liable for 17 | any legal damages whatsoever connected with the use of this work. 18 | 19 | 2. The copyright holder retain all copyright to the work. All copies of 20 | the work and all works derived from it must contain (1) this copyright 21 | notice, and (2) additional notices describing the content, dates and 22 | copyright holder of modifications or additions made to the work, if 23 | any, including distribution and use conditions and intellectual property 24 | claims. Derived works must be clearly distinguished from the original 25 | work, both by name and by the prominent inclusion of explicit 26 | descriptions of overlaps and differences. 27 | 28 | 3. The names and trademarks of the copyright holder may not be used in 29 | advertising or publicity related to this work without specific prior 30 | written permission. 31 | 32 | 4. In return for the free use of this work, you are requested, but not 33 | legally required, to do the following: 34 | 35 | * If you become aware of factors that may significantly affect other 36 | users of the work, for example major bugs or 37 | deficiencies or possible intellectual property issues, you are 38 | requested to report them to the copyright holder, if possible 39 | including redistributable fixes or workarounds. 40 | 41 | * If you use the work in scientific research or as part of a larger 42 | software system, you are requested to cite the use in any related 43 | publications or technical documentation. The work is based upon: 44 | 45 | J.R.R. Uijlings, K.E.A. van de Sande, T. Gevers, and A.W.M. Smeulders. 46 | Selective Search for Object Recognition 47 | IJCV, 2013. 48 | 49 | and uses 50 | 51 | J. M. Geusebroek, A. W. M. Smeulders, and J. van de Weijer. 52 | Fast anisotropic gauss filtering. IEEE Trans. Image Processing, 53 | vol. 12, no. 8, pp. 938-943, 2003. 54 | 55 | P. Felzenszwalb and D. Huttenlocher. 56 | Efficient graph-based image segmentation, 57 | International Journal of Computer Vision, 2004. 58 | 59 | 60 | This copyright notice must be retained with all copies of the software, 61 | including any modified or derived versions. 62 | 63 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/image.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* a simple image class */ 20 | 21 | #ifndef IMAGE_H 22 | #define IMAGE_H 23 | 24 | #include 25 | 26 | template 27 | class image { 28 | public: 29 | /* create an image */ 30 | image(const int width, const int height, const bool init = true); 31 | 32 | /* delete an image */ 33 | ~image(); 34 | 35 | /* init an image */ 36 | void init(const T &val); 37 | 38 | /* copy an image */ 39 | image *copy() const; 40 | 41 | /* get the width of an image. */ 42 | int width() const { return w; } 43 | 44 | /* get the height of an image. */ 45 | int height() const { return h; } 46 | 47 | /* image data. */ 48 | T *data; 49 | 50 | /* row pointers. */ 51 | T **access; 52 | 53 | private: 54 | int w, h; 55 | }; 56 | 57 | /* use imRef to access image data. */ 58 | #define imRef(im, x, y) (im->access[y][x]) 59 | 60 | /* use imPtr to get pointer to image data. */ 61 | #define imPtr(im, x, y) &(im->access[y][x]) 62 | 63 | template 64 | image::image(const int width, const int height, const bool init) { 65 | w = width; 66 | h = height; 67 | data = new T[w * h]; // allocate space for image data 68 | access = new T*[h]; // allocate space for row pointers 69 | 70 | // initialize row pointers 71 | for (int i = 0; i < h; i++) 72 | access[i] = data + (i * w); 73 | 74 | if (init) 75 | memset(data, 0, w * h * sizeof(T)); 76 | } 77 | 78 | template 79 | image::~image() { 80 | delete [] data; 81 | delete [] access; 82 | } 83 | 84 | template 85 | void image::init(const T &val) { 86 | T *ptr = imPtr(this, 0, 0); 87 | T *end = imPtr(this, w-1, h-1); 88 | while (ptr <= end) 89 | *ptr++ = val; 90 | } 91 | 92 | 93 | template 94 | image *image::copy() const { 95 | image *im = new image(w, h, false); 96 | memcpy(im->data, data, w * h * sizeof(T)); 97 | return im; 98 | } 99 | 100 | #endif 101 | 102 | -------------------------------------------------------------------------------- /tools/reval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Reval = re-eval. Re-evaluate saved detections.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.test import apply_nms 14 | from fast_rcnn.config import cfg 15 | from datasets.factory import get_imdb 16 | import cPickle 17 | import os, sys, argparse 18 | import numpy as np 19 | 20 | def parse_args(): 21 | """ 22 | Parse input arguments 23 | """ 24 | parser = argparse.ArgumentParser(description='Re-evaluate results') 25 | parser.add_argument('output_dir', nargs=1, help='results directory', 26 | type=str) 27 | parser.add_argument('--rerun', dest='rerun', 28 | help=('re-run evaluation code ' 29 | '(otherwise: results are loaded from file)'), 30 | action='store_true') 31 | parser.add_argument('--imdb', dest='imdb_name', 32 | help='dataset to re-evaluate', 33 | default='voc_2007_test', type=str) 34 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 35 | action='store_true') 36 | 37 | if len(sys.argv) == 1: 38 | parser.print_help() 39 | sys.exit(1) 40 | 41 | args = parser.parse_args() 42 | return args 43 | 44 | 45 | def from_mats(imdb_name, output_dir): 46 | import scipy.io as sio 47 | 48 | imdb = get_imdb(imdb_name) 49 | 50 | aps = [] 51 | for i, cls in enumerate(imdb.classes[1:]): 52 | mat = sio.loadmat(os.path.join(output_dir, cls + '_pr.mat')) 53 | ap = mat['ap'][0, 0] * 100 54 | apAuC = mat['ap_auc'][0, 0] * 100 55 | print '!!! {} : {:.1f} {:.1f}'.format(cls, ap, apAuC) 56 | aps.append(ap) 57 | 58 | print '~~~~~~~~~~~~~~~~~~~' 59 | print 'Results (from mat files):' 60 | for ap in aps: 61 | print '{:.1f}'.format(ap) 62 | print '{:.1f}'.format(np.array(aps).mean()) 63 | print '~~~~~~~~~~~~~~~~~~~' 64 | 65 | 66 | def from_dets(imdb_name, output_dir, comp_mode): 67 | imdb = get_imdb(imdb_name) 68 | imdb.competition_mode(comp_mode) 69 | with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f: 70 | dets = cPickle.load(f) 71 | 72 | print 'Applying NMS to all detections' 73 | nms_dets = apply_nms(dets, cfg.TEST.NMS) 74 | 75 | print 'Evaluating detections' 76 | imdb.evaluate_detections(nms_dets, output_dir) 77 | 78 | if __name__ == '__main__': 79 | args = parse_args() 80 | 81 | output_dir = os.path.abspath(args.output_dir[0]) 82 | imdb_name = args.imdb_name 83 | 84 | if args.comp_mode and not args.rerun: 85 | raise ValueError('--rerun must be used with --comp') 86 | 87 | if args.rerun: 88 | from_dets(imdb_name, output_dir, args.comp_mode) 89 | else: 90 | from_mats(imdb_name, output_dir) 91 | -------------------------------------------------------------------------------- /selective_search/demo.m: -------------------------------------------------------------------------------- 1 | % This demo shows how to use the software described in our IJCV paper: 2 | % Selective Search for Object Recognition, 3 | % J.R.R. Uijlings, K.E.A. van de Sande, T. Gevers, A.W.M. Smeulders, IJCV 2013 4 | %% 5 | addpath('Dependencies'); 6 | 7 | fprintf('Demo of how to run the code for:\n'); 8 | fprintf(' J. Uijlings, K. van de Sande, T. Gevers, A. Smeulders\n'); 9 | fprintf(' Segmentation as Selective Search for Object Recognition\n'); 10 | fprintf(' IJCV 2013\n\n'); 11 | 12 | % Compile anisotropic gaussian filter 13 | if(~exist('anigauss')) 14 | fprintf('Compiling the anisotropic gauss filtering of:\n'); 15 | fprintf(' J. Geusebroek, A. Smeulders, and J. van de Weijer\n'); 16 | fprintf(' Fast anisotropic gauss filtering\n'); 17 | fprintf(' IEEE Transactions on Image Processing, 2003\n'); 18 | fprintf('Source code/Project page:\n'); 19 | fprintf(' http://staff.science.uva.nl/~mark/downloads.html#anigauss\n\n'); 20 | mex Dependencies/anigaussm/anigauss_mex.c Dependencies/anigaussm/anigauss.c -output anigauss 21 | end 22 | 23 | if(~exist('mexCountWordsIndex')) 24 | mex Dependencies/mexCountWordsIndex.cpp 25 | end 26 | 27 | % Compile the code of Felzenszwalb and Huttenlocher, IJCV 2004. 28 | if(~exist('mexFelzenSegmentIndex')) 29 | fprintf('Compiling the segmentation algorithm of:\n'); 30 | fprintf(' P. Felzenszwalb and D. Huttenlocher\n'); 31 | fprintf(' Efficient Graph-Based Image Segmentation\n'); 32 | fprintf(' International Journal of Computer Vision, 2004\n'); 33 | fprintf('Source code/Project page:\n'); 34 | fprintf(' http://www.cs.brown.edu/~pff/segment/\n'); 35 | fprintf('Note: A small Matlab wrapper was made.\n'); 36 | % fprintf(' 37 | mex Dependencies/FelzenSegment/mexFelzenSegmentIndex.cpp -output mexFelzenSegmentIndex; 38 | end 39 | 40 | %% 41 | % Parameters. Note that this controls the number of hierarchical 42 | % segmentations which are combined. 43 | colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'}; 44 | colorType = colorTypes{1}; % Single color space for demo 45 | 46 | % Here you specify which similarity functions to use in merging 47 | simFunctionHandles = {@SSSimColourTextureSizeFillOrig, @SSSimTextureSizeFill, @SSSimBoxFillOrig, @SSSimSize}; 48 | simFunctionHandles = simFunctionHandles(1:2); % Two different merging strategies 49 | 50 | % Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm. 51 | % Note that by default, we set minSize = k, and sigma = 0.8. 52 | k = 200; % controls size of segments of initial segmentation. 53 | minSize = k; 54 | sigma = 0.8; 55 | 56 | % As an example, use a single image 57 | images = {'000015.jpg'}; 58 | im = imread(images{1}); 59 | 60 | % Perform Selective Search 61 | [boxes blobIndIm blobBoxes hierarchy] = Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles); 62 | boxes = BoxRemoveDuplicates(boxes); 63 | 64 | % Show boxes 65 | ShowRectsWithinImage(boxes, 5, 5, im); 66 | 67 | % Show blobs which result from first similarity function 68 | hBlobs = RecreateBlobHierarchyIndIm(blobIndIm, blobBoxes, hierarchy{1}); 69 | ShowBlobs(hBlobs, 5, 5, im); -------------------------------------------------------------------------------- /tools/train_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Train a Fast R-CNN network on a region of interest database.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.train import get_training_roidb, train_net 14 | from fast_rcnn.config import cfg, cfg_from_file, get_output_dir 15 | from datasets.factory import get_imdb 16 | import caffe 17 | import argparse 18 | import pprint 19 | import numpy as np 20 | import sys 21 | 22 | def parse_args(): 23 | """ 24 | Parse input arguments 25 | """ 26 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 27 | parser.add_argument('--gpu', dest='gpu_id', 28 | help='GPU device id to use [0]', 29 | default=0, type=int) 30 | parser.add_argument('--solver', dest='solver', 31 | help='solver prototxt', 32 | default=None, type=str) 33 | parser.add_argument('--iters', dest='max_iters', 34 | help='number of iterations to train', 35 | default=40000, type=int) 36 | parser.add_argument('--weights', dest='pretrained_model', 37 | help='initialize with pretrained model weights', 38 | default=None, type=str) 39 | parser.add_argument('--cfg', dest='cfg_file', 40 | help='optional config file', 41 | default=None, type=str) 42 | parser.add_argument('--imdb', dest='imdb_name', 43 | help='dataset to train on', 44 | default='voc_2007_trainval', type=str) 45 | parser.add_argument('--rand', dest='randomize', 46 | help='randomize (do not use a fixed seed)', 47 | action='store_true') 48 | 49 | if len(sys.argv) == 1: 50 | parser.print_help() 51 | sys.exit(1) 52 | 53 | args = parser.parse_args() 54 | return args 55 | 56 | if __name__ == '__main__': 57 | args = parse_args() 58 | 59 | print('Called with args:') 60 | print(args) 61 | 62 | if args.cfg_file is not None: 63 | cfg_from_file(args.cfg_file) 64 | 65 | print('Using config:') 66 | pprint.pprint(cfg) 67 | 68 | if not args.randomize: 69 | # fix the random seeds (numpy and caffe) for reproducibility 70 | np.random.seed(cfg.RNG_SEED) 71 | caffe.set_random_seed(cfg.RNG_SEED) 72 | 73 | # set up caffe 74 | caffe.set_mode_gpu() 75 | if args.gpu_id is not None: 76 | caffe.set_device(args.gpu_id) 77 | 78 | imdb = get_imdb(args.imdb_name) 79 | print 'Loaded dataset `{:s}` for training'.format(imdb.name) 80 | roidb = get_training_roidb(imdb) 81 | 82 | output_dir = get_output_dir(imdb, None) 83 | print 'Output will be saved to `{:s}`'.format(output_dir) 84 | 85 | train_net(args.solver, roidb, output_dir, 86 | pretrained_model=args.pretrained_model, 87 | max_iters=args.max_iters) 88 | -------------------------------------------------------------------------------- /selective_search/selective_search_rcnn.m: -------------------------------------------------------------------------------- 1 | function all_boxes = selective_search_rcnn(image_filenames, output_filename) 2 | 3 | % Based on the demo.m file included in the Selective Search 4 | % IJCV code, and on selective_search_boxes.m from R-CNN. 5 | 6 | % Load dependencies and compile if needed. 7 | 8 | addpath('Dependencies'); 9 | 10 | if(~exist('anigauss')) 11 | mex Dependencies/anigaussm/anigauss_mex.c Dependencies/anigaussm/anigauss.c -output anigauss 12 | end 13 | 14 | if(~exist('mexCountWordsIndex')) 15 | mex Dependencies/mexCountWordsIndex.cpp 16 | end 17 | 18 | if(~exist('mexFelzenSegmentIndex')) 19 | mex Dependencies/FelzenSegment/mexFelzenSegmentIndex.cpp -output mexFelzenSegmentIndex; 20 | end 21 | 22 | % Configure 23 | im_width = 500; 24 | 25 | % Parameters. Note that this controls the number of hierarchical 26 | % segmentations which are combined. 27 | colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'}; 28 | 29 | % Here you specify which similarity functions to use in merging 30 | simFunctionHandles = {@SSSimColourTextureSizeFillOrig, ... 31 | @SSSimTextureSizeFill, ... 32 | @SSSimBoxFillOrig, ... 33 | @SSSimSize}; 34 | 35 | % Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm. 36 | % Note that by default, we set minSize = k, and sigma = 0.8. 37 | % controls size of segments of initial segmentation. 38 | ks = [50 100 150 300]; 39 | sigma = 0.8; 40 | 41 | % After segmentation, filter out boxes which have a width/height smaller 42 | % than minBoxWidth (default = 20 pixels). 43 | minBoxWidth = 20; 44 | 45 | % Comment the following three lines for the 'quality' version 46 | colorTypes = colorTypes(1:2); % 'Fast' uses HSV and Lab 47 | simFunctionHandles = simFunctionHandles(1:2); % Two different merging strategies 48 | ks = ks(1:2); 49 | 50 | % Process all images. 51 | all_boxes = {}; 52 | for i=1:length(image_filenames) 53 | im = imread(image_filenames{i}); 54 | % Resize image to canonical dimensions since proposals aren't scale invariant. 55 | scale = size(im, 2) / im_width; 56 | im = imresize(im, [NaN im_width]); 57 | 58 | idx = 1; 59 | for j = 1:length(ks) 60 | k = ks(j); % Segmentation threshold k 61 | minSize = k; % We set minSize = k 62 | for n = 1:length(colorTypes) 63 | colorType = colorTypes{n}; 64 | [boxesT{idx} blobIndIm blobBoxes hierarchy priorityT{idx}] = ... 65 | Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles); 66 | idx = idx + 1; 67 | end 68 | end 69 | boxes = cat(1, boxesT{:}); % Concatenate boxes from all hierarchies 70 | priority = cat(1, priorityT{:}); % Concatenate priorities 71 | 72 | % Do pseudo random sorting as in paper 73 | priority = priority .* rand(size(priority)); 74 | [priority sortIds] = sort(priority, 'ascend'); 75 | boxes = boxes(sortIds,:); 76 | 77 | boxes = FilterBoxesWidth(boxes, minBoxWidth); 78 | boxes = BoxRemoveDuplicates(boxes); 79 | 80 | % Adjust boxes to cancel effect of canonical scaling. 81 | boxes = (boxes - 1) * scale + 1; 82 | 83 | boxes = FilterBoxesWidth(boxes, minBoxWidth); 84 | boxes = BoxRemoveDuplicates(boxes); 85 | all_boxes{i} = boxes; 86 | end 87 | 88 | if nargin > 1 89 | all_boxes 90 | save(output_filename, 'all_boxes', '-v7'); 91 | end 92 | -------------------------------------------------------------------------------- /help/test/README.md: -------------------------------------------------------------------------------- 1 | # Test Fast-RCNN on Another Dataset 2 | 3 | We will illustrate how to test Fast-RCNN on another dataset in the following steps, and we will take **INRIA Person** as the example dataset. 4 | 5 | ### Format Your Dataset 6 | 7 | At first, the dataset must be well organzied with the required format. 8 | ``` 9 | INRIA 10 | |-- data 11 | |-- Annotations 12 | |-- *.txt (Annotation files) 13 | |-- Images 14 | |-- *.png (Image files) 15 | |-- ImageSets 16 | |-- test.txt 17 | |-- results 18 | |-- test (empty before test) 19 | |-- VOCcode (optical) 20 | ``` 21 | 22 | The `test.txt` contains all the names(without extensions) of images files that will be used for training. For example, there are a few lines in `test.txt` below. 23 | 24 | ``` 25 | crop_000001 26 | crop_000002 27 | crop_000003 28 | crop_000004 29 | crop_000005 30 | ``` 31 | 32 | ### Construct IMDB 33 | 34 | See it at https://github.com/EdisonResearch/fast-rcnn/tree/master/help/train. 35 | 36 | Actually you do not need to implement the `_load_inria_annotation`, you could just use `inria.py` to construct IMDB for your own dataset. For example, to train on a dataset named **TownCenter**, just the followings to `factory.py`. 37 | 38 | ```sh 39 | towncenter_devkit_path = '/home/szy/TownCenter' 40 | for split in ['test']: 41 | name = '{}_{}'.format('towncenter', split) 42 | __sets[name] = (lambda split=split: datasets.inria(split, towncenter_devkit_path)) 43 | ``` 44 | 45 | ### Run Selective Search 46 | 47 | See it at https://github.com/EdisonResearch/fast-rcnn/tree/master/help/train. 48 | 49 | Note that it should be `test.mat` rather than `train.mat`. 50 | 51 | ### Modify Prototxt 52 | 53 | For example, if you want to use the model **VGG_CNN_M_1024**, then you should modify `test.prototxt` in `$FRCNN_ROOTmodels/VGG_CNN_M_1024`, it mainly concerns with the number of classes you want to train. Let's assume that the number of classes is `C (do not forget to count the `background` class). Then you should 54 | - Modify `num_output` in the `cls_score` layer to `C` 55 | - Modify `num_output` in the `bbox_pred` layer to `4 * C` 56 | 57 | See https://github.com/rbgirshick/fast-rcnn/issues/11 for more details. 58 | 59 | ### Prepare Your Evaluation Code 60 | 61 | In the original framework of **Fast-RCNN**, it uses matlab wrappers to evluate the results. As the evluation process is not very difficult, you could modify the function `evaluate_detections` in `inria.py`. 62 | 63 | As **INRIA Person** provides some matlab files in the format of **PASCAL-VOC**, you could modify it a little and use it directly. You could see https://github.com/EdisonResearch/fast-rcnn/tree/master/help/INRIA/VOCcode for the VOCcode. 64 | 65 | If you do not want to use the evluation function in the framework of **Fast-RCNN**, you could find the results in the directory `results/test` in the roor directory of your dataset. 66 | 67 | ### Test! 68 | 69 | In the directory **$FRCNN_ROOT**, run the following command in the shell. 70 | 71 | ```sh 72 | ./tools/test_net.py --gpu 1 --def models/VGG_CNN_M_1024/test.prototxt \ 73 | --net output/default/train/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel --imdb inria_test 74 | ``` 75 | 76 | Be careful with the **imdb** argument as it specifies the dataset you will train on. 77 | 78 | ### References 79 | 80 | [Fast-RCNN] https://github.com/rbgirshick/fast-rcnn 81 | 82 | ### Contact 83 | 84 | Feel free to contact me at . -------------------------------------------------------------------------------- /help/INRIA/VOCcode/PASreadrecord.m: -------------------------------------------------------------------------------- 1 | function record=PASreadrecord(filename) 2 | [fd,syserrmsg]=fopen(filename,'rt'); 3 | if (fd==-1), 4 | PASmsg=sprintf('Could not open %s for reading',filename); 5 | PASerrmsg(PASmsg,syserrmsg); 6 | end; 7 | 8 | matchstrs=initstrings; 9 | record=PASemptyrecord; 10 | notEOF=1; 11 | while (notEOF), 12 | line=fgetl(fd); 13 | notEOF=ischar(line); 14 | if (notEOF), 15 | matchnum=match(line,matchstrs); 16 | switch matchnum, 17 | case 1, [imgname]=strread(line,matchstrs(matchnum).str); 18 | record.imgname=char(imgname); 19 | case 2, [x,y,c]=strread(line,matchstrs(matchnum).str); 20 | record.imgsize=[x y c]; 21 | case 3, [database]=strread(line,matchstrs(matchnum).str); 22 | record.database=char(database); 23 | case 4, [obj,lbl,xmin,ymin,xmax,ymax]=strread(line,matchstrs(matchnum).str); 24 | record.objects(obj).label=char(lbl); 25 | record.objects(obj).bbox=[min(xmin,xmax),min(ymin,ymax),max(xmin,xmax),max(ymin,ymax)]; 26 | case 5, tmp=findstr(line,' : '); 27 | [obj,lbl]=strread(line(1:tmp),matchstrs(matchnum).str); 28 | record.objects(obj).label=char(lbl); 29 | record.objects(obj).polygon=sscanf(line(tmp+3:end),'(%d, %d) ')'; 30 | case 6, [obj,lbl,mask]=strread(line,matchstrs(matchnum).str); 31 | record.objects(obj).label=char(lbl); 32 | record.objects(obj).mask=char(mask); 33 | case 7, [obj,lbl,orglbl]=strread(line,matchstrs(matchnum).str); 34 | lbl=char(lbl); 35 | record.objects(obj).label=lbl; 36 | record.objects(obj).orglabel=char(orglbl); 37 | if strcmp(lbl(max(end-8,1):end),'Difficult') 38 | record.objects(obj).difficult=true; 39 | lbl(end-8:end)=[]; 40 | else 41 | record.objects(obj).difficult=false; 42 | end 43 | if strcmp(lbl(max(end-4,1):end),'Trunc') 44 | record.objects(obj).truncated=true; 45 | lbl(end-4:end)=[]; 46 | else 47 | record.objects(obj).truncated=false; 48 | end 49 | t=find(lbl>='A'&lbl<='Z'); 50 | t=t(t>=4); 51 | if ~isempty(t) 52 | record.objects(obj).view=lbl(t(1):end); 53 | lbl(t(1):end)=[]; 54 | else 55 | record.objects(obj).view=''; 56 | end 57 | record.objects(obj).class=lbl(4:end); 58 | 59 | otherwise, %fprintf('Skipping: %s\n',line); 60 | end; 61 | end; 62 | end; 63 | fclose(fd); 64 | return 65 | 66 | function matchnum=match(line,matchstrs) 67 | for i=1:length(matchstrs), 68 | matched(i)=strncmp(line,matchstrs(i).str,matchstrs(i).matchlen); 69 | end; 70 | matchnum=find(matched); 71 | if isempty(matchnum), matchnum=0; end; 72 | if (length(matchnum)~=1), 73 | PASerrmsg('Multiple matches while parsing',''); 74 | end; 75 | return 76 | 77 | function s=initstrings 78 | s(1).matchlen=14; 79 | s(1).str='Image filename : %q'; 80 | 81 | s(2).matchlen=10; 82 | s(2).str='Image size (X x Y x C) : %d x %d x %d'; 83 | 84 | s(3).matchlen=8; 85 | s(3).str='Database : %q'; 86 | 87 | s(4).matchlen=8; 88 | s(4).str='Bounding box for object %d %q (Xmin, Ymin) - (Xmax, Ymax) : (%d, %d) - (%d, %d)'; 89 | 90 | s(5).matchlen=7; 91 | s(5).str='Polygon for object %d %q (X, Y)'; 92 | 93 | s(6).matchlen=5; 94 | s(6).str='Pixel mask for object %d %q : %q'; 95 | 96 | s(7).matchlen=8; 97 | s(7).str='Original label for object %d %q : %q'; 98 | 99 | return -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/filter.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* simple filters */ 20 | 21 | #ifndef FILTER_H 22 | #define FILTER_H 23 | 24 | #include 25 | #include 26 | #include "image.h" 27 | #include "misc.h" 28 | #include "convolve.h" 29 | #include "imconv.h" 30 | 31 | #define WIDTH 4.0 32 | 33 | /* normalize mask so it integrates to one */ 34 | static void normalize(std::vector &mask) { 35 | int len = mask.size(); 36 | float sum = 0; 37 | for (int i = 1; i < len; i++) { 38 | sum += fabs(mask[i]); 39 | } 40 | sum = 2*sum + fabs(mask[0]); 41 | for (int i = 0; i < len; i++) { 42 | mask[i] /= sum; 43 | } 44 | } 45 | 46 | /* make filters */ 47 | #define MAKE_FILTER(name, fun) \ 48 | static std::vector make_ ## name (float sigma) { \ 49 | sigma = std::max(sigma, 0.01F); \ 50 | int len = (int)ceil(sigma * WIDTH) + 1; \ 51 | std::vector mask(len); \ 52 | for (int i = 0; i < len; i++) { \ 53 | mask[i] = fun; \ 54 | } \ 55 | return mask; \ 56 | } 57 | 58 | MAKE_FILTER(fgauss, exp(-0.5*square(i/sigma))); 59 | 60 | /* convolve image with gaussian filter */ 61 | static image *smooth(image *src, float sigma) { 62 | std::vector mask = make_fgauss(sigma); 63 | normalize(mask); 64 | 65 | image *tmp = new image(src->height(), src->width(), false); 66 | image *dst = new image(src->width(), src->height(), false); 67 | convolve_even(src, tmp, mask); 68 | convolve_even(tmp, dst, mask); 69 | 70 | delete tmp; 71 | return dst; 72 | } 73 | 74 | /* convolve image with gaussian filter */ 75 | image *smooth(image *src, float sigma) { 76 | image *tmp = imageUCHARtoFLOAT(src); 77 | image *dst = smooth(tmp, sigma); 78 | delete tmp; 79 | return dst; 80 | } 81 | 82 | /* compute laplacian */ 83 | static image *laplacian(image *src) { 84 | int width = src->width(); 85 | int height = src->height(); 86 | image *dst = new image(width, height); 87 | 88 | for (int y = 1; y < height-1; y++) { 89 | for (int x = 1; x < width-1; x++) { 90 | float d2x = imRef(src, x-1, y) + imRef(src, x+1, y) - 91 | 2*imRef(src, x, y); 92 | float d2y = imRef(src, x, y-1) + imRef(src, x, y+1) - 93 | 2*imRef(src, x, y); 94 | imRef(dst, x, y) = d2x + d2y; 95 | } 96 | } 97 | return dst; 98 | } 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /help/INRIA/VOCcode/VOCpr.m: -------------------------------------------------------------------------------- 1 | function [rec,prec,ap] = VOCpr(VOCopts,id,cls,draw) 2 | 3 | % load test set 4 | [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d'); 5 | 6 | % load ground truth objects 7 | tic; 8 | npos=0; 9 | for i=1:length(gtids) 10 | % display progress 11 | if toc>1 12 | fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids)); 13 | drawnow; 14 | tic; 15 | end 16 | 17 | % read annotation 18 | rec=PASreadrecord(sprintf(VOCopts.annopath,gtids{i})); 19 | 20 | % extract objects of class 21 | clsinds=strmatch(cls,{rec.objects(:).class},'exact'); 22 | gt(i).BB=cat(1,rec.objects(clsinds).bbox)'; 23 | gt(i).diff=[rec.objects(clsinds).difficult]; 24 | gt(i).det=false(length(clsinds),1); 25 | npos=npos+sum(~gt(i).diff); 26 | end 27 | 28 | % load results 29 | [ids,confidence,b1,b2,b3,b4]=textread(sprintf(VOCopts.detrespath,id,cls),'%s %f %f %f %f %f'); 30 | BB=[b1 b2 b3 b4]'; 31 | 32 | % sort detections by decreasing confidence 33 | [sc,si]=sort(-confidence); 34 | ids=ids(si); 35 | BB=BB(:,si); 36 | 37 | % assign detections to ground truth objects 38 | nd=length(confidence); 39 | tp=zeros(nd,1); 40 | fp=zeros(nd,1); 41 | tic; 42 | for d=1:nd 43 | % display progress 44 | if toc>1 45 | fprintf('%s: pr: compute: %d/%d\n',cls,d,nd); 46 | drawnow; 47 | tic; 48 | end 49 | 50 | % find ground truth image 51 | i=strmatch(ids{d},gtids,'exact'); 52 | if isempty(i) 53 | error('unrecognized image "%s"',ids{d}); 54 | elseif length(i)>1 55 | error('multiple image "%s"',ids{d}); 56 | end 57 | 58 | % assign detection to ground truth object if any 59 | bb=BB(:,d); 60 | ovmax=-inf; 61 | for j=1:size(gt(i).BB,2) 62 | bbgt=gt(i).BB(:,j); 63 | bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; 64 | iw=bi(3)-bi(1)+1; 65 | ih=bi(4)-bi(2)+1; 66 | if iw>0 & ih>0 67 | % compute overlap as area of intersection / area of union 68 | ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... 69 | (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... 70 | iw*ih; 71 | ov=iw*ih/ua; 72 | if ov>ovmax 73 | ovmax=ov; 74 | jmax=j; 75 | end 76 | end 77 | end 78 | % assign detection as true positive/don't care/false positive 79 | if ovmax>=VOCopts.minoverlap 80 | if ~gt(i).det(jmax) 81 | if ~gt(i).diff(jmax) 82 | tp(d)=1; % true positive 83 | end 84 | gt(i).det(jmax)=true; 85 | else 86 | fp(d)=1; % false positive (multiple detection) 87 | end 88 | else 89 | fp(d)=1; % false positive 90 | end 91 | end 92 | 93 | % compute precision/recall 94 | fp=cumsum(fp); 95 | tp=cumsum(tp); 96 | rec=tp/npos; 97 | prec=tp./(fp+tp); 98 | 99 | % compute average precision 100 | 101 | ap=0; 102 | for t=0:0.1:1 103 | p=max(prec(rec>=t)); 104 | if isempty(p) 105 | p=0; 106 | end 107 | ap=ap+p/11; 108 | end 109 | 110 | if draw 111 | % plot precision/recall 112 | plot(rec,prec,'-'); 113 | grid; 114 | xlabel 'recall' 115 | ylabel 'precision' 116 | title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap)); 117 | end 118 | -------------------------------------------------------------------------------- /help/INRIA/VOCcode/VOCevaldet.m: -------------------------------------------------------------------------------- 1 | function [rec,prec,ap] = VOCpr(VOCopts,id,cls,draw) 2 | 3 | % load test set 4 | [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d'); 5 | 6 | % load ground truth objects 7 | tic; 8 | npos=0; 9 | for i=1:length(gtids) 10 | % display progress 11 | if toc>1 12 | fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids)); 13 | drawnow; 14 | tic; 15 | end 16 | 17 | % read annotation 18 | rec=PASreadrecord(sprintf(VOCopts.annopath,gtids{i})); 19 | 20 | % extract objects of class 21 | clsinds=strmatch(cls,{rec.objects(:).class},'exact'); 22 | gt(i).BB=cat(1,rec.objects(clsinds).bbox)'; 23 | gt(i).diff=[rec.objects(clsinds).difficult]; 24 | gt(i).det=false(length(clsinds),1); 25 | npos=npos+sum(~gt(i).diff); 26 | end 27 | 28 | % load results 29 | sprintf(VOCopts.detrespath,id,cls) 30 | [ids,confidence,b1,b2,b3,b4]=textread(sprintf(VOCopts.detrespath,id,cls),'%s %f %f %f %f %f'); 31 | BB=[b1 b2 b3 b4]'; 32 | 33 | % sort detections by decreasing confidence 34 | [sc,si]=sort(-confidence); 35 | ids=ids(si); 36 | BB=BB(:,si); 37 | 38 | % assign detections to ground truth objects 39 | nd=length(confidence); 40 | tp=zeros(nd,1); 41 | fp=zeros(nd,1); 42 | tic; 43 | for d=1:nd 44 | % display progress 45 | if toc>1 46 | fprintf('%s: pr: compute: %d/%d\n',cls,d,nd); 47 | drawnow; 48 | tic; 49 | end 50 | 51 | % find ground truth image 52 | i=strmatch(ids{d},gtids,'exact'); 53 | if isempty(i) 54 | error('unrecognized image "%s"',ids{d}); 55 | elseif length(i)>1 56 | error('multiple image "%s"',ids{d}); 57 | end 58 | 59 | % assign detection to ground truth object if any 60 | bb=BB(:,d); 61 | ovmax=-inf; 62 | for j=1:size(gt(i).BB,2) 63 | bbgt=gt(i).BB(:,j); 64 | bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; 65 | iw=bi(3)-bi(1)+1; 66 | ih=bi(4)-bi(2)+1; 67 | if iw>0 & ih>0 68 | % compute overlap as area of intersection / area of union 69 | ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... 70 | (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... 71 | iw*ih; 72 | ov=iw*ih/ua; 73 | if ov>ovmax 74 | ovmax=ov; 75 | jmax=j; 76 | end 77 | end 78 | end 79 | % assign detection as true positive/don't care/false positive 80 | if ovmax>=VOCopts.minoverlap 81 | if ~gt(i).det(jmax) 82 | if ~gt(i).diff(jmax) 83 | tp(d)=1; % true positive 84 | end 85 | gt(i).det(jmax)=true; 86 | else 87 | fp(d)=1; % false positive (multiple detection) 88 | end 89 | else 90 | fp(d)=1; % false positive 91 | end 92 | end 93 | 94 | % compute precision/recall 95 | fp=cumsum(fp); 96 | tp=cumsum(tp); 97 | rec=tp/npos; 98 | prec=tp./(fp+tp); 99 | 100 | % compute average precision 101 | 102 | ap=0; 103 | for t=0:0.1:1 104 | p=max(prec(rec>=t)); 105 | if isempty(p) 106 | p=0; 107 | end 108 | ap=ap+p/11; 109 | end 110 | 111 | if draw 112 | % plot precision/recall 113 | plot(rec,prec,'-'); 114 | grid; 115 | xlabel 'recall' 116 | ylabel 'precision' 117 | title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap)); 118 | end 119 | -------------------------------------------------------------------------------- /selective_search/Image2HierarchicalGrouping.m: -------------------------------------------------------------------------------- 1 | function [boxes blobIndIm blobBoxes hierarchy priority] = Image2HierarchicalGrouping(im, sigma, k, minSize, colourType, functionHandles) 2 | % function [boxes blobIndIm blobBoxes hierarchy] = Image2HierarchicalGrouping 3 | % (im, sigma, k, minSize, colourType, functionHandles) 4 | % 5 | % Creates hierarchical grouping from an image 6 | % 7 | % im: Image 8 | % sigma (= 0.8): Smoothing for initial segmentation (Felzenszwalb 2004) 9 | % k (= 100): Threshold for initial segmentation 10 | % minSize (= 100): Minimum size of segments for initial segmentation 11 | % colourType: ColourType in which to do grouping (see Image2ColourSpace) 12 | % functionHandles: Similarity functions which are called. Function 13 | % creates as many hierarchies as there are functionHandles 14 | % 15 | % boxes: N x 4 array with boxes of all hierarchical groupings 16 | % blobIndIm: Index image with the initial segmentation 17 | % blobBoxes: Boxes belonging to the indices in blobIndIm 18 | % hierarchy: M x 1 cell array with hierarchies. M = 19 | % length(functionHandles) 20 | % 21 | % Jasper Uijlings - 2013 22 | 23 | % Change colour space 24 | [colourIm imageToSegment] = Image2ColourSpace(im, colourType); 25 | 26 | % Get initial segmentation, boxes, and neighbouring blobs 27 | [blobIndIm blobBoxes neighbours] = mexFelzenSegmentIndex(imageToSegment, sigma, k, minSize); 28 | numBlobs = size(blobBoxes,1); 29 | 30 | % Skip hierarchical grouping if segmentation results in single region only 31 | if numBlobs == 1 32 | warning('Oversegmentation results in a single region only'); 33 | boxes = blobBoxes; 34 | hierarchy = []; 35 | priority = 1; % priority is legacy 36 | return; 37 | end 38 | 39 | %%% Calculate histograms and sizes as prerequisite for grouping procedure 40 | 41 | % Get colour histogram 42 | [colourHist blobSizes] = BlobStructColourHist(blobIndIm, colourIm); 43 | 44 | % Get texture histogram 45 | textureHist = BlobStructTextureHist(blobIndIm, colourIm); 46 | % textureHist = BlobStructTextureHistLBP(blobIndIm, colourIm); 47 | 48 | % Allocate memory for complete hierarchy. 49 | blobStruct.colourHist = zeros(size(colourHist,2), numBlobs * 2 - 1); 50 | blobStruct.textureHist = zeros(size(textureHist,2), numBlobs * 2 - 1); 51 | blobStruct.size = zeros(numBlobs * 2 -1, 1); 52 | blobStruct.boxes = zeros(numBlobs * 2 - 1, 4); 53 | 54 | % Insert calculated histograms, sizes, and boxes 55 | blobStruct.colourHist(:,1:numBlobs) = colourHist'; 56 | blobStruct.textureHist(:,1:numBlobs) = textureHist'; 57 | blobStruct.size(1:numBlobs) = blobSizes ./ 3; 58 | blobStruct.boxes(1:numBlobs,:) = blobBoxes; 59 | 60 | blobStruct.imSize = size(im,1) * size(im,2); 61 | 62 | %%% If you want to use original blobs in similarity functions, uncomment 63 | %%% these lines. 64 | % blobStruct.blobs = cell(numBlobs * 2 - 1, 1); 65 | % initialBlobs = SegmentIndices2Blobs(blobIndIm, blobBoxes); 66 | % blobStruct.blobs(1:numBlobs) = initialBlobs; 67 | 68 | 69 | % Loop over all merging strategies. Perform them one by one. 70 | boxes = cell(1, length(functionHandles)+1); 71 | priority = cell(1, length(functionHandles) + 1); 72 | hierarchy = cell(1, length(functionHandles)); 73 | for i=1:length(functionHandles) 74 | [boxes{i} hierarchy{i} blobStructT mergeThreshold] = BlobStruct2HierarchicalGrouping(blobStruct, neighbours, numBlobs, functionHandles{i}); 75 | boxes{i} = boxes{i}(numBlobs+1:end,:); 76 | priority{i} = (size(boxes{i}, 1):-1:1)'; 77 | end 78 | 79 | % Also save the initial boxes 80 | i = i+1; 81 | boxes{i} = blobBoxes; 82 | priority{i} = ones(size(boxes{i}, 1), 1) * (size(boxes{1}, 1)+1); 83 | 84 | % Concatenate boxes and priorities resulting from the different merging 85 | % strategies 86 | boxes = cat(1, boxes{:}); 87 | priority = cat(1, priority{:}); 88 | [priority ids] = sort(priority, 'ascend'); 89 | boxes = boxes(ids,:); 90 | 91 | 92 | -------------------------------------------------------------------------------- /help/train/README.md: -------------------------------------------------------------------------------- 1 | # Train Fast-RCNN on Another Dataset 2 | 3 | We will illustrate how to train Fast-RCNN on another dataset in the following steps, and we will take **INRIA Person** as the example dataset. 4 | 5 | ### Format Your Dataset 6 | 7 | At first, the dataset must be well organzied with the required format. 8 | ``` 9 | INRIA 10 | |-- data 11 | |-- Annotations 12 | |-- *.txt (Annotation files) 13 | |-- Images 14 | |-- *.png (Image files) 15 | |-- ImageSets 16 | |-- train.txt 17 | ``` 18 | 19 | The `train.txt` contains all the names(without extensions) of images files that will be used for training. For example, there are a few lines in `train.txt` below. 20 | 21 | ``` 22 | crop_000011 23 | crop_000603 24 | crop_000606 25 | crop_000607 26 | crop_000608 27 | ``` 28 | 29 | ### Construct IMDB 30 | 31 | You need to add a new python file describing the dataset we will use to the directory `$FRCNN_ROOT/lib/datasets`, see `inria.py`. Then the following steps should be taken. 32 | - Modify `self._classes` in the constructor function to fit your dataset. 33 | - Be careful with the extensions of your image files. See `image_path_from_index` in `inria.py`. 34 | - Write the function for parsing annotations. See `_load_inria_annotation` in `inria.py`. 35 | - Do not forget to add `import` syntaxes in your own python file and other python files in the same directory. 36 | 37 | Then you should modify the `factory.py` in the same directory. For example, to add **INRIA Person**, we should add 38 | 39 | ```sh 40 | inria_devkit_path = '/home/szy/INRIA' 41 | for split in ['train', 'test']: 42 | name = '{}_{}'.format('inria', split) 43 | __sets[name] = (lambda split=split: datasets.inria(split, inria_devkit_path)) 44 | ``` 45 | 46 | See the example `inria.py` at https://github.com/EdisonResearch/fast-rcnn/blob/master/lib/datasets/inria.py. 47 | 48 | ### Run Selective Search 49 | 50 | Modify the matlab file `selective_search.m` in the directory `$FRCNN_ROOT/selective_search`, if you do not have that directory, you could find it at https://github.com/EdisonResearch/fast-rcnn/tree/master/selective_search. 51 | 52 | ```sh 53 | image_db = '/home/szy/INRIA/'; 54 | image_filenames = textread([image_db '/data/ImageSets/train.txt'], '%s', 'delimiter', '\n'); 55 | for i = 1:length(image_filenames) 56 | if exist([image_db '/data/Images/' image_filenames{i} '.jpg'], 'file') == 2 57 | image_filenames{i} = [image_db '/data/Images/' image_filenames{i} '.jpg']; 58 | end 59 | if exist([image_db '/data/Images/' image_filenames{i} '.png'], 'file') == 2 60 | image_filenames{i} = [image_db '/data/Images/' image_filenames{i} '.png']; 61 | end 62 | end 63 | selective_search_rcnn(image_filenames, 'train.mat'); 64 | ``` 65 | 66 | Run this matlab file and then move the output `train.mat` to the root directory of your dataset, here it should be `/home/szy/INRIA/`. As it is a time consuming process, please be patient. 67 | 68 | ### Modify Prototxt 69 | 70 | For example, if you want to use the model **VGG_CNN_M_1024**, then you should modify `train.prototxt` in `$FRCNN_ROOTmodels/VGG_CNN_M_1024`, it mainly concerns with the number of classes you want to train. Let's assume that the number of classes is `C (do not forget to count the `background` class). Then you should 71 | - Modify `num_classes` to `C`; 72 | - Modify `num_output` in the `cls_score` layer to `C` 73 | - Modify `num_output` in the `bbox_pred` layer to `4 * C` 74 | 75 | See https://github.com/rbgirshick/fast-rcnn/issues/11 for more details. 76 | 77 | ### Train! 78 | 79 | In the directory **$FRCNN_ROOT**, run the following command in the shell. 80 | 81 | ```sh 82 | ./tools/train_net.py --gpu 0 --solver models/VGG_CNN_M_1024/solver.prototxt \ 83 | --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel --imdb inria_train 84 | ``` 85 | 86 | Be careful with the **imdb** argument as it specifies the dataset you will train on. Then just drink a cup of coffee and take a break to wait for the training. 87 | 88 | ### References 89 | 90 | [Fast-RCNN] https://github.com/rbgirshick/fast-rcnn 91 | 92 | ### Contact 93 | 94 | Feel free to contact me at . 95 | -------------------------------------------------------------------------------- /tools/compress_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Compress a Fast R-CNN network using truncated SVD.""" 11 | 12 | import _init_paths 13 | import caffe 14 | import argparse 15 | import numpy as np 16 | import os, sys 17 | 18 | def parse_args(): 19 | """Parse input arguments.""" 20 | parser = argparse.ArgumentParser(description='Compress a Fast R-CNN network') 21 | parser.add_argument('--def', dest='prototxt', 22 | help='prototxt file defining the uncompressed network', 23 | default=None, type=str) 24 | parser.add_argument('--def-svd', dest='prototxt_svd', 25 | help='prototxt file defining the SVD compressed network', 26 | default=None, type=str) 27 | parser.add_argument('--net', dest='caffemodel', 28 | help='model to compress', 29 | default=None, type=str) 30 | 31 | if len(sys.argv) == 1: 32 | parser.print_help() 33 | sys.exit(1) 34 | 35 | args = parser.parse_args() 36 | return args 37 | 38 | def compress_weights(W, l): 39 | """Compress the weight matrix W of an inner product (fully connected) layer 40 | using truncated SVD. 41 | 42 | Parameters: 43 | W: N x M weights matrix 44 | l: number of singular values to retain 45 | 46 | Returns: 47 | Ul, L: matrices such that W \approx Ul*L 48 | """ 49 | 50 | # numpy doesn't seem to have a fast truncated SVD algorithm... 51 | # this could be faster 52 | U, s, V = np.linalg.svd(W, full_matrices=False) 53 | 54 | Ul = U[:, :l] 55 | sl = s[:l] 56 | Vl = V[:l, :] 57 | 58 | L = np.dot(np.diag(sl), Vl) 59 | return Ul, L 60 | 61 | def main(): 62 | args = parse_args() 63 | 64 | # prototxt = 'models/VGG16/test.prototxt' 65 | # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel' 66 | net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) 67 | 68 | # prototxt_svd = 'models/VGG16/svd/test_fc6_fc7.prototxt' 69 | # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel' 70 | net_svd = caffe.Net(args.prototxt_svd, args.caffemodel, caffe.TEST) 71 | 72 | print('Uncompressed network {} : {}'.format(args.prototxt, args.caffemodel)) 73 | print('Compressed network prototxt {}'.format(args.prototxt_svd)) 74 | 75 | out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svd' 76 | out_dir = os.path.dirname(args.caffemodel) 77 | 78 | # Compress fc6 79 | if net_svd.params.has_key('fc6_L'): 80 | l_fc6 = net_svd.params['fc6_L'][0].data.shape[0] 81 | print(' fc6_L bottleneck size: {}'.format(l_fc6)) 82 | 83 | # uncompressed weights and biases 84 | W_fc6 = net.params['fc6'][0].data 85 | B_fc6 = net.params['fc6'][1].data 86 | 87 | print(' compressing fc6...') 88 | Ul_fc6, L_fc6 = compress_weights(W_fc6, l_fc6) 89 | 90 | assert(len(net_svd.params['fc6_L']) == 1) 91 | 92 | # install compressed matrix factors (and original biases) 93 | net_svd.params['fc6_L'][0].data[...] = L_fc6 94 | 95 | net_svd.params['fc6_U'][0].data[...] = Ul_fc6 96 | net_svd.params['fc6_U'][1].data[...] = B_fc6 97 | 98 | out += '_fc6_{}'.format(l_fc6) 99 | 100 | # Compress fc7 101 | if net_svd.params.has_key('fc7_L'): 102 | l_fc7 = net_svd.params['fc7_L'][0].data.shape[0] 103 | print ' fc7_L bottleneck size: {}'.format(l_fc7) 104 | 105 | W_fc7 = net.params['fc7'][0].data 106 | B_fc7 = net.params['fc7'][1].data 107 | 108 | print(' compressing fc7...') 109 | Ul_fc7, L_fc7 = compress_weights(W_fc7, l_fc7) 110 | 111 | assert(len(net_svd.params['fc7_L']) == 1) 112 | 113 | net_svd.params['fc7_L'][0].data[...] = L_fc7 114 | 115 | net_svd.params['fc7_U'][0].data[...] = Ul_fc7 116 | net_svd.params['fc7_U'][1].data[...] = B_fc7 117 | 118 | out += '_fc7_{}'.format(l_fc7) 119 | 120 | filename = '{}/{}.caffemodel'.format(out_dir, out) 121 | net_svd.save(filename) 122 | print 'Wrote svd model to: {:s}'.format(filename) 123 | 124 | if __name__ == '__main__': 125 | main() 126 | -------------------------------------------------------------------------------- /selective_search/demoPascal2007.m: -------------------------------------------------------------------------------- 1 | % This demo shows how to use the software described in our IJCV paper: 2 | % Selective Search for Object Recognition, 3 | % J.R.R. Uijlings, K.E.A. van de Sande, T. Gevers, A.W.M. Smeulders, IJCV 2013 4 | %% 5 | addpath('Dependencies'); 6 | 7 | fprintf('Demo of how to run the code for:\n'); 8 | fprintf(' J. Uijlings, K. van de Sande, T. Gevers, A. Smeulders\n'); 9 | fprintf(' Segmentation as Selective Search for Object Recognition\n'); 10 | fprintf(' IJCV 2013\n\n'); 11 | 12 | % Compile anisotropic gaussian filter 13 | if(~exist('anigauss')) 14 | fprintf('Compiling the anisotropic gauss filtering of:\n'); 15 | fprintf(' J. Geusebroek, A. Smeulders, and J. van de Weijer\n'); 16 | fprintf(' Fast anisotropic gauss filtering\n'); 17 | fprintf(' IEEE Transactions on Image Processing, 2003\n'); 18 | fprintf('Source code/Project page:\n'); 19 | fprintf(' http://staff.science.uva.nl/~mark/downloads.html#anigauss\n\n'); 20 | mex Dependencies/anigaussm/anigauss_mex.c Dependencies/anigaussm/anigauss.c -output anigauss 21 | end 22 | 23 | if(~exist('mexCountWordsIndex')) 24 | mex Dependencies/mexCountWordsIndex.cpp 25 | end 26 | 27 | % Compile the code of Felzenszwalb and Huttenlocher, IJCV 2004. 28 | if(~exist('mexFelzenSegmentIndex')) 29 | fprintf('Compiling the segmentation algorithm of:\n'); 30 | fprintf(' P. Felzenszwalb and D. Huttenlocher\n'); 31 | fprintf(' Efficient Graph-Based Image Segmentation\n'); 32 | fprintf(' International Journal of Computer Vision, 2004\n'); 33 | fprintf('Source code/Project page:\n'); 34 | fprintf(' http://www.cs.brown.edu/~pff/segment/\n'); 35 | fprintf('Note: A small Matlab wrapper was made. See demo.m for usage\n\n'); 36 | % fprintf(' 37 | mex Dependencies/FelzenSegment/mexFelzenSegmentIndex.cpp -output mexFelzenSegmentIndex; 38 | end 39 | 40 | %% 41 | % Parameters. Note that this controls the number of hierarchical 42 | % segmentations which are combined. 43 | colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'}; 44 | 45 | % Here you specify which similarity functions to use in merging 46 | simFunctionHandles = {@SSSimColourTextureSizeFillOrig, @SSSimTextureSizeFill, @SSSimBoxFillOrig, @SSSimSize}; 47 | 48 | % Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm. 49 | % Note that by default, we set minSize = k, and sigma = 0.8. 50 | ks = [50 100 150 300]; % controls size of segments of initial segmentation. 51 | sigma = 0.8; 52 | 53 | % After segmentation, filter out boxes which have a width/height smaller 54 | % than minBoxWidth (default = 20 pixels). 55 | minBoxWidth = 20; 56 | 57 | % Comment the following three lines for the 'quality' version 58 | % colorTypes = colorTypes(1:2); % 'Fast' uses HSV and Lab 59 | % simFunctionHandles = simFunctionHandles(1:2); % Two different merging strategies 60 | % ks = ks(1:2); 61 | 62 | % Test the boxes 63 | load('GroundTruthVOC2007test.mat'); % Load ground truth boxes and images and image names 64 | VOCImgPath = '/media/Data/Databases/VOCdevkit/VOC2007/JPEGImages/%s.jpg' 65 | fprintf('After box extraction, boxes smaller than %d pixels will be removed\n', minBoxWidth); 66 | fprintf('Obtaining boxes for Pascal 2007 test set:\n'); 67 | totalTime = 0; 68 | for i=1:length(testIms) 69 | fprintf('%d ', i); 70 | 71 | % VOCopts.img 72 | im = imread(sprintf(VOCImgPath, testIms{i})); 73 | idx = 1; 74 | for j=1:length(ks) 75 | k = ks(j); % Segmentation threshold k 76 | minSize = k; % We set minSize = k 77 | for n = 1:length(colorTypes) 78 | colorType = colorTypes{n}; 79 | tic; 80 | [boxesT{idx} blobIndIm blobBoxes hierarchy priorityT{idx}] = Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles); 81 | totalTime = totalTime + toc; 82 | idx = idx + 1; 83 | end 84 | end 85 | boxes{i} = cat(1, boxesT{:}); % Concatenate boxes from all hierarchies 86 | priority = cat(1, priorityT{:}); % Concatenate priorities 87 | 88 | % Do pseudo random sorting as in paper 89 | priority = priority .* rand(size(priority)); 90 | [priority sortIds] = sort(priority, 'ascend'); 91 | boxes{i} = boxes{i}(sortIds,:); 92 | end 93 | fprintf('\n'); 94 | 95 | %% 96 | tic 97 | for i=1:length(boxes) 98 | boxes{i} = FilterBoxesWidth(boxes{i}, minBoxWidth); 99 | boxes{i} = BoxRemoveDuplicates(boxes{i}); 100 | end 101 | totalTime = totalTime + toc; 102 | 103 | fprintf('Time per image: %.2f\nNow evaluating the boxes on Pascal 2007...\n', totalTime ./ length(testIms)); 104 | 105 | %% 106 | [boxAbo boxMabo boScores avgNumBoxes] = BoxAverageBestOverlap(gtBoxes, gtImIds, boxes); 107 | 108 | fprintf('Mean Average Best Overlap for the box-based locations: %.3f\n', boxMabo); -------------------------------------------------------------------------------- /matlab/fast_rcnn_im_detect.m: -------------------------------------------------------------------------------- 1 | % -------------------------------------------------------- 2 | % Fast R-CNN 3 | % Copyright (c) 2015 Microsoft 4 | % Licensed under The MIT License [see LICENSE for details] 5 | % Written by Ross Girshick 6 | % -------------------------------------------------------- 7 | 8 | function dets = fast_rcnn_im_detect(model, im, boxes) 9 | % Perform detection a Fast R-CNN network given an image and 10 | % object proposals. 11 | 12 | if model.init_key ~= caffe('get_init_key') 13 | error('You probably need call fast_rcnn_load_net() first.'); 14 | end 15 | 16 | [im_batch, scales] = image_pyramid(im, model.pixel_means, false); 17 | 18 | [feat_pyra_boxes, feat_pyra_levels] = project_im_rois(boxes, scales); 19 | rois = cat(2, feat_pyra_levels, feat_pyra_boxes); 20 | % Adjust to 0-based indexing and make roi info the fastest dimension 21 | rois = rois - 1; 22 | rois = permute(rois, [2 1]); 23 | 24 | input_blobs = cell(2, 1); 25 | input_blobs{1} = im_batch; 26 | input_blobs{2} = rois; 27 | th = tic(); 28 | blobs_out = caffe('forward', input_blobs); 29 | fprintf('fwd: %.3fs\n', toc(th)); 30 | 31 | bbox_deltas = squeeze(blobs_out{1})'; 32 | probs = squeeze(blobs_out{2})'; 33 | 34 | num_classes = size(probs, 2); 35 | dets = cell(num_classes - 1, 1); 36 | NMS_THRESH = 0.3; 37 | % class index 1 is __background__, so we don't return it 38 | for j = 2:num_classes 39 | cls_probs = probs(:, j); 40 | cls_deltas = bbox_deltas(:, (1 + (j - 1) * 4):(j * 4)); 41 | pred_boxes = bbox_pred(boxes, cls_deltas); 42 | cls_dets = [pred_boxes cls_probs]; 43 | keep = nms(cls_dets, NMS_THRESH); 44 | cls_dets = cls_dets(keep, :); 45 | dets{j - 1} = cls_dets; 46 | end 47 | 48 | % ------------------------------------------------------------------------ 49 | function [batch, scales] = image_pyramid(im, pixel_means, multiscale) 50 | % ------------------------------------------------------------------------ 51 | % Construct an image pyramid that's ready for feeding directly into caffe 52 | if ~multiscale 53 | SCALES = [600]; 54 | MAX_SIZE = 1000; 55 | else 56 | SCALES = [1200 864 688 576 480]; 57 | MAX_SIZE = 2000; 58 | end 59 | num_levels = length(SCALES); 60 | 61 | im = single(im); 62 | % Convert to BGR 63 | im = im(:, :, [3 2 1]); 64 | % Subtract mean (mean of the image mean--one mean per channel) 65 | im = bsxfun(@minus, im, pixel_means); 66 | 67 | im_orig = im; 68 | im_size = min([size(im_orig, 1) size(im_orig, 2)]); 69 | im_size_big = max([size(im_orig, 1) size(im_orig, 2)]); 70 | scale_factors = SCALES ./ im_size; 71 | 72 | max_size = [0 0 0]; 73 | for i = 1:num_levels 74 | if round(im_size_big * scale_factors(i)) > MAX_SIZE 75 | scale_factors(i) = MAX_SIZE / im_size_big; 76 | end 77 | ims{i} = imresize(im_orig, scale_factors(i), 'bilinear', ... 78 | 'antialiasing', false); 79 | max_size = max(cat(1, max_size, size(ims{i})), [], 1); 80 | end 81 | 82 | batch = zeros(max_size(2), max_size(1), 3, num_levels, 'single'); 83 | for i = 1:num_levels 84 | im = ims{i}; 85 | im_sz = size(im); 86 | im_sz = im_sz(1:2); 87 | % Make width the fastest dimension (for caffe) 88 | im = permute(im, [2 1 3]); 89 | batch(1:im_sz(2), 1:im_sz(1), :, i) = im; 90 | end 91 | scales = scale_factors'; 92 | 93 | % ------------------------------------------------------------------------ 94 | function [boxes, levels] = project_im_rois(boxes, scales) 95 | % ------------------------------------------------------------------------ 96 | widths = boxes(:,3) - boxes(:,1) + 1; 97 | heights = boxes(:,4) - boxes(:,2) + 1; 98 | 99 | areas = widths .* heights; 100 | scaled_areas = bsxfun(@times, areas, (scales.^2)'); 101 | diff_areas = abs(scaled_areas - (224 * 224)); 102 | [~, levels] = min(diff_areas, [], 2); 103 | 104 | boxes = boxes - 1; 105 | boxes = bsxfun(@times, boxes, scales(levels)); 106 | boxes = boxes + 1; 107 | 108 | % ------------------------------------------------------------------------ 109 | function pred_boxes = bbox_pred(boxes, bbox_deltas) 110 | % ------------------------------------------------------------------------ 111 | if isempty(boxes) 112 | pred_boxes = []; 113 | return; 114 | end 115 | 116 | Y = bbox_deltas; 117 | 118 | % Read out predictions 119 | dst_ctr_x = Y(:, 1); 120 | dst_ctr_y = Y(:, 2); 121 | dst_scl_x = Y(:, 3); 122 | dst_scl_y = Y(:, 4); 123 | 124 | src_w = boxes(:, 3) - boxes(:, 1) + eps; 125 | src_h = boxes(:, 4) - boxes(:, 2) + eps; 126 | src_ctr_x = boxes(:, 1) + 0.5 * src_w; 127 | src_ctr_y = boxes(:, 2) + 0.5 * src_h; 128 | 129 | pred_ctr_x = (dst_ctr_x .* src_w) + src_ctr_x; 130 | pred_ctr_y = (dst_ctr_y .* src_h) + src_ctr_y; 131 | pred_w = exp(dst_scl_x) .* src_w; 132 | pred_h = exp(dst_scl_y) .* src_h; 133 | pred_boxes = [pred_ctr_x - 0.5 * pred_w, pred_ctr_y - 0.5 * pred_h, ... 134 | pred_ctr_x + 0.5 * pred_w, pred_ctr_y + 0.5 * pred_h]; 135 | -------------------------------------------------------------------------------- /lib/fast_rcnn/train.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Train a Fast R-CNN network.""" 9 | 10 | import caffe 11 | from fast_rcnn.config import cfg 12 | import roi_data_layer.roidb as rdl_roidb 13 | from utils.timer import Timer 14 | import numpy as np 15 | import os 16 | 17 | from caffe.proto import caffe_pb2 18 | import google.protobuf as pb2 19 | 20 | class SolverWrapper(object): 21 | """A simple wrapper around Caffe's solver. 22 | This wrapper gives us control over he snapshotting process, which we 23 | use to unnormalize the learned bounding-box regression weights. 24 | """ 25 | 26 | def __init__(self, solver_prototxt, roidb, output_dir, 27 | pretrained_model=None): 28 | """Initialize the SolverWrapper.""" 29 | self.output_dir = output_dir 30 | 31 | print 'Computing bounding-box regression targets...' 32 | self.bbox_means, self.bbox_stds = \ 33 | rdl_roidb.add_bbox_regression_targets(roidb) 34 | print 'done' 35 | 36 | self.solver = caffe.SGDSolver(solver_prototxt) 37 | if pretrained_model is not None: 38 | print ('Loading pretrained model ' 39 | 'weights from {:s}').format(pretrained_model) 40 | self.solver.net.copy_from(pretrained_model) 41 | 42 | self.solver_param = caffe_pb2.SolverParameter() 43 | with open(solver_prototxt, 'rt') as f: 44 | pb2.text_format.Merge(f.read(), self.solver_param) 45 | 46 | self.solver.net.layers[0].set_roidb(roidb) 47 | 48 | def snapshot(self): 49 | """Take a snapshot of the network after unnormalizing the learned 50 | bounding-box regression weights. This enables easy use at test-time. 51 | """ 52 | net = self.solver.net 53 | 54 | if cfg.TRAIN.BBOX_REG: 55 | # save original values 56 | orig_0 = net.params['bbox_pred'][0].data.copy() 57 | orig_1 = net.params['bbox_pred'][1].data.copy() 58 | 59 | # scale and shift with bbox reg unnormalization; then save snapshot 60 | net.params['bbox_pred'][0].data[...] = \ 61 | (net.params['bbox_pred'][0].data * 62 | self.bbox_stds[:, np.newaxis]) 63 | net.params['bbox_pred'][1].data[...] = \ 64 | (net.params['bbox_pred'][1].data * 65 | self.bbox_stds + self.bbox_means) 66 | 67 | if not os.path.exists(self.output_dir): 68 | os.makedirs(self.output_dir) 69 | 70 | infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX 71 | if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') 72 | filename = (self.solver_param.snapshot_prefix + infix + 73 | '_iter_{:d}'.format(self.solver.iter) + '.caffemodel') 74 | filename = os.path.join(self.output_dir, filename) 75 | 76 | net.save(str(filename)) 77 | print 'Wrote snapshot to: {:s}'.format(filename) 78 | 79 | if cfg.TRAIN.BBOX_REG: 80 | # restore net to original state 81 | net.params['bbox_pred'][0].data[...] = orig_0 82 | net.params['bbox_pred'][1].data[...] = orig_1 83 | 84 | def train_model(self, max_iters): 85 | """Network training loop.""" 86 | last_snapshot_iter = -1 87 | timer = Timer() 88 | while self.solver.iter < max_iters: 89 | # Make one SGD update 90 | timer.tic() 91 | self.solver.step(1) 92 | timer.toc() 93 | if self.solver.iter % (10 * self.solver_param.display) == 0: 94 | print 'speed: {:.3f}s / iter'.format(timer.average_time) 95 | 96 | if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: 97 | last_snapshot_iter = self.solver.iter 98 | self.snapshot() 99 | 100 | if last_snapshot_iter != self.solver.iter: 101 | self.snapshot() 102 | 103 | def get_training_roidb(imdb): 104 | """Returns a roidb (Region of Interest database) for use in training.""" 105 | if cfg.TRAIN.USE_FLIPPED: 106 | print 'Appending horizontally-flipped training examples...' 107 | imdb.append_flipped_images() 108 | print 'done' 109 | 110 | print 'Preparing training data...' 111 | rdl_roidb.prepare_roidb(imdb) 112 | print 'done' 113 | 114 | return imdb.roidb 115 | 116 | def train_net(solver_prototxt, roidb, output_dir, 117 | pretrained_model=None, max_iters=40000): 118 | """Train a Fast R-CNN network.""" 119 | sw = SolverWrapper(solver_prototxt, roidb, output_dir, 120 | pretrained_model=pretrained_model) 121 | 122 | print 'Solving...' 123 | sw.train_model(max_iters) 124 | print 'done solving' 125 | -------------------------------------------------------------------------------- /selective_search/Dependencies/anigaussm/anigauss.m: -------------------------------------------------------------------------------- 1 | % anigauss - Recursive anisotropic Gauss filtering 2 | % Usage: 3 | % out = anigauss(in, sigma_v, sigma_u, phi, 4 | % derivative_order_v, derivative_order_u); 5 | % 6 | % v-axis = short axis 7 | % u-axis = long axis 8 | % phi = orientation angle in degrees 9 | % 10 | % parameters sigma_u, phi, and derivative_order_{v,w} are optional. 11 | % sigma_u defaults to the value of sigma_v (isotropic filtering), 12 | % phi defaults to zero degrees, 13 | % derivative orders default to 0 (no differentiation, only smooth data). 14 | % 15 | % Note that for isotropic filtering a slightly faster algorithm is used than 16 | % for anisotropic filtering. Furthermore, execution time depends on the order 17 | % of differentiation. Note that the execution time is independend of the 18 | % values for sigma. 19 | % 20 | % Examples: 21 | % 22 | % isotropic filtering: 23 | % a=zeros(512,512); 24 | % a(256,256)=1; 25 | % tic;c=anigauss(a,10);toc 26 | % elapsed_time = 27 | % 0.0500 28 | % 29 | % anisotropic filtering: 30 | % a=zeros(512,512); 31 | % a(256,256)=1; 32 | % tic;c=anigauss(a,10,3,30);toc 33 | % elapsed_time = 34 | % 0.0600 35 | % 36 | % Usage: 37 | % 38 | % isotropic data smoothing: 39 | % out = anigauss(in, 3.0); 40 | % 41 | % isotropic data differentiation along y-axis: 42 | % out = anigauss(in, 3.0, 3.0, 0.0, 0, 1); 43 | % 44 | % anisotropic data smoothing: 45 | % out = anigauss(in, 3.0, 7.0, 30.0); 46 | % 47 | % anisotropic edge detection: 48 | % out = anigauss(in, 3.0, 7.0, 30.0, 1, 0); 49 | % 50 | % anisotropic line detection: 51 | % out = anigauss(in, 3.0, 7.0, 30.0, 2, 0); 52 | % 53 | % 54 | % 55 | % Copyright University of Amsterdam, 2002-2004. All rights reserved. 56 | % 57 | % Contact person: 58 | % Jan-Mark Geusebroek (mark@science.uva.nl, http://www.science.uva.nl/~mark) 59 | % Intelligent Systems Lab Amsterdam 60 | % Informatics Institute, Faculty of Science, University of Amsterdam 61 | % Kruislaan 403, 1098 SJ Amsterdam, The Netherlands. 62 | % 63 | % 64 | % This software is being made available for individual research use only. 65 | % Any commercial use or redistribution of this software requires a license from 66 | % the University of Amsterdam. 67 | % 68 | % You may use this work subject to the following conditions: 69 | % 70 | % 1. This work is provided "as is" by the copyright holder, with 71 | % absolutely no warranties of correctness, fitness, intellectual property 72 | % ownership, or anything else whatsoever. You use the work 73 | % entirely at your own risk. The copyright holder will not be liable for 74 | % any legal damages whatsoever connected with the use of this work. 75 | % 76 | % 2. The copyright holder retain all copyright to the work. All copies of 77 | % the work and all works derived from it must contain (1) this copyright 78 | % notice, and (2) additional notices describing the content, dates and 79 | % copyright holder of modifications or additions made to the work, if 80 | % any, including distribution and use conditions and intellectual property 81 | % claims. Derived works must be clearly distinguished from the original 82 | % work, both by name and by the prominent inclusion of explicit 83 | % descriptions of overlaps and differences. 84 | % 85 | % 3. The names and trademarks of the copyright holder may not be used in 86 | % advertising or publicity related to this work without specific prior 87 | % written permission. 88 | % 89 | % 4. In return for the free use of this work, you are requested, but not 90 | % legally required, to do the following: 91 | % 92 | % - If you become aware of factors that may significantly affect other 93 | % users of the work, for example major bugs or 94 | % deficiencies or possible intellectual property issues, you are 95 | % requested to report them to the copyright holder, if possible 96 | % including redistributable fixes or workarounds. 97 | % 98 | % - If you use the work in scientific research or as part of a larger 99 | % software system, you are requested to cite the use in any related 100 | % publications or technical documentation. The work is based upon: 101 | % 102 | % J. M. Geusebroek, A. W. M. Smeulders, and J. van de Weijer. 103 | % Fast anisotropic gauss filtering. IEEE Trans. Image Processing, 104 | % vol. 12, no. 8, pp. 938-943, 2003. 105 | % 106 | % related work: 107 | % 108 | % I.T. Young and L.J. van Vliet. Recursive implementation 109 | % of the Gaussian filter. Signal Processing, vol. 44, pp. 139-151, 1995. 110 | % 111 | % B. Triggs and M. Sdika. Boundary conditions for Young-van Vliet 112 | % recursive filtering. IEEE Trans. Signal Processing, 113 | % vol. 54, pp. 2365-2367, 2006. 114 | % 115 | % This copyright notice must be retained with all copies of the software, 116 | % including any modified or derived versions. 117 | -------------------------------------------------------------------------------- /selective_search/Dependencies/FelzenSegment/segment-image.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #ifndef SEGMENT_IMAGE 20 | #define SEGMENT_IMAGE 21 | 22 | #include 23 | #include "image.h" 24 | #include "misc.h" 25 | #include "filter.h" 26 | #include "segment-graph.h" 27 | 28 | // random color 29 | rgb random_rgb(){ 30 | rgb c; 31 | double r; 32 | 33 | c.r = (uchar)rand(); 34 | c.g = (uchar)rand(); 35 | c.b = (uchar)rand(); 36 | 37 | return c; 38 | } 39 | 40 | // dissimilarity measure between pixels 41 | static inline float diff(image *r, image *g, image *b, 42 | int x1, int y1, int x2, int y2) { 43 | return sqrt(square(imRef(r, x1, y1)-imRef(r, x2, y2)) + 44 | square(imRef(g, x1, y1)-imRef(g, x2, y2)) + 45 | square(imRef(b, x1, y1)-imRef(b, x2, y2))); 46 | } 47 | 48 | /* 49 | * Segment an image 50 | * 51 | * Returns a color image representing the segmentation. 52 | * 53 | * im: image to segment. 54 | * sigma: to smooth the image. 55 | * c: constant for treshold function. 56 | * min_size: minimum component size (enforced by post-processing stage). 57 | * num_ccs: number of connected components in the segmentation. 58 | */ 59 | image *segment_image(image *im, float sigma, float c, int min_size, 60 | int *num_ccs) { 61 | int width = im->width(); 62 | int height = im->height(); 63 | 64 | image *r = new image(width, height); 65 | image *g = new image(width, height); 66 | image *b = new image(width, height); 67 | 68 | // smooth each color channel 69 | for (int y = 0; y < height; y++) { 70 | for (int x = 0; x < width; x++) { 71 | imRef(r, x, y) = imRef(im, x, y).r; 72 | imRef(g, x, y) = imRef(im, x, y).g; 73 | imRef(b, x, y) = imRef(im, x, y).b; 74 | } 75 | } 76 | image *smooth_r = smooth(r, sigma); 77 | image *smooth_g = smooth(g, sigma); 78 | image *smooth_b = smooth(b, sigma); 79 | delete r; 80 | delete g; 81 | delete b; 82 | 83 | // build graph 84 | edge *edges = new edge[width*height*4]; 85 | int num = 0; 86 | for (int y = 0; y < height; y++) { 87 | for (int x = 0; x < width; x++) { 88 | if (x < width-1) { 89 | edges[num].a = y * width + x; 90 | edges[num].b = y * width + (x+1); 91 | edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y); 92 | num++; 93 | } 94 | 95 | if (y < height-1) { 96 | edges[num].a = y * width + x; 97 | edges[num].b = (y+1) * width + x; 98 | edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x, y+1); 99 | num++; 100 | } 101 | 102 | if ((x < width-1) && (y < height-1)) { 103 | edges[num].a = y * width + x; 104 | edges[num].b = (y+1) * width + (x+1); 105 | edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y+1); 106 | num++; 107 | } 108 | 109 | if ((x < width-1) && (y > 0)) { 110 | edges[num].a = y * width + x; 111 | edges[num].b = (y-1) * width + (x+1); 112 | edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y-1); 113 | num++; 114 | } 115 | } 116 | } 117 | delete smooth_r; 118 | delete smooth_g; 119 | delete smooth_b; 120 | 121 | // segment 122 | universe *u = segment_graph(width*height, num, edges, c); 123 | 124 | // post process small components 125 | for (int i = 0; i < num; i++) { 126 | int a = u->find(edges[i].a); 127 | int b = u->find(edges[i].b); 128 | if ((a != b) && ((u->size(a) < min_size) || (u->size(b) < min_size))) 129 | u->join(a, b); 130 | } 131 | delete [] edges; 132 | *num_ccs = u->num_sets(); 133 | 134 | image *output = new image(width, height); 135 | 136 | // pick random colors for each component 137 | rgb *colors = new rgb[width*height]; 138 | for (int i = 0; i < width*height; i++) 139 | colors[i] = random_rgb(); 140 | 141 | for (int y = 0; y < height; y++) { 142 | for (int x = 0; x < width; x++) { 143 | int comp = u->find(y * width + x); 144 | imRef(output, x, y) = colors[comp]; 145 | } 146 | } 147 | 148 | delete [] colors; 149 | delete u; 150 | 151 | return output; 152 | } 153 | 154 | #endif 155 | -------------------------------------------------------------------------------- /models/VGG_CNN_M_1024/no_bbox_reg/test.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_CNN_M_1024" 2 | input: "data" 3 | input_shape { 4 | dim: 1 5 | dim: 3 6 | dim: 224 7 | dim: 224 8 | } 9 | input: "rois" 10 | input_shape { 11 | dim: 1 # to be changed on-the-fly to num ROIs 12 | dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing 13 | } 14 | layer { 15 | name: "conv1" 16 | type: "Convolution" 17 | bottom: "data" 18 | top: "conv1" 19 | param { 20 | lr_mult: 0 21 | decay_mult: 0 22 | } 23 | param { 24 | lr_mult: 0 25 | decay_mult: 0 26 | } 27 | convolution_param { 28 | num_output: 96 29 | kernel_size: 7 30 | stride: 2 31 | } 32 | } 33 | layer { 34 | name: "relu1" 35 | type: "ReLU" 36 | bottom: "conv1" 37 | top: "conv1" 38 | } 39 | layer { 40 | name: "norm1" 41 | type: "LRN" 42 | bottom: "conv1" 43 | top: "norm1" 44 | lrn_param { 45 | local_size: 5 46 | alpha: 0.0005 47 | beta: 0.75 48 | k: 2 49 | } 50 | } 51 | layer { 52 | name: "pool1" 53 | type: "Pooling" 54 | bottom: "norm1" 55 | top: "pool1" 56 | pooling_param { 57 | pool: MAX 58 | kernel_size: 3 59 | stride: 2 60 | } 61 | } 62 | layer { 63 | name: "conv2" 64 | type: "Convolution" 65 | bottom: "pool1" 66 | top: "conv2" 67 | param { 68 | lr_mult: 1 69 | decay_mult: 1 70 | } 71 | param { 72 | lr_mult: 2 73 | decay_mult: 0 74 | } 75 | convolution_param { 76 | num_output: 256 77 | pad: 1 78 | kernel_size: 5 79 | stride: 2 80 | } 81 | } 82 | layer { 83 | name: "relu2" 84 | type: "ReLU" 85 | bottom: "conv2" 86 | top: "conv2" 87 | } 88 | layer { 89 | name: "norm2" 90 | type: "LRN" 91 | bottom: "conv2" 92 | top: "norm2" 93 | lrn_param { 94 | local_size: 5 95 | alpha: 0.0005 96 | beta: 0.75 97 | k: 2 98 | } 99 | } 100 | layer { 101 | name: "pool2" 102 | type: "Pooling" 103 | bottom: "norm2" 104 | top: "pool2" 105 | pooling_param { 106 | pool: MAX 107 | kernel_size: 3 108 | stride: 2 109 | } 110 | } 111 | layer { 112 | name: "conv3" 113 | type: "Convolution" 114 | bottom: "pool2" 115 | top: "conv3" 116 | param { 117 | lr_mult: 1 118 | decay_mult: 1 119 | } 120 | param { 121 | lr_mult: 2 122 | decay_mult: 0 123 | } 124 | convolution_param { 125 | num_output: 512 126 | pad: 1 127 | kernel_size: 3 128 | } 129 | } 130 | layer { 131 | name: "relu3" 132 | type: "ReLU" 133 | bottom: "conv3" 134 | top: "conv3" 135 | } 136 | layer { 137 | name: "conv4" 138 | type: "Convolution" 139 | bottom: "conv3" 140 | top: "conv4" 141 | param { 142 | lr_mult: 1 143 | decay_mult: 1 144 | } 145 | param { 146 | lr_mult: 2 147 | decay_mult: 0 148 | } 149 | convolution_param { 150 | num_output: 512 151 | pad: 1 152 | kernel_size: 3 153 | } 154 | } 155 | layer { 156 | name: "relu4" 157 | type: "ReLU" 158 | bottom: "conv4" 159 | top: "conv4" 160 | } 161 | layer { 162 | name: "conv5" 163 | type: "Convolution" 164 | bottom: "conv4" 165 | top: "conv5" 166 | param { 167 | lr_mult: 1 168 | decay_mult: 1 169 | } 170 | param { 171 | lr_mult: 2 172 | decay_mult: 0 173 | } 174 | convolution_param { 175 | num_output: 512 176 | pad: 1 177 | kernel_size: 3 178 | } 179 | } 180 | layer { 181 | name: "relu5" 182 | type: "ReLU" 183 | bottom: "conv5" 184 | top: "conv5" 185 | } 186 | layer { 187 | name: "roi_pool5" 188 | type: "ROIPooling" 189 | bottom: "conv5" 190 | bottom: "rois" 191 | top: "pool5" 192 | roi_pooling_param { 193 | pooled_w: 6 194 | pooled_h: 6 195 | spatial_scale: 0.0625 # 1/16 196 | } 197 | } 198 | layer { 199 | name: "fc6" 200 | type: "InnerProduct" 201 | bottom: "pool5" 202 | top: "fc6" 203 | param { 204 | lr_mult: 1 205 | decay_mult: 1 206 | } 207 | param { 208 | lr_mult: 2 209 | decay_mult: 0 210 | } 211 | inner_product_param { 212 | num_output: 4096 213 | } 214 | } 215 | layer { 216 | name: "relu6" 217 | type: "ReLU" 218 | bottom: "fc6" 219 | top: "fc6" 220 | } 221 | layer { 222 | name: "drop6" 223 | type: "Dropout" 224 | bottom: "fc6" 225 | top: "fc6" 226 | dropout_param { 227 | dropout_ratio: 0.5 228 | } 229 | } 230 | layer { 231 | name: "fc7" 232 | type: "InnerProduct" 233 | bottom: "fc6" 234 | top: "fc7" 235 | param { 236 | lr_mult: 1 237 | decay_mult: 1 238 | } 239 | param { 240 | lr_mult: 2 241 | decay_mult: 0 242 | } 243 | inner_product_param { 244 | num_output: 1024 245 | } 246 | } 247 | layer { 248 | name: "relu7" 249 | type: "ReLU" 250 | bottom: "fc7" 251 | top: "fc7" 252 | } 253 | layer { 254 | name: "drop7" 255 | type: "Dropout" 256 | bottom: "fc7" 257 | top: "fc7" 258 | dropout_param { 259 | dropout_ratio: 0.5 260 | } 261 | } 262 | layer { 263 | name: "cls_score" 264 | type: "InnerProduct" 265 | bottom: "fc7" 266 | top: "cls_score" 267 | param { 268 | lr_mult: 1 269 | decay_mult: 1 270 | } 271 | param { 272 | lr_mult: 2 273 | decay_mult: 0 274 | } 275 | inner_product_param { 276 | num_output: 21 277 | weight_filler { 278 | type: "gaussian" 279 | std: 0.01 280 | } 281 | bias_filler { 282 | type: "constant" 283 | value: 0 284 | } 285 | } 286 | } 287 | layer { 288 | name: "cls_prob" 289 | type: "Softmax" 290 | bottom: "cls_score" 291 | top: "cls_prob" 292 | } 293 | -------------------------------------------------------------------------------- /models/CaffeNet/no_bbox_reg/test.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | input: "data" 3 | input_shape { 4 | dim: 1 5 | dim: 3 6 | dim: 227 7 | dim: 227 8 | } 9 | input: "rois" 10 | input_shape { 11 | dim: 1 # to be changed on-the-fly to num ROIs 12 | dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing 13 | } 14 | layer { 15 | name: "conv1" 16 | type: "Convolution" 17 | bottom: "data" 18 | top: "conv1" 19 | param { 20 | lr_mult: 0 21 | decay_mult: 0 22 | } 23 | param { 24 | lr_mult: 0 25 | decay_mult: 0 26 | } 27 | convolution_param { 28 | num_output: 96 29 | kernel_size: 11 30 | pad: 5 31 | stride: 4 32 | } 33 | } 34 | layer { 35 | name: "relu1" 36 | type: "ReLU" 37 | bottom: "conv1" 38 | top: "conv1" 39 | } 40 | layer { 41 | name: "pool1" 42 | type: "Pooling" 43 | bottom: "conv1" 44 | top: "pool1" 45 | pooling_param { 46 | pool: MAX 47 | kernel_size: 3 48 | pad: 1 49 | stride: 2 50 | } 51 | } 52 | layer { 53 | name: "norm1" 54 | type: "LRN" 55 | bottom: "pool1" 56 | top: "norm1" 57 | lrn_param { 58 | local_size: 5 59 | alpha: 0.0001 60 | beta: 0.75 61 | } 62 | } 63 | layer { 64 | name: "conv2" 65 | type: "Convolution" 66 | bottom: "norm1" 67 | top: "conv2" 68 | param { 69 | lr_mult: 1 70 | decay_mult: 1 71 | } 72 | param { 73 | lr_mult: 2 74 | decay_mult: 0 75 | } 76 | convolution_param { 77 | num_output: 256 78 | kernel_size: 5 79 | pad: 2 80 | group: 2 81 | } 82 | } 83 | layer { 84 | name: "relu2" 85 | type: "ReLU" 86 | bottom: "conv2" 87 | top: "conv2" 88 | } 89 | layer { 90 | name: "pool2" 91 | type: "Pooling" 92 | bottom: "conv2" 93 | top: "pool2" 94 | pooling_param { 95 | pool: MAX 96 | kernel_size: 3 97 | pad: 1 98 | stride: 2 99 | } 100 | } 101 | layer { 102 | name: "norm2" 103 | type: "LRN" 104 | bottom: "pool2" 105 | top: "norm2" 106 | lrn_param { 107 | local_size: 5 108 | alpha: 0.0001 109 | beta: 0.75 110 | } 111 | } 112 | layer { 113 | name: "conv3" 114 | type: "Convolution" 115 | bottom: "norm2" 116 | top: "conv3" 117 | param { 118 | lr_mult: 1 119 | decay_mult: 1 120 | } 121 | param { 122 | lr_mult: 2 123 | decay_mult: 0 124 | } 125 | convolution_param { 126 | num_output: 384 127 | kernel_size: 3 128 | pad: 1 129 | } 130 | } 131 | layer { 132 | name: "relu3" 133 | type: "ReLU" 134 | bottom: "conv3" 135 | top: "conv3" 136 | } 137 | layer { 138 | name: "conv4" 139 | type: "Convolution" 140 | bottom: "conv3" 141 | top: "conv4" 142 | param { 143 | lr_mult: 1 144 | decay_mult: 1 145 | } 146 | param { 147 | lr_mult: 2 148 | decay_mult: 0 149 | } 150 | convolution_param { 151 | num_output: 384 152 | kernel_size: 3 153 | pad: 1 154 | group: 2 155 | } 156 | } 157 | layer { 158 | name: "relu4" 159 | type: "ReLU" 160 | bottom: "conv4" 161 | top: "conv4" 162 | } 163 | layer { 164 | name: "conv5" 165 | type: "Convolution" 166 | bottom: "conv4" 167 | top: "conv5" 168 | param { 169 | lr_mult: 1 170 | decay_mult: 1 171 | } 172 | param { 173 | lr_mult: 2 174 | decay_mult: 0 175 | } 176 | convolution_param { 177 | num_output: 256 178 | kernel_size: 3 179 | pad: 1 180 | group: 2 181 | } 182 | } 183 | layer { 184 | name: "relu5" 185 | type: "ReLU" 186 | bottom: "conv5" 187 | top: "conv5" 188 | } 189 | layer { 190 | name: "roi_pool5" 191 | type: "ROIPooling" 192 | bottom: "conv5" 193 | bottom: "rois" 194 | top: "pool5" 195 | roi_pooling_param { 196 | pooled_w: 6 197 | pooled_h: 6 198 | spatial_scale: 0.0625 # 1/16 199 | } 200 | } 201 | layer { 202 | name: "fc6" 203 | type: "InnerProduct" 204 | bottom: "pool5" 205 | top: "fc6" 206 | param { 207 | lr_mult: 1 208 | decay_mult: 1 209 | } 210 | param { 211 | lr_mult: 2 212 | decay_mult: 0 213 | } 214 | inner_product_param { 215 | num_output: 4096 216 | } 217 | } 218 | layer { 219 | name: "relu6" 220 | type: "ReLU" 221 | bottom: "fc6" 222 | top: "fc6" 223 | } 224 | layer { 225 | name: "drop6" 226 | type: "Dropout" 227 | bottom: "fc6" 228 | top: "fc6" 229 | dropout_param { 230 | dropout_ratio: 0.5 231 | } 232 | } 233 | layer { 234 | name: "fc7" 235 | type: "InnerProduct" 236 | bottom: "fc6" 237 | top: "fc7" 238 | param { 239 | lr_mult: 1 240 | decay_mult: 1 241 | } 242 | param { 243 | lr_mult: 2 244 | decay_mult: 0 245 | } 246 | inner_product_param { 247 | num_output: 4096 248 | } 249 | } 250 | layer { 251 | name: "relu7" 252 | type: "ReLU" 253 | bottom: "fc7" 254 | top: "fc7" 255 | } 256 | layer { 257 | name: "drop7" 258 | type: "Dropout" 259 | bottom: "fc7" 260 | top: "fc7" 261 | dropout_param { 262 | dropout_ratio: 0.5 263 | } 264 | } 265 | layer { 266 | name: "cls_score" 267 | type: "InnerProduct" 268 | bottom: "fc7" 269 | top: "cls_score" 270 | param { 271 | lr_mult: 1 272 | decay_mult: 1 273 | } 274 | param { 275 | lr_mult: 2 276 | decay_mult: 0 277 | } 278 | inner_product_param { 279 | num_output: 21 280 | weight_filler { 281 | type: "gaussian" 282 | std: 0.01 283 | } 284 | bias_filler { 285 | type: "constant" 286 | value: 0 287 | } 288 | } 289 | } 290 | layer { 291 | name: "cls_prob" 292 | type: "Softmax" 293 | bottom: "cls_score" 294 | top: "cls_prob" 295 | } 296 | -------------------------------------------------------------------------------- /models/VGG_CNN_M_1024/no_bbox_reg/train.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_CNN_M_1024" 2 | layer { 3 | name: 'data' 4 | type: 'Python' 5 | top: 'data' 6 | top: 'rois' 7 | top: 'labels' 8 | python_param { 9 | module: 'roi_data_layer.layer' 10 | layer: 'RoIDataLayer' 11 | param_str: "'num_classes': 21" 12 | } 13 | } 14 | layer { 15 | name: "conv1" 16 | type: "Convolution" 17 | bottom: "data" 18 | top: "conv1" 19 | param { 20 | lr_mult: 0 21 | decay_mult: 0 22 | } 23 | param { 24 | lr_mult: 0 25 | decay_mult: 0 26 | } 27 | convolution_param { 28 | num_output: 96 29 | kernel_size: 7 30 | stride: 2 31 | } 32 | } 33 | layer { 34 | name: "relu1" 35 | type: "ReLU" 36 | bottom: "conv1" 37 | top: "conv1" 38 | } 39 | layer { 40 | name: "norm1" 41 | type: "LRN" 42 | bottom: "conv1" 43 | top: "norm1" 44 | lrn_param { 45 | local_size: 5 46 | alpha: 0.0005 47 | beta: 0.75 48 | k: 2 49 | } 50 | } 51 | layer { 52 | name: "pool1" 53 | type: "Pooling" 54 | bottom: "norm1" 55 | top: "pool1" 56 | pooling_param { 57 | pool: MAX 58 | kernel_size: 3 59 | stride: 2 60 | } 61 | } 62 | layer { 63 | name: "conv2" 64 | type: "Convolution" 65 | bottom: "pool1" 66 | top: "conv2" 67 | param { 68 | lr_mult: 1 69 | decay_mult: 1 70 | } 71 | param { 72 | lr_mult: 2 73 | decay_mult: 0 74 | } 75 | convolution_param { 76 | num_output: 256 77 | pad: 1 78 | kernel_size: 5 79 | stride: 2 80 | } 81 | } 82 | layer { 83 | name: "relu2" 84 | type: "ReLU" 85 | bottom: "conv2" 86 | top: "conv2" 87 | } 88 | layer { 89 | name: "norm2" 90 | type: "LRN" 91 | bottom: "conv2" 92 | top: "norm2" 93 | lrn_param { 94 | local_size: 5 95 | alpha: 0.0005 96 | beta: 0.75 97 | k: 2 98 | } 99 | } 100 | layer { 101 | name: "pool2" 102 | type: "Pooling" 103 | bottom: "norm2" 104 | top: "pool2" 105 | pooling_param { 106 | pool: MAX 107 | kernel_size: 3 108 | stride: 2 109 | } 110 | } 111 | layer { 112 | name: "conv3" 113 | type: "Convolution" 114 | bottom: "pool2" 115 | top: "conv3" 116 | param { 117 | lr_mult: 1 118 | decay_mult: 1 119 | } 120 | param { 121 | lr_mult: 2 122 | decay_mult: 0 123 | } 124 | convolution_param { 125 | num_output: 512 126 | pad: 1 127 | kernel_size: 3 128 | } 129 | } 130 | layer { 131 | name: "relu3" 132 | type: "ReLU" 133 | bottom: "conv3" 134 | top: "conv3" 135 | } 136 | layer { 137 | name: "conv4" 138 | type: "Convolution" 139 | bottom: "conv3" 140 | top: "conv4" 141 | param { 142 | lr_mult: 1 143 | decay_mult: 1 144 | } 145 | param { 146 | lr_mult: 2 147 | decay_mult: 0 148 | } 149 | convolution_param { 150 | num_output: 512 151 | pad: 1 152 | kernel_size: 3 153 | } 154 | } 155 | layer { 156 | name: "relu4" 157 | type: "ReLU" 158 | bottom: "conv4" 159 | top: "conv4" 160 | } 161 | layer { 162 | name: "conv5" 163 | type: "Convolution" 164 | bottom: "conv4" 165 | top: "conv5" 166 | param { 167 | lr_mult: 1 168 | decay_mult: 1 169 | } 170 | param { 171 | lr_mult: 2 172 | decay_mult: 0 173 | } 174 | convolution_param { 175 | num_output: 512 176 | pad: 1 177 | kernel_size: 3 178 | } 179 | } 180 | layer { 181 | name: "relu5" 182 | type: "ReLU" 183 | bottom: "conv5" 184 | top: "conv5" 185 | } 186 | layer { 187 | name: "roi_pool5" 188 | type: "ROIPooling" 189 | bottom: "conv5" 190 | bottom: "rois" 191 | top: "pool5" 192 | roi_pooling_param { 193 | pooled_w: 6 194 | pooled_h: 6 195 | spatial_scale: 0.0625 # 1/16 196 | } 197 | } 198 | layer { 199 | name: "fc6" 200 | type: "InnerProduct" 201 | bottom: "pool5" 202 | top: "fc6" 203 | param { 204 | lr_mult: 1 205 | decay_mult: 1 206 | } 207 | param { 208 | lr_mult: 2 209 | decay_mult: 0 210 | } 211 | inner_product_param { 212 | num_output: 4096 213 | } 214 | } 215 | layer { 216 | name: "relu6" 217 | type: "ReLU" 218 | bottom: "fc6" 219 | top: "fc6" 220 | } 221 | layer { 222 | name: "drop6" 223 | type: "Dropout" 224 | bottom: "fc6" 225 | top: "fc6" 226 | dropout_param { 227 | dropout_ratio: 0.5 228 | } 229 | } 230 | layer { 231 | name: "fc7" 232 | type: "InnerProduct" 233 | bottom: "fc6" 234 | top: "fc7" 235 | param { 236 | lr_mult: 1 237 | decay_mult: 1 238 | } 239 | param { 240 | lr_mult: 2 241 | decay_mult: 0 242 | } 243 | inner_product_param { 244 | num_output: 1024 245 | } 246 | } 247 | layer { 248 | name: "relu7" 249 | type: "ReLU" 250 | bottom: "fc7" 251 | top: "fc7" 252 | } 253 | layer { 254 | name: "drop7" 255 | type: "Dropout" 256 | bottom: "fc7" 257 | top: "fc7" 258 | dropout_param { 259 | dropout_ratio: 0.5 260 | } 261 | } 262 | layer { 263 | name: "cls_score" 264 | type: "InnerProduct" 265 | bottom: "fc7" 266 | top: "cls_score" 267 | param { 268 | lr_mult: 1 269 | decay_mult: 1 270 | } 271 | param { 272 | lr_mult: 2 273 | decay_mult: 0 274 | } 275 | inner_product_param { 276 | num_output: 21 277 | weight_filler { 278 | type: "gaussian" 279 | std: 0.01 280 | } 281 | bias_filler { 282 | type: "constant" 283 | value: 0 284 | } 285 | } 286 | } 287 | layer { 288 | name: "loss_cls" 289 | type: "SoftmaxWithLoss" 290 | bottom: "cls_score" 291 | bottom: "labels" 292 | top: "loss_cls" 293 | loss_weight: 1 294 | } 295 | -------------------------------------------------------------------------------- /models/CaffeNet/no_bbox_reg/train.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: 'data' 4 | type: 'Python' 5 | top: 'data' 6 | top: 'rois' 7 | top: 'labels' 8 | python_param { 9 | module: 'roi_data_layer.layer' 10 | layer: 'RoIDataLayer' 11 | param_str: "'num_classes': 21" 12 | } 13 | } 14 | layer { 15 | name: "conv1" 16 | type: "Convolution" 17 | bottom: "data" 18 | top: "conv1" 19 | param { 20 | lr_mult: 0 21 | decay_mult: 0 22 | } 23 | param { 24 | lr_mult: 0 25 | decay_mult: 0 26 | } 27 | convolution_param { 28 | num_output: 96 29 | kernel_size: 11 30 | pad: 5 31 | stride: 4 32 | } 33 | } 34 | layer { 35 | name: "relu1" 36 | type: "ReLU" 37 | bottom: "conv1" 38 | top: "conv1" 39 | } 40 | layer { 41 | name: "pool1" 42 | type: "Pooling" 43 | bottom: "conv1" 44 | top: "pool1" 45 | pooling_param { 46 | pool: MAX 47 | kernel_size: 3 48 | pad: 1 49 | stride: 2 50 | } 51 | } 52 | layer { 53 | name: "norm1" 54 | type: "LRN" 55 | bottom: "pool1" 56 | top: "norm1" 57 | lrn_param { 58 | local_size: 5 59 | alpha: 0.0001 60 | beta: 0.75 61 | } 62 | } 63 | layer { 64 | name: "conv2" 65 | type: "Convolution" 66 | bottom: "norm1" 67 | top: "conv2" 68 | param { 69 | lr_mult: 1 70 | decay_mult: 1 71 | } 72 | param { 73 | lr_mult: 2 74 | decay_mult: 0 75 | } 76 | convolution_param { 77 | num_output: 256 78 | kernel_size: 5 79 | pad: 2 80 | group: 2 81 | } 82 | } 83 | layer { 84 | name: "relu2" 85 | type: "ReLU" 86 | bottom: "conv2" 87 | top: "conv2" 88 | } 89 | layer { 90 | name: "pool2" 91 | type: "Pooling" 92 | bottom: "conv2" 93 | top: "pool2" 94 | pooling_param { 95 | pool: MAX 96 | kernel_size: 3 97 | pad: 1 98 | stride: 2 99 | } 100 | } 101 | layer { 102 | name: "norm2" 103 | type: "LRN" 104 | bottom: "pool2" 105 | top: "norm2" 106 | lrn_param { 107 | local_size: 5 108 | alpha: 0.0001 109 | beta: 0.75 110 | } 111 | } 112 | layer { 113 | name: "conv3" 114 | type: "Convolution" 115 | bottom: "norm2" 116 | top: "conv3" 117 | param { 118 | lr_mult: 1 119 | decay_mult: 1 120 | } 121 | param { 122 | lr_mult: 2 123 | decay_mult: 0 124 | } 125 | convolution_param { 126 | num_output: 384 127 | kernel_size: 3 128 | pad: 1 129 | } 130 | } 131 | layer { 132 | name: "relu3" 133 | type: "ReLU" 134 | bottom: "conv3" 135 | top: "conv3" 136 | } 137 | layer { 138 | name: "conv4" 139 | type: "Convolution" 140 | bottom: "conv3" 141 | top: "conv4" 142 | param { 143 | lr_mult: 1 144 | decay_mult: 1 145 | } 146 | param { 147 | lr_mult: 2 148 | decay_mult: 0 149 | } 150 | convolution_param { 151 | num_output: 384 152 | kernel_size: 3 153 | pad: 1 154 | group: 2 155 | } 156 | } 157 | layer { 158 | name: "relu4" 159 | type: "ReLU" 160 | bottom: "conv4" 161 | top: "conv4" 162 | } 163 | layer { 164 | name: "conv5" 165 | type: "Convolution" 166 | bottom: "conv4" 167 | top: "conv5" 168 | param { 169 | lr_mult: 1 170 | decay_mult: 1 171 | } 172 | param { 173 | lr_mult: 2 174 | decay_mult: 0 175 | } 176 | convolution_param { 177 | num_output: 256 178 | kernel_size: 3 179 | pad: 1 180 | group: 2 181 | } 182 | } 183 | layer { 184 | name: "relu5" 185 | type: "ReLU" 186 | bottom: "conv5" 187 | top: "conv5" 188 | } 189 | layer { 190 | name: "roi_pool5" 191 | type: "ROIPooling" 192 | bottom: "conv5" 193 | bottom: "rois" 194 | top: "pool5" 195 | roi_pooling_param { 196 | pooled_w: 6 197 | pooled_h: 6 198 | spatial_scale: 0.0625 # 1/16 199 | } 200 | } 201 | layer { 202 | name: "fc6" 203 | type: "InnerProduct" 204 | bottom: "pool5" 205 | top: "fc6" 206 | param { 207 | lr_mult: 1 208 | decay_mult: 1 209 | } 210 | param { 211 | lr_mult: 2 212 | decay_mult: 0 213 | } 214 | inner_product_param { 215 | num_output: 4096 216 | } 217 | } 218 | layer { 219 | name: "relu6" 220 | type: "ReLU" 221 | bottom: "fc6" 222 | top: "fc6" 223 | } 224 | layer { 225 | name: "drop6" 226 | type: "Dropout" 227 | bottom: "fc6" 228 | top: "fc6" 229 | dropout_param { 230 | dropout_ratio: 0.5 231 | } 232 | } 233 | layer { 234 | name: "fc7" 235 | type: "InnerProduct" 236 | bottom: "fc6" 237 | top: "fc7" 238 | param { 239 | lr_mult: 1 240 | decay_mult: 1 241 | } 242 | param { 243 | lr_mult: 2 244 | decay_mult: 0 245 | } 246 | inner_product_param { 247 | num_output: 4096 248 | } 249 | } 250 | layer { 251 | name: "relu7" 252 | type: "ReLU" 253 | bottom: "fc7" 254 | top: "fc7" 255 | } 256 | layer { 257 | name: "drop7" 258 | type: "Dropout" 259 | bottom: "fc7" 260 | top: "fc7" 261 | dropout_param { 262 | dropout_ratio: 0.5 263 | } 264 | } 265 | layer { 266 | name: "cls_score" 267 | type: "InnerProduct" 268 | bottom: "fc7" 269 | top: "cls_score" 270 | param { 271 | lr_mult: 1 272 | decay_mult: 1 273 | } 274 | param { 275 | lr_mult: 2 276 | decay_mult: 0 277 | } 278 | inner_product_param { 279 | num_output: 21 280 | weight_filler { 281 | type: "gaussian" 282 | std: 0.01 283 | } 284 | bias_filler { 285 | type: "constant" 286 | value: 0 287 | } 288 | } 289 | } 290 | layer { 291 | name: "loss_cls" 292 | type: "SoftmaxWithLoss" 293 | bottom: "cls_score" 294 | bottom: "labels" 295 | top: "loss_cls" 296 | loss_weight: 1 297 | } 298 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | from fast_rcnn.config import cfg 12 | import utils.cython_bbox 13 | 14 | def prepare_roidb(imdb): 15 | """Enrich the imdb's roidb by adding some derived quantities that 16 | are useful for training. This function precomputes the maximum 17 | overlap, taken over ground-truth boxes, between each ROI and 18 | each ground-truth box. The class with maximum overlap is also 19 | recorded. 20 | """ 21 | roidb = imdb.roidb 22 | for i in xrange(len(imdb.image_index)): 23 | roidb[i]['image'] = imdb.image_path_at(i) 24 | # need gt_overlaps as a dense array for argmax 25 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 26 | # max overlap with gt over classes (columns) 27 | max_overlaps = gt_overlaps.max(axis=1) 28 | # gt class that had the max overlap 29 | max_classes = gt_overlaps.argmax(axis=1) 30 | roidb[i]['max_classes'] = max_classes 31 | roidb[i]['max_overlaps'] = max_overlaps 32 | # sanity checks 33 | # max overlap of 0 => class should be zero (background) 34 | zero_inds = np.where(max_overlaps == 0)[0] 35 | assert all(max_classes[zero_inds] == 0) 36 | # max overlap > 0 => class should not be zero (must be a fg class) 37 | nonzero_inds = np.where(max_overlaps > 0)[0] 38 | assert all(max_classes[nonzero_inds] != 0) 39 | 40 | def add_bbox_regression_targets(roidb): 41 | """Add information needed to train bounding-box regressors.""" 42 | assert len(roidb) > 0 43 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 44 | 45 | num_images = len(roidb) 46 | # Infer number of classes from the number of columns in gt_overlaps 47 | num_classes = roidb[0]['gt_overlaps'].shape[1] 48 | for im_i in xrange(num_images): 49 | rois = roidb[im_i]['boxes'] 50 | max_overlaps = roidb[im_i]['max_overlaps'] 51 | max_classes = roidb[im_i]['max_classes'] 52 | roidb[im_i]['bbox_targets'] = \ 53 | _compute_targets(rois, max_overlaps, max_classes) 54 | 55 | # Compute values needed for means and stds 56 | # var(x) = E(x^2) - E(x)^2 57 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 58 | sums = np.zeros((num_classes, 4)) 59 | squared_sums = np.zeros((num_classes, 4)) 60 | for im_i in xrange(num_images): 61 | targets = roidb[im_i]['bbox_targets'] 62 | for cls in xrange(1, num_classes): 63 | cls_inds = np.where(targets[:, 0] == cls)[0] 64 | if cls_inds.size > 0: 65 | class_counts[cls] += cls_inds.size 66 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 67 | squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0) 68 | 69 | means = sums / class_counts 70 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 71 | 72 | # Normalize targets 73 | for im_i in xrange(num_images): 74 | targets = roidb[im_i]['bbox_targets'] 75 | for cls in xrange(1, num_classes): 76 | cls_inds = np.where(targets[:, 0] == cls)[0] 77 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 78 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 79 | 80 | # These values will be needed for making predictions 81 | # (the predicts will need to be unnormalized and uncentered) 82 | return means.ravel(), stds.ravel() 83 | 84 | def _compute_targets(rois, overlaps, labels): 85 | """Compute bounding-box regression targets for an image.""" 86 | # Ensure ROIs are floats 87 | rois = rois.astype(np.float, copy=False) 88 | 89 | # Indices of ground-truth ROIs 90 | gt_inds = np.where(overlaps == 1)[0] 91 | # Indices of examples for which we try to make predictions 92 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] 93 | 94 | # Get IoU overlap between each ex ROI and gt ROI 95 | ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :], 96 | rois[gt_inds, :]) 97 | 98 | # Find which gt ROI each ex ROI has max overlap with: 99 | # this will be the ex ROI's gt target 100 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 101 | gt_rois = rois[gt_inds[gt_assignment], :] 102 | ex_rois = rois[ex_inds, :] 103 | 104 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS 105 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS 106 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 107 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 108 | 109 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS 110 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS 111 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 112 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 113 | 114 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 115 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 116 | targets_dw = np.log(gt_widths / ex_widths) 117 | targets_dh = np.log(gt_heights / ex_heights) 118 | 119 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 120 | targets[ex_inds, 0] = labels[ex_inds] 121 | targets[ex_inds, 1] = targets_dx 122 | targets[ex_inds, 2] = targets_dy 123 | targets[ex_inds, 3] = targets_dw 124 | targets[ex_inds, 4] = targets_dh 125 | return targets 126 | --------------------------------------------------------------------------------