├── output
    ├── .gitignore
    └── README.md
├── experiments
    ├── logs
    │   └── .gitignore
    ├── cfgs
    │   ├── fc_only.yml
    │   ├── piecewise.yml
    │   ├── no_bbox_reg.yml
    │   ├── multiscale.yml
    │   └── svm.yml
    ├── README.md
    └── scripts
    │   ├── all_vgg16.sh
    │   ├── all_caffenet.sh
    │   ├── all_vgg_cnn_m_1024.sh
    │   ├── multitask_no_bbox_reg_vgg16.sh
    │   ├── multitask_no_bbox_reg_caffenet.sh
    │   ├── multitask_no_bbox_reg_vgg_cnn_m_1024.sh
    │   ├── default_vgg16.sh
    │   ├── default_caffenet.sh
    │   ├── default_vgg_cnn_m_1024.sh
    │   ├── svd_vgg16.sh
    │   ├── svd_caffenet.sh
    │   ├── svm_vgg16.sh
    │   ├── fc_only_vgg16.sh
    │   ├── svm_caffenet.sh
    │   ├── svd_vgg_cnn_m_1024.sh
    │   ├── multiscale_caffenet.sh
    │   ├── no_bbox_reg_vgg16.sh
    │   ├── svm_vgg_cnn_m_1024.sh
    │   ├── no_bbox_reg_caffenet.sh
    │   ├── piecewise_vgg16.sh
    │   ├── multiscale_vgg_cnn_m_1024.sh
    │   ├── piecewise_caffenet.sh
    │   ├── no_bbox_reg_vgg_cnn_m_1024.sh
    │   └── piecewise_vgg_cnn_m_1024.sh
├── lib
    ├── utils
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── timer.py
    │   ├── nms.py
    │   ├── blob.py
    │   ├── bbox.pyx
    │   └── nms.pyx
    ├── Makefile
    ├── roi_data_layer
    │   ├── __init__.py
    │   └── roidb.py
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── xVOCap.m
    │   │   └── voc_eval.m
    │   ├── __init__.py
    │   └── factory.py
    ├── fast_rcnn
    │   ├── __init__.py
    │   └── train.py
    └── setup.py
├── selective_search
    ├── .gitignore
    ├── cat.jpg
    ├── 000015.jpg
    ├── MergeBlobs.p
    ├── ChangeEdges.p
    ├── SSSimBoxFill.p
    ├── SSSimColour.p
    ├── SSSimTexture.p
    ├── SSSimColourSize.p
    ├── Dependencies
    │   ├── Rgb2C.p
    │   ├── Rgb2Ooo.p
    │   ├── Rgb2Rg.p
    │   ├── Rgb2Rgi.p
    │   ├── Blob2Image.p
    │   ├── Blob2Vector.p
    │   ├── BlobAddSizes.p
    │   ├── Blobs2Boxes.p
    │   ├── Vector2Hist.p
    │   ├── gaussianFilter.p
    │   ├── Image2ColourSpace.p
    │   ├── BlobAddTextureHists.p
    │   ├── SegmentIndices2Blobs.p
    │   ├── Image2OrientedGradients.p
    │   ├── BoxUnion.m
    │   ├── NormalizeArray.m
    │   ├── FelzenSegment
    │   │   ├── Makefile
    │   │   ├── README
    │   │   ├── segment.cpp
    │   │   ├── imutil.h
    │   │   ├── misc.h
    │   │   ├── disjoint-set.h
    │   │   ├── convolve.h
    │   │   ├── segment-graph.h
    │   │   ├── image.h
    │   │   ├── filter.h
    │   │   └── segment-image.h
    │   ├── BoxRemoveDuplicates.m
    │   ├── FilterBoxesWidth.m
    │   ├── ShowRectsWithinImage.m
    │   ├── BoxSize.m
    │   ├── BoxIntersection.m
    │   ├── GetPascalOverlap.m
    │   ├── BoxBestOverlap.m
    │   ├── NormalizeRows.m
    │   ├── PascalOverlap.m
    │   ├── CountVisualWordsIndex.m
    │   ├── ShowBlobs.m
    │   ├── ShowImageCell.m
    │   ├── mexCountWordsIndex.cpp
    │   └── anigaussm
    │   │   ├── anigauss_mex.c
    │   │   └── anigauss.m
    ├── SSSimBoxFillOrig.p
    ├── SSSimBoxFillSize.p
    ├── SSSimTextureSize.p
    ├── BlobStructColourHist.p
    ├── SSSimBoxFillOrigSize.p
    ├── SSSimTextureSizeFill.p
    ├── BlobStructTextureHist.p
    ├── GroundTruthVOC2007test.mat
    ├── SSSimColourTextureSizeFill.p
    ├── SSSimColourTextureSizeFillOrig.p
    ├── BlobStruct2HierarchicalGrouping.p
    ├── SSSimSize.m
    ├── README.md
    ├── selective_search.m
    ├── BlobBestOverlap.m
    ├── RecreateBlobHierarchyIndIm.m
    ├── RecreateBlobHierarchy.m
    ├── BoxAverageBestOverlap.m
    ├── BlobAverageBestOverlap.m
    ├── License.txt
    ├── demo.m
    ├── selective_search_rcnn.m
    ├── Image2HierarchicalGrouping.m
    └── demoPascal2007.m
├── .gitignore
├── data
    ├── pylintrc
    ├── demo
    │   ├── 000004.jpg
    │   ├── 001551.jpg
    │   ├── 000004_boxes.mat
    │   └── 001551_boxes.mat
    ├── .gitignore
    ├── scripts
    │   ├── fetch_fast_rcnn_models.sh
    │   ├── fetch_imagenet_models.sh
    │   └── fetch_selective_search_data.sh
    └── README.md
├── tools
    ├── README.md
    ├── _init_paths.py
    ├── test_net.py
    ├── reval.py
    ├── train_net.py
    └── compress_net.py
├── todo.txt
├── .gitmodules
├── help
    ├── INRIA
    │   └── VOCcode
    │   │   ├── PASemptyrecord.m
    │   │   ├── PASemptyobject.m
    │   │   ├── PASerrmsg.m
    │   │   ├── VOCinit.m
    │   │   ├── VOCroc.m
    │   │   ├── PASreadrecord.m
    │   │   ├── VOCpr.m
    │   │   └── VOCevaldet.m
    ├── test
    │   └── README.md
    └── train
    │   └── README.md
├── README.md
├── matlab
    ├── README.md
    ├── fast_rcnn_load_net.m
    ├── showboxes.m
    ├── nms.m
    ├── fast_rcnn_demo.m
    └── fast_rcnn_im_detect.m
├── models
    ├── README.md
    ├── CaffeNet
    │   ├── solver.prototxt
    │   ├── piecewise
    │   │   └── solver.prototxt
    │   └── no_bbox_reg
    │   │   ├── solver.prototxt
    │   │   ├── test.prototxt
    │   │   └── train.prototxt
    ├── VGG16
    │   ├── solver.prototxt
    │   ├── fc_only
    │   │   └── solver.prototxt
    │   ├── piecewise
    │   │   └── solver.prototxt
    │   └── no_bbox_reg
    │   │   └── solver.prototxt
    └── VGG_CNN_M_1024
    │   ├── solver.prototxt
    │   ├── piecewise
    │       └── solver.prototxt
    │   └── no_bbox_reg
    │       ├── solver.prototxt
    │       ├── test.prototxt
    │       └── train.prototxt
├── commands.txt
└── LICENSE


/output/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | 


--------------------------------------------------------------------------------
/experiments/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *.txt*
2 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.so
3 | 


--------------------------------------------------------------------------------
/selective_search/.gitignore:
--------------------------------------------------------------------------------
1 | *.mex*
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .ipynb_checkpoints
3 | utils/*.c
4 | utils/*.so
5 | 


--------------------------------------------------------------------------------
/data/pylintrc:
--------------------------------------------------------------------------------
1 | [TYPECHECK]
2 | 
3 | ignored-modules = numpy, numpy.random, cv2
4 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
1 | Tools for training, testing, and compressing Fast R-CNN networks.
2 | 


--------------------------------------------------------------------------------
/experiments/cfgs/fc_only.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: fc_only
2 | TRAIN:
3 |   SNAPSHOT_INFIX: fc_only
4 | 


--------------------------------------------------------------------------------
/data/demo/000004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/data/demo/000004.jpg


--------------------------------------------------------------------------------
/data/demo/001551.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/data/demo/001551.jpg


--------------------------------------------------------------------------------
/experiments/cfgs/piecewise.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: piecewise
2 | TRAIN:
3 |   SNAPSHOT_INFIX: piecewise
4 | 


--------------------------------------------------------------------------------
/selective_search/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/cat.jpg


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | selective_search*
2 | imagenet_models*
3 | fast_rcnn_models*
4 | VOCdevkit*
5 | cache
6 | 


--------------------------------------------------------------------------------
/data/demo/000004_boxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/data/demo/000004_boxes.mat


--------------------------------------------------------------------------------
/data/demo/001551_boxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/data/demo/001551_boxes.mat


--------------------------------------------------------------------------------
/selective_search/000015.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/000015.jpg


--------------------------------------------------------------------------------
/selective_search/MergeBlobs.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/MergeBlobs.p


--------------------------------------------------------------------------------
/selective_search/ChangeEdges.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/ChangeEdges.p


--------------------------------------------------------------------------------
/selective_search/SSSimBoxFill.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimBoxFill.p


--------------------------------------------------------------------------------
/selective_search/SSSimColour.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimColour.p


--------------------------------------------------------------------------------
/selective_search/SSSimTexture.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimTexture.p


--------------------------------------------------------------------------------
/selective_search/SSSimColourSize.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimColourSize.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/Rgb2C.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Rgb2C.p


--------------------------------------------------------------------------------
/selective_search/SSSimBoxFillOrig.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimBoxFillOrig.p


--------------------------------------------------------------------------------
/selective_search/SSSimBoxFillSize.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimBoxFillSize.p


--------------------------------------------------------------------------------
/selective_search/SSSimTextureSize.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimTextureSize.p


--------------------------------------------------------------------------------
/selective_search/BlobStructColourHist.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/BlobStructColourHist.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/Rgb2Ooo.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Rgb2Ooo.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/Rgb2Rg.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Rgb2Rg.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/Rgb2Rgi.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Rgb2Rgi.p


--------------------------------------------------------------------------------
/selective_search/SSSimBoxFillOrigSize.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimBoxFillOrigSize.p


--------------------------------------------------------------------------------
/selective_search/SSSimTextureSizeFill.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimTextureSizeFill.p


--------------------------------------------------------------------------------
/selective_search/BlobStructTextureHist.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/BlobStructTextureHist.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/Blob2Image.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Blob2Image.p


--------------------------------------------------------------------------------
/todo.txt:
--------------------------------------------------------------------------------
1 | - ImageNet ILSVRC detection dataset
2 | - COCO bounding-box detection
3 | - Port PASCAL evaluation code from Matlab to Python
4 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/Blob2Vector.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Blob2Vector.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/BlobAddSizes.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/BlobAddSizes.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/Blobs2Boxes.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Blobs2Boxes.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/Vector2Hist.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Vector2Hist.p


--------------------------------------------------------------------------------
/selective_search/GroundTruthVOC2007test.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/GroundTruthVOC2007test.mat


--------------------------------------------------------------------------------
/selective_search/Dependencies/gaussianFilter.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/gaussianFilter.p


--------------------------------------------------------------------------------
/selective_search/SSSimColourTextureSizeFill.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimColourTextureSizeFill.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/Image2ColourSpace.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Image2ColourSpace.p


--------------------------------------------------------------------------------
/selective_search/SSSimColourTextureSizeFillOrig.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/SSSimColourTextureSizeFillOrig.p


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "caffe-fast-rcnn"]
2 | 	path = caffe-fast-rcnn
3 | 	url = https://github.com/rbgirshick/caffe-fast-rcnn.git
4 | 	branch = fast-rcnn
5 | 


--------------------------------------------------------------------------------
/experiments/cfgs/no_bbox_reg.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: "no_bbox_reg"
2 | TRAIN:
3 |   BBOX_REG: False
4 |   SNAPSHOT_INFIX: no_bbox_reg
5 | TEST:
6 |   BBOX_REG: False
7 | 


--------------------------------------------------------------------------------
/selective_search/BlobStruct2HierarchicalGrouping.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/BlobStruct2HierarchicalGrouping.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/BlobAddTextureHists.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/BlobAddTextureHists.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/SegmentIndices2Blobs.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/SegmentIndices2Blobs.p


--------------------------------------------------------------------------------
/selective_search/Dependencies/Image2OrientedGradients.p:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeyuanxy/fast-rcnn/HEAD/selective_search/Dependencies/Image2OrientedGradients.p


--------------------------------------------------------------------------------
/help/INRIA/VOCcode/PASemptyrecord.m:
--------------------------------------------------------------------------------
1 | function record=PASemptyrecord
2 |   record.imgname='';
3 |   record.imgsize=[];
4 |   record.database='';
5 |   record.objects=PASemptyobject;
6 | return


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### Train and Test on Another Dataset
2 | - [Train](https://github.com/zeyuanxy/fast-rcnn/blob/master/help/train/README.md)
3 | - [Test](https://github.com/zeyuanxy/fast-rcnn/blob/master/help/test/README.md)
4 | 


--------------------------------------------------------------------------------
/matlab/README.md:
--------------------------------------------------------------------------------
1 | A basic demo in MATLAB.
2 | 
3 | Detection is also implemented in MATLAB (though missing some bells and whistles
4 | compared to the Python version) via the fast_rcnn_im_detect() function.
5 | 
6 | See fast_rcnn_demo.m for example usage.
7 | 


--------------------------------------------------------------------------------
/experiments/cfgs/multiscale.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: multiscale
2 | TRAIN:
3 |   SCALES: !!python/tuple [480, 576, 688, 864, 1200]
4 |   MAX_SIZE: 2000
5 |   SNAPSHOT_INFIX: multiscale
6 | TEST:
7 |   SCALES: !!python/tuple [480, 576, 688, 864, 1200]
8 |   MAX_SIZE: 2000
9 | 


--------------------------------------------------------------------------------
/models/README.md:
--------------------------------------------------------------------------------
1 | Prototxt files that define models and solvers.
2 | 
3 | Three models are defined, with some variations of each to support experiments
4 | in the paper.
5 |  - Caffenet (model **S**)
6 |  - VGG_CNN_M_1024 (model **M**)
7 |  - VGG16 (model **L**)
8 | 


--------------------------------------------------------------------------------
/experiments/cfgs/svm.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: svm
2 | TRAIN:
3 |   # don't use flipped examples when training SVMs for two reasons:
4 |   # 1) R-CNN didn't
5 |   # 2) I've tried and it doesn't help, yet makes SVM training take 2x longer
6 |   USE_FLIPPED: False
7 | TEST:
8 |   SVM: True
9 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
1 | Scripts to reproduce (most) of the experiments in the paper.
2 | 
3 | Scripts are under `experiments/scripts`.
4 | 
5 | Each script saves a log file under `experiments/logs`.
6 | 
7 | Configuration override files used in the experiments are stored in `experiments/cfgs`.
8 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/help/INRIA/VOCcode/PASemptyobject.m:
--------------------------------------------------------------------------------
 1 | function object=PASemptyobject
 2 |   object.label='';
 3 |   object.orglabel='';
 4 |   object.bbox=[];
 5 |   object.polygon=[];
 6 |   object.mask='';
 7 |   object.class='';
 8 |   object.view='';
 9 |   object.truncated=false;
10 |   object.difficult=false;
11 | return


--------------------------------------------------------------------------------
/selective_search/SSSimSize.m:
--------------------------------------------------------------------------------
1 | function [similarity indSim] = SSSimSize(a, b, blobStruct)
2 | % function similarity = SSSimSize(a, b, blobStruct)
3 | %
4 | % Calculate size similarity
5 | 
6 | similarity = (blobStruct.imSize - blobStruct.size(a) - blobStruct.size(b)) ...
7 |            ./ blobStruct.imSize;
8 | 
9 | indSim = similarity;


--------------------------------------------------------------------------------
/help/INRIA/VOCcode/PASerrmsg.m:
--------------------------------------------------------------------------------
1 | function PASerrmsg(PASerr,SYSerr)
2 |   fprintf('Pascal Error Message: %s\n',PASerr);
3 |   fprintf('System Error Message: %s\n',SYSerr);
4 |   k=input('Enter K for keyboard, any other key to continue or ^C to quit ...','s');
5 |   if (~isempty(k)), if (lower(k)=='k'), keyboard; end; end;
6 |   fprintf('\n');
7 | return


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/commands.txt:
--------------------------------------------------------------------------------
1 | ./tools/train_net.py --gpu 0 --solver models/VGG_CNN_M_1024/solver.prototxt \
2 |     --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel --imdb inria_train
3 | 
4 | ./tools/test_net.py --gpu 1 --def models/VGG_CNN_M_1024/test.prototxt \
5 |     --net output/default/train/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel --imdb inria_test
6 | 


--------------------------------------------------------------------------------
/output/README.md:
--------------------------------------------------------------------------------
 1 | Artifacts generated by the scripts in `tools` are written in this directory.
 2 | 
 3 | Trained Fast R-CNN networks are saved under:
 4 | 
 5 | ```
 6 | output/<experiment directory>/<dataset name>/
 7 | ```
 8 | 
 9 | Test outputs are saved under:
10 | 
11 | ```
12 | output/<experiment directory>/<dataset name>/<network snapshot name>/
13 | ```
14 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from . import config
 9 | from . import train
10 | from . import test
11 | 


--------------------------------------------------------------------------------
/experiments/scripts/all_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | GPU=$1
 5 | NET=vgg16
 6 | ./experiments/scripts/default_${NET}.sh $GPU
 7 | ./experiments/scripts/fc_only_${NET}.sh $GPU
 8 | ./experiments/scripts/multitask_no_bbox_reg_${NET}.sh $GPU
 9 | ./experiments/scripts/no_bbox_reg_${NET}.sh $GPU
10 | ./experiments/scripts/piecewise_${NET}.sh $GPU
11 | ./experiments/scripts/svd_${NET}.sh $GPU
12 | ./experiments/scripts/svm_${NET}.sh $GPU
13 | 


--------------------------------------------------------------------------------
/experiments/scripts/all_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | GPU=$1
 5 | NET=caffenet
 6 | ./experiments/scripts/default_${NET}.sh $GPU
 7 | ./experiments/scripts/multiscale_${NET}.sh $GPU
 8 | ./experiments/scripts/multitask_no_bbox_reg_${NET}.sh $GPU
 9 | ./experiments/scripts/no_bbox_reg_${NET}.sh $GPU
10 | ./experiments/scripts/piecewise_${NET}.sh $GPU
11 | ./experiments/scripts/svd_${NET}.sh $GPU
12 | ./experiments/scripts/svm_${NET}.sh $GPU
13 | 


--------------------------------------------------------------------------------
/experiments/scripts/all_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | GPU=$1
 5 | NET=vgg_cnn_m_1024
 6 | ./experiments/scripts/default_${NET}.sh $GPU
 7 | ./experiments/scripts/multiscale_${NET}.sh $GPU
 8 | ./experiments/scripts/multitask_no_bbox_reg_${NET}.sh $GPU
 9 | ./experiments/scripts/no_bbox_reg_${NET}.sh $GPU
10 | ./experiments/scripts/piecewise_${NET}.sh $GPU
11 | ./experiments/scripts/svd_${NET}.sh $GPU
12 | ./experiments/scripts/svm_${NET}.sh $GPU
13 | 


--------------------------------------------------------------------------------
/models/CaffeNet/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/CaffeNet/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "caffenet_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/BoxUnion.m:
--------------------------------------------------------------------------------
 1 | function union = BoxUnion(a, b)
 2 | % union = BoxUnion(a, b)
 3 | %
 4 | % Creates the union box of two bounding boxes. 
 5 | %
 6 | % a:            Input bonding box "a"
 7 | % b:            Input bounding box "b"
 8 | %
 9 | % union: Intersection of box a and b
10 | %
11 | %     Jasper Uijlings - 2013
12 | 
13 | union = [min(a(:,1),b(:,1)) min(a(:,2),b(:,2)) ...
14 |          max(a(:,3),b(:,3)) max(a(:,4),b(:,4))];
15 | 


--------------------------------------------------------------------------------
/models/VGG16/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/CaffeNet/piecewise/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/CaffeNet/piecewise/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "caffenet_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG_CNN_M_1024/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/CaffeNet/no_bbox_reg/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/CaffeNet/no_bbox_reg/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "caffenet_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/VGG16/fc_only/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/fc_only/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/VGG16/piecewise/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/piecewise/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/VGG16/no_bbox_reg/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/no_bbox_reg/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | # iter_size: 1
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | # We disable standard caffe solver snapshotting and implement our own snapshot
12 | # function
13 | snapshot: 0
14 | # We still use the snapshot prefix, though
15 | snapshot_prefix: "vgg16_fast_rcnn"
16 | #debug_info: true
17 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/piecewise/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG_CNN_M_1024/piecewise/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/no_bbox_reg/solver.prototxt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG_CNN_M_1024/no_bbox_reg/train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | average_loss: 100
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | # function
12 | snapshot: 0
13 | # We still use the snapshot prefix, though
14 | snapshot_prefix: "vgg_cnn_m_1024_fast_rcnn"
15 | #debug_info: true
16 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/NormalizeArray.m:
--------------------------------------------------------------------------------
 1 | function b = NormalizeArray(a)
 2 | % Normalizes array a. This means that the minimum value will become 0 and
 3 | % the maximum value 1.
 4 | %
 5 | % a:            Input array.
 6 | %
 7 | % b:            Normalized output array
 8 | %
 9 | %     Jasper Uijlings - 2013
10 | 
11 | minVal = min(a(:));
12 | maxVal = max(a(:));
13 | 
14 | diffVal = maxVal - minVal;
15 | 
16 | b = a - minVal;
17 | if diffVal ~= 0
18 |     b = b ./ diffVal;
19 | end
20 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/Makefile:
--------------------------------------------------------------------------------
 1 | INCDIR = -I.
 2 | DBG    = -g
 3 | OPT    = -O3
 4 | CPP    = g++
 5 | CFLAGS = $(DBG) $(OPT) $(INCDIR)
 6 | LINK   = -lm 
 7 | 
 8 | .cpp.o:
 9 | 	$(CPP) $(CFLAGS) -c $< -o $@
10 | 
11 | all: segment
12 | 
13 | segment: segment.cpp segment-image.h segment-graph.h disjoint-set.h
14 | 	$(CPP) $(CFLAGS) -o segment segment.cpp $(LINK)
15 | 
16 | clean:
17 | 	/bin/rm -f segment *.o
18 | 
19 | clean-all: clean
20 | 	/bin/rm -f *~ 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/experiments/scripts/multitask_no_bbox_reg_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multitask_no_bbox_reg_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/test_net.py --gpu $1 \
13 |   --def models/VGG16/test.prototxt \
14 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_test \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 


--------------------------------------------------------------------------------
/experiments/scripts/multitask_no_bbox_reg_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multitask_no_bbox_reg_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/test_net.py --gpu $1 \
13 |   --def models/CaffeNet/test.prototxt \
14 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_test \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 


--------------------------------------------------------------------------------
/selective_search/README.md:
--------------------------------------------------------------------------------
 1 | This is mostly the code from [Segmentation as Selective Search for Object Recognition](http://koen.me/research/selectivesearch/), downloaded November 2013.
 2 | I simply needed a way to call this stuff from Python: `selective_search.py` and `selective_search.m` are the only new files.
 3 | 
 4 |     import selective_search_ijcv_with_python as selective_search
 5 |     windows = selective_search.get_windows(image_filenames)
 6 | 
 7 | To make sure this works, simply `python selective_search.py`.
 8 | 
 9 | Sergey Karayev
10 | 25 Nov 2013
11 | 


--------------------------------------------------------------------------------
/experiments/scripts/multitask_no_bbox_reg_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multitask_no_bbox_reg_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/test_net.py --gpu $1 \
13 |   --def models/VGG_CNN_M_1024/test.prototxt \
14 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_test \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 


--------------------------------------------------------------------------------
/selective_search/selective_search.m:
--------------------------------------------------------------------------------
 1 | image_db = '/home/szy/INRIA/';
 2 | image_filenames = textread([image_db '/data/ImageSets/train.txt'], '%s', 'delimiter', '\n');
 3 | for i = 1:length(image_filenames)
 4 |     if exist([image_db '/data/Images/' image_filenames{i} '.jpg'], 'file') == 2
 5 | 	image_filenames{i} = [image_db '/data/Images/' image_filenames{i} '.jpg'];
 6 |     end
 7 |     if exist([image_db '/data/Images/' image_filenames{i} '.png'], 'file') == 2
 8 |         image_filenames{i} = [image_db '/data/Images/' image_filenames{i} '.png'];
 9 |     end
10 | end
11 | selective_search_rcnn(image_filenames, 'output.mat');
12 | 


--------------------------------------------------------------------------------
/experiments/scripts/default_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/default_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG16/solver.prototxt \
14 |   --weights data/imagenet_models/VGG16.v2.caffemodel \
15 |   --imdb voc_2007_trainval
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/VGG16/test.prototxt \
19 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/BoxRemoveDuplicates.m:
--------------------------------------------------------------------------------
 1 | function [boxesOut uniqueIdx] = BoxRemoveDuplicates(boxesIn)
 2 | % function boxOut = BoxRemoveDuplicates(boxIn)
 3 | %
 4 | % Removes duplicate boxes. Leaves the boxes in the same order
 5 | % Keeps the first box of each kind.
 6 | %
 7 | % boxesIn:          N x 4 array containing boxes
 8 | % 
 9 | % boxexOut:         M x 4 array of boxes witout duplicates. M <= N
10 | % uniqueIdx:        Indices of retained boxes from boxesIn
11 | %
12 | %     Jasper Uijlings - 2013
13 | 
14 | [dummy uniqueIdx] = unique(boxesIn, 'rows', 'first');
15 | uniqueIdx = sort(uniqueIdx);
16 | boxesOut = boxesIn(uniqueIdx,:);
17 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FilterBoxesWidth.m:
--------------------------------------------------------------------------------
 1 | function [outBoxes idsGood]= FilterBoxesWidth(inBoxes, minLen)
 2 | % [outBoxes idsGood]= FilterBoxesWidth(inBoxes, minLen)
 3 | %
 4 | % Filters out small boxes. Boxes have to have a width and height 
 5 | % larger than minLen
 6 | %
 7 | % inBoxes:       M x 4 array of boxes
 8 | % minLen:   Minimum width and height of boxes
 9 | %
10 | % outBoxes:      N x 4 array of boxes, N < M
11 | % idsGood:       M x 1 logical array denoting boxes kept
12 | %
13 | %     Jasper Uijlings - 2013
14 | 
15 | [nr nc] = BoxSize(inBoxes);
16 | 
17 | idsGood = (nr >= minLen) & (nc >= minLen);
18 | outBoxes = inBoxes(idsGood,:);


--------------------------------------------------------------------------------
/experiments/scripts/default_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/default_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/CaffeNet/solver.prototxt \
14 |   --weights data/imagenet_models/CaffeNet.v2.caffemodel \
15 |   --imdb voc_2007_trainval
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/CaffeNet/test.prototxt \
19 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/default_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/default_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG_CNN_M_1024/solver.prototxt \
14 |   --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel \
15 |   --imdb voc_2007_trainval
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/VGG_CNN_M_1024/test.prototxt \
19 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/svd_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svd_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/compress_net.py \
13 |   --def models/VGG16/test.prototxt \
14 |   --def-svd models/VGG16/compressed/test.prototxt \
15 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/VGG16/compressed/test.prototxt \
19 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000_svd_fc6_1024_fc7_256.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/svd_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svd_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/compress_net.py \
13 |   --def models/CaffeNet/test.prototxt \
14 |   --def-svd models/CaffeNet/compressed/test.prototxt \
15 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/CaffeNet/compressed/test.prototxt \
19 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000_svd_fc6_1024_fc7_256.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/svm_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svm_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_svms.py --gpu $1 \
13 |   --def models/VGG16/test.prototxt \
14 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/svm.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG16/test.prototxt \
20 |   --net output/default/voc_2007_trainval/vgg16_fast_rcnn_iter_40000_svm.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/svm.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/fc_only_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/fc_only_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG16/fc_only/solver.prototxt \
14 |   --weights data/imagenet_models/VGG16.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/fc_only.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG16/test.prototxt \
20 |   --net output/fc_only/voc_2007_trainval/vgg16_fast_rcnn_fc_only_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/fc_only.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/svm_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svm_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_svms.py --gpu $1 \
13 |   --def models/CaffeNet/test.prototxt \
14 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/svm.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/CaffeNet/test.prototxt \
20 |   --net output/default/voc_2007_trainval/caffenet_fast_rcnn_iter_40000_svm.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/svm.yml
23 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/ShowRectsWithinImage.m:
--------------------------------------------------------------------------------
 1 | function ShowRectsWithinImage(rects, numRow, numCol, image, imageNames)
 2 | % ShowRects(Rects, numRow, numCol, image)
 3 | %
 4 | % Shows only the rectangles of the image
 5 | %
 6 | %     Jasper Uijlings - 2013
 7 | 
 8 | if ~exist('imageNames', 'var')
 9 |     imageNames = cell(size(rects,1), 1);
10 |     for i=1:size(rects,1)
11 |         imageNames{i} = sprintf('%d', i);
12 |     end
13 | end
14 | 
15 | % Convert to images
16 | idx = 1;
17 | images = cell(size(rects,1),1);
18 | for i=1:size(rects,1)
19 |     bbox = rects(i,:);
20 |     images{idx} = image(bbox(1):bbox(3),bbox(2):bbox(4),:);
21 |     idx = idx + 1;
22 | end
23 | 
24 | ShowImageCell(images, numRow, numCol, 'rects', imageNames);


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Set up paths for Fast R-CNN."""
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | def add_path(path):
14 |     if path not in sys.path:
15 |         sys.path.insert(0, path)
16 | 
17 | this_dir = osp.dirname(__file__)
18 | 
19 | # Add caffe to PYTHONPATH
20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python')
21 | add_path(caffe_path)
22 | 
23 | # Add lib to PYTHONPATH
24 | lib_path = osp.join(this_dir, '..', 'lib')
25 | add_path(lib_path)
26 | 


--------------------------------------------------------------------------------
/experiments/scripts/svd_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svd_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/compress_net.py \
13 |   --def models/VGG_CNN_M_1024/test.prototxt \
14 |   --def-svd models/VGG_CNN_M_1024/compressed/test.prototxt \
15 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel
16 | 
17 | time ./tools/test_net.py --gpu $1 \
18 |   --def models/VGG_CNN_M_1024/compressed/test.prototxt \
19 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000_svd_fc6_1024_fc7_256.caffemodel \
20 |   --imdb voc_2007_test
21 | 


--------------------------------------------------------------------------------
/experiments/scripts/multiscale_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multiscale_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/CaffeNet/solver.prototxt \
14 |   --weights data/imagenet_models/CaffeNet.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/multiscale.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/CaffeNet/test.prototxt \
20 |   --net output/multiscale/voc_2007_trainval/caffenet_fast_rcnn_multiscale_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/multiscale.yml
23 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/BoxSize.m:
--------------------------------------------------------------------------------
 1 | function [numRows numColumns area] = BoxSize(bbox)
 2 | % [numRows numColumns Surface] = BoxSize(bbox)
 3 | %
 4 | % Retrieves number of rows, columns, and surface area from bounding box
 5 | %
 6 | % bbox:         4 x N Bounding box as [rowBegin colBegin rowEnd colEnd]
 7 | %
 8 | % numRows:      Number of rows of boxes
 9 | % numColumns:   Number of columns of boxes
10 | % area:         Area of boxes
11 | %
12 | %     Jasper Uijlings - 2013
13 | 
14 | % Box is empty
15 | if isempty(bbox)
16 |     numRows = 0;
17 |     numColumns = 0;
18 |     area = 0;
19 |     return
20 | end
21 | 
22 | numRows = bbox(:,3) - bbox(:,1) + 1;
23 | numColumns = bbox(:,4) - bbox(:,2) + 1;
24 | area = numRows .* numColumns;
25 | 
26 | 


--------------------------------------------------------------------------------
/experiments/scripts/no_bbox_reg_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/no_bbox_reg_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG16/no_bbox_reg/solver.prototxt \
14 |   --weights data/imagenet_models/VGG16.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG16/no_bbox_reg/test.prototxt \
20 |   --net output/no_bbox_reg/voc_2007_trainval/vgg16_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/no_bbox_reg.yml
23 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/BoxIntersection.m:
--------------------------------------------------------------------------------
 1 | function intersection = BoxIntersection(a, b)
 2 | % intersection = BoxIntersection(a, b)
 3 | %
 4 | % Creates the intersection of two bounding boxes. Returns minus ones if
 5 | % there is no intersection
 6 | %
 7 | % a:            Input bonding box "a"
 8 | % b:            Input bounding box "b"
 9 | %
10 | % intersection: Intersection of box a and b
11 | %
12 | %     Jasper Uijlings - 2013
13 | 
14 | intersection = [max(a(:,1),b(:,1)) max(a(:,2),b(:,2)) ...
15 |                 min(a(:,3),b(:,3)) min(a(:,4),b(:,4))];
16 |                 
17 | [numRows numColumns] = BoxSize(intersection);
18 | 
19 | % There is no intersection box
20 | negIds = numRows < 1 | numColumns < 1;
21 | intersection(negIds,:) = -1;
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/experiments/scripts/svm_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/svm_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_svms.py --gpu $1 \
13 |   --def models/VGG_CNN_M_1024/test.prototxt \
14 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/svm.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG_CNN_M_1024/test.prototxt \
20 |   --net output/default/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_iter_40000_svm.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/svm.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/no_bbox_reg_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/no_bbox_reg_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/CaffeNet/no_bbox_reg/solver.prototxt \
14 |   --weights data/imagenet_models/CaffeNet.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/CaffeNet/no_bbox_reg/test.prototxt \
20 |   --net output/no_bbox_reg/voc_2007_trainval/caffenet_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/no_bbox_reg.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/piecewise_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/piecewise_vgg16.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG16/piecewise/solver.prototxt \
14 |   --weights output/no_bbox_reg/voc_2007_trainval/vgg16_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/piecewise.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG16/test.prototxt \
20 |   --net output/piecewise/voc_2007_trainval/vgg16_fast_rcnn_piecewise_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/piecewise.yml
23 | 


--------------------------------------------------------------------------------
/matlab/fast_rcnn_load_net.m:
--------------------------------------------------------------------------------
 1 | % --------------------------------------------------------
 2 | % Fast R-CNN
 3 | % Copyright (c) 2015 Microsoft
 4 | % Licensed under The MIT License [see LICENSE for details]
 5 | % Written by Ross Girshick
 6 | % --------------------------------------------------------
 7 | 
 8 | function model = fast_rcnn_load_net(def, net, use_gpu)
 9 | % Load a Fast R-CNN network.
10 | 
11 | init_key = caffe('init', def, net, 'test');
12 | if exist('use_gpu', 'var') && ~use_gpu
13 |   caffe('set_mode_cpu');
14 | else
15 |   caffe('set_mode_gpu');
16 | end
17 | 
18 | model.init_key = init_key;
19 | % model.stride is correct for the included models, but may not be correct
20 | % for other models!
21 | model.stride = 16;
22 | model.pixel_means = reshape([102.9801, 115.9465, 122.7717], [1 1 3]);
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/multiscale_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/multiscale_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG_CNN_M_1024/solver.prototxt \
14 |   --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/multiscale.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG_CNN_M_1024/test.prototxt \
20 |   --net output/multiscale/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_multiscale_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/multiscale.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/piecewise_caffenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/piecewise_caffenet.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/CaffeNet/piecewise/solver.prototxt \
14 |   --weights output/no_bbox_reg/voc_2007_trainval/caffenet_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/piecewise.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/CaffeNet/test.prototxt \
20 |   --net output/piecewise/voc_2007_trainval/caffenet_fast_rcnn_piecewise_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/piecewise.yml
23 | 


--------------------------------------------------------------------------------
/experiments/scripts/no_bbox_reg_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/no_bbox_reg_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG_CNN_M_1024/no_bbox_reg/solver.prototxt \
14 |   --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/no_bbox_reg.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG_CNN_M_1024/no_bbox_reg/test.prototxt \
20 |   --net output/no_bbox_reg/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/no_bbox_reg.yml
23 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/GetPascalOverlap.m:
--------------------------------------------------------------------------------
 1 | function score = GetPascalOverlap(bb, bbgt)
 2 | % Directly copied from Pascal code
 3 | %
 4 | % Gets the overlap measure according to Pascal
 5 | %
 6 | % bb:           Bounding Box
 7 | % bbgt:         Ground truth bounding box
 8 | %
 9 | % score:        Score between 0 and 1. 1 is complete overlap.
10 | 
11 | score = 0;
12 | 
13 | % intersection bbox
14 | bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))];
15 | iw=bi(3)-bi(1)+1;
16 | ih=bi(4)-bi(2)+1;
17 | if iw>0 & ih>0 % intersection should be non-zero               
18 |     % compute overlap as area of intersection / area of union
19 |     ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+...
20 |        (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-...
21 |        iw*ih;
22 |     score=iw*ih/ua;
23 | end


--------------------------------------------------------------------------------
/experiments/scripts/piecewise_vgg_cnn_m_1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | LOG="experiments/logs/piecewise_vgg_cnn_m_1024.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
 9 | exec &> >(tee -a "$LOG")
10 | echo Logging output to "$LOG"
11 | 
12 | time ./tools/train_net.py --gpu $1 \
13 |   --solver models/VGG_CNN_M_1024/piecewise/solver.prototxt \
14 |   --weights output/no_bbox_reg/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_no_bbox_reg_iter_40000.caffemodel \
15 |   --imdb voc_2007_trainval \
16 |   --cfg experiments/cfgs/piecewise.yml
17 | 
18 | time ./tools/test_net.py --gpu $1 \
19 |   --def models/VGG_CNN_M_1024/test.prototxt \
20 |   --net output/piecewise/voc_2007_trainval/vgg_cnn_m_1024_fast_rcnn_piecewise_iter_40000.caffemodel \
21 |   --imdb voc_2007_test \
22 |   --cfg experiments/cfgs/piecewise.yml
23 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/BoxBestOverlap.m:
--------------------------------------------------------------------------------
 1 | function [scores index] = BoxBestOverlap(gtBoxes, testBoxes)
 2 | % [scores index] = BoxBestOverlap(gtBox, testBoxes)
 3 | % 
 4 | % Get overlap scores (Pascal-wise) for testBoxes bounding boxes
 5 | %
 6 | % gtBoxes:                 Ground truth bounding boxes
 7 | % testBoxes:               Test bounding boxes
 8 | %
 9 | % scores:                  Highest overlap scores for each testBoxes bbox.
10 | % index:                   Index for each testBoxes box which ground truth box is best
11 | %
12 | %     Jasper Uijlings - 2013
13 | 
14 | numGT = size(gtBoxes,1);
15 | numTest = size(testBoxes,1);
16 | 
17 | scoreM = zeros(numGT, numTest);
18 | 
19 | 
20 | for j=1:numGT
21 |     scoreM(j,:) = PascalOverlap(gtBoxes(j,:), testBoxes);
22 | end
23 | 
24 | 
25 | [scores index] = max(scoreM, [], 2);
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/matlab/showboxes.m:
--------------------------------------------------------------------------------
 1 | % --------------------------------------------------------
 2 | % Fast R-CNN
 3 | % Copyright (c) 2015 Microsoft
 4 | % Licensed under The MIT License [see LICENSE for details]
 5 | % Written by Ross Girshick
 6 | % --------------------------------------------------------
 7 | 
 8 | function showboxes(im, boxes)
 9 | 
10 | image(im);
11 | axis image;
12 | axis off;
13 | set(gcf, 'Color', 'white');
14 | 
15 | if ~isempty(boxes)
16 |   x1 = boxes(:, 1);
17 |   y1 = boxes(:, 2);
18 |   x2 = boxes(:, 3);
19 |   y2 = boxes(:, 4);
20 |   c = 'r';
21 |   s = '-';
22 |   line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', ...
23 |        'color', c, 'linewidth', 2, 'linestyle', s);
24 |   for i = 1:size(boxes, 1)
25 |     text(double(x1(i)), double(y1(i)) - 2, ...
26 |          sprintf('%.3f', boxes(i, end)), ...
27 |          'backgroundcolor', 'r', 'color', 'w');
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/selective_search/BlobBestOverlap.m:
--------------------------------------------------------------------------------
 1 | function [scores index] = BlobBestOverlap(gtBlobs, testBlobs)
 2 | % [scores index] = BlobBestOverlap(gtBlobs, testBlobs)
 3 | % 
 4 | % Get overlap scores (Pascal-wise) for test  blobs
 5 | %
 6 | % groundTruthBlob:         ground truth blobs
 7 | % test:                    Test blobs
 8 | %
 9 | % scores:                  Highest overlap scores for each test blob.
10 | % index:                   Index for each test blob which ground truth blob
11 | %                          is best
12 | %
13 | %     Jasper Uijlings - 2013
14 | 
15 | numTarget = length(gtBlobs);
16 | numTest = length(testBlobs);
17 | 
18 | scoreM = zeros(numTest, numTarget);
19 | 
20 | for i=1:numTest
21 |     for j=1:numTarget
22 |         scoreM(i,j) = PascalOverlapBlob(gtBlobs{j}, testBlobs{i});
23 |     end
24 | end
25 | 
26 | [scores index] = max(scoreM, [], 2);
27 | 
28 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/README:
--------------------------------------------------------------------------------
 1 | 
 2 | Implementation of the segmentation algorithm described in:
 3 | 
 4 | Efficient Graph-Based Image Segmentation
 5 | Pedro F. Felzenszwalb and Daniel P. Huttenlocher
 6 | International Journal of Computer Vision, 59(2) September 2004.
 7 | 
 8 | The program takes a color image (PPM format) and produces a segmentation
 9 | with a random color assigned to each region.
10 | 
11 | 1) Type "make" to compile "segment".
12 | 
13 | 2) Run "segment sigma k min input output".
14 | 
15 | The parameters are: (see the paper for details)
16 | 
17 | sigma: Used to smooth the input image before segmenting it.
18 | k: Value for the threshold function.
19 | min: Minimum component size enforced by post-processing.
20 | input: Input image.
21 | output: Output image.
22 | 
23 | Typical parameters are sigma = 0.5, k = 500, min = 20.
24 | Larger values for k result in larger components in the result.
25 | 
26 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_fast_rcnn_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=fast_rcnn_models.tgz
 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE
 8 | CHECKSUM=5f7dde9f5376e18c8e065338cc5df3f7
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading Fast R-CNN demo models (0.96G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_imagenet_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=imagenet_models.tgz
 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE
 8 | CHECKSUM=8b1d4b9da0593fc70ef403284f810adc
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading pretrained ImageNet models (1G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/NormalizeRows.m:
--------------------------------------------------------------------------------
 1 | function b = NormalizeRows(a, n)
 2 | % Normalizes the rows of a. Makes sure there is no division by zero: b will
 3 | % not contain any NaN entries.
 4 | %
 5 | % a:            data with row vectors
 6 | % n:            The rows will sum to n. By default n = 1
 7 | % 
 8 | % b:            normalized data with row vecors. All rows sum to one except
 9 | %               the ones that are zero in the first place: these remain
10 | %               zero.
11 | %
12 | %     Jasper Uijlings - 2013
13 | 
14 | % Get sums
15 | sumA = sum(a,2);
16 | 
17 | % Make sure there is no division by zero
18 | sumA(sumA == 0) = 1;
19 | 
20 | % Do the normalization
21 | if nargin == 1
22 |     b = bsxfun(@rdivide, a, sumA);
23 | else
24 |     b = bsxfun(@rdivide, a, sumA / n);
25 | end
26 | 
27 | % Do the normalization
28 | % if nargin == 1
29 | %     b = a ./ repmat(sumA, 1, size(a,2));
30 | % else
31 | %     b = a .* n ./ repmat(sumA, 1, size(a,2));
32 | % end


--------------------------------------------------------------------------------
/data/scripts/fetch_selective_search_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=selective_search_data.tgz
 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE
 8 | CHECKSUM=7078c1db87a7851b31966b96774cd9b9
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading precomputed selective search boxes (0.5G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | from distutils.core import setup
10 | from distutils.extension import Extension
11 | from Cython.Distutils import build_ext
12 | 
13 | cmdclass = {}
14 | ext_modules = [
15 |     Extension(
16 |         "utils.cython_bbox",
17 |         ["utils/bbox.pyx"],
18 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
19 |     ),
20 |     Extension(
21 |         "utils.cython_nms",
22 |         ["utils/nms.pyx"],
23 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
24 |     )
25 | ]
26 | cmdclass.update({'build_ext': build_ext})
27 | 
28 | setup(
29 |     name='fast_rcnn',
30 |     cmdclass=cmdclass,
31 |     ext_modules=ext_modules,
32 |     include_dirs=[np.get_include()]
33 | )
34 | 


--------------------------------------------------------------------------------
/selective_search/RecreateBlobHierarchyIndIm.m:
--------------------------------------------------------------------------------
 1 | function [hBlobs blobsInit blobsRest] = RecreateBlobHierarchyIndIm(blobIndIm, blobBoxes, hierarchy)
 2 | % function hBlobs = RecreateBlobHierarchyIndIm(blobIndIm, boxes, hierarchy)
 3 | %
 4 | % Recreate hierarchy from the initial segmentation image
 5 | %
 6 | % blobIndIm:            Image with indices denoting segments
 7 | % blobBoxes:            Boxes belonging to blobs in blobIndIm
 8 | % hierarchy:            Hierarchy denoting hierarchical merging
 9 | % 
10 | % hBlobs:               All blobs in the hierarchy
11 | % blobsInit:            The initial blobs
12 | % blobsRest:            All blobs but the initial blobs
13 | %
14 | %     Jasper Uijlings - 2013
15 | 
16 | % Get blobs of initial segmentation
17 | blobsInit = SegmentIndices2Blobs(blobIndIm, blobBoxes);
18 | 
19 | % Add sizes
20 | blobsInit = BlobAddSizes(blobsInit);
21 | 
22 | % Reconstruct hierarchy
23 | hBlobs = RecreateBlobHierarchy(blobsInit, hierarchy);
24 | 
25 | if nargout == 3
26 |     blobsRest = hBlobs(length(blobsInit)+1:end);
27 | end


--------------------------------------------------------------------------------
/selective_search/RecreateBlobHierarchy.m:
--------------------------------------------------------------------------------
 1 | function hBlobs = RecreateBlobHierarchy(blobs, hierarchy)
 2 | % [blobs hierarchy] = RecreateBlobHierarchy(blobs, hierarchy)
 3 | % 
 4 | % Recreates the hierarchical grouping using the starting blobs and the 
 5 | % resulting hierarchy. This allows one to save the grouping using
 6 | % relatively small disk space while still being able to fastly recreate the
 7 | % complete grouping.
 8 | %
 9 | % blobs:            Input cell array with blobs
10 | % hierarchy:        Hierarchy of the blobs as created by
11 | %                   HierarchicalGrouping.m
12 | %
13 | % hBlobs:           All segments of the hierarchical grouping.
14 | %
15 | %     Jasper Uijlings - 2013
16 | 
17 | hBlobs = cell(length(hierarchy) + 1,1);
18 | 
19 | hBlobs(1:length(blobs)) = blobs;
20 | 
21 | for i=length(blobs)+1:length(hBlobs)
22 |     n = find(hierarchy == i);
23 |     
24 |     if length(n) ~= 2
25 |         error('One can not merge more than 2 blobs!');
26 |     end
27 |     
28 |     hBlobs{i} = MergeBlobs(hBlobs{n(1)}, hBlobs{n(2)});
29 | end


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/PascalOverlap.m:
--------------------------------------------------------------------------------
 1 | function scores = PascalOverlap(targetBox, testBoxes)
 2 | % scores = PascalOverlap(targetBox, testBoxes)
 3 | %
 4 | % Function obtains the pascal overlap scores between the targetBox and
 5 | % all testBoxes
 6 | %
 7 | % targetBox:            1 x 4 array containing target box
 8 | % testBoxes:            N x 4 array containing test boxes
 9 | %
10 | % scores:               N x 1 array containing for each testBox the pascal
11 | %                       overlap score.
12 | %
13 | %     Jasper Uijlings - 2013
14 | 
15 | intersectBoxes = BoxIntersection(targetBox, testBoxes);
16 | overlapI = intersectBoxes(:,1) ~= -1; % Get which boxes overlap
17 | 
18 | % Intersection size
19 | [nr nc intersectionSize] = BoxSize(intersectBoxes(overlapI,:));
20 | 
21 | % Union size
22 | [nr nc testBoxSize] = BoxSize(testBoxes(overlapI,:));
23 | [nr nc targetBoxSize] = BoxSize(targetBox);
24 | unionSize = testBoxSize + targetBoxSize - intersectionSize;
25 | 
26 | scores = zeros(size(testBoxes,1),1);
27 | scores(overlapI) = intersectionSize ./ unionSize;
28 | 


--------------------------------------------------------------------------------
/lib/utils/nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def nms(dets, thresh):
11 |     x1 = dets[:, 0]
12 |     y1 = dets[:, 1]
13 |     x2 = dets[:, 2]
14 |     y2 = dets[:, 3]
15 |     scores = dets[:, 4]
16 | 
17 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 |     order = scores.argsort()[::-1]
19 | 
20 |     keep = []
21 |     while order.size > 0:
22 |         i = order[0]
23 |         keep.append(i)
24 |         xx1 = np.maximum(x1[i], x1[order[1:]])
25 |         yy1 = np.maximum(y1[i], y1[order[1:]])
26 |         xx2 = np.minimum(x2[i], x2[order[1:]])
27 |         yy2 = np.minimum(y2[i], y2[order[1:]])
28 | 
29 |         w = np.maximum(0.0, xx2 - xx1 + 1)
30 |         h = np.maximum(0.0, yy2 - yy1 + 1)
31 |         inter = w * h
32 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
33 | 
34 |         inds = np.where(ovr <= thresh)[0]
35 |         order = order[inds + 1]
36 | 
37 |     return keep
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Fast R-CNN
 2 | 
 3 | Copyright (c) Microsoft Corporation
 4 | 
 5 | All rights reserved.
 6 | 
 7 | MIT License
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a
10 | copy of this software and associated documentation files (the "Software"),
11 | to deal in the Software without restriction, including without limitation
12 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 | and/or sell copies of the Software, and to permit persons to whom the
14 | Software is furnished to do so, subject to the following conditions:
15 | 
16 | The above copyright notice and this permission notice shall be included
17 | in all copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 | OTHER DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/CountVisualWordsIndex.m:
--------------------------------------------------------------------------------
 1 | function [cb counts] = CountVisualWordsIndex(indexIm, wordIm, numIndex, numWords)
 2 | % cb = CountVisualWordsIndex(indexIm, wordIm, numIndex, numWords)
 3 | %
 4 | % Counts the number of visual words for the visual words in wordIm.
 5 | % wordIm is an array with visual word identities. Zeros will be ignored.
 6 | % indexIm is an array with regions to which visual words belong.
 7 | %
 8 | % WARNING: VERY FEW CHECKS FOR INTEGRETY. WRONG INPUT WILL CRASH THE SYSTEM
 9 | %
10 | % indexIm:          Array with indices. Range: [1,numIndex]
11 | % wordIm:           Array with visual word identities. Range: [0,numWords]
12 | % numIndex:         Number of indices in indexIm.
13 | % numWords:         Number of visual words.
14 | %
15 | % cb:               numIndex x numWords array with histogram counts
16 | % counts:           numIndex x 1 array with counts per row of cb.
17 | %
18 | %     Jasper Uijlings - 2013
19 | 
20 | if size(indexIm,1) ~= size(wordIm,1) | size(indexIm,2) ~= size(wordIm,2)
21 |     error('First two input arguments should have the same 2D dimension');
22 | end
23 | 
24 | wordIm = double(wordIm);
25 | 
26 | [cb counts] = mexCountWordsIndex(indexIm, wordIm, numIndex, numWords);
27 | 


--------------------------------------------------------------------------------
/help/INRIA/VOCcode/VOCinit.m:
--------------------------------------------------------------------------------
 1 | clear VOCopts
 2 | 
 3 | % get current directory with forward slashes
 4 | 
 5 | cwd=cd;
 6 | cwd(cwd=='\')='/';
 7 | 
 8 | % change this path to point to your copy of the PASCAL VOC data
 9 | VOCopts.datadir=[cwd '/'];
10 | 
11 | % change this path to a writable directory for your results
12 | VOCopts.resdir=[cwd '/results/test/'];
13 | 
14 | % change this path to a writable local directory for the example code
15 | VOCopts.localdir=[cwd '/local/'];
16 | 
17 | % initialize the test set
18 | 
19 | VOCopts.testset='test'; % use test set for final challenge
20 | 
21 | % initialize paths
22 | 
23 | VOCopts.imgsetpath=[VOCopts.datadir 'data/ImageSets/%s.txt'];
24 | VOCopts.clsimgsetpath=[VOCopts.datadir 'data/ImageSets/%s_%s.txt'];
25 | VOCopts.annopath=[VOCopts.datadir 'data/Annotations/%s.txt'];
26 | VOCopts.imgpath=[VOCopts.datadir 'data/Images/%s.png'];
27 | VOCopts.clsrespath=[VOCopts.resdir '%s_cls_' VOCopts.testset '_%s.txt'];
28 | VOCopts.detrespath=[VOCopts.resdir '%s_det_' VOCopts.testset '_%s.txt'];
29 | 
30 | % initialize the VOC challenge options
31 | 
32 | VOCopts.classes={'person'};
33 | VOCopts.nclasses=length(VOCopts.classes);	
34 | 
35 | VOCopts.minoverlap=0.5;
36 | 
37 | % initialize example options
38 | 
39 | VOCopts.exfdpath=[VOCopts.localdir '%s_fd.mat'];
40 | 
41 | % datasets
42 | VOCopts.dataset = ['inria'];
43 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from .imdb import imdb
 9 | from .pascal_voc import pascal_voc
10 | from .inria import inria
11 | from . import factory
12 | 
13 | import os.path as osp
14 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..')
15 | 
16 | # We assume your matlab binary is in your path and called `matlab'.
17 | # If either is not true, just add it to your path and alias it as matlab, or
18 | # you could change this file.
19 | MATLAB = 'matlab'
20 | 
21 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
22 | def _which(program):
23 |     import os
24 |     def is_exe(fpath):
25 |         return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
26 | 
27 |     fpath, fname = os.path.split(program)
28 |     if fpath:
29 |         if is_exe(program):
30 |             return program
31 |     else:
32 |         for path in os.environ["PATH"].split(os.pathsep):
33 |             path = path.strip('"')
34 |             exe_file = os.path.join(path, program)
35 |             if is_exe(exe_file):
36 |                 return exe_file
37 | 
38 |     return None
39 | 
40 | if _which(MATLAB) is None:
41 |     msg = ("MATLAB command '{}' not found. "
42 |            "Please add '{}' to your PATH.").format(MATLAB, MATLAB)
43 |     raise EnvironmentError(msg)
44 | 


--------------------------------------------------------------------------------
/matlab/nms.m:
--------------------------------------------------------------------------------
 1 | function pick = nms(boxes, overlap)
 2 | % top = nms(boxes, overlap)
 3 | % Non-maximum suppression. (FAST VERSION)
 4 | % Greedily select high-scoring detections and skip detections
 5 | % that are significantly covered by a previously selected
 6 | % detection.
 7 | %
 8 | % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m),
 9 | % but an inner loop has been eliminated to significantly speed it
10 | % up in the case of a large number of boxes
11 | 
12 | % Copyright (C) 2011-12 by Tomasz Malisiewicz
13 | % All rights reserved.
14 | %
15 | % This file is part of the Exemplar-SVM library and is made
16 | % available under the terms of the MIT license (see COPYING file).
17 | % Project homepage: https://github.com/quantombone/exemplarsvm
18 | 
19 | 
20 | if isempty(boxes)
21 |   pick = [];
22 |   return;
23 | end
24 | 
25 | x1 = boxes(:,1);
26 | y1 = boxes(:,2);
27 | x2 = boxes(:,3);
28 | y2 = boxes(:,4);
29 | s = boxes(:,end);
30 | 
31 | area = (x2-x1+1) .* (y2-y1+1);
32 | [vals, I] = sort(s);
33 | 
34 | pick = s*0;
35 | counter = 1;
36 | while ~isempty(I)
37 |   last = length(I);
38 |   i = I(last);
39 |   pick(counter) = i;
40 |   counter = counter + 1;
41 | 
42 |   xx1 = max(x1(i), x1(I(1:last-1)));
43 |   yy1 = max(y1(i), y1(I(1:last-1)));
44 |   xx2 = min(x2(i), x2(I(1:last-1)));
45 |   yy2 = min(y2(i), y2(I(1:last-1)));
46 | 
47 |   w = max(0.0, xx2-xx1+1);
48 |   h = max(0.0, yy2-yy1+1);
49 | 
50 |   inter = w.*h;
51 |   o = inter ./ (area(i) + area(I(1:last-1)) - inter);
52 | 
53 |   I = I(find(o<=overlap));
54 | end
55 | 
56 | pick = pick(1:(counter-1));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir, rm_res)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir, rm_res);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir, rm_res)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = 1;
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | if rm_res
57 |   delete(res_fn);
58 | end
59 | 
60 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
61 | 


--------------------------------------------------------------------------------
/help/INRIA/VOCcode/VOCroc.m:
--------------------------------------------------------------------------------
 1 | function [fp,tp,auc] = VOCroc(VOCopts,id,cls,draw)
 2 | 
 3 | % load test set
 4 | [gtids,gt]=textread(sprintf(VOCopts.clsimgsetpath,cls,VOCopts.testset),'%s %d');
 5 | 
 6 | % load results
 7 | [ids,confidence]=textread(sprintf(VOCopts.clsrespath,id,cls),'%s %f');
 8 | 
 9 | % map results to ground truth images
10 | out=ones(size(gt))*-inf;
11 | tic;
12 | for i=1:length(ids)
13 |     % display progress
14 |     if toc>1
15 |         fprintf('%s: roc: %d/%d\n',cls,i,length(ids));
16 |         drawnow;
17 |         tic;
18 |     end
19 |     
20 |     % find ground truth image
21 |     j=strmatch(ids{i},gtids,'exact');
22 |     if isempty(j)
23 |         error('unrecognized image "%s"',ids{i});
24 |     elseif length(j)>1
25 |         error('multiple image "%s"',ids{i});
26 |     else
27 |         out(j)=confidence(i);
28 |     end
29 | end
30 | 
31 | % compute true and false positive rates
32 | [so,si]=sort(-out);
33 | tp=cumsum(gt(si)>0)/sum(gt>0);
34 | fp=cumsum(gt(si)<0)/sum(gt<0);
35 | [uo,ui]=unique(so);
36 | tp=[0;tp(ui);1];
37 | fp=[0;fp(ui);1];
38 | 
39 | % compute lower envelope and area under curve
40 | di=[true ; tp(2:end-1)~=tp(1:end-2) ; true];
41 | x=fp(di);
42 | y=tp(di);
43 | auc=(x(2:end)-x(1:end-1))'*y(1:end-1);
44 | 
45 | if draw
46 |     % plot lower envelope
47 |     xp=[0 ; reshape([x x]',[],1) ; 1 ; 1];
48 |     yp=[0 ; 0 ; reshape([y y]',[],1) ; 1];
49 | 
50 |     plot(xp,yp,'-');
51 |     grid;
52 |     axis([0 1 0 1]);
53 |     xlabel 'false positive rate'
54 |     ylabel 'true positive rate'
55 |     title(sprintf('class: %s, subset: %s, AUC = %.3f',cls,VOCopts.testset,auc));
56 | end
57 | 


--------------------------------------------------------------------------------
/selective_search/BoxAverageBestOverlap.m:
--------------------------------------------------------------------------------
 1 | function [abo mabo boScores avgNumBoxes] = BoxAverageBestOverlap(gtBoxes, gtNrs, testBoxes)
 2 | % [abo mabo boScores avgNumBoxes] = BoxAverageBestOverlap(gtBoxes, gtNrs, testBoxes)
 3 | %
 4 | % Calculate Average Best Overlap scores
 5 | %
 6 | % gtBoxes:      Cell array of ground truth boxes per class (see
 7 | %               GetAllObjectBoxes)
 8 | % gtNrs:        Cell array with image nrs corresponding to ground truth.
 9 | % testBoxes:    Cell array of testboxes per image.
10 | %
11 | % abo:          Average Best Overlap per class (Pascal Overlap criterion)
12 | % mabo:         Mean Average Best Overlap (mean(abo))
13 | % boScores:     Best Overlap Score per GT box.
14 | % avgNumBoxes:  Average number of boxes per image
15 | %
16 | %     Jasper Uijlings - 2013
17 | 
18 | % Check nr of gt elements
19 | nClasses = length(gtBoxes);
20 | 
21 | boScores = cell(1, nClasses);
22 | for cI = 1:nClasses
23 |     boScores{cI} = zeros(size(gtBoxes{cI}, 1),1);
24 | end
25 | 
26 | % indices per class
27 | classIdx = ones(1, nClasses);
28 | 
29 | for cI = 1:length(gtBoxes)
30 |     for i = 1:size(gtBoxes{cI}, 1)
31 |         boScores{cI}(classIdx(cI)) = ...
32 |             BoxBestOverlap(gtBoxes{cI}(i,:), testBoxes{gtNrs{cI}(i)});
33 |         classIdx(cI) = classIdx(cI) + 1;
34 |     end
35 | end
36 | 
37 | % Calculation abo and mabo measures
38 | abo = zeros(nClasses, 1);
39 | for cI = 1:nClasses
40 |     abo(cI) = mean(boScores{cI});
41 | end
42 | mabo = mean(abo);
43 | 
44 | % Calculation avgNumBoxes
45 | numBoxes = zeros(length(testBoxes), 1);
46 | for i=1:length(testBoxes)
47 |     numBoxes(i) = size(testBoxes{i}, 1);
48 | end
49 | avgNumBoxes = mean(numBoxes);


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/segment.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (C) 2006 Pedro Felzenszwalb
 3 | 
 4 | This program is free software; you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation; either version 2 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program; if not, write to the Free Software
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 | */
18 | 
19 | #include <cstdio>
20 | #include <cstdlib>
21 | #include <image.h>
22 | #include <misc.h>
23 | #include <pnmfile.h>
24 | #include "segment-image.h"
25 | 
26 | int main(int argc, char **argv) {
27 |   if (argc != 6) {
28 |     fprintf(stderr, "usage: %s sigma k min input(ppm) output(ppm)\n", argv[0]);
29 |     return 1;
30 |   }
31 |   
32 |   float sigma = atof(argv[1]);
33 |   float k = atof(argv[2]);
34 |   int min_size = atoi(argv[3]);
35 | 	
36 |   printf("loading input image.\n");
37 |   image<rgb> *input = loadPPM(argv[4]);
38 | 	
39 |   printf("processing\n");
40 |   int num_ccs; 
41 |   image<rgb> *seg = segment_image(input, sigma, k, min_size, &num_ccs); 
42 |   savePPM(seg, argv[5]);
43 | 
44 |   printf("got %d components\n", num_ccs);
45 |   printf("done! uff...thats hard work.\n");
46 | 
47 |   return 0;
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/ShowBlobs.m:
--------------------------------------------------------------------------------
 1 | function ShowBlobs(blobs, numRow, numCol, image, minSize, imNames)
 2 | % PlotBlobs(blobs, numRow, numCol, image, minSize) plots all blobs in numCol columns
 3 | %
 4 | %     Jasper Uijlings - 2013
 5 | 
 6 | if nargin == 4
 7 |     minSize = 0;
 8 | end
 9 | 
10 | doNames = exist('imNames', 'var');
11 | 
12 | if doNames
13 |     if ~iscell(imNames)
14 |         imageNamesC = cell(size(imNames));
15 |         for i=1:length(imNames)
16 |             imageNamesC{i} = sprintf('%g', imNames(i));
17 |         end
18 |         imNames = imageNamesC;
19 |     end    
20 | end
21 |     
22 | % Convert to images
23 | idx = 1;
24 | for i=1:length(blobs)
25 |     if not(isfield(blobs{i}, 'size'))
26 |         blobs{i}.size = sum(sum(blobs{i}.mask));
27 |     end
28 |     if blobs{i}.size > minSize
29 |         images{idx} = Blob2Image(blobs{i}, image);
30 |         if doNames
31 |             iiNames{idx} = imNames{i};
32 |         end
33 |         idx = idx + 1;
34 |     end
35 | end
36 | 
37 | 
38 | if doNames;
39 |     ShowImageCell(images, numRow, numCol, '', iiNames);
40 | else
41 |     ShowImageCell(images, numRow, numCol);
42 | end
43 | 
44 | % totImages = idx - 1;
45 | % 
46 | % numFigures = ceil(totImages / (numCol * numRow))
47 | % 
48 | % n = 1;
49 | % screenSize = get(0, 'ScreenSize');
50 | % 
51 | % for i=1:numFigures
52 | %     figure('Position', [1, 1, screenSize(3)/2, screenSize(4)]);
53 | %     clf;
54 | %     for j = 1:numCol * numRow
55 | %         if(n <= totImages)
56 | %             subplot(numRow, numCol, j);
57 | %             imshow(images{n});
58 | %             n = n + 1;
59 | %         end
60 | %     end
61 | % end
62 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/ShowImageCell.m:
--------------------------------------------------------------------------------
 1 | function ShowImageCell(imageCell, n, m, figurename, imageNames)
 2 | % ShowImageCell(imageCell, n, m, figurename, imageNames)
 3 | %
 4 | % Generate a figure with thumbnails of the images in the imageCell.
 5 | %
 6 | % imageCell:            Cell array with images which can be displayed
 7 | %                       with imshow.
 8 | % n:                    number of thumbnail rows per figure.
 9 | % m:                    number of thumbnail columns per figure.
10 | % figurename:           Name of the figures (optional).
11 | %
12 | %     Jasper Uijlings - 2013
13 | 
14 | totImages = length(imageCell);
15 | numFigures = ceil(totImages / (n * m));
16 | 
17 | if nargin < 4
18 |     figurename = 'untitled';
19 | end
20 | 
21 | if nargin < 5
22 |     imageNames = cell(length(imageCell));
23 | end
24 | 
25 | if ~iscell(imageNames)
26 |     imageNamesC = cell(length(imageNames));
27 |     for i=1:length(imageNames)
28 |         imageNamesC{i} = sprintf('%g', imageNames(i));
29 |     end
30 |     imageNames = imageNamesC;
31 | end
32 | 
33 | idx = 1;
34 | screenSize = get(0, 'ScreenSize');
35 | 
36 | for i=1:numFigures
37 |     if ispc
38 |         figure('Position', [1, 1, screenSize(3), screenSize(4)], 'Name', figurename);
39 | %         figure('Position', [1 49 1920 946] , 'Name', figurename);
40 |     else
41 |         figure('Position', [1, 1, screenSize(3)/2, screenSize(4)], 'Name', figurename);
42 |     end
43 |     clf;
44 |     for j = 1:n * m
45 |         if(idx <= totImages)
46 |             subplot(n, m, j);
47 |             imshow(imageCell{idx});
48 |             xlabel(imageNames{idx});
49 |             idx = idx + 1;
50 |         end
51 |     end
52 | end
53 |     


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | 
13 | def im_list_to_blob(ims):
14 |     """Convert a list of images into a network input.
15 | 
16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
17 |     """
18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 |     num_images = len(ims)
20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 |                     dtype=np.float32)
22 |     for i in xrange(num_images):
23 |         im = ims[i]
24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 |     # Move channels (axis 3) to axis 1
26 |     # Axis order will become: (batch elem, channel, height, width)
27 |     channel_swap = (0, 3, 1, 2)
28 |     blob = blob.transpose(channel_swap)
29 |     return blob
30 | 
31 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
32 |     """Mean subtract and scale an image for use in a blob."""
33 |     im = im.astype(np.float32, copy=False)
34 |     im -= pixel_means
35 |     im_shape = im.shape
36 |     im_size_min = np.min(im_shape[0:2])
37 |     im_size_max = np.max(im_shape[0:2])
38 |     im_scale = float(target_size) / float(im_size_min)
39 |     # Prevent the biggest axis from being more than MAX_SIZE
40 |     if np.round(im_scale * im_size_max) > max_size:
41 |         im_scale = float(max_size) / float(im_size_max)
42 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
43 |                     interpolation=cv2.INTER_LINEAR)
44 | 
45 |     return im, im_scale
46 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | This directory holds (*after you download them*):
 2 | - Pre-computed object proposals
 3 | - Caffe models pre-trained on ImageNet
 4 | - Fast R-CNN models
 5 | - Symlinks to datasets
 6 | 
 7 | To download precomputed Selective Search proposals for PASCAL VOC 2007 and 2012, run:
 8 | 
 9 | ```
10 | ./data/scripts/fetch_selective_search_data.sh
11 | ```
12 | 
13 | This script will populate `data/selective_search_data`.
14 | 
15 | To download Caffe models (CaffeNet, VGG_CNN_M_1024, VGG16) pre-trained on ImageNet, run:
16 | 
17 | ```
18 | ./data/scripts/fetch_imagenet_models.sh
19 | ```
20 | 
21 | This script will populate `data/imagenet_models`.
22 | 
23 | To download Fast R-CNN models trained on VOC 2007, run:
24 | 
25 | ```
26 | ./data/scripts/fetch_fast_rcnn_models.sh
27 | ```
28 | 
29 | This script will populate `data/fast_rcnn_models`.
30 | 
31 | In order to train and test with PASCAL VOC, you will need to establish symlinks.
32 | From the `data` directory (`cd data`):
33 | 
34 | ```
35 | # For VOC 2007
36 | ln -s /your/path/to/VOC2007/VOCdevkit VOCdevkit2007
37 | 
38 | # For VOC 2012
39 | ln -s /your/path/to/VOC2012/VOCdevkit VOCdevkit2012
40 | ```
41 | 
42 | Since you'll likely be experimenting with multiple installs of Fast R-CNN in
43 | parallel, you'll probably want to keep all of this data in a shared place and
44 | use symlinks. On my system I create the following symlinks inside `data`:
45 | 
46 | ```
47 | # data/cache holds various outputs created by the datasets package
48 | ln -s /data/fast_rcnn_shared/cache
49 | 
50 | # move the imagenet_models to shared location and symlink to them
51 | ln -s /data/fast_rcnn_shared/imagenet_models
52 | 
53 | # move the selective search data to a shared location and symlink to them
54 | ln -s /data/fast_rcnn_shared/selective_search_data
55 | 
56 | ln -s /data/VOC2007/VOCdevkit VOCdevkit2007
57 | ln -s /data/VOC2012/VOCdevkit VOCdevkit2012
58 | ```
59 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/mexCountWordsIndex.cpp:
--------------------------------------------------------------------------------
 1 | #include <cmath>
 2 | #include "mex.h"
 3 | void mexFunction(int nlhs, mxArray *out[], int nrhs, const mxArray *input[])
 4 | {
 5 |     // Checking number of arguments
 6 |     if (nlhs > 2){
 7 |         mexErrMsgTxt("Error: function has only two output parameters");
 8 |         return;
 9 |     }
10 | 
11 |     if (nrhs != 4){
12 |         mexErrMsgTxt("Error: Needs exactly two four input parameters");
13 |         return;
14 |     }
15 | 
16 |     int numWords = (int) mxGetScalar(input[3]);
17 |     int numIndices = (int) mxGetScalar(input[2]);
18 | 
19 |     // Load in arrays
20 |     double* indices = mxGetPr(input[0]);
21 |     double* a = mxGetPr( input[1] );
22 |     int aNum = (int) mxGetNumberOfElements(input[1]);
23 |     int totIndices = (int) mxGetNumberOfElements(input[0]); // number of elements. Not confuse with max
24 |     int numLoops = aNum / totIndices;
25 | 
26 |     // Create output histogram
27 |     out[0] = mxCreateDoubleMatrix(numIndices, numWords, mxREAL);
28 |     double* histogram = mxGetPr(out[0]);
29 |     //histogram = histogram - 1;
30 | 
31 |     out[1] = mxCreateDoubleMatrix(numIndices, 1, mxREAL);
32 |     double* count = mxGetPr(out[1]);
33 | 
34 |     double* aP = a;
35 |     int iPval;
36 |     for(int j=0; j < numLoops; j++){
37 |         double* iP = indices;
38 |         for(int i=0;i < totIndices; i++){
39 |             //mexPrintf("%d\n", i);
40 |             if (*aP){
41 |                 //(*(histogram + (((int) *aP) -1) * numIndices + ((int) *iP - 1)))++;
42 |                 //count++;
43 |                 iPval = ((int) *iP) -1;
44 |                 histogram[(((int) *aP) - 1) * numIndices + iPval]++;
45 |                 count[iPval]++;
46 |             }
47 | 
48 |             //arrayI = (int) *aP;
49 |             //histogram[arrayI]++;
50 |             aP++;
51 |             iP++;
52 |         }
53 |     }
54 | 
55 |     return;
56 | }
57 | 


--------------------------------------------------------------------------------
/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/imutil.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (C) 2006 Pedro Felzenszwalb
 3 | 
 4 | This program is free software; you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation; either version 2 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program; if not, write to the Free Software
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 | */
18 | 
19 | /* some image utilities */
20 | 
21 | #ifndef IMUTIL_H
22 | #define IMUTIL_H
23 | 
24 | #include "image.h"
25 | #include "misc.h"
26 | 
27 | /* compute minimum and maximum value in an image */
28 | template <class T>
29 | void min_max(image<T> *im, T *ret_min, T *ret_max) {
30 |   int width = im->width();
31 |   int height = im->height();
32 |   
33 |   T min = imRef(im, 0, 0);
34 |   T max = imRef(im, 0, 0);
35 |   for (int y = 0; y < height; y++) {
36 |     for (int x = 0; x < width; x++) {
37 |       T val = imRef(im, x, y);
38 |       if (min > val)
39 | 	min = val;
40 |       if (max < val)
41 | 	max = val;
42 |     }
43 |   }
44 | 
45 |   *ret_min = min;
46 |   *ret_max = max;
47 | } 
48 | 
49 | /* threshold image */
50 | template <class T>
51 | image<uchar> *threshold(image<T> *src, int t) {
52 |   int width = src->width();
53 |   int height = src->height();
54 |   image<uchar> *dst = new image<uchar>(width, height);
55 |   
56 |   for (int y = 0; y < height; y++) {
57 |     for (int x = 0; x < width; x++) {
58 |       imRef(dst, x, y) = (imRef(src, x, y) >= t);
59 |     }
60 |   }
61 | 
62 |   return dst;
63 | }
64 | 
65 | #endif
66 | 
67 | 


--------------------------------------------------------------------------------
/matlab/fast_rcnn_demo.m:
--------------------------------------------------------------------------------
 1 | % --------------------------------------------------------
 2 | % Fast R-CNN
 3 | % Copyright (c) 2015 Microsoft
 4 | % Licensed under The MIT License [see LICENSE for details]
 5 | % Written by Ross Girshick
 6 | % --------------------------------------------------------
 7 | 
 8 | function fast_rcnn_demo()
 9 | % Fast R-CNN demo (in matlab).
10 | 
11 | [folder, name, ext] = fileparts(mfilename('fullpath'));
12 | 
13 | caffe_path = fullfile(folder, '..', 'caffe-fast-rcnn', 'matlab', 'caffe');
14 | addpath(caffe_path);
15 | 
16 | use_gpu = true;
17 | % You can try other models here:
18 | def = fullfile(folder, '..', 'models', 'VGG16', 'test.prototxt');;
19 | net = fullfile(folder, '..', 'data', 'fast_rcnn_models', ...
20 |                'vgg16_fast_rcnn_iter_40000.caffemodel');
21 | model = fast_rcnn_load_net(def, net, use_gpu);
22 | 
23 | car_ind = 7;
24 | sofa_ind = 18;
25 | tv_ind = 20;
26 | 
27 | demo(model, '000004', [car_ind], {'car'});
28 | demo(model, '001551', [sofa_ind, tv_ind], {'sofa', 'tvmonitor'});
29 | fprintf('\n');
30 | 
31 | % ------------------------------------------------------------------------
32 | function demo(model, im_id, cls_inds, cls_names)
33 | % ------------------------------------------------------------------------
34 | [folder, name, ext] = fileparts(mfilename('fullpath'));
35 | box_file = fullfile(folder, '..', 'data', 'demo', [im_id '_boxes.mat']);
36 | % Boxes were saved with 0-based indexing
37 | ld = load(box_file); boxes = single(ld.boxes) + 1; clear ld;
38 | im_file = fullfile(folder, '..', 'data', 'demo', [im_id '.jpg']);
39 | im = imread(im_file);
40 | dets = fast_rcnn_im_detect(model, im, boxes);
41 | 
42 | THRESH = 0.8;
43 | for j = 1:length(cls_inds)
44 |   cls_ind = cls_inds(j);
45 |   cls_name = cls_names{j};
46 |   I = find(dets{cls_ind}(:, end) >= THRESH);
47 |   showboxes(im, dets{cls_ind}(I, :));
48 |   title(sprintf('%s detections with p(%s | box) >= %.3f', ...
49 |                 cls_name, cls_name, THRESH))
50 |   fprintf('\n> Press any key to continue');
51 |   pause;
52 | end
53 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/anigaussm/anigauss_mex.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |    The Matlab mex function.
 3 |    If necessary to recompile, type:
 4 |        mex -v -g anigauss_mex.c anigauss.c
 5 |    from within matlab.
 6 |    For windows platforms, you may want to use the provided "anigauss.dll" file.
 7 | */
 8 | 
 9 | 
10 | #include "mex.h"
11 | 
12 | extern void anigauss(double *input, double *output, int sizex, int sizey,
13 | 	double sigmav, double sigmau, double phi, int orderv, int orderu);
14 | 
15 | void mexFunction(int nlhs,mxArray *plhs[],int nrhs, const mxArray *prhs[])
16 | {
17 |     double *in, *out;
18 |     double sigmav, sigmau, phi = 0.0;
19 |     int    orderv = 0, orderu = 0;
20 |     int    m, n;
21 | 
22 | 	/*
23 | 	 * Check the input arguments and the output argument
24 | 	 */
25 |     if ((nrhs<2) || (nrhs>6) || (nrhs==5) || (nlhs!=1))
26 |         mexErrMsgTxt(
27 |             "use: out = anigauss(in, sigmav, sigmau, phi, orderv, orderu);");
28 | 
29 | 	if ( mxGetNumberOfDimensions(prhs[0]) != 2 ) 
30 | 		{ mexErrMsgTxt("anigauss: input array should be of dimension 2"); }
31 | 
32 |     if (nrhs>=2) {
33 |         in = mxGetPr(prhs[0]); 
34 |         sigmav = mxGetScalar(prhs[1]);
35 |         sigmau = sigmav;
36 |     }
37 |     if (nrhs>=3)
38 |         sigmau = mxGetScalar(prhs[2]);
39 |     if (nrhs>=4)
40 |         phi = mxGetScalar(prhs[3]);
41 |     if (nrhs==6) {
42 |         orderv = (int)(mxGetScalar(prhs[4])+0.5);
43 |         orderu = (int)(mxGetScalar(prhs[5])+0.5);
44 |     }
45 | 
46 |     if ((orderv<0) || (orderu<0))
47 | 		{ mexErrMsgTxt("anigauss: derivative orders should be positive"); }
48 | 
49 |     m = mxGetM(prhs[0]);
50 |     n = mxGetN(prhs[0]);
51 | 
52 | 	/* pointers to output array */
53 | 
54 | 	plhs[0]=mxCreateDoubleMatrix(m, n, mxREAL );	
55 | 	if ( plhs[0] == NULL )
56 |         { mexErrMsgTxt("No more memory for out array"); }
57 | 	out = (double *)mxGetPr( plhs[0] );
58 | 	
59 | 	anigauss(in, out, m, n, sigmav,  sigmau,  phi-90.0, orderv,  orderu);
60 | }
61 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/misc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (C) 2006 Pedro Felzenszwalb
 3 | 
 4 | This program is free software; you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation; either version 2 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program; if not, write to the Free Software
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 | */
18 | 
19 | /* random stuff */
20 | 
21 | #ifndef MISC_H
22 | #define MISC_H
23 | 
24 | #include <cmath>
25 | 
26 | #ifndef M_PI
27 | #define M_PI 3.141592653589793
28 | #endif
29 | 
30 | typedef unsigned char uchar;
31 | 
32 | typedef struct { uchar r, g, b; } rgb;
33 | 
34 | inline bool operator==(const rgb &a, const rgb &b) {
35 |   return ((a.r == b.r) && (a.g == b.g) && (a.b == b.b));
36 | }
37 | 
38 | template <class T>
39 | inline T abs(const T &x) { return (x > 0 ? x : -x); };
40 | 
41 | template <class T>
42 | inline int sign(const T &x) { return (x >= 0 ? 1 : -1); };
43 | 
44 | template <class T>
45 | inline T square(const T &x) { return x*x; };
46 | 
47 | template <class T>
48 | inline T bound(const T &x, const T &min, const T &max) {
49 |   return (x < min ? min : (x > max ? max : x));
50 | }
51 | 
52 | template <class T>
53 | inline bool check_bound(const T &x, const T&min, const T &max) {
54 |   return ((x < min) || (x > max));
55 | }
56 | 
57 | inline int vlib_round(float x) { return (int)(x + 0.5F); }
58 | 
59 | inline int vlib_round(double x) { return (int)(x + 0.5); }
60 | 
61 | inline double gaussian(double val, double sigma) {
62 |   return exp(-square(val/sigma)/2)/(sqrt(2*M_PI)*sigma);
63 | }
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/disjoint-set.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (C) 2006 Pedro Felzenszwalb
 3 | 
 4 | This program is free software; you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation; either version 2 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program; if not, write to the Free Software
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 | */
18 | 
19 | #ifndef DISJOINT_SET
20 | #define DISJOINT_SET
21 | 
22 | // disjoint-set forests using union-by-rank and path compression (sort of).
23 | 
24 | typedef struct {
25 |   int rank;
26 |   int p;
27 |   int size;
28 | } uni_elt;
29 | 
30 | class universe {
31 | public:
32 |   universe(int elements);
33 |   ~universe();
34 |   int find(int x);  
35 |   void join(int x, int y);
36 |   int size(int x) const { return elts[x].size; }
37 |   int num_sets() const { return num; }
38 | 
39 | private:
40 |   uni_elt *elts;
41 |   int num;
42 | };
43 | 
44 | universe::universe(int elements) {
45 |   elts = new uni_elt[elements];
46 |   num = elements;
47 |   for (int i = 0; i < elements; i++) {
48 |     elts[i].rank = 0;
49 |     elts[i].size = 1;
50 |     elts[i].p = i;
51 |   }
52 | }
53 |   
54 | universe::~universe() {
55 |   delete [] elts;
56 | }
57 | 
58 | int universe::find(int x) {
59 |   int y = x;
60 |   while (y != elts[y].p)
61 |     y = elts[y].p;
62 |   elts[x].p = y;
63 |   return y;
64 | }
65 | 
66 | void universe::join(int x, int y) {
67 |   if (elts[x].rank > elts[y].rank) {
68 |     elts[y].p = x;
69 |     elts[x].size += elts[y].size;
70 |   } else {
71 |     elts[x].p = y;
72 |     elts[y].size += elts[x].size;
73 |     if (elts[x].rank == elts[y].rank)
74 |       elts[y].rank++;
75 |   }
76 |   num--;
77 | }
78 | 
79 | #endif
80 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/convolve.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (C) 2006 Pedro Felzenszwalb
 3 | 
 4 | This program is free software; you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation; either version 2 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program; if not, write to the Free Software
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 | */
18 | 
19 | /* convolution */
20 | 
21 | #ifndef CONVOLVE_H
22 | #define CONVOLVE_H
23 | 
24 | #include <vector>
25 | #include <algorithm>
26 | #include <cmath>
27 | #include "image.h"
28 | 
29 | /* convolve src with mask.  dst is flipped! */
30 | static void convolve_even(image<float> *src, image<float> *dst, 
31 | 			  std::vector<float> &mask) {
32 |   int width = src->width();
33 |   int height = src->height();
34 |   int len = mask.size();
35 | 
36 |   for (int y = 0; y < height; y++) {
37 |     for (int x = 0; x < width; x++) {
38 |       float sum = mask[0] * imRef(src, x, y);
39 |       for (int i = 1; i < len; i++) {
40 | 	sum += mask[i] * 
41 | 	  (imRef(src, std::max(x-i,0), y) + 
42 | 	   imRef(src, std::min(x+i, width-1), y));
43 |       }
44 |       imRef(dst, y, x) = sum;
45 |     }
46 |   }
47 | }
48 | 
49 | /* convolve src with mask.  dst is flipped! */
50 | static void convolve_odd(image<float> *src, image<float> *dst, 
51 | 			 std::vector<float> &mask) {
52 |   int width = src->width();
53 |   int height = src->height();
54 |   int len = mask.size();
55 | 
56 |   for (int y = 0; y < height; y++) {
57 |     for (int x = 0; x < width; x++) {
58 |       float sum = mask[0] * imRef(src, x, y);
59 |       for (int i = 1; i < len; i++) {
60 | 	sum += mask[i] * 
61 | 	  (imRef(src, std::max(x-i,0), y) - 
62 | 	   imRef(src, std::min(x+i, width-1), y));
63 |       }
64 |       imRef(dst, y, x) = sum;
65 |     }
66 |   }
67 | }
68 | 
69 | #endif
70 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | 
10 | __sets = {}
11 | 
12 | import datasets.pascal_voc
13 | import datasets.inria
14 | import numpy as np
15 | 
16 | def _selective_search_IJCV_top_k(split, year, top_k):
17 |     """Return an imdb that uses the top k proposals from the selective search
18 |     IJCV code.
19 |     """
20 |     imdb = datasets.pascal_voc(split, year)
21 |     imdb.roidb_handler = imdb.selective_search_IJCV_roidb
22 |     imdb.config['top_k'] = top_k
23 |     return imdb
24 | 
25 | # Set up voc_<year>_<split> using selective search "fast" mode
26 | for year in ['2007', '2012']:
27 |     for split in ['train', 'val', 'trainval', 'test']:
28 |         name = 'voc_{}_{}'.format(year, split)
29 |         __sets[name] = (lambda split=split, year=year:
30 |                 datasets.pascal_voc(split, year))
31 | 
32 | # Set up voc_<year>_<split>_top_<k> using selective search "quality" mode
33 | # but only returning the first k boxes
34 | for top_k in np.arange(1000, 11000, 1000):
35 |     for year in ['2007', '2012']:
36 |         for split in ['train', 'val', 'trainval', 'test']:
37 |             name = 'voc_{}_{}_top_{:d}'.format(year, split, top_k)
38 |             __sets[name] = (lambda split=split, year=year, top_k=top_k:
39 |                     _selective_search_IJCV_top_k(split, year, top_k))
40 | 
41 | # Set up inria_<split> using selective search "fast" mode
42 | inria_devkit_path = '/home/szy/INRIA'
43 | for split in ['train', 'test']:
44 |     name = '{}_{}'.format('inria', split)
45 |     __sets[name] = (lambda split=split: datasets.inria(split, inria_devkit_path))
46 | 
47 | towncenter_devkit_path = '/home/szy/TownCenter'
48 | for split in ['test']:
49 |    name = '{}_{}'.format('towncenter', split)
50 |    __sets[name] = (lambda split=split: datasets.inria(split, towncenter_devkit_path))
51 | 
52 | def get_imdb(name):
53 |     """Get an imdb (image database) by name."""
54 |     if not __sets.has_key(name):
55 |         raise KeyError('Unknown dataset: {}'.format(name))
56 |     return __sets[name]()
57 | 
58 | def list_imdbs():
59 |     """List all registered imdbs."""
60 |     return __sets.keys()
61 | 


--------------------------------------------------------------------------------
/lib/utils/nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/segment-graph.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (C) 2006 Pedro Felzenszwalb
 3 | 
 4 | This program is free software; you can redistribute it and/or modify
 5 | it under the terms of the GNU General Public License as published by
 6 | the Free Software Foundation; either version 2 of the License, or
 7 | (at your option) any later version.
 8 | 
 9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | 
14 | You should have received a copy of the GNU General Public License
15 | along with this program; if not, write to the Free Software
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 | */
18 | 
19 | #ifndef SEGMENT_GRAPH
20 | #define SEGMENT_GRAPH
21 | 
22 | #include <algorithm>
23 | #include <cmath>
24 | #include "disjoint-set.h"
25 | 
26 | // threshold function
27 | #define THRESHOLD(size, c) (c/size)
28 | 
29 | typedef struct {
30 |   float w;
31 |   int a, b;
32 | } edge;
33 | 
34 | bool operator<(const edge &a, const edge &b) {
35 |   return a.w < b.w;
36 | }
37 | 
38 | /*
39 |  * Segment a graph
40 |  *
41 |  * Returns a disjoint-set forest representing the segmentation.
42 |  *
43 |  * num_vertices: number of vertices in graph.
44 |  * num_edges: number of edges in graph
45 |  * edges: array of edges.
46 |  * c: constant for treshold function.
47 |  */
48 | universe *segment_graph(int num_vertices, int num_edges, edge *edges, 
49 | 			float c) { 
50 |   // sort edges by weight
51 |   std::sort(edges, edges + num_edges);
52 | 
53 |   // make a disjoint-set forest
54 |   universe *u = new universe(num_vertices);
55 | 
56 |   // init thresholds
57 |   float *threshold = new float[num_vertices];
58 |   for (int i = 0; i < num_vertices; i++)
59 |     threshold[i] = THRESHOLD(1,c);
60 | 
61 |   // for each edge, in non-decreasing weight order...
62 |   for (int i = 0; i < num_edges; i++) {
63 |     edge *pedge = &edges[i];
64 |     
65 |     // components conected by this edge
66 |     int a = u->find(pedge->a);
67 |     int b = u->find(pedge->b);
68 |     if (a != b) {
69 |       if ((pedge->w <= threshold[a]) &&
70 | 	  (pedge->w <= threshold[b])) {
71 | 	u->join(a, b);
72 | 	a = u->find(a);
73 | 	threshold[a] = pedge->w + THRESHOLD(u->size(a), c);
74 |       }
75 |     }
76 |   }
77 | 
78 |   // free up
79 |   delete threshold;
80 |   return u;
81 | }
82 | 
83 | #endif
84 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Test a Fast R-CNN network on an image database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import test_net
14 | from fast_rcnn.config import cfg, cfg_from_file
15 | from datasets.factory import get_imdb
16 | import caffe
17 | import argparse
18 | import pprint
19 | import time, os, sys
20 | 
21 | def parse_args():
22 |     """
23 |     Parse input arguments
24 |     """
25 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
26 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use',
27 |                         default=0, type=int)
28 |     parser.add_argument('--def', dest='prototxt',
29 |                         help='prototxt file defining the network',
30 |                         default=None, type=str)
31 |     parser.add_argument('--net', dest='caffemodel',
32 |                         help='model to test',
33 |                         default=None, type=str)
34 |     parser.add_argument('--cfg', dest='cfg_file',
35 |                         help='optional config file', default=None, type=str)
36 |     parser.add_argument('--wait', dest='wait',
37 |                         help='wait until net file exists',
38 |                         default=True, type=bool)
39 |     parser.add_argument('--imdb', dest='imdb_name',
40 |                         help='dataset to test',
41 |                         default='voc_2007_test', type=str)
42 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
43 |                         action='store_true')
44 | 
45 |     if len(sys.argv) == 1:
46 |         parser.print_help()
47 |         sys.exit(1)
48 | 
49 |     args = parser.parse_args()
50 |     return args
51 | 
52 | if __name__ == '__main__':
53 |     args = parse_args()
54 | 
55 |     print('Called with args:')
56 |     print(args)
57 | 
58 |     if args.cfg_file is not None:
59 |         cfg_from_file(args.cfg_file)
60 | 
61 |     print('Using config:')
62 |     pprint.pprint(cfg)
63 | 
64 |     while not os.path.exists(args.caffemodel) and args.wait:
65 |         print('Waiting for {} to exist...'.format(args.caffemodel))
66 |         time.sleep(10)
67 | 
68 |     caffe.set_mode_gpu()
69 |     caffe.set_device(args.gpu_id)
70 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
71 |     net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
72 | 
73 |     imdb = get_imdb(args.imdb_name)
74 |     imdb.competition_mode(args.comp_mode)
75 | 
76 |     test_net(net, imdb)
77 | 


--------------------------------------------------------------------------------
/selective_search/BlobAverageBestOverlap.m:
--------------------------------------------------------------------------------
 1 | function [abo mabo boScores avgNumSegments] = BlobAverageBestOverlap(gtBlobs, gtNrs, blobIndIm, blobBoxes, hierarchy, minWidth)
 2 | % [abo mabo boScores avgNumSegments] = BlobAverageBestOverlap(gtBlobs,
 3 | %                   gtNrs, blobIndIm, blobBoxes, hierarchy, minWidth)
 4 | %
 5 | % Calculate Average Best Overlap scores
 6 | %
 7 | % gtBlobs:      Cell array of ground truth segments per class (see
 8 | %               GetAllObjectBoxes)
 9 | % gtNrs:        Cell array with image nrs corresponding to ground truth.
10 | % blobIndIm:    Image with indices per blob (mexFelzenSegmentIndex)
11 | % blobBoxes:    Boxes corresponding to blobs in blobIndIm
12 | % hierarchy:    Hierarchy necessary to reconstruct all blobs in grouping
13 | % minWidth:     (optional) Filter out blobs with a width smaller than minWidth.
14 | %
15 | % abo:          Average Best Overlap per class (Pascal Overlap criterion)
16 | % mabo:         Mean Average Best Overlap (mean(abo))
17 | % boScores:     Best Overlap Score per GT segment.
18 | % avgNumBlobs:  Average number of blobs per image
19 | %
20 | %     Jasper Uijlings - 2013
21 | 
22 | if ~exist('minWidth', 'var')
23 |     minWidth = 0;
24 | end
25 | 
26 | nClasses = length(gtBlobs);
27 | 
28 | % Memory initialization
29 | numSegments = zeros(length(blobIndIm), 1);
30 | boScores = cell(1, nClasses);
31 | for cI = 1:nClasses
32 |     boScores{cI} = length(gtBlobs{cI});
33 | end
34 | 
35 | % indices per class
36 | classIdx = ones(1, nClasses);
37 | 
38 | for cI=1:length(gtBlobs)
39 |     for i=1:length(gtBlobs{cI})
40 |         testImNr = gtNrs{cI}(i);
41 |         
42 |         % the hierarchy here contains possibly multiple groupings with
43 |         % different initial measures
44 |         testBlobsT = cell(length(hierarchy{testImNr}), 1);
45 |         testBlobsT{1} = RecreateBlobHierarchyIndIm(blobIndIm{testImNr}, blobBoxes{testImNr}, hierarchy{testImNr}{1});
46 |         for j=2:length(hierarchy{testImNr}) % Without initial blobs here
47 |             [aa bb testBlobsT{j}] = RecreateBlobHierarchyIndIm(blobIndIm{testImNr}, blobBoxes{testImNr}, hierarchy{testImNr}{j});
48 |         end
49 |         testBlobs = cat(1, testBlobsT{:});
50 |         
51 |         % Get rid of too small blobs
52 |         testBlobs = FilterBlobsWidth(testBlobs, minWidth);
53 |         numSegments(testImNr) = length(testBlobs);        
54 |       
55 |         % Calculate overlap scores
56 |         boScores{cI}(classIdx(cI)) = BlobBestOverlap(testBlobs, gtBlobs{cI}(i));
57 |         
58 |         classIdx(cI) = classIdx(cI) + 1;
59 |     end
60 | end
61 | 
62 | abo = zeros(nClasses, 1);
63 | 
64 | for cI = 1:nClasses
65 |     abo(cI) = mean(boScores{cI});
66 | end
67 | 
68 | mabo = mean(abo);    
69 | 
70 | % Average of numSegments. Make sure that only images for which the
71 | % numSegments are actually calculated are taken into account.
72 | avgNumSegments = mean(numSegments(numSegments > 0));
73 |     


--------------------------------------------------------------------------------
/selective_search/License.txt:
--------------------------------------------------------------------------------
 1 | Copyright University of Amsterdam. All rights reserved.
 2 | 
 3 | Contact persons:
 4 | Jasper Uijlings (jrr <at> disi.unitn.it)
 5 | Koen van de Sande (ksande <at> uva.nl)
 6 | 
 7 | This software is being made available for individual research use only.
 8 | Any commercial use or redistribution of this software requires a license from
 9 | the University of Amsterdam.
10 | 
11 | You may use this work subject to the following conditions:
12 | 
13 | 1. This work is provided "as is" by the copyright holder, with
14 | absolutely no warranties of correctness, fitness, intellectual property
15 | ownership, or anything else whatsoever.  You use the work
16 | entirely at your own risk.  The copyright holder will not be liable for
17 | any legal damages whatsoever connected with the use of this work.
18 | 
19 | 2. The copyright holder retain all copyright to the work. All copies of
20 | the work and all works derived from it must contain (1) this copyright
21 | notice, and (2) additional notices describing the content, dates and
22 | copyright holder of modifications or additions made to the work, if
23 | any, including distribution and use conditions and intellectual property
24 | claims.  Derived works must be clearly distinguished from the original
25 | work, both by name and by the prominent inclusion of explicit
26 | descriptions of overlaps and differences.
27 | 
28 | 3. The names and trademarks of the copyright holder may not be used in
29 | advertising or publicity related to this work without specific prior
30 | written permission. 
31 | 
32 | 4. In return for the free use of this work, you are requested, but not
33 | legally required, to do the following:
34 | 
35 | * If you become aware of factors that may significantly affect other
36 |   users of the work, for example major bugs or
37 |   deficiencies or possible intellectual property issues, you are
38 |   requested to report them to the copyright holder, if possible
39 |   including redistributable fixes or workarounds.
40 | 
41 | * If you use the work in scientific research or as part of a larger
42 |   software system, you are requested to cite the use in any related
43 |   publications or technical documentation. The work is based upon:
44 | 
45 |     J.R.R. Uijlings, K.E.A. van de Sande, T. Gevers, and A.W.M. Smeulders.
46 |     Selective Search for Object Recognition
47 |     IJCV, 2013.
48 | 
49 |   and uses
50 | 
51 |     J. M. Geusebroek, A. W. M. Smeulders, and J. van de Weijer.
52 |     Fast anisotropic gauss filtering. IEEE Trans. Image Processing,
53 |     vol. 12, no. 8, pp. 938-943, 2003.
54 | 
55 |     P. Felzenszwalb and D. Huttenlocher.
56 |     Efficient graph-based image segmentation,
57 |     International Journal of Computer Vision, 2004.
58 |     
59 |  
60 | This copyright notice must be retained with all copies of the software,
61 | including any modified or derived versions.
62 | 
63 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/image.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (C) 2006 Pedro Felzenszwalb
  3 | 
  4 | This program is free software; you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation; either version 2 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program; if not, write to the Free Software
 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 17 | */
 18 | 
 19 | /* a simple image class */
 20 | 
 21 | #ifndef IMAGE_H
 22 | #define IMAGE_H
 23 | 
 24 | #include <cstring>
 25 | 
 26 | template <class T>
 27 | class image {
 28 |  public:
 29 |   /* create an image */
 30 |   image(const int width, const int height, const bool init = true);
 31 | 
 32 |   /* delete an image */
 33 |   ~image();
 34 | 
 35 |   /* init an image */
 36 |   void init(const T &val);
 37 | 
 38 |   /* copy an image */
 39 |   image<T> *copy() const;
 40 |   
 41 |   /* get the width of an image. */
 42 |   int width() const { return w; }
 43 |   
 44 |   /* get the height of an image. */
 45 |   int height() const { return h; }
 46 |   
 47 |   /* image data. */
 48 |   T *data;
 49 |   
 50 |   /* row pointers. */
 51 |   T **access;
 52 |   
 53 |  private:
 54 |   int w, h;
 55 | };
 56 | 
 57 | /* use imRef to access image data. */
 58 | #define imRef(im, x, y) (im->access[y][x])
 59 |   
 60 | /* use imPtr to get pointer to image data. */
 61 | #define imPtr(im, x, y) &(im->access[y][x])
 62 | 
 63 | template <class T>
 64 | image<T>::image(const int width, const int height, const bool init) {
 65 |   w = width;
 66 |   h = height;
 67 |   data = new T[w * h];  // allocate space for image data
 68 |   access = new T*[h];   // allocate space for row pointers
 69 |   
 70 |   // initialize row pointers
 71 |   for (int i = 0; i < h; i++)
 72 |     access[i] = data + (i * w);  
 73 |   
 74 |   if (init)
 75 |     memset(data, 0, w * h * sizeof(T));
 76 | }
 77 | 
 78 | template <class T>
 79 | image<T>::~image() {
 80 |   delete [] data; 
 81 |   delete [] access;
 82 | }
 83 | 
 84 | template <class T>
 85 | void image<T>::init(const T &val) {
 86 |   T *ptr = imPtr(this, 0, 0);
 87 |   T *end = imPtr(this, w-1, h-1);
 88 |   while (ptr <= end)
 89 |     *ptr++ = val;
 90 | }
 91 | 
 92 | 
 93 | template <class T>
 94 | image<T> *image<T>::copy() const {
 95 |   image<T> *im = new image<T>(w, h, false);
 96 |   memcpy(im->data, data, w * h * sizeof(T));
 97 |   return im;
 98 | }
 99 | 
100 | #endif
101 |   
102 | 


--------------------------------------------------------------------------------
/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Reval = re-eval. Re-evaluate saved detections."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import apply_nms
14 | from fast_rcnn.config import cfg
15 | from datasets.factory import get_imdb
16 | import cPickle
17 | import os, sys, argparse
18 | import numpy as np
19 | 
20 | def parse_args():
21 |     """
22 |     Parse input arguments
23 |     """
24 |     parser = argparse.ArgumentParser(description='Re-evaluate results')
25 |     parser.add_argument('output_dir', nargs=1, help='results directory',
26 |                         type=str)
27 |     parser.add_argument('--rerun', dest='rerun',
28 |                         help=('re-run evaluation code '
29 |                               '(otherwise: results are loaded from file)'),
30 |                         action='store_true')
31 |     parser.add_argument('--imdb', dest='imdb_name',
32 |                         help='dataset to re-evaluate',
33 |                         default='voc_2007_test', type=str)
34 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
35 |                         action='store_true')
36 | 
37 |     if len(sys.argv) == 1:
38 |         parser.print_help()
39 |         sys.exit(1)
40 | 
41 |     args = parser.parse_args()
42 |     return args
43 | 
44 | 
45 | def from_mats(imdb_name, output_dir):
46 |     import scipy.io as sio
47 | 
48 |     imdb = get_imdb(imdb_name)
49 | 
50 |     aps = []
51 |     for i, cls in enumerate(imdb.classes[1:]):
52 |         mat = sio.loadmat(os.path.join(output_dir, cls + '_pr.mat'))
53 |         ap = mat['ap'][0, 0] * 100
54 |         apAuC = mat['ap_auc'][0, 0] * 100
55 |         print '!!! {} : {:.1f} {:.1f}'.format(cls, ap, apAuC)
56 |         aps.append(ap)
57 | 
58 |     print '~~~~~~~~~~~~~~~~~~~'
59 |     print 'Results (from mat files):'
60 |     for ap in aps:
61 |         print '{:.1f}'.format(ap)
62 |     print '{:.1f}'.format(np.array(aps).mean())
63 |     print '~~~~~~~~~~~~~~~~~~~'
64 | 
65 | 
66 | def from_dets(imdb_name, output_dir, comp_mode):
67 |     imdb = get_imdb(imdb_name)
68 |     imdb.competition_mode(comp_mode)
69 |     with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
70 |         dets = cPickle.load(f)
71 | 
72 |     print 'Applying NMS to all detections'
73 |     nms_dets = apply_nms(dets, cfg.TEST.NMS)
74 | 
75 |     print 'Evaluating detections'
76 |     imdb.evaluate_detections(nms_dets, output_dir)
77 | 
78 | if __name__ == '__main__':
79 |     args = parse_args()
80 | 
81 |     output_dir = os.path.abspath(args.output_dir[0])
82 |     imdb_name = args.imdb_name
83 | 
84 |     if args.comp_mode and not args.rerun:
85 |         raise ValueError('--rerun must be used with --comp')
86 | 
87 |     if args.rerun:
88 |         from_dets(imdb_name, output_dir, args.comp_mode)
89 |     else:
90 |         from_mats(imdb_name, output_dir)
91 | 


--------------------------------------------------------------------------------
/selective_search/demo.m:
--------------------------------------------------------------------------------
 1 | % This demo shows how to use the software described in our IJCV paper: 
 2 | %   Selective Search for Object Recognition,
 3 | %   J.R.R. Uijlings, K.E.A. van de Sande, T. Gevers, A.W.M. Smeulders, IJCV 2013
 4 | %%
 5 | addpath('Dependencies');
 6 | 
 7 | fprintf('Demo of how to run the code for:\n');
 8 | fprintf('   J. Uijlings, K. van de Sande, T. Gevers, A. Smeulders\n');
 9 | fprintf('   Segmentation as Selective Search for Object Recognition\n');
10 | fprintf('   IJCV 2013\n\n');
11 | 
12 | % Compile anisotropic gaussian filter
13 | if(~exist('anigauss'))
14 |     fprintf('Compiling the anisotropic gauss filtering of:\n');
15 |     fprintf('   J. Geusebroek, A. Smeulders, and J. van de Weijer\n');
16 |     fprintf('   Fast anisotropic gauss filtering\n');
17 |     fprintf('   IEEE Transactions on Image Processing, 2003\n');
18 |     fprintf('Source code/Project page:\n');
19 |     fprintf('   http://staff.science.uva.nl/~mark/downloads.html#anigauss\n\n');
20 |     mex Dependencies/anigaussm/anigauss_mex.c Dependencies/anigaussm/anigauss.c -output anigauss
21 | end
22 | 
23 | if(~exist('mexCountWordsIndex'))
24 |     mex Dependencies/mexCountWordsIndex.cpp
25 | end
26 | 
27 | % Compile the code of Felzenszwalb and Huttenlocher, IJCV 2004.
28 | if(~exist('mexFelzenSegmentIndex'))
29 |     fprintf('Compiling the segmentation algorithm of:\n');
30 |     fprintf('   P. Felzenszwalb and D. Huttenlocher\n');
31 |     fprintf('   Efficient Graph-Based Image Segmentation\n');
32 |     fprintf('   International Journal of Computer Vision, 2004\n');
33 |     fprintf('Source code/Project page:\n');
34 |     fprintf('   http://www.cs.brown.edu/~pff/segment/\n');
35 |     fprintf('Note: A small Matlab wrapper was made.\n');
36 | %     fprintf('   
37 |     mex Dependencies/FelzenSegment/mexFelzenSegmentIndex.cpp -output mexFelzenSegmentIndex;
38 | end
39 | 
40 | %%
41 | % Parameters. Note that this controls the number of hierarchical
42 | % segmentations which are combined.
43 | colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'};
44 | colorType = colorTypes{1}; % Single color space for demo
45 | 
46 | % Here you specify which similarity functions to use in merging
47 | simFunctionHandles = {@SSSimColourTextureSizeFillOrig, @SSSimTextureSizeFill, @SSSimBoxFillOrig, @SSSimSize};
48 | simFunctionHandles = simFunctionHandles(1:2); % Two different merging strategies
49 | 
50 | % Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm.
51 | % Note that by default, we set minSize = k, and sigma = 0.8.
52 | k = 200; % controls size of segments of initial segmentation. 
53 | minSize = k;
54 | sigma = 0.8;
55 | 
56 | % As an example, use a single image
57 | images = {'000015.jpg'};
58 | im = imread(images{1});
59 | 
60 | % Perform Selective Search
61 | [boxes blobIndIm blobBoxes hierarchy] = Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles);
62 | boxes = BoxRemoveDuplicates(boxes);
63 | 
64 | % Show boxes
65 | ShowRectsWithinImage(boxes, 5, 5, im);
66 | 
67 | % Show blobs which result from first similarity function
68 | hBlobs = RecreateBlobHierarchyIndIm(blobIndIm, blobBoxes, hierarchy{1});
69 | ShowBlobs(hBlobs, 5, 5, im);


--------------------------------------------------------------------------------
/tools/train_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Train a Fast R-CNN network on a region of interest database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.train import get_training_roidb, train_net
14 | from fast_rcnn.config import cfg, cfg_from_file, get_output_dir
15 | from datasets.factory import get_imdb
16 | import caffe
17 | import argparse
18 | import pprint
19 | import numpy as np
20 | import sys
21 | 
22 | def parse_args():
23 |     """
24 |     Parse input arguments
25 |     """
26 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
27 |     parser.add_argument('--gpu', dest='gpu_id',
28 |                         help='GPU device id to use [0]',
29 |                         default=0, type=int)
30 |     parser.add_argument('--solver', dest='solver',
31 |                         help='solver prototxt',
32 |                         default=None, type=str)
33 |     parser.add_argument('--iters', dest='max_iters',
34 |                         help='number of iterations to train',
35 |                         default=40000, type=int)
36 |     parser.add_argument('--weights', dest='pretrained_model',
37 |                         help='initialize with pretrained model weights',
38 |                         default=None, type=str)
39 |     parser.add_argument('--cfg', dest='cfg_file',
40 |                         help='optional config file',
41 |                         default=None, type=str)
42 |     parser.add_argument('--imdb', dest='imdb_name',
43 |                         help='dataset to train on',
44 |                         default='voc_2007_trainval', type=str)
45 |     parser.add_argument('--rand', dest='randomize',
46 |                         help='randomize (do not use a fixed seed)',
47 |                         action='store_true')
48 | 
49 |     if len(sys.argv) == 1:
50 |         parser.print_help()
51 |         sys.exit(1)
52 | 
53 |     args = parser.parse_args()
54 |     return args
55 | 
56 | if __name__ == '__main__':
57 |     args = parse_args()
58 | 
59 |     print('Called with args:')
60 |     print(args)
61 | 
62 |     if args.cfg_file is not None:
63 |         cfg_from_file(args.cfg_file)
64 | 
65 |     print('Using config:')
66 |     pprint.pprint(cfg)
67 | 
68 |     if not args.randomize:
69 |         # fix the random seeds (numpy and caffe) for reproducibility
70 |         np.random.seed(cfg.RNG_SEED)
71 |         caffe.set_random_seed(cfg.RNG_SEED)
72 | 
73 |     # set up caffe
74 |     caffe.set_mode_gpu()
75 |     if args.gpu_id is not None:
76 |         caffe.set_device(args.gpu_id)
77 | 
78 |     imdb = get_imdb(args.imdb_name)
79 |     print 'Loaded dataset `{:s}` for training'.format(imdb.name)
80 |     roidb = get_training_roidb(imdb)
81 | 
82 |     output_dir = get_output_dir(imdb, None)
83 |     print 'Output will be saved to `{:s}`'.format(output_dir)
84 | 
85 |     train_net(args.solver, roidb, output_dir,
86 |               pretrained_model=args.pretrained_model,
87 |               max_iters=args.max_iters)
88 | 


--------------------------------------------------------------------------------
/selective_search/selective_search_rcnn.m:
--------------------------------------------------------------------------------
 1 | function all_boxes = selective_search_rcnn(image_filenames, output_filename)
 2 | 
 3 | % Based on the demo.m file included in the Selective Search
 4 | % IJCV code, and on selective_search_boxes.m from R-CNN.
 5 | 
 6 | % Load dependencies and compile if needed.
 7 | 
 8 | addpath('Dependencies');
 9 | 
10 | if(~exist('anigauss'))
11 |     mex Dependencies/anigaussm/anigauss_mex.c Dependencies/anigaussm/anigauss.c -output anigauss
12 | end
13 | 
14 | if(~exist('mexCountWordsIndex'))
15 |     mex Dependencies/mexCountWordsIndex.cpp
16 | end
17 | 
18 | if(~exist('mexFelzenSegmentIndex'))
19 |     mex Dependencies/FelzenSegment/mexFelzenSegmentIndex.cpp -output mexFelzenSegmentIndex;
20 | end
21 | 
22 | % Configure
23 | im_width = 500;
24 | 
25 | % Parameters. Note that this controls the number of hierarchical
26 | % segmentations which are combined.
27 | colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'};
28 | 
29 | % Here you specify which similarity functions to use in merging
30 | simFunctionHandles = {@SSSimColourTextureSizeFillOrig, ...
31 |                       @SSSimTextureSizeFill, ...
32 |                       @SSSimBoxFillOrig, ...
33 |                       @SSSimSize};
34 | 
35 | % Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm.
36 | % Note that by default, we set minSize = k, and sigma = 0.8.
37 | % controls size of segments of initial segmentation.
38 | ks = [50 100 150 300];
39 | sigma = 0.8;
40 | 
41 | % After segmentation, filter out boxes which have a width/height smaller
42 | % than minBoxWidth (default = 20 pixels).
43 | minBoxWidth = 20;
44 | 
45 | % Comment the following three lines for the 'quality' version
46 | colorTypes = colorTypes(1:2); % 'Fast' uses HSV and Lab
47 | simFunctionHandles = simFunctionHandles(1:2); % Two different merging strategies
48 | ks = ks(1:2);
49 | 
50 | % Process all images.
51 | all_boxes = {};
52 | for i=1:length(image_filenames)
53 |     im = imread(image_filenames{i});
54 |     % Resize image to canonical dimensions since proposals aren't scale invariant.
55 |     scale = size(im, 2) / im_width;
56 |     im = imresize(im, [NaN im_width]);
57 | 
58 |     idx = 1;
59 |     for j = 1:length(ks)
60 |       k = ks(j); % Segmentation threshold k
61 |       minSize = k; % We set minSize = k
62 |       for n = 1:length(colorTypes)
63 |         colorType = colorTypes{n};
64 |         [boxesT{idx} blobIndIm blobBoxes hierarchy priorityT{idx}] = ...
65 |           Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles);
66 |         idx = idx + 1;
67 |       end
68 |     end
69 |     boxes = cat(1, boxesT{:}); % Concatenate boxes from all hierarchies
70 |     priority = cat(1, priorityT{:}); % Concatenate priorities
71 | 
72 |     % Do pseudo random sorting as in paper
73 |     priority = priority .* rand(size(priority));
74 |     [priority sortIds] = sort(priority, 'ascend');
75 |     boxes = boxes(sortIds,:);
76 | 
77 |     boxes = FilterBoxesWidth(boxes, minBoxWidth);
78 |     boxes = BoxRemoveDuplicates(boxes);
79 | 
80 |     % Adjust boxes to cancel effect of canonical scaling.
81 |     boxes = (boxes - 1) * scale + 1;
82 | 
83 |     boxes = FilterBoxesWidth(boxes, minBoxWidth);
84 |     boxes = BoxRemoveDuplicates(boxes);
85 |     all_boxes{i} = boxes;
86 | end
87 | 
88 | if nargin > 1
89 |     all_boxes
90 |     save(output_filename, 'all_boxes', '-v7');
91 | end
92 | 


--------------------------------------------------------------------------------
/help/test/README.md:
--------------------------------------------------------------------------------
 1 | # Test Fast-RCNN on Another Dataset
 2 | 
 3 | We will illustrate how to test Fast-RCNN on another dataset in the following steps, and we will take **INRIA Person** as the example dataset.
 4 | 
 5 | ### Format Your Dataset
 6 | 
 7 | At first, the dataset must be well organzied with the required format.
 8 | ```
 9 | INRIA
10 | |-- data
11 |     |-- Annotations
12 |          |-- *.txt (Annotation files)
13 |     |-- Images
14 |          |-- *.png (Image files)
15 |     |-- ImageSets
16 |          |-- test.txt
17 | |-- results
18 |     |-- test (empty before test)
19 | |-- VOCcode (optical)
20 | ```
21 | 
22 | The `test.txt` contains all the names(without extensions) of images files that will be used for training. For example, there are a few lines in `test.txt` below.
23 | 
24 | ```
25 | crop_000001
26 | crop_000002
27 | crop_000003
28 | crop_000004
29 | crop_000005
30 | ```
31 | 
32 | ### Construct IMDB
33 | 
34 | See it at https://github.com/EdisonResearch/fast-rcnn/tree/master/help/train.
35 | 
36 | Actually you do not need to implement the `_load_inria_annotation`, you could just use `inria.py` to construct IMDB for your own dataset. For example, to train on a dataset named **TownCenter**, just the followings to `factory.py`.
37 | 
38 | ```sh
39 | towncenter_devkit_path = '/home/szy/TownCenter'
40 | for split in ['test']:
41 |    name = '{}_{}'.format('towncenter', split)
42 |    __sets[name] = (lambda split=split: datasets.inria(split, towncenter_devkit_path))
43 | ```
44 | 
45 | ### Run Selective Search 
46 | 
47 | See it at https://github.com/EdisonResearch/fast-rcnn/tree/master/help/train.
48 | 
49 | Note that it should be `test.mat` rather than `train.mat`.
50 | 
51 | ### Modify Prototxt
52 | 
53 | For example, if you want to use the model **VGG_CNN_M_1024**, then you should modify `test.prototxt` in `$FRCNN_ROOTmodels/VGG_CNN_M_1024`, it mainly concerns with the number of classes you want to train. Let's assume that the number of classes is `C (do not forget to count the `background` class). Then you should 
54 |   - Modify `num_output` in the `cls_score` layer to `C`
55 |   - Modify `num_output` in the `bbox_pred` layer to `4 * C`
56 | 
57 | See https://github.com/rbgirshick/fast-rcnn/issues/11 for more details. 
58 | 
59 | ### Prepare Your Evaluation Code
60 | 
61 | In the original framework of **Fast-RCNN**, it uses matlab wrappers to evluate the results. As the evluation process is not very difficult, you could modify the function `evaluate_detections` in `inria.py`.  
62 | 
63 | As **INRIA Person** provides some matlab files in the format of **PASCAL-VOC**, you could modify it a little and use it directly. You could see https://github.com/EdisonResearch/fast-rcnn/tree/master/help/INRIA/VOCcode for the VOCcode.
64 | 
65 | If you do not want to use the evluation function in the framework of **Fast-RCNN**, you could find the results in the directory `results/test` in the roor directory of your dataset.
66 | 
67 | ### Test!
68 | 
69 | In the directory **$FRCNN_ROOT**, run the following command in the shell.
70 | 
71 | ```sh
72 | ./tools/test_net.py --gpu 1 --def models/VGG_CNN_M_1024/test.prototxt \
73 |     --net output/default/train/vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel --imdb inria_test
74 | ```
75 | 
76 | Be careful with the **imdb** argument as it specifies the dataset you will train on. 
77 | 
78 | ### References
79 | 
80 | [Fast-RCNN] https://github.com/rbgirshick/fast-rcnn
81 | 
82 | ### Contact
83 | 
84 | Feel free to contact me at <zeyuanxy@gmail.com>.


--------------------------------------------------------------------------------
/help/INRIA/VOCcode/PASreadrecord.m:
--------------------------------------------------------------------------------
 1 | function record=PASreadrecord(filename)
 2 |   [fd,syserrmsg]=fopen(filename,'rt');
 3 |   if (fd==-1),
 4 |     PASmsg=sprintf('Could not open %s for reading',filename);
 5 |     PASerrmsg(PASmsg,syserrmsg); 
 6 |   end;
 7 |   
 8 |   matchstrs=initstrings;
 9 |   record=PASemptyrecord;
10 |   notEOF=1;
11 |   while (notEOF),
12 |     line=fgetl(fd);
13 |     notEOF=ischar(line);
14 |     if (notEOF),
15 |       matchnum=match(line,matchstrs);
16 |       switch matchnum,
17 |     case 1, [imgname]=strread(line,matchstrs(matchnum).str);
18 | 	        record.imgname=char(imgname);
19 | 	case 2, [x,y,c]=strread(line,matchstrs(matchnum).str);
20 | 	        record.imgsize=[x y c];
21 | 	case 3, [database]=strread(line,matchstrs(matchnum).str);
22 | 	        record.database=char(database);
23 | 	case 4, [obj,lbl,xmin,ymin,xmax,ymax]=strread(line,matchstrs(matchnum).str);
24 | 	        record.objects(obj).label=char(lbl);
25 | 		record.objects(obj).bbox=[min(xmin,xmax),min(ymin,ymax),max(xmin,xmax),max(ymin,ymax)];
26 | 	case 5, tmp=findstr(line,' : ');
27 | 	        [obj,lbl]=strread(line(1:tmp),matchstrs(matchnum).str);
28 | 	        record.objects(obj).label=char(lbl);
29 |     		record.objects(obj).polygon=sscanf(line(tmp+3:end),'(%d, %d) ')';
30 | 	case 6, [obj,lbl,mask]=strread(line,matchstrs(matchnum).str);
31 | 	        record.objects(obj).label=char(lbl);
32 |     		record.objects(obj).mask=char(mask);
33 | 	case 7, [obj,lbl,orglbl]=strread(line,matchstrs(matchnum).str);
34 |             lbl=char(lbl);
35 | 	        record.objects(obj).label=lbl;
36 |     		record.objects(obj).orglabel=char(orglbl);
37 |             if strcmp(lbl(max(end-8,1):end),'Difficult')
38 |                 record.objects(obj).difficult=true;
39 |                 lbl(end-8:end)=[];
40 |             else
41 |                 record.objects(obj).difficult=false;
42 |             end
43 |             if strcmp(lbl(max(end-4,1):end),'Trunc')
44 |                 record.objects(obj).truncated=true;
45 |                 lbl(end-4:end)=[];
46 |             else
47 |                 record.objects(obj).truncated=false;
48 |             end
49 |             t=find(lbl>='A'&lbl<='Z');
50 |             t=t(t>=4);
51 |             if ~isempty(t)
52 |                 record.objects(obj).view=lbl(t(1):end);
53 |                 lbl(t(1):end)=[];
54 |             else
55 |                 record.objects(obj).view='';                
56 |             end
57 |             record.objects(obj).class=lbl(4:end);
58 |         
59 | 	otherwise, %fprintf('Skipping: %s\n',line);
60 |       end;
61 |     end;
62 |   end;
63 |   fclose(fd);
64 | return
65 | 
66 | function matchnum=match(line,matchstrs)
67 |   for i=1:length(matchstrs),
68 |     matched(i)=strncmp(line,matchstrs(i).str,matchstrs(i).matchlen);
69 |   end;
70 |   matchnum=find(matched);
71 |   if isempty(matchnum), matchnum=0; end;
72 |   if (length(matchnum)~=1), 
73 |     PASerrmsg('Multiple matches while parsing','');
74 |   end;
75 | return
76 | 
77 | function s=initstrings
78 |   s(1).matchlen=14;
79 |   s(1).str='Image filename : %q';
80 |   
81 |   s(2).matchlen=10;
82 |   s(2).str='Image size (X x Y x C) : %d x %d x %d';
83 |   
84 |   s(3).matchlen=8;
85 |   s(3).str='Database : %q';
86 |   
87 |   s(4).matchlen=8;
88 |   s(4).str='Bounding box for object %d %q (Xmin, Ymin) - (Xmax, Ymax) : (%d, %d) - (%d, %d)';
89 |   
90 |   s(5).matchlen=7;
91 |   s(5).str='Polygon for object %d %q (X, Y)';
92 |   
93 |   s(6).matchlen=5;
94 |   s(6).str='Pixel mask for object %d %q : %q';
95 | 
96 |   s(7).matchlen=8;
97 |   s(7).str='Original label for object %d %q : %q';
98 | 
99 | return


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/filter.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (C) 2006 Pedro Felzenszwalb
  3 | 
  4 | This program is free software; you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation; either version 2 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program; if not, write to the Free Software
 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 17 | */
 18 | 
 19 | /* simple filters */
 20 | 
 21 | #ifndef FILTER_H
 22 | #define FILTER_H
 23 | 
 24 | #include <vector>
 25 | #include <cmath>
 26 | #include "image.h"
 27 | #include "misc.h"
 28 | #include "convolve.h"
 29 | #include "imconv.h"
 30 | 
 31 | #define WIDTH 4.0
 32 | 
 33 | /* normalize mask so it integrates to one */
 34 | static void normalize(std::vector<float> &mask) {
 35 |   int len = mask.size();
 36 |   float sum = 0;
 37 |   for (int i = 1; i < len; i++) {
 38 |     sum += fabs(mask[i]);
 39 |   }
 40 |   sum = 2*sum + fabs(mask[0]);
 41 |   for (int i = 0; i < len; i++) {
 42 |     mask[i] /= sum;
 43 |   }
 44 | }
 45 | 
 46 | /* make filters */
 47 | #define MAKE_FILTER(name, fun)                                \
 48 | static std::vector<float> make_ ## name (float sigma) {       \
 49 |   sigma = std::max(sigma, 0.01F);			      \
 50 |   int len = (int)ceil(sigma * WIDTH) + 1;                     \
 51 |   std::vector<float> mask(len);                               \
 52 |   for (int i = 0; i < len; i++) {                             \
 53 |     mask[i] = fun;                                            \
 54 |   }                                                           \
 55 |   return mask;                                                \
 56 | }
 57 | 
 58 | MAKE_FILTER(fgauss, exp(-0.5*square(i/sigma)));
 59 | 
 60 | /* convolve image with gaussian filter */
 61 | static image<float> *smooth(image<float> *src, float sigma) {
 62 |   std::vector<float> mask = make_fgauss(sigma);
 63 |   normalize(mask);
 64 | 
 65 |   image<float> *tmp = new image<float>(src->height(), src->width(), false);
 66 |   image<float> *dst = new image<float>(src->width(), src->height(), false);
 67 |   convolve_even(src, tmp, mask);
 68 |   convolve_even(tmp, dst, mask);
 69 | 
 70 |   delete tmp;
 71 |   return dst;
 72 | }
 73 | 
 74 | /* convolve image with gaussian filter */
 75 | image<float> *smooth(image<uchar> *src, float sigma) {
 76 |   image<float> *tmp = imageUCHARtoFLOAT(src);
 77 |   image<float> *dst = smooth(tmp, sigma);
 78 |   delete tmp;
 79 |   return dst;
 80 | }
 81 | 
 82 | /* compute laplacian */
 83 | static image<float> *laplacian(image<float> *src) {
 84 |   int width = src->width();
 85 |   int height = src->height();
 86 |   image<float> *dst = new image<float>(width, height);  
 87 | 
 88 |   for (int y = 1; y < height-1; y++) {
 89 |     for (int x = 1; x < width-1; x++) {
 90 |       float d2x = imRef(src, x-1, y) + imRef(src, x+1, y) -
 91 | 	2*imRef(src, x, y);
 92 |       float d2y = imRef(src, x, y-1) + imRef(src, x, y+1) -
 93 | 	2*imRef(src, x, y);
 94 |       imRef(dst, x, y) = d2x + d2y;
 95 |     }
 96 |   }
 97 |   return dst;
 98 | }
 99 | 
100 | #endif
101 | 


--------------------------------------------------------------------------------
/help/INRIA/VOCcode/VOCpr.m:
--------------------------------------------------------------------------------
  1 | function [rec,prec,ap] = VOCpr(VOCopts,id,cls,draw)
  2 | 
  3 | % load test set
  4 | [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d');
  5 | 
  6 | % load ground truth objects
  7 | tic;
  8 | npos=0;
  9 | for i=1:length(gtids)
 10 |     % display progress
 11 |     if toc>1
 12 |         fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids));
 13 |         drawnow;
 14 |         tic;
 15 |     end
 16 |     
 17 |     % read annotation
 18 |     rec=PASreadrecord(sprintf(VOCopts.annopath,gtids{i}));
 19 |     
 20 |     % extract objects of class
 21 |     clsinds=strmatch(cls,{rec.objects(:).class},'exact');
 22 |     gt(i).BB=cat(1,rec.objects(clsinds).bbox)';
 23 |     gt(i).diff=[rec.objects(clsinds).difficult];
 24 |     gt(i).det=false(length(clsinds),1);
 25 |     npos=npos+sum(~gt(i).diff);
 26 | end
 27 | 
 28 | % load results
 29 | [ids,confidence,b1,b2,b3,b4]=textread(sprintf(VOCopts.detrespath,id,cls),'%s %f %f %f %f %f');
 30 | BB=[b1 b2 b3 b4]';
 31 | 
 32 | % sort detections by decreasing confidence
 33 | [sc,si]=sort(-confidence);
 34 | ids=ids(si);
 35 | BB=BB(:,si);
 36 | 
 37 | % assign detections to ground truth objects
 38 | nd=length(confidence);
 39 | tp=zeros(nd,1);
 40 | fp=zeros(nd,1);
 41 | tic;
 42 | for d=1:nd
 43 |     % display progress
 44 |     if toc>1
 45 |         fprintf('%s: pr: compute: %d/%d\n',cls,d,nd);
 46 |         drawnow;
 47 |         tic;
 48 |     end
 49 |     
 50 |     % find ground truth image
 51 |     i=strmatch(ids{d},gtids,'exact');
 52 |     if isempty(i)
 53 |         error('unrecognized image "%s"',ids{d});
 54 |     elseif length(i)>1
 55 |         error('multiple image "%s"',ids{d});
 56 |     end
 57 | 
 58 |     % assign detection to ground truth object if any
 59 |     bb=BB(:,d);
 60 |     ovmax=-inf;
 61 |     for j=1:size(gt(i).BB,2)
 62 |         bbgt=gt(i).BB(:,j);
 63 |         bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))];
 64 |         iw=bi(3)-bi(1)+1;
 65 |         ih=bi(4)-bi(2)+1;
 66 |         if iw>0 & ih>0                
 67 |             % compute overlap as area of intersection / area of union
 68 |             ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+...
 69 |                (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-...
 70 |                iw*ih;
 71 |             ov=iw*ih/ua;
 72 |             if ov>ovmax
 73 |                 ovmax=ov;
 74 |                 jmax=j;
 75 |             end
 76 |         end
 77 |     end
 78 |     % assign detection as true positive/don't care/false positive
 79 |     if ovmax>=VOCopts.minoverlap
 80 |         if ~gt(i).det(jmax)
 81 |             if ~gt(i).diff(jmax)
 82 |                 tp(d)=1;            % true positive
 83 |             end
 84 |             gt(i).det(jmax)=true;
 85 |         else
 86 |             fp(d)=1;                % false positive (multiple detection)
 87 |         end
 88 |     else
 89 |         fp(d)=1;                    % false positive
 90 |     end
 91 | end
 92 | 
 93 | % compute precision/recall
 94 | fp=cumsum(fp);
 95 | tp=cumsum(tp);
 96 | rec=tp/npos;
 97 | prec=tp./(fp+tp);
 98 | 
 99 | % compute average precision
100 | 
101 | ap=0;
102 | for t=0:0.1:1
103 |     p=max(prec(rec>=t));
104 |     if isempty(p)
105 |         p=0;
106 |     end
107 |     ap=ap+p/11;
108 | end
109 | 
110 | if draw
111 |     % plot precision/recall
112 |     plot(rec,prec,'-');
113 |     grid;
114 |     xlabel 'recall'
115 |     ylabel 'precision'
116 |     title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap));
117 | end
118 | 


--------------------------------------------------------------------------------
/help/INRIA/VOCcode/VOCevaldet.m:
--------------------------------------------------------------------------------
  1 | function [rec,prec,ap] = VOCpr(VOCopts,id,cls,draw)
  2 | 
  3 | % load test set
  4 | [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d');
  5 | 
  6 | % load ground truth objects
  7 | tic;
  8 | npos=0;
  9 | for i=1:length(gtids)
 10 |     % display progress
 11 |     if toc>1
 12 |         fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids));
 13 |         drawnow;
 14 |         tic;
 15 |     end
 16 |     
 17 |     % read annotation
 18 |     rec=PASreadrecord(sprintf(VOCopts.annopath,gtids{i}));
 19 |     
 20 |     % extract objects of class
 21 |     clsinds=strmatch(cls,{rec.objects(:).class},'exact');
 22 |     gt(i).BB=cat(1,rec.objects(clsinds).bbox)';
 23 |     gt(i).diff=[rec.objects(clsinds).difficult];
 24 |     gt(i).det=false(length(clsinds),1);
 25 |     npos=npos+sum(~gt(i).diff);
 26 | end
 27 | 
 28 | % load results
 29 | sprintf(VOCopts.detrespath,id,cls)
 30 | [ids,confidence,b1,b2,b3,b4]=textread(sprintf(VOCopts.detrespath,id,cls),'%s %f %f %f %f %f');
 31 | BB=[b1 b2 b3 b4]';
 32 | 
 33 | % sort detections by decreasing confidence
 34 | [sc,si]=sort(-confidence);
 35 | ids=ids(si);
 36 | BB=BB(:,si);
 37 | 
 38 | % assign detections to ground truth objects
 39 | nd=length(confidence);
 40 | tp=zeros(nd,1);
 41 | fp=zeros(nd,1);
 42 | tic;
 43 | for d=1:nd
 44 |     % display progress
 45 |     if toc>1
 46 |         fprintf('%s: pr: compute: %d/%d\n',cls,d,nd);
 47 |         drawnow;
 48 |         tic;
 49 |     end
 50 |     
 51 |     % find ground truth image
 52 |     i=strmatch(ids{d},gtids,'exact');
 53 |     if isempty(i)
 54 |         error('unrecognized image "%s"',ids{d});
 55 |     elseif length(i)>1
 56 |         error('multiple image "%s"',ids{d});
 57 |     end
 58 | 
 59 |     % assign detection to ground truth object if any
 60 |     bb=BB(:,d);
 61 |     ovmax=-inf;
 62 |     for j=1:size(gt(i).BB,2)
 63 |         bbgt=gt(i).BB(:,j);
 64 |         bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))];
 65 |         iw=bi(3)-bi(1)+1;
 66 |         ih=bi(4)-bi(2)+1;
 67 |         if iw>0 & ih>0                
 68 |             % compute overlap as area of intersection / area of union
 69 |             ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+...
 70 |                (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-...
 71 |                iw*ih;
 72 |             ov=iw*ih/ua;
 73 |             if ov>ovmax
 74 |                 ovmax=ov;
 75 |                 jmax=j;
 76 |             end
 77 |         end
 78 |     end
 79 |     % assign detection as true positive/don't care/false positive
 80 |     if ovmax>=VOCopts.minoverlap
 81 |         if ~gt(i).det(jmax)
 82 |             if ~gt(i).diff(jmax)
 83 |                 tp(d)=1;            % true positive
 84 |             end
 85 |             gt(i).det(jmax)=true;
 86 |         else
 87 |             fp(d)=1;                % false positive (multiple detection)
 88 |         end
 89 |     else
 90 |         fp(d)=1;                    % false positive
 91 |     end
 92 | end
 93 | 
 94 | % compute precision/recall
 95 | fp=cumsum(fp);
 96 | tp=cumsum(tp);
 97 | rec=tp/npos;
 98 | prec=tp./(fp+tp);
 99 | 
100 | % compute average precision
101 | 
102 | ap=0;
103 | for t=0:0.1:1
104 |     p=max(prec(rec>=t));
105 |     if isempty(p)
106 |         p=0;
107 |     end
108 |     ap=ap+p/11;
109 | end
110 | 
111 | if draw
112 |     % plot precision/recall
113 |     plot(rec,prec,'-');
114 |     grid;
115 |     xlabel 'recall'
116 |     ylabel 'precision'
117 |     title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap));
118 | end
119 | 


--------------------------------------------------------------------------------
/selective_search/Image2HierarchicalGrouping.m:
--------------------------------------------------------------------------------
 1 | function [boxes blobIndIm blobBoxes hierarchy priority] = Image2HierarchicalGrouping(im, sigma, k, minSize, colourType, functionHandles)
 2 | % function [boxes blobIndIm blobBoxes hierarchy] = Image2HierarchicalGrouping
 3 | %                              (im, sigma, k, minSize, colourType, functionHandles)
 4 | %
 5 | % Creates hierarchical grouping from an image
 6 | %
 7 | % im:                   Image
 8 | % sigma (= 0.8):        Smoothing for initial segmentation (Felzenszwalb 2004)
 9 | % k (= 100):            Threshold for initial segmentation
10 | % minSize (= 100):      Minimum size of segments for initial segmentation
11 | % colourType:           ColourType in which to do grouping (see Image2ColourSpace)
12 | % functionHandles:      Similarity functions which are called. Function
13 | %                       creates as many hierarchies as there are functionHandles
14 | %
15 | % boxes:                N x 4 array with boxes of all hierarchical groupings
16 | % blobIndIm:            Index image with the initial segmentation
17 | % blobBoxes:            Boxes belonging to the indices in blobIndIm
18 | % hierarchy:            M x 1 cell array with hierarchies. M =
19 | %                       length(functionHandles)
20 | %
21 | %     Jasper Uijlings - 2013
22 | 
23 | % Change colour space
24 | [colourIm imageToSegment] = Image2ColourSpace(im, colourType);
25 | 
26 | % Get initial segmentation, boxes, and neighbouring blobs
27 | [blobIndIm blobBoxes neighbours] = mexFelzenSegmentIndex(imageToSegment, sigma, k, minSize);
28 | numBlobs = size(blobBoxes,1);
29 | 
30 | % Skip hierarchical grouping if segmentation results in single region only
31 | if numBlobs == 1
32 |     warning('Oversegmentation results in a single region only');
33 |     boxes = blobBoxes;
34 |     hierarchy = [];
35 |     priority = 1; % priority is legacy
36 |     return;
37 | end
38 | 
39 | %%% Calculate histograms and sizes as prerequisite for grouping procedure
40 | 
41 | % Get colour histogram
42 | [colourHist blobSizes] = BlobStructColourHist(blobIndIm, colourIm);
43 | 
44 | % Get texture histogram
45 | textureHist = BlobStructTextureHist(blobIndIm, colourIm);
46 | % textureHist = BlobStructTextureHistLBP(blobIndIm, colourIm);
47 | 
48 | % Allocate memory for complete hierarchy.
49 | blobStruct.colourHist = zeros(size(colourHist,2), numBlobs * 2 - 1);
50 | blobStruct.textureHist = zeros(size(textureHist,2), numBlobs * 2 - 1);
51 | blobStruct.size = zeros(numBlobs * 2 -1, 1);
52 | blobStruct.boxes = zeros(numBlobs * 2 - 1, 4);
53 | 
54 | % Insert calculated histograms, sizes, and boxes
55 | blobStruct.colourHist(:,1:numBlobs) = colourHist';
56 | blobStruct.textureHist(:,1:numBlobs) = textureHist';
57 | blobStruct.size(1:numBlobs) = blobSizes ./ 3;
58 | blobStruct.boxes(1:numBlobs,:) = blobBoxes;
59 | 
60 | blobStruct.imSize = size(im,1) * size(im,2);
61 | 
62 | %%% If you want to use original blobs in similarity functions, uncomment
63 | %%% these lines.
64 | % blobStruct.blobs = cell(numBlobs * 2 - 1, 1);
65 | % initialBlobs = SegmentIndices2Blobs(blobIndIm, blobBoxes);
66 | % blobStruct.blobs(1:numBlobs) = initialBlobs;
67 | 
68 | 
69 | % Loop over all merging strategies. Perform them one by one.
70 | boxes = cell(1, length(functionHandles)+1);
71 | priority = cell(1, length(functionHandles) + 1);
72 | hierarchy = cell(1, length(functionHandles));
73 | for i=1:length(functionHandles)
74 |     [boxes{i} hierarchy{i} blobStructT mergeThreshold] = BlobStruct2HierarchicalGrouping(blobStruct, neighbours, numBlobs, functionHandles{i});
75 |     boxes{i} = boxes{i}(numBlobs+1:end,:);
76 |     priority{i} = (size(boxes{i}, 1):-1:1)';
77 | end
78 | 
79 | % Also save the initial boxes
80 | i = i+1;
81 | boxes{i} = blobBoxes;
82 | priority{i} = ones(size(boxes{i}, 1), 1) * (size(boxes{1}, 1)+1);
83 | 
84 | % Concatenate boxes and priorities resulting from the different merging
85 | % strategies
86 | boxes = cat(1, boxes{:});
87 | priority = cat(1, priority{:});
88 | [priority ids] = sort(priority, 'ascend');
89 | boxes = boxes(ids,:);
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/help/train/README.md:
--------------------------------------------------------------------------------
 1 | # Train Fast-RCNN on Another Dataset
 2 | 
 3 | We will illustrate how to train Fast-RCNN on another dataset in the following steps, and we will take **INRIA Person** as the example dataset.
 4 | 
 5 | ### Format Your Dataset
 6 | 
 7 | At first, the dataset must be well organzied with the required format.
 8 | ```
 9 | INRIA
10 | |-- data
11 |     |-- Annotations
12 |          |-- *.txt (Annotation files)
13 |     |-- Images
14 |          |-- *.png (Image files)
15 |     |-- ImageSets
16 |          |-- train.txt
17 | ```
18 | 
19 | The `train.txt` contains all the names(without extensions) of images files that will be used for training. For example, there are a few lines in `train.txt` below.
20 | 
21 | ```
22 | crop_000011
23 | crop_000603
24 | crop_000606
25 | crop_000607
26 | crop_000608
27 | ```
28 | 
29 | ### Construct IMDB
30 | 
31 | You need to add a new python file describing the dataset we will use to the directory `$FRCNN_ROOT/lib/datasets`, see `inria.py`. Then the following steps should be taken.
32 |   - Modify `self._classes` in the constructor function to fit your dataset.
33 |   - Be careful with the extensions of your image files. See `image_path_from_index` in `inria.py`.
34 |   - Write the function for parsing annotations. See `_load_inria_annotation` in `inria.py`.
35 |   - Do not forget to add `import` syntaxes in your own python file and other python files in the same directory.
36 | 
37 | Then you should modify the `factory.py` in the same directory. For example, to add **INRIA Person**, we should add
38 | 
39 | ```sh
40 | inria_devkit_path = '/home/szy/INRIA'
41 | for split in ['train', 'test']:
42 |     name = '{}_{}'.format('inria', split)
43 |     __sets[name] = (lambda split=split: datasets.inria(split, inria_devkit_path))
44 | ```
45 | 
46 | See the example `inria.py` at https://github.com/EdisonResearch/fast-rcnn/blob/master/lib/datasets/inria.py.
47 | 
48 | ### Run Selective Search 
49 | 
50 | Modify the matlab file `selective_search.m` in the directory `$FRCNN_ROOT/selective_search`, if you do not have that directory, you could find it at https://github.com/EdisonResearch/fast-rcnn/tree/master/selective_search. 
51 | 
52 | ```sh
53 | image_db = '/home/szy/INRIA/';
54 | image_filenames = textread([image_db '/data/ImageSets/train.txt'], '%s', 'delimiter', '\n');
55 | for i = 1:length(image_filenames)
56 |     if exist([image_db '/data/Images/' image_filenames{i} '.jpg'], 'file') == 2
57 | 	image_filenames{i} = [image_db '/data/Images/' image_filenames{i} '.jpg'];
58 |     end
59 |     if exist([image_db '/data/Images/' image_filenames{i} '.png'], 'file') == 2
60 |         image_filenames{i} = [image_db '/data/Images/' image_filenames{i} '.png'];
61 |     end
62 | end
63 | selective_search_rcnn(image_filenames, 'train.mat');
64 | ```
65 | 
66 | Run this matlab file and then move the output `train.mat` to the root directory of your dataset, here it should be `/home/szy/INRIA/`. As it is a time consuming process, please be patient.
67 | 
68 | ### Modify Prototxt
69 | 
70 | For example, if you want to use the model **VGG_CNN_M_1024**, then you should modify `train.prototxt` in `$FRCNN_ROOTmodels/VGG_CNN_M_1024`, it mainly concerns with the number of classes you want to train. Let's assume that the number of classes is `C (do not forget to count the `background` class). Then you should 
71 |   - Modify `num_classes` to `C`;
72 |   - Modify `num_output` in the `cls_score` layer to `C`
73 |   - Modify `num_output` in the `bbox_pred` layer to `4 * C`
74 | 
75 | See https://github.com/rbgirshick/fast-rcnn/issues/11 for more details. 
76 | 
77 | ### Train!
78 | 
79 | In the directory **$FRCNN_ROOT**, run the following command in the shell.
80 | 
81 | ```sh
82 | ./tools/train_net.py --gpu 0 --solver models/VGG_CNN_M_1024/solver.prototxt \
83 |     --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel --imdb inria_train
84 | ```
85 | 
86 | Be careful with the **imdb** argument as it specifies the dataset you will train on. Then just drink a cup of coffee and take a break to wait for the training.
87 | 
88 | ### References
89 | 
90 | [Fast-RCNN] https://github.com/rbgirshick/fast-rcnn
91 | 
92 | ### Contact
93 | 
94 | Feel free to contact me at <zeyuanxy@gmail.com>.
95 | 


--------------------------------------------------------------------------------
/tools/compress_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """Compress a Fast R-CNN network using truncated SVD."""
 11 | 
 12 | import _init_paths
 13 | import caffe
 14 | import argparse
 15 | import numpy as np
 16 | import os, sys
 17 | 
 18 | def parse_args():
 19 |     """Parse input arguments."""
 20 |     parser = argparse.ArgumentParser(description='Compress a Fast R-CNN network')
 21 |     parser.add_argument('--def', dest='prototxt',
 22 |                         help='prototxt file defining the uncompressed network',
 23 |                         default=None, type=str)
 24 |     parser.add_argument('--def-svd', dest='prototxt_svd',
 25 |                         help='prototxt file defining the SVD compressed network',
 26 |                         default=None, type=str)
 27 |     parser.add_argument('--net', dest='caffemodel',
 28 |                         help='model to compress',
 29 |                         default=None, type=str)
 30 | 
 31 |     if len(sys.argv) == 1:
 32 |         parser.print_help()
 33 |         sys.exit(1)
 34 | 
 35 |     args = parser.parse_args()
 36 |     return args
 37 | 
 38 | def compress_weights(W, l):
 39 |     """Compress the weight matrix W of an inner product (fully connected) layer
 40 |     using truncated SVD.
 41 | 
 42 |     Parameters:
 43 |     W: N x M weights matrix
 44 |     l: number of singular values to retain
 45 | 
 46 |     Returns:
 47 |     Ul, L: matrices such that W \approx Ul*L
 48 |     """
 49 | 
 50 |     # numpy doesn't seem to have a fast truncated SVD algorithm...
 51 |     # this could be faster
 52 |     U, s, V = np.linalg.svd(W, full_matrices=False)
 53 | 
 54 |     Ul = U[:, :l]
 55 |     sl = s[:l]
 56 |     Vl = V[:l, :]
 57 | 
 58 |     L = np.dot(np.diag(sl), Vl)
 59 |     return Ul, L
 60 | 
 61 | def main():
 62 |     args = parse_args()
 63 | 
 64 |     # prototxt = 'models/VGG16/test.prototxt'
 65 |     # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel'
 66 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
 67 | 
 68 |     # prototxt_svd = 'models/VGG16/svd/test_fc6_fc7.prototxt'
 69 |     # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel'
 70 |     net_svd = caffe.Net(args.prototxt_svd, args.caffemodel, caffe.TEST)
 71 | 
 72 |     print('Uncompressed network {} : {}'.format(args.prototxt, args.caffemodel))
 73 |     print('Compressed network prototxt {}'.format(args.prototxt_svd))
 74 | 
 75 |     out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svd'
 76 |     out_dir = os.path.dirname(args.caffemodel)
 77 | 
 78 |     # Compress fc6
 79 |     if net_svd.params.has_key('fc6_L'):
 80 |         l_fc6 = net_svd.params['fc6_L'][0].data.shape[0]
 81 |         print('  fc6_L bottleneck size: {}'.format(l_fc6))
 82 | 
 83 |         # uncompressed weights and biases
 84 |         W_fc6 = net.params['fc6'][0].data
 85 |         B_fc6 = net.params['fc6'][1].data
 86 | 
 87 |         print('  compressing fc6...')
 88 |         Ul_fc6, L_fc6 = compress_weights(W_fc6, l_fc6)
 89 | 
 90 |         assert(len(net_svd.params['fc6_L']) == 1)
 91 | 
 92 |         # install compressed matrix factors (and original biases)
 93 |         net_svd.params['fc6_L'][0].data[...] = L_fc6
 94 | 
 95 |         net_svd.params['fc6_U'][0].data[...] = Ul_fc6
 96 |         net_svd.params['fc6_U'][1].data[...] = B_fc6
 97 | 
 98 |         out += '_fc6_{}'.format(l_fc6)
 99 | 
100 |     # Compress fc7
101 |     if net_svd.params.has_key('fc7_L'):
102 |         l_fc7 = net_svd.params['fc7_L'][0].data.shape[0]
103 |         print '  fc7_L bottleneck size: {}'.format(l_fc7)
104 | 
105 |         W_fc7 = net.params['fc7'][0].data
106 |         B_fc7 = net.params['fc7'][1].data
107 | 
108 |         print('  compressing fc7...')
109 |         Ul_fc7, L_fc7 = compress_weights(W_fc7, l_fc7)
110 | 
111 |         assert(len(net_svd.params['fc7_L']) == 1)
112 | 
113 |         net_svd.params['fc7_L'][0].data[...] = L_fc7
114 | 
115 |         net_svd.params['fc7_U'][0].data[...] = Ul_fc7
116 |         net_svd.params['fc7_U'][1].data[...] = B_fc7
117 | 
118 |         out += '_fc7_{}'.format(l_fc7)
119 | 
120 |     filename = '{}/{}.caffemodel'.format(out_dir, out)
121 |     net_svd.save(filename)
122 |     print 'Wrote svd model to: {:s}'.format(filename)
123 | 
124 | if __name__ == '__main__':
125 |     main()
126 | 


--------------------------------------------------------------------------------
/selective_search/demoPascal2007.m:
--------------------------------------------------------------------------------
  1 | % This demo shows how to use the software described in our IJCV paper: 
  2 | %   Selective Search for Object Recognition,
  3 | %   J.R.R. Uijlings, K.E.A. van de Sande, T. Gevers, A.W.M. Smeulders, IJCV 2013
  4 | %%
  5 | addpath('Dependencies');
  6 | 
  7 | fprintf('Demo of how to run the code for:\n');
  8 | fprintf('   J. Uijlings, K. van de Sande, T. Gevers, A. Smeulders\n');
  9 | fprintf('   Segmentation as Selective Search for Object Recognition\n');
 10 | fprintf('   IJCV 2013\n\n');
 11 | 
 12 | % Compile anisotropic gaussian filter
 13 | if(~exist('anigauss'))
 14 |     fprintf('Compiling the anisotropic gauss filtering of:\n');
 15 |     fprintf('   J. Geusebroek, A. Smeulders, and J. van de Weijer\n');
 16 |     fprintf('   Fast anisotropic gauss filtering\n');
 17 |     fprintf('   IEEE Transactions on Image Processing, 2003\n');
 18 |     fprintf('Source code/Project page:\n');
 19 |     fprintf('   http://staff.science.uva.nl/~mark/downloads.html#anigauss\n\n');
 20 |     mex Dependencies/anigaussm/anigauss_mex.c Dependencies/anigaussm/anigauss.c -output anigauss
 21 | end
 22 | 
 23 | if(~exist('mexCountWordsIndex'))
 24 |     mex Dependencies/mexCountWordsIndex.cpp
 25 | end
 26 | 
 27 | % Compile the code of Felzenszwalb and Huttenlocher, IJCV 2004.
 28 | if(~exist('mexFelzenSegmentIndex'))
 29 |     fprintf('Compiling the segmentation algorithm of:\n');
 30 |     fprintf('   P. Felzenszwalb and D. Huttenlocher\n');
 31 |     fprintf('   Efficient Graph-Based Image Segmentation\n');
 32 |     fprintf('   International Journal of Computer Vision, 2004\n');
 33 |     fprintf('Source code/Project page:\n');
 34 |     fprintf('   http://www.cs.brown.edu/~pff/segment/\n');
 35 |     fprintf('Note: A small Matlab wrapper was made. See demo.m for usage\n\n');
 36 | %     fprintf('   
 37 |     mex Dependencies/FelzenSegment/mexFelzenSegmentIndex.cpp -output mexFelzenSegmentIndex;
 38 | end
 39 | 
 40 | %%
 41 | % Parameters. Note that this controls the number of hierarchical
 42 | % segmentations which are combined.
 43 | colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'};
 44 | 
 45 | % Here you specify which similarity functions to use in merging
 46 | simFunctionHandles = {@SSSimColourTextureSizeFillOrig, @SSSimTextureSizeFill, @SSSimBoxFillOrig, @SSSimSize};
 47 | 
 48 | % Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm.
 49 | % Note that by default, we set minSize = k, and sigma = 0.8.
 50 | ks = [50 100 150 300]; % controls size of segments of initial segmentation. 
 51 | sigma = 0.8;
 52 | 
 53 | % After segmentation, filter out boxes which have a width/height smaller
 54 | % than minBoxWidth (default = 20 pixels).
 55 | minBoxWidth = 20;
 56 | 
 57 | % Comment the following three lines for the 'quality' version
 58 | % colorTypes = colorTypes(1:2); % 'Fast' uses HSV and Lab
 59 | % simFunctionHandles = simFunctionHandles(1:2); % Two different merging strategies
 60 | % ks = ks(1:2);
 61 | 
 62 | % Test the boxes
 63 | load('GroundTruthVOC2007test.mat'); % Load ground truth boxes and images and image names
 64 | VOCImgPath = '/media/Data/Databases/VOCdevkit/VOC2007/JPEGImages/%s.jpg'
 65 | fprintf('After box extraction, boxes smaller than %d pixels will be removed\n', minBoxWidth);
 66 | fprintf('Obtaining boxes for Pascal 2007 test set:\n');
 67 | totalTime = 0;
 68 | for i=1:length(testIms)
 69 |     fprintf('%d ', i);
 70 |     
 71 |     % VOCopts.img
 72 |     im = imread(sprintf(VOCImgPath, testIms{i}));
 73 |     idx = 1;
 74 |     for j=1:length(ks)
 75 |         k = ks(j); % Segmentation threshold k
 76 |         minSize = k; % We set minSize = k
 77 |         for n = 1:length(colorTypes)
 78 |             colorType = colorTypes{n};
 79 |             tic;
 80 |             [boxesT{idx} blobIndIm blobBoxes hierarchy priorityT{idx}] = Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles);
 81 |             totalTime = totalTime + toc;
 82 |             idx = idx + 1;
 83 |         end
 84 |     end
 85 |     boxes{i} = cat(1, boxesT{:}); % Concatenate boxes from all hierarchies
 86 |     priority = cat(1, priorityT{:}); % Concatenate priorities
 87 |     
 88 |     % Do pseudo random sorting as in paper
 89 |     priority = priority .* rand(size(priority));
 90 |     [priority sortIds] = sort(priority, 'ascend');
 91 |     boxes{i} = boxes{i}(sortIds,:);
 92 | end
 93 | fprintf('\n');
 94 | 
 95 | %%
 96 | tic
 97 | for i=1:length(boxes)
 98 |     boxes{i} = FilterBoxesWidth(boxes{i}, minBoxWidth);
 99 |     boxes{i} = BoxRemoveDuplicates(boxes{i});
100 | end
101 | totalTime = totalTime + toc;
102 | 
103 | fprintf('Time per image: %.2f\nNow evaluating the boxes on Pascal 2007...\n', totalTime ./ length(testIms));
104 | 
105 | %%
106 | [boxAbo boxMabo boScores avgNumBoxes] = BoxAverageBestOverlap(gtBoxes, gtImIds, boxes);
107 | 
108 | fprintf('Mean Average Best Overlap for the box-based locations: %.3f\n', boxMabo);


--------------------------------------------------------------------------------
/matlab/fast_rcnn_im_detect.m:
--------------------------------------------------------------------------------
  1 | % --------------------------------------------------------
  2 | % Fast R-CNN
  3 | % Copyright (c) 2015 Microsoft
  4 | % Licensed under The MIT License [see LICENSE for details]
  5 | % Written by Ross Girshick
  6 | % --------------------------------------------------------
  7 | 
  8 | function dets = fast_rcnn_im_detect(model, im, boxes)
  9 | % Perform detection a Fast R-CNN network given an image and
 10 | % object proposals.
 11 | 
 12 | if model.init_key ~= caffe('get_init_key')
 13 |   error('You probably need call fast_rcnn_load_net() first.');
 14 | end
 15 | 
 16 | [im_batch, scales] = image_pyramid(im, model.pixel_means, false);
 17 | 
 18 | [feat_pyra_boxes, feat_pyra_levels] = project_im_rois(boxes, scales);
 19 | rois = cat(2, feat_pyra_levels, feat_pyra_boxes);
 20 | % Adjust to 0-based indexing and make roi info the fastest dimension
 21 | rois = rois - 1;
 22 | rois = permute(rois, [2 1]);
 23 | 
 24 | input_blobs = cell(2, 1);
 25 | input_blobs{1} = im_batch;
 26 | input_blobs{2} = rois;
 27 | th = tic();
 28 | blobs_out = caffe('forward', input_blobs);
 29 | fprintf('fwd: %.3fs\n', toc(th));
 30 | 
 31 | bbox_deltas = squeeze(blobs_out{1})';
 32 | probs = squeeze(blobs_out{2})';
 33 | 
 34 | num_classes = size(probs, 2);
 35 | dets = cell(num_classes - 1, 1);
 36 | NMS_THRESH = 0.3;
 37 | % class index 1 is __background__, so we don't return it
 38 | for j = 2:num_classes
 39 |   cls_probs = probs(:, j);
 40 |   cls_deltas = bbox_deltas(:, (1 + (j - 1) * 4):(j * 4));
 41 |   pred_boxes = bbox_pred(boxes, cls_deltas);
 42 |   cls_dets = [pred_boxes cls_probs];
 43 |   keep = nms(cls_dets, NMS_THRESH);
 44 |   cls_dets = cls_dets(keep, :);
 45 |   dets{j - 1} = cls_dets;
 46 | end
 47 | 
 48 | % ------------------------------------------------------------------------
 49 | function [batch, scales] = image_pyramid(im, pixel_means, multiscale)
 50 | % ------------------------------------------------------------------------
 51 | % Construct an image pyramid that's ready for feeding directly into caffe
 52 | if ~multiscale
 53 |   SCALES = [600];
 54 |   MAX_SIZE = 1000;
 55 | else
 56 |   SCALES = [1200 864 688 576 480];
 57 |   MAX_SIZE = 2000;
 58 | end
 59 | num_levels = length(SCALES);
 60 | 
 61 | im = single(im);
 62 | % Convert to BGR
 63 | im = im(:, :, [3 2 1]);
 64 | % Subtract mean (mean of the image mean--one mean per channel)
 65 | im = bsxfun(@minus, im, pixel_means);
 66 | 
 67 | im_orig = im;
 68 | im_size = min([size(im_orig, 1) size(im_orig, 2)]);
 69 | im_size_big = max([size(im_orig, 1) size(im_orig, 2)]);
 70 | scale_factors = SCALES ./ im_size;
 71 | 
 72 | max_size = [0 0 0];
 73 | for i = 1:num_levels
 74 |   if round(im_size_big * scale_factors(i)) > MAX_SIZE
 75 |     scale_factors(i) = MAX_SIZE / im_size_big;
 76 |   end
 77 |   ims{i} = imresize(im_orig, scale_factors(i), 'bilinear', ...
 78 |                     'antialiasing', false);
 79 |   max_size = max(cat(1, max_size, size(ims{i})), [], 1);
 80 | end
 81 | 
 82 | batch = zeros(max_size(2), max_size(1), 3, num_levels, 'single');
 83 | for i = 1:num_levels
 84 |   im = ims{i};
 85 |   im_sz = size(im);
 86 |   im_sz = im_sz(1:2);
 87 |   % Make width the fastest dimension (for caffe)
 88 |   im = permute(im, [2 1 3]);
 89 |   batch(1:im_sz(2), 1:im_sz(1), :, i) = im;
 90 | end
 91 | scales = scale_factors';
 92 | 
 93 | % ------------------------------------------------------------------------
 94 | function [boxes, levels] = project_im_rois(boxes, scales)
 95 | % ------------------------------------------------------------------------
 96 | widths = boxes(:,3) - boxes(:,1) + 1;
 97 | heights = boxes(:,4) - boxes(:,2) + 1;
 98 | 
 99 | areas = widths .* heights;
100 | scaled_areas = bsxfun(@times, areas, (scales.^2)');
101 | diff_areas = abs(scaled_areas - (224 * 224));
102 | [~, levels] = min(diff_areas, [], 2);
103 | 
104 | boxes = boxes - 1;
105 | boxes = bsxfun(@times, boxes, scales(levels));
106 | boxes = boxes + 1;
107 | 
108 | % ------------------------------------------------------------------------
109 | function pred_boxes = bbox_pred(boxes, bbox_deltas)
110 | % ------------------------------------------------------------------------
111 | if isempty(boxes)
112 |   pred_boxes = [];
113 |   return;
114 | end
115 | 
116 | Y = bbox_deltas;
117 | 
118 | % Read out predictions
119 | dst_ctr_x = Y(:, 1);
120 | dst_ctr_y = Y(:, 2);
121 | dst_scl_x = Y(:, 3);
122 | dst_scl_y = Y(:, 4);
123 | 
124 | src_w = boxes(:, 3) - boxes(:, 1) + eps;
125 | src_h = boxes(:, 4) - boxes(:, 2) + eps;
126 | src_ctr_x = boxes(:, 1) + 0.5 * src_w;
127 | src_ctr_y = boxes(:, 2) + 0.5 * src_h;
128 | 
129 | pred_ctr_x = (dst_ctr_x .* src_w) + src_ctr_x;
130 | pred_ctr_y = (dst_ctr_y .* src_h) + src_ctr_y;
131 | pred_w = exp(dst_scl_x) .* src_w;
132 | pred_h = exp(dst_scl_y) .* src_h;
133 | pred_boxes = [pred_ctr_x - 0.5 * pred_w, pred_ctr_y - 0.5 * pred_h, ...
134 |               pred_ctr_x + 0.5 * pred_w, pred_ctr_y + 0.5 * pred_h];
135 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/train.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Train a Fast R-CNN network."""
  9 | 
 10 | import caffe
 11 | from fast_rcnn.config import cfg
 12 | import roi_data_layer.roidb as rdl_roidb
 13 | from utils.timer import Timer
 14 | import numpy as np
 15 | import os
 16 | 
 17 | from caffe.proto import caffe_pb2
 18 | import google.protobuf as pb2
 19 | 
 20 | class SolverWrapper(object):
 21 |     """A simple wrapper around Caffe's solver.
 22 |     This wrapper gives us control over he snapshotting process, which we
 23 |     use to unnormalize the learned bounding-box regression weights.
 24 |     """
 25 | 
 26 |     def __init__(self, solver_prototxt, roidb, output_dir,
 27 |                  pretrained_model=None):
 28 |         """Initialize the SolverWrapper."""
 29 |         self.output_dir = output_dir
 30 | 
 31 |         print 'Computing bounding-box regression targets...'
 32 |         self.bbox_means, self.bbox_stds = \
 33 |                 rdl_roidb.add_bbox_regression_targets(roidb)
 34 |         print 'done'
 35 | 
 36 |         self.solver = caffe.SGDSolver(solver_prototxt)
 37 |         if pretrained_model is not None:
 38 |             print ('Loading pretrained model '
 39 |                    'weights from {:s}').format(pretrained_model)
 40 |             self.solver.net.copy_from(pretrained_model)
 41 | 
 42 |         self.solver_param = caffe_pb2.SolverParameter()
 43 |         with open(solver_prototxt, 'rt') as f:
 44 |             pb2.text_format.Merge(f.read(), self.solver_param)
 45 | 
 46 |         self.solver.net.layers[0].set_roidb(roidb)
 47 | 
 48 |     def snapshot(self):
 49 |         """Take a snapshot of the network after unnormalizing the learned
 50 |         bounding-box regression weights. This enables easy use at test-time.
 51 |         """
 52 |         net = self.solver.net
 53 | 
 54 |         if cfg.TRAIN.BBOX_REG:
 55 |             # save original values
 56 |             orig_0 = net.params['bbox_pred'][0].data.copy()
 57 |             orig_1 = net.params['bbox_pred'][1].data.copy()
 58 | 
 59 |             # scale and shift with bbox reg unnormalization; then save snapshot
 60 |             net.params['bbox_pred'][0].data[...] = \
 61 |                     (net.params['bbox_pred'][0].data *
 62 |                      self.bbox_stds[:, np.newaxis])
 63 |             net.params['bbox_pred'][1].data[...] = \
 64 |                     (net.params['bbox_pred'][1].data *
 65 |                      self.bbox_stds + self.bbox_means)
 66 | 
 67 |         if not os.path.exists(self.output_dir):
 68 |             os.makedirs(self.output_dir)
 69 | 
 70 |         infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
 71 |                  if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
 72 |         filename = (self.solver_param.snapshot_prefix + infix +
 73 |                     '_iter_{:d}'.format(self.solver.iter) + '.caffemodel')
 74 |         filename = os.path.join(self.output_dir, filename)
 75 | 
 76 |         net.save(str(filename))
 77 |         print 'Wrote snapshot to: {:s}'.format(filename)
 78 | 
 79 |         if cfg.TRAIN.BBOX_REG:
 80 |             # restore net to original state
 81 |             net.params['bbox_pred'][0].data[...] = orig_0
 82 |             net.params['bbox_pred'][1].data[...] = orig_1
 83 | 
 84 |     def train_model(self, max_iters):
 85 |         """Network training loop."""
 86 |         last_snapshot_iter = -1
 87 |         timer = Timer()
 88 |         while self.solver.iter < max_iters:
 89 |             # Make one SGD update
 90 |             timer.tic()
 91 |             self.solver.step(1)
 92 |             timer.toc()
 93 |             if self.solver.iter % (10 * self.solver_param.display) == 0:
 94 |                 print 'speed: {:.3f}s / iter'.format(timer.average_time)
 95 | 
 96 |             if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
 97 |                 last_snapshot_iter = self.solver.iter
 98 |                 self.snapshot()
 99 | 
100 |         if last_snapshot_iter != self.solver.iter:
101 |             self.snapshot()
102 | 
103 | def get_training_roidb(imdb):
104 |     """Returns a roidb (Region of Interest database) for use in training."""
105 |     if cfg.TRAIN.USE_FLIPPED:
106 |         print 'Appending horizontally-flipped training examples...'
107 |         imdb.append_flipped_images()
108 |         print 'done'
109 | 
110 |     print 'Preparing training data...'
111 |     rdl_roidb.prepare_roidb(imdb)
112 |     print 'done'
113 | 
114 |     return imdb.roidb
115 | 
116 | def train_net(solver_prototxt, roidb, output_dir,
117 |               pretrained_model=None, max_iters=40000):
118 |     """Train a Fast R-CNN network."""
119 |     sw = SolverWrapper(solver_prototxt, roidb, output_dir,
120 |                        pretrained_model=pretrained_model)
121 | 
122 |     print 'Solving...'
123 |     sw.train_model(max_iters)
124 |     print 'done solving'
125 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/anigaussm/anigauss.m:
--------------------------------------------------------------------------------
  1 | % anigauss - Recursive anisotropic Gauss filtering
  2 | % Usage:
  3 | %   out = anigauss(in, sigma_v, sigma_u, phi,
  4 | %     derivative_order_v, derivative_order_u);
  5 | % 
  6 | %   v-axis = short axis
  7 | %   u-axis = long axis
  8 | %   phi = orientation angle in degrees
  9 | % 
 10 | %   parameters sigma_u, phi, and derivative_order_{v,w} are optional.
 11 | %   sigma_u defaults to the value of sigma_v (isotropic filtering),
 12 | %   phi defaults to zero degrees,
 13 | %   derivative orders default to 0 (no differentiation, only smooth data).
 14 | %
 15 | %   Note that for isotropic filtering a slightly faster algorithm is used than
 16 | %   for anisotropic filtering. Furthermore, execution time depends on the order
 17 | %   of differentiation. Note that the execution time is independend of the
 18 | %   values for sigma.
 19 | %
 20 | % Examples:
 21 | %
 22 | %   isotropic filtering:
 23 | %   a=zeros(512,512);
 24 | %   a(256,256)=1; 
 25 | %   tic;c=anigauss(a,10);toc
 26 | %   elapsed_time =
 27 | %      0.0500
 28 | %
 29 | %   anisotropic filtering:
 30 | %   a=zeros(512,512);
 31 | %   a(256,256)=1; 
 32 | %   tic;c=anigauss(a,10,3,30);toc
 33 | %   elapsed_time =
 34 | %      0.0600
 35 | %
 36 | % Usage:
 37 | %
 38 | %   isotropic data smoothing:
 39 | %     out = anigauss(in, 3.0);
 40 | %
 41 | %   isotropic data differentiation along y-axis:
 42 | %     out = anigauss(in, 3.0, 3.0, 0.0, 0, 1);
 43 | %
 44 | %   anisotropic data smoothing:
 45 | %     out = anigauss(in, 3.0, 7.0, 30.0);
 46 | % 
 47 | %   anisotropic edge detection:
 48 | %     out = anigauss(in, 3.0, 7.0, 30.0, 1, 0);
 49 | % 
 50 | %   anisotropic line detection:
 51 | %     out = anigauss(in, 3.0, 7.0, 30.0, 2, 0);
 52 | % 
 53 | %
 54 | %
 55 | % Copyright University of Amsterdam, 2002-2004. All rights reserved.
 56 | % 
 57 | % Contact person:
 58 | % Jan-Mark Geusebroek (mark@science.uva.nl, http://www.science.uva.nl/~mark)
 59 | % Intelligent Systems Lab Amsterdam
 60 | % Informatics Institute, Faculty of Science, University of Amsterdam
 61 | % Kruislaan 403, 1098 SJ Amsterdam, The Netherlands.
 62 | % 
 63 | % 
 64 | % This software is being made available for individual research use only.
 65 | % Any commercial use or redistribution of this software requires a license from
 66 | % the University of Amsterdam.
 67 | % 
 68 | % You may use this work subject to the following conditions:
 69 | % 
 70 | % 1. This work is provided "as is" by the copyright holder, with
 71 | % absolutely no warranties of correctness, fitness, intellectual property
 72 | % ownership, or anything else whatsoever.  You use the work
 73 | % entirely at your own risk.  The copyright holder will not be liable for
 74 | % any legal damages whatsoever connected with the use of this work.
 75 | % 
 76 | % 2. The copyright holder retain all copyright to the work. All copies of
 77 | % the work and all works derived from it must contain (1) this copyright
 78 | % notice, and (2) additional notices describing the content, dates and
 79 | % copyright holder of modifications or additions made to the work, if
 80 | % any, including distribution and use conditions and intellectual property
 81 | % claims.  Derived works must be clearly distinguished from the original
 82 | % work, both by name and by the prominent inclusion of explicit
 83 | % descriptions of overlaps and differences.
 84 | % 
 85 | % 3. The names and trademarks of the copyright holder may not be used in
 86 | % advertising or publicity related to this work without specific prior
 87 | % written permission. 
 88 | % 
 89 | % 4. In return for the free use of this work, you are requested, but not
 90 | % legally required, to do the following:
 91 | % 
 92 | % - If you become aware of factors that may significantly affect other
 93 | %   users of the work, for example major bugs or
 94 | %   deficiencies or possible intellectual property issues, you are
 95 | %   requested to report them to the copyright holder, if possible
 96 | %   including redistributable fixes or workarounds.
 97 | % 
 98 | % - If you use the work in scientific research or as part of a larger
 99 | %   software system, you are requested to cite the use in any related
100 | %   publications or technical documentation. The work is based upon:
101 | % 
102 | %     J. M. Geusebroek, A. W. M. Smeulders, and J. van de Weijer.
103 | %     Fast anisotropic gauss filtering. IEEE Trans. Image Processing,
104 | %     vol. 12, no. 8, pp. 938-943, 2003.
105 | %
106 | %   related work:
107 | %  
108 | %     I.T. Young and L.J. van Vliet. Recursive implementation
109 | %     of the Gaussian filter. Signal Processing, vol. 44, pp. 139-151, 1995.
110 | %  
111 | %     B. Triggs and M. Sdika. Boundary conditions for Young-van Vliet
112 | %     recursive filtering. IEEE Trans. Signal Processing,
113 | %     vol. 54, pp. 2365-2367, 2006.
114 | %  
115 | % This copyright notice must be retained with all copies of the software,
116 | % including any modified or derived versions.
117 | 


--------------------------------------------------------------------------------
/selective_search/Dependencies/FelzenSegment/segment-image.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (C) 2006 Pedro Felzenszwalb
  3 | 
  4 | This program is free software; you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation; either version 2 of the License, or
  7 | (at your option) any later version.
  8 | 
  9 | This program is distributed in the hope that it will be useful,
 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License
 15 | along with this program; if not, write to the Free Software
 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 17 | */
 18 | 
 19 | #ifndef SEGMENT_IMAGE
 20 | #define SEGMENT_IMAGE
 21 | 
 22 | #include <cstdlib>
 23 | #include "image.h"
 24 | #include "misc.h"
 25 | #include "filter.h"
 26 | #include "segment-graph.h"
 27 | 
 28 | // random color
 29 | rgb random_rgb(){ 
 30 |   rgb c;
 31 |   double r;
 32 |   
 33 |   c.r = (uchar)rand();
 34 |   c.g = (uchar)rand();
 35 |   c.b = (uchar)rand();
 36 | 
 37 |   return c;
 38 | }
 39 | 
 40 | // dissimilarity measure between pixels
 41 | static inline float diff(image<float> *r, image<float> *g, image<float> *b,
 42 | 			 int x1, int y1, int x2, int y2) {
 43 |   return sqrt(square(imRef(r, x1, y1)-imRef(r, x2, y2)) +
 44 | 	      square(imRef(g, x1, y1)-imRef(g, x2, y2)) +
 45 | 	      square(imRef(b, x1, y1)-imRef(b, x2, y2)));
 46 | }
 47 | 
 48 | /*
 49 |  * Segment an image
 50 |  *
 51 |  * Returns a color image representing the segmentation.
 52 |  *
 53 |  * im: image to segment.
 54 |  * sigma: to smooth the image.
 55 |  * c: constant for treshold function.
 56 |  * min_size: minimum component size (enforced by post-processing stage).
 57 |  * num_ccs: number of connected components in the segmentation.
 58 |  */
 59 | image<rgb> *segment_image(image<rgb> *im, float sigma, float c, int min_size,
 60 | 			  int *num_ccs) {
 61 |   int width = im->width();
 62 |   int height = im->height();
 63 | 
 64 |   image<float> *r = new image<float>(width, height);
 65 |   image<float> *g = new image<float>(width, height);
 66 |   image<float> *b = new image<float>(width, height);
 67 | 
 68 |   // smooth each color channel  
 69 |   for (int y = 0; y < height; y++) {
 70 |     for (int x = 0; x < width; x++) {
 71 |       imRef(r, x, y) = imRef(im, x, y).r;
 72 |       imRef(g, x, y) = imRef(im, x, y).g;
 73 |       imRef(b, x, y) = imRef(im, x, y).b;
 74 |     }
 75 |   }
 76 |   image<float> *smooth_r = smooth(r, sigma);
 77 |   image<float> *smooth_g = smooth(g, sigma);
 78 |   image<float> *smooth_b = smooth(b, sigma);
 79 |   delete r;
 80 |   delete g;
 81 |   delete b;
 82 |  
 83 |   // build graph
 84 |   edge *edges = new edge[width*height*4];
 85 |   int num = 0;
 86 |   for (int y = 0; y < height; y++) {
 87 |     for (int x = 0; x < width; x++) {
 88 |       if (x < width-1) {
 89 | 	edges[num].a = y * width + x;
 90 | 	edges[num].b = y * width + (x+1);
 91 | 	edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y);
 92 | 	num++;
 93 |       }
 94 | 
 95 |       if (y < height-1) {
 96 | 	edges[num].a = y * width + x;
 97 | 	edges[num].b = (y+1) * width + x;
 98 | 	edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x, y+1);
 99 | 	num++;
100 |       }
101 | 
102 |       if ((x < width-1) && (y < height-1)) {
103 | 	edges[num].a = y * width + x;
104 | 	edges[num].b = (y+1) * width + (x+1);
105 | 	edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y+1);
106 | 	num++;
107 |       }
108 | 
109 |       if ((x < width-1) && (y > 0)) {
110 | 	edges[num].a = y * width + x;
111 | 	edges[num].b = (y-1) * width + (x+1);
112 | 	edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y-1);
113 | 	num++;
114 |       }
115 |     }
116 |   }
117 |   delete smooth_r;
118 |   delete smooth_g;
119 |   delete smooth_b;
120 | 
121 |   // segment
122 |   universe *u = segment_graph(width*height, num, edges, c);
123 |   
124 |   // post process small components
125 |   for (int i = 0; i < num; i++) {
126 |     int a = u->find(edges[i].a);
127 |     int b = u->find(edges[i].b);
128 |     if ((a != b) && ((u->size(a) < min_size) || (u->size(b) < min_size)))
129 |       u->join(a, b);
130 |   }
131 |   delete [] edges;
132 |   *num_ccs = u->num_sets();
133 | 
134 |   image<rgb> *output = new image<rgb>(width, height);
135 | 
136 |   // pick random colors for each component
137 |   rgb *colors = new rgb[width*height];
138 |   for (int i = 0; i < width*height; i++)
139 |     colors[i] = random_rgb();
140 |   
141 |   for (int y = 0; y < height; y++) {
142 |     for (int x = 0; x < width; x++) {
143 |       int comp = u->find(y * width + x);
144 |       imRef(output, x, y) = colors[comp];
145 |     }
146 |   }  
147 | 
148 |   delete [] colors;  
149 |   delete u;
150 | 
151 |   return output;
152 | }
153 | 
154 | #endif
155 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/no_bbox_reg/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 7
 30 |     stride: 2
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   bottom: "conv1"
 43 |   top: "norm1"
 44 |   lrn_param {
 45 |     local_size: 5
 46 |     alpha: 0.0005
 47 |     beta: 0.75
 48 |     k: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "norm1"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 3
 59 |     stride: 2
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   bottom: "pool1"
 66 |   top: "conv2"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   convolution_param {
 76 |     num_output: 256
 77 |     pad: 1
 78 |     kernel_size: 5
 79 |     stride: 2
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "relu2"
 84 |   type: "ReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layer {
 89 |   name: "norm2"
 90 |   type: "LRN"
 91 |   bottom: "conv2"
 92 |   top: "norm2"
 93 |   lrn_param {
 94 |     local_size: 5
 95 |     alpha: 0.0005
 96 |     beta: 0.75
 97 |     k: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "pool2"
102 |   type: "Pooling"
103 |   bottom: "norm2"
104 |   top: "pool2"
105 |   pooling_param {
106 |     pool: MAX
107 |     kernel_size: 3
108 |     stride: 2
109 |   }
110 | }
111 | layer {
112 |   name: "conv3"
113 |   type: "Convolution"
114 |   bottom: "pool2"
115 |   top: "conv3"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "conv3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 512
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu4"
157 |   type: "ReLU"
158 |   bottom: "conv4"
159 |   top: "conv4"
160 | }
161 | layer {
162 |   name: "conv5"
163 |   type: "Convolution"
164 |   bottom: "conv4"
165 |   top: "conv5"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 512
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu5"
182 |   type: "ReLU"
183 |   bottom: "conv5"
184 |   top: "conv5"
185 | }
186 | layer {
187 |   name: "roi_pool5"
188 |   type: "ROIPooling"
189 |   bottom: "conv5"
190 |   bottom: "rois"
191 |   top: "pool5"
192 |   roi_pooling_param {
193 |     pooled_w: 6
194 |     pooled_h: 6
195 |     spatial_scale: 0.0625 # 1/16
196 |   }
197 | }
198 | layer {
199 |   name: "fc6"
200 |   type: "InnerProduct"
201 |   bottom: "pool5"
202 |   top: "fc6"
203 |   param {
204 |     lr_mult: 1
205 |     decay_mult: 1
206 |   }
207 |   param {
208 |     lr_mult: 2
209 |     decay_mult: 0
210 |   }
211 |   inner_product_param {
212 |     num_output: 4096
213 |   }
214 | }
215 | layer {
216 |   name: "relu6"
217 |   type: "ReLU"
218 |   bottom: "fc6"
219 |   top: "fc6"
220 | }
221 | layer {
222 |   name: "drop6"
223 |   type: "Dropout"
224 |   bottom: "fc6"
225 |   top: "fc6"
226 |   dropout_param {
227 |     dropout_ratio: 0.5
228 |   }
229 | }
230 | layer {
231 |   name: "fc7"
232 |   type: "InnerProduct"
233 |   bottom: "fc6"
234 |   top: "fc7"
235 |   param {
236 |     lr_mult: 1
237 |     decay_mult: 1
238 |   }
239 |   param {
240 |     lr_mult: 2
241 |     decay_mult: 0
242 |   }
243 |   inner_product_param {
244 |     num_output: 1024
245 |   }
246 | }
247 | layer {
248 |   name: "relu7"
249 |   type: "ReLU"
250 |   bottom: "fc7"
251 |   top: "fc7"
252 | }
253 | layer {
254 |   name: "drop7"
255 |   type: "Dropout"
256 |   bottom: "fc7"
257 |   top: "fc7"
258 |   dropout_param {
259 |     dropout_ratio: 0.5
260 |   }
261 | }
262 | layer {
263 |   name: "cls_score"
264 |   type: "InnerProduct"
265 |   bottom: "fc7"
266 |   top: "cls_score"
267 |   param {
268 |     lr_mult: 1
269 |     decay_mult: 1
270 |   }
271 |   param {
272 |     lr_mult: 2
273 |     decay_mult: 0
274 |   }
275 |   inner_product_param {
276 |     num_output: 21
277 |     weight_filler {
278 |       type: "gaussian"
279 |       std: 0.01
280 |     }
281 |     bias_filler {
282 |       type: "constant"
283 |       value: 0
284 |     }
285 |   }
286 | }
287 | layer {
288 |   name: "cls_prob"
289 |   type: "Softmax"
290 |   bottom: "cls_score"
291 |   top: "cls_prob"
292 | }
293 | 


--------------------------------------------------------------------------------
/models/CaffeNet/no_bbox_reg/test.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 227
  7 |   dim: 227
  8 | }
  9 | input: "rois"
 10 | input_shape {
 11 |   dim: 1 # to be changed on-the-fly to num ROIs
 12 |   dim: 5 # [batch ind, x1, y1, x2, y2] zero-based indexing
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 11
 30 |     pad: 5
 31 |     stride: 4
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "relu1"
 36 |   type: "ReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 3
 48 |     pad: 1
 49 |     stride: 2
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "norm1"
 54 |   type: "LRN"
 55 |   bottom: "pool1"
 56 |   top: "norm1"
 57 |   lrn_param {
 58 |     local_size: 5
 59 |     alpha: 0.0001
 60 |     beta: 0.75
 61 |   }
 62 | }
 63 | layer {
 64 |   name: "conv2"
 65 |   type: "Convolution"
 66 |   bottom: "norm1"
 67 |   top: "conv2"
 68 |   param {
 69 |     lr_mult: 1
 70 |     decay_mult: 1
 71 |   }
 72 |   param {
 73 |     lr_mult: 2
 74 |     decay_mult: 0
 75 |   }
 76 |   convolution_param {
 77 |     num_output: 256
 78 |     kernel_size: 5
 79 |     pad: 2
 80 |     group: 2
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu2"
 85 |   type: "ReLU"
 86 |   bottom: "conv2"
 87 |   top: "conv2"
 88 | }
 89 | layer {
 90 |   name: "pool2"
 91 |   type: "Pooling"
 92 |   bottom: "conv2"
 93 |   top: "pool2"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     pad: 1
 98 |     stride: 2
 99 |   }
100 | }
101 | layer {
102 |   name: "norm2"
103 |   type: "LRN"
104 |   bottom: "pool2"
105 |   top: "norm2"
106 |   lrn_param {
107 |     local_size: 5
108 |     alpha: 0.0001
109 |     beta: 0.75
110 |   }
111 | }
112 | layer {
113 |   name: "conv3"
114 |   type: "Convolution"
115 |   bottom: "norm2"
116 |   top: "conv3"
117 |   param {
118 |     lr_mult: 1
119 |     decay_mult: 1
120 |   }
121 |   param {
122 |     lr_mult: 2
123 |     decay_mult: 0
124 |   }
125 |   convolution_param {
126 |     num_output: 384
127 |     kernel_size: 3
128 |     pad: 1
129 |   }
130 | }
131 | layer {
132 |   name: "relu3"
133 |   type: "ReLU"
134 |   bottom: "conv3"
135 |   top: "conv3"
136 | }
137 | layer {
138 |   name: "conv4"
139 |   type: "Convolution"
140 |   bottom: "conv3"
141 |   top: "conv4"
142 |   param {
143 |     lr_mult: 1
144 |     decay_mult: 1
145 |   }
146 |   param {
147 |     lr_mult: 2
148 |     decay_mult: 0
149 |   }
150 |   convolution_param {
151 |     num_output: 384
152 |     kernel_size: 3
153 |     pad: 1
154 |     group: 2
155 |   }
156 | }
157 | layer {
158 |   name: "relu4"
159 |   type: "ReLU"
160 |   bottom: "conv4"
161 |   top: "conv4"
162 | }
163 | layer {
164 |   name: "conv5"
165 |   type: "Convolution"
166 |   bottom: "conv4"
167 |   top: "conv5"
168 |   param {
169 |     lr_mult: 1
170 |     decay_mult: 1
171 |   }
172 |   param {
173 |     lr_mult: 2
174 |     decay_mult: 0
175 |   }
176 |   convolution_param {
177 |     num_output: 256
178 |     kernel_size: 3
179 |     pad: 1
180 |     group: 2
181 |   }
182 | }
183 | layer {
184 |   name: "relu5"
185 |   type: "ReLU"
186 |   bottom: "conv5"
187 |   top: "conv5"
188 | }
189 | layer {
190 |   name: "roi_pool5"
191 |   type: "ROIPooling"
192 |   bottom: "conv5"
193 |   bottom: "rois"
194 |   top: "pool5"
195 |   roi_pooling_param {
196 |     pooled_w: 6
197 |     pooled_h: 6
198 |     spatial_scale: 0.0625 # 1/16
199 |   }
200 | }
201 | layer {
202 |   name: "fc6"
203 |   type: "InnerProduct"
204 |   bottom: "pool5"
205 |   top: "fc6"
206 |   param {
207 |     lr_mult: 1
208 |     decay_mult: 1
209 |   }
210 |   param {
211 |     lr_mult: 2
212 |     decay_mult: 0
213 |   }
214 |   inner_product_param {
215 |     num_output: 4096
216 |   }
217 | }
218 | layer {
219 |   name: "relu6"
220 |   type: "ReLU"
221 |   bottom: "fc6"
222 |   top: "fc6"
223 | }
224 | layer {
225 |   name: "drop6"
226 |   type: "Dropout"
227 |   bottom: "fc6"
228 |   top: "fc6"
229 |   dropout_param {
230 |     dropout_ratio: 0.5
231 |   }
232 | }
233 | layer {
234 |   name: "fc7"
235 |   type: "InnerProduct"
236 |   bottom: "fc6"
237 |   top: "fc7"
238 |   param {
239 |     lr_mult: 1
240 |     decay_mult: 1
241 |   }
242 |   param {
243 |     lr_mult: 2
244 |     decay_mult: 0
245 |   }
246 |   inner_product_param {
247 |     num_output: 4096
248 |   }
249 | }
250 | layer {
251 |   name: "relu7"
252 |   type: "ReLU"
253 |   bottom: "fc7"
254 |   top: "fc7"
255 | }
256 | layer {
257 |   name: "drop7"
258 |   type: "Dropout"
259 |   bottom: "fc7"
260 |   top: "fc7"
261 |   dropout_param {
262 |     dropout_ratio: 0.5
263 |   }
264 | }
265 | layer {
266 |   name: "cls_score"
267 |   type: "InnerProduct"
268 |   bottom: "fc7"
269 |   top: "cls_score"
270 |   param {
271 |     lr_mult: 1
272 |     decay_mult: 1
273 |   }
274 |   param {
275 |     lr_mult: 2
276 |     decay_mult: 0
277 |   }
278 |   inner_product_param {
279 |     num_output: 21
280 |     weight_filler {
281 |       type: "gaussian"
282 |       std: 0.01
283 |     }
284 |     bias_filler {
285 |       type: "constant"
286 |       value: 0
287 |     }
288 |   }
289 | }
290 | layer {
291 |   name: "cls_prob"
292 |   type: "Softmax"
293 |   bottom: "cls_score"
294 |   top: "cls_prob"
295 | }
296 | 


--------------------------------------------------------------------------------
/models/VGG_CNN_M_1024/no_bbox_reg/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "VGG_CNN_M_1024"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 21"
 12 |   }
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 7
 30 |     stride: 2
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   bottom: "conv1"
 43 |   top: "norm1"
 44 |   lrn_param {
 45 |     local_size: 5
 46 |     alpha: 0.0005
 47 |     beta: 0.75
 48 |     k: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   bottom: "norm1"
 55 |   top: "pool1"
 56 |   pooling_param {
 57 |     pool: MAX
 58 |     kernel_size: 3
 59 |     stride: 2
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   bottom: "pool1"
 66 |   top: "conv2"
 67 |   param {
 68 |     lr_mult: 1
 69 |     decay_mult: 1
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   convolution_param {
 76 |     num_output: 256
 77 |     pad: 1
 78 |     kernel_size: 5
 79 |     stride: 2
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "relu2"
 84 |   type: "ReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 | }
 88 | layer {
 89 |   name: "norm2"
 90 |   type: "LRN"
 91 |   bottom: "conv2"
 92 |   top: "norm2"
 93 |   lrn_param {
 94 |     local_size: 5
 95 |     alpha: 0.0005
 96 |     beta: 0.75
 97 |     k: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "pool2"
102 |   type: "Pooling"
103 |   bottom: "norm2"
104 |   top: "pool2"
105 |   pooling_param {
106 |     pool: MAX
107 |     kernel_size: 3
108 |     stride: 2
109 |   }
110 | }
111 | layer {
112 |   name: "conv3"
113 |   type: "Convolution"
114 |   bottom: "pool2"
115 |   top: "conv3"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 512
126 |     pad: 1
127 |     kernel_size: 3
128 |   }
129 | }
130 | layer {
131 |   name: "relu3"
132 |   type: "ReLU"
133 |   bottom: "conv3"
134 |   top: "conv3"
135 | }
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "conv3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 512
151 |     pad: 1
152 |     kernel_size: 3
153 |   }
154 | }
155 | layer {
156 |   name: "relu4"
157 |   type: "ReLU"
158 |   bottom: "conv4"
159 |   top: "conv4"
160 | }
161 | layer {
162 |   name: "conv5"
163 |   type: "Convolution"
164 |   bottom: "conv4"
165 |   top: "conv5"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 512
176 |     pad: 1
177 |     kernel_size: 3
178 |   }
179 | }
180 | layer {
181 |   name: "relu5"
182 |   type: "ReLU"
183 |   bottom: "conv5"
184 |   top: "conv5"
185 | }
186 | layer {
187 |   name: "roi_pool5"
188 |   type: "ROIPooling"
189 |   bottom: "conv5"
190 |   bottom: "rois"
191 |   top: "pool5"
192 |   roi_pooling_param {
193 |     pooled_w: 6
194 |     pooled_h: 6
195 |     spatial_scale: 0.0625 # 1/16
196 |   }
197 | }
198 | layer {
199 |   name: "fc6"
200 |   type: "InnerProduct"
201 |   bottom: "pool5"
202 |   top: "fc6"
203 |   param {
204 |     lr_mult: 1
205 |     decay_mult: 1
206 |   }
207 |   param {
208 |     lr_mult: 2
209 |     decay_mult: 0
210 |   }
211 |   inner_product_param {
212 |     num_output: 4096
213 |   }
214 | }
215 | layer {
216 |   name: "relu6"
217 |   type: "ReLU"
218 |   bottom: "fc6"
219 |   top: "fc6"
220 | }
221 | layer {
222 |   name: "drop6"
223 |   type: "Dropout"
224 |   bottom: "fc6"
225 |   top: "fc6"
226 |   dropout_param {
227 |     dropout_ratio: 0.5
228 |   }
229 | }
230 | layer {
231 |   name: "fc7"
232 |   type: "InnerProduct"
233 |   bottom: "fc6"
234 |   top: "fc7"
235 |   param {
236 |     lr_mult: 1
237 |     decay_mult: 1
238 |   }
239 |   param {
240 |     lr_mult: 2
241 |     decay_mult: 0
242 |   }
243 |   inner_product_param {
244 |     num_output: 1024
245 |   }
246 | }
247 | layer {
248 |   name: "relu7"
249 |   type: "ReLU"
250 |   bottom: "fc7"
251 |   top: "fc7"
252 | }
253 | layer {
254 |   name: "drop7"
255 |   type: "Dropout"
256 |   bottom: "fc7"
257 |   top: "fc7"
258 |   dropout_param {
259 |     dropout_ratio: 0.5
260 |   }
261 | }
262 | layer {
263 |   name: "cls_score"
264 |   type: "InnerProduct"
265 |   bottom: "fc7"
266 |   top: "cls_score"
267 |   param {
268 |     lr_mult: 1
269 |     decay_mult: 1
270 |   }
271 |   param {
272 |     lr_mult: 2
273 |     decay_mult: 0
274 |   }
275 |   inner_product_param {
276 |     num_output: 21
277 |     weight_filler {
278 |       type: "gaussian"
279 |       std: 0.01
280 |     }
281 |     bias_filler {
282 |       type: "constant"
283 |       value: 0
284 |     }
285 |   }
286 | }
287 | layer {
288 |   name: "loss_cls"
289 |   type: "SoftmaxWithLoss"
290 |   bottom: "cls_score"
291 |   bottom: "labels"
292 |   top: "loss_cls"
293 |   loss_weight: 1
294 | }
295 | 


--------------------------------------------------------------------------------
/models/CaffeNet/no_bbox_reg/train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: 'data'
  4 |   type: 'Python'
  5 |   top: 'data'
  6 |   top: 'rois'
  7 |   top: 'labels'
  8 |   python_param {
  9 |     module: 'roi_data_layer.layer'
 10 |     layer: 'RoIDataLayer'
 11 |     param_str: "'num_classes': 21"
 12 |   }
 13 | }
 14 | layer {
 15 |   name: "conv1"
 16 |   type: "Convolution"
 17 |   bottom: "data"
 18 |   top: "conv1"
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   param {
 24 |     lr_mult: 0
 25 |     decay_mult: 0
 26 |   }
 27 |   convolution_param {
 28 |     num_output: 96
 29 |     kernel_size: 11
 30 |     pad: 5
 31 |     stride: 4
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "relu1"
 36 |   type: "ReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 3
 48 |     pad: 1
 49 |     stride: 2
 50 |   }
 51 | }
 52 | layer {
 53 |   name: "norm1"
 54 |   type: "LRN"
 55 |   bottom: "pool1"
 56 |   top: "norm1"
 57 |   lrn_param {
 58 |     local_size: 5
 59 |     alpha: 0.0001
 60 |     beta: 0.75
 61 |   }
 62 | }
 63 | layer {
 64 |   name: "conv2"
 65 |   type: "Convolution"
 66 |   bottom: "norm1"
 67 |   top: "conv2"
 68 |   param {
 69 |     lr_mult: 1
 70 |     decay_mult: 1
 71 |   }
 72 |   param {
 73 |     lr_mult: 2
 74 |     decay_mult: 0
 75 |   }
 76 |   convolution_param {
 77 |     num_output: 256
 78 |     kernel_size: 5
 79 |     pad: 2
 80 |     group: 2
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu2"
 85 |   type: "ReLU"
 86 |   bottom: "conv2"
 87 |   top: "conv2"
 88 | }
 89 | layer {
 90 |   name: "pool2"
 91 |   type: "Pooling"
 92 |   bottom: "conv2"
 93 |   top: "pool2"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     pad: 1
 98 |     stride: 2
 99 |   }
100 | }
101 | layer {
102 |   name: "norm2"
103 |   type: "LRN"
104 |   bottom: "pool2"
105 |   top: "norm2"
106 |   lrn_param {
107 |     local_size: 5
108 |     alpha: 0.0001
109 |     beta: 0.75
110 |   }
111 | }
112 | layer {
113 |   name: "conv3"
114 |   type: "Convolution"
115 |   bottom: "norm2"
116 |   top: "conv3"
117 |   param {
118 |     lr_mult: 1
119 |     decay_mult: 1
120 |   }
121 |   param {
122 |     lr_mult: 2
123 |     decay_mult: 0
124 |   }
125 |   convolution_param {
126 |     num_output: 384
127 |     kernel_size: 3
128 |     pad: 1
129 |   }
130 | }
131 | layer {
132 |   name: "relu3"
133 |   type: "ReLU"
134 |   bottom: "conv3"
135 |   top: "conv3"
136 | }
137 | layer {
138 |   name: "conv4"
139 |   type: "Convolution"
140 |   bottom: "conv3"
141 |   top: "conv4"
142 |   param {
143 |     lr_mult: 1
144 |     decay_mult: 1
145 |   }
146 |   param {
147 |     lr_mult: 2
148 |     decay_mult: 0
149 |   }
150 |   convolution_param {
151 |     num_output: 384
152 |     kernel_size: 3
153 |     pad: 1
154 |     group: 2
155 |   }
156 | }
157 | layer {
158 |   name: "relu4"
159 |   type: "ReLU"
160 |   bottom: "conv4"
161 |   top: "conv4"
162 | }
163 | layer {
164 |   name: "conv5"
165 |   type: "Convolution"
166 |   bottom: "conv4"
167 |   top: "conv5"
168 |   param {
169 |     lr_mult: 1
170 |     decay_mult: 1
171 |   }
172 |   param {
173 |     lr_mult: 2
174 |     decay_mult: 0
175 |   }
176 |   convolution_param {
177 |     num_output: 256
178 |     kernel_size: 3
179 |     pad: 1
180 |     group: 2
181 |   }
182 | }
183 | layer {
184 |   name: "relu5"
185 |   type: "ReLU"
186 |   bottom: "conv5"
187 |   top: "conv5"
188 | }
189 | layer {
190 |   name: "roi_pool5"
191 |   type: "ROIPooling"
192 |   bottom: "conv5"
193 |   bottom: "rois"
194 |   top: "pool5"
195 |   roi_pooling_param {
196 |     pooled_w: 6
197 |     pooled_h: 6
198 |     spatial_scale: 0.0625 # 1/16
199 |   }
200 | }
201 | layer {
202 |   name: "fc6"
203 |   type: "InnerProduct"
204 |   bottom: "pool5"
205 |   top: "fc6"
206 |   param {
207 |     lr_mult: 1
208 |     decay_mult: 1
209 |   }
210 |   param {
211 |     lr_mult: 2
212 |     decay_mult: 0
213 |   }
214 |   inner_product_param {
215 |     num_output: 4096
216 |   }
217 | }
218 | layer {
219 |   name: "relu6"
220 |   type: "ReLU"
221 |   bottom: "fc6"
222 |   top: "fc6"
223 | }
224 | layer {
225 |   name: "drop6"
226 |   type: "Dropout"
227 |   bottom: "fc6"
228 |   top: "fc6"
229 |   dropout_param {
230 |     dropout_ratio: 0.5
231 |   }
232 | }
233 | layer {
234 |   name: "fc7"
235 |   type: "InnerProduct"
236 |   bottom: "fc6"
237 |   top: "fc7"
238 |   param {
239 |     lr_mult: 1
240 |     decay_mult: 1
241 |   }
242 |   param {
243 |     lr_mult: 2
244 |     decay_mult: 0
245 |   }
246 |   inner_product_param {
247 |     num_output: 4096
248 |   }
249 | }
250 | layer {
251 |   name: "relu7"
252 |   type: "ReLU"
253 |   bottom: "fc7"
254 |   top: "fc7"
255 | }
256 | layer {
257 |   name: "drop7"
258 |   type: "Dropout"
259 |   bottom: "fc7"
260 |   top: "fc7"
261 |   dropout_param {
262 |     dropout_ratio: 0.5
263 |   }
264 | }
265 | layer {
266 |   name: "cls_score"
267 |   type: "InnerProduct"
268 |   bottom: "fc7"
269 |   top: "cls_score"
270 |   param {
271 |     lr_mult: 1
272 |     decay_mult: 1
273 |   }
274 |   param {
275 |     lr_mult: 2
276 |     decay_mult: 0
277 |   }
278 |   inner_product_param {
279 |     num_output: 21
280 |     weight_filler {
281 |       type: "gaussian"
282 |       std: 0.01
283 |     }
284 |     bias_filler {
285 |       type: "constant"
286 |       value: 0
287 |     }
288 |   }
289 | }
290 | layer {
291 |   name: "loss_cls"
292 |   type: "SoftmaxWithLoss"
293 |   bottom: "cls_score"
294 |   bottom: "labels"
295 |   top: "loss_cls"
296 |   loss_weight: 1
297 | }
298 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | import utils.cython_bbox
 13 | 
 14 | def prepare_roidb(imdb):
 15 |     """Enrich the imdb's roidb by adding some derived quantities that
 16 |     are useful for training. This function precomputes the maximum
 17 |     overlap, taken over ground-truth boxes, between each ROI and
 18 |     each ground-truth box. The class with maximum overlap is also
 19 |     recorded.
 20 |     """
 21 |     roidb = imdb.roidb
 22 |     for i in xrange(len(imdb.image_index)):
 23 |         roidb[i]['image'] = imdb.image_path_at(i)
 24 |         # need gt_overlaps as a dense array for argmax
 25 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 26 |         # max overlap with gt over classes (columns)
 27 |         max_overlaps = gt_overlaps.max(axis=1)
 28 |         # gt class that had the max overlap
 29 |         max_classes = gt_overlaps.argmax(axis=1)
 30 |         roidb[i]['max_classes'] = max_classes
 31 |         roidb[i]['max_overlaps'] = max_overlaps
 32 |         # sanity checks
 33 |         # max overlap of 0 => class should be zero (background)
 34 |         zero_inds = np.where(max_overlaps == 0)[0]
 35 |         assert all(max_classes[zero_inds] == 0)
 36 |         # max overlap > 0 => class should not be zero (must be a fg class)
 37 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 38 |         assert all(max_classes[nonzero_inds] != 0)
 39 | 
 40 | def add_bbox_regression_targets(roidb):
 41 |     """Add information needed to train bounding-box regressors."""
 42 |     assert len(roidb) > 0
 43 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 44 | 
 45 |     num_images = len(roidb)
 46 |     # Infer number of classes from the number of columns in gt_overlaps
 47 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 48 |     for im_i in xrange(num_images):
 49 |         rois = roidb[im_i]['boxes']
 50 |         max_overlaps = roidb[im_i]['max_overlaps']
 51 |         max_classes = roidb[im_i]['max_classes']
 52 |         roidb[im_i]['bbox_targets'] = \
 53 |                 _compute_targets(rois, max_overlaps, max_classes)
 54 | 
 55 |     # Compute values needed for means and stds
 56 |     # var(x) = E(x^2) - E(x)^2
 57 |     class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 58 |     sums = np.zeros((num_classes, 4))
 59 |     squared_sums = np.zeros((num_classes, 4))
 60 |     for im_i in xrange(num_images):
 61 |         targets = roidb[im_i]['bbox_targets']
 62 |         for cls in xrange(1, num_classes):
 63 |             cls_inds = np.where(targets[:, 0] == cls)[0]
 64 |             if cls_inds.size > 0:
 65 |                 class_counts[cls] += cls_inds.size
 66 |                 sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 67 |                 squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0)
 68 | 
 69 |     means = sums / class_counts
 70 |     stds = np.sqrt(squared_sums / class_counts - means ** 2)
 71 | 
 72 |     # Normalize targets
 73 |     for im_i in xrange(num_images):
 74 |         targets = roidb[im_i]['bbox_targets']
 75 |         for cls in xrange(1, num_classes):
 76 |             cls_inds = np.where(targets[:, 0] == cls)[0]
 77 |             roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
 78 |             roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
 79 | 
 80 |     # These values will be needed for making predictions
 81 |     # (the predicts will need to be unnormalized and uncentered)
 82 |     return means.ravel(), stds.ravel()
 83 | 
 84 | def _compute_targets(rois, overlaps, labels):
 85 |     """Compute bounding-box regression targets for an image."""
 86 |     # Ensure ROIs are floats
 87 |     rois = rois.astype(np.float, copy=False)
 88 | 
 89 |     # Indices of ground-truth ROIs
 90 |     gt_inds = np.where(overlaps == 1)[0]
 91 |     # Indices of examples for which we try to make predictions
 92 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
 93 | 
 94 |     # Get IoU overlap between each ex ROI and gt ROI
 95 |     ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :],
 96 |                                                      rois[gt_inds, :])
 97 | 
 98 |     # Find which gt ROI each ex ROI has max overlap with:
 99 |     # this will be the ex ROI's gt target
100 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
101 |     gt_rois = rois[gt_inds[gt_assignment], :]
102 |     ex_rois = rois[ex_inds, :]
103 | 
104 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
105 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
106 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
107 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
108 | 
109 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
110 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
111 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
112 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
113 | 
114 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
115 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
116 |     targets_dw = np.log(gt_widths / ex_widths)
117 |     targets_dh = np.log(gt_heights / ex_heights)
118 | 
119 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
120 |     targets[ex_inds, 0] = labels[ex_inds]
121 |     targets[ex_inds, 1] = targets_dx
122 |     targets[ex_inds, 2] = targets_dy
123 |     targets[ex_inds, 3] = targets_dw
124 |     targets[ex_inds, 4] = targets_dh
125 |     return targets
126 | 


--------------------------------------------------------------------------------