├── lib
    ├── nms
    │   ├── __init__.py
    │   ├── .gitignore
    │   ├── __init__.pyc
    │   ├── gpu_nms.hpp
    │   ├── py_cpu_nms.py
    │   ├── gpu_nms.pyx
    │   ├── cpu_nms.pyx
    │   └── nms_kernel.cu
    ├── utils
    │   ├── .gitignore
    │   ├── blob.pyc
    │   ├── timer.pyc
    │   ├── __init__.pyc
    │   ├── __init__.py
    │   ├── timer.py
    │   ├── blob.py
    │   └── bbox.pyx
    ├── Makefile
    ├── rpn
    │   ├── __init__.pyc
    │   ├── generate.pyc
    │   ├── proposal_layer.pyc
    │   ├── generate_anchors.pyc
    │   ├── anchor_target_layer.pyc
    │   ├── __init__.py
    │   ├── README.md
    │   ├── generate_anchors.py
    │   ├── generate.py
    │   └── proposal_layer.py
    ├── datasets
    │   ├── imdb.pyc
    │   ├── kakou.pyc
    │   ├── __init__.pyc
    │   ├── factory.pyc
    │   ├── pascal_voc.pyc
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── xVOCap.m
    │   │   ├── main.m
    │   │   ├── compute_overlap.m
    │   │   └── detection_eval.m
    │   ├── factory.py
    │   ├── __init__.py
    │   └── imdb.py
    ├── fast_rcnn
    │   ├── test.pyc
    │   ├── config.pyc
    │   ├── train.pyc
    │   ├── __init__.pyc
    │   ├── nms_wrapper.pyc
    │   ├── bbox_transform.pyc
    │   ├── __init__.py
    │   ├── nms_wrapper.py
    │   ├── bbox_transform.py
    │   └── train.py
    ├── roi_data_layer
    │   ├── layer.pyc
    │   ├── roidb.pyc
    │   ├── __init__.pyc
    │   ├── minibatch.pyc
    │   ├── __init__.py
    │   └── roidb.py
    ├── roi_data_layer_original
    │   ├── layer.pyc
    │   ├── roidb.pyc
    │   ├── __init__.pyc
    │   ├── minibatch.pyc
    │   ├── __init__.py
    │   ├── roidb.py
    │   └── layer.py
    └── setup.py
├── experiments
    ├── logs
    │   └── .gitignore
    ├── cfgs
    │   ├── faster_rcnn_alt_opt.yml
    │   └── faster_rcnn_end2end.yml
    ├── README.md
    └── scripts
    │   ├── TEST.sh
    │   ├── fast_rcnn.sh
    │   └── faster_rcnn_alt_opt.sh
├── test.sh
├── tools
    ├── README.md
    ├── fcolor.pyc
    ├── _init_paths.pyc
    ├── _init_paths.py
    ├── fcolor.py
    ├── eval_recall.py
    ├── reval.py
    ├── test_net.py
    ├── test_net_debug.py
    ├── rpn_generate.py
    ├── visulization.py
    ├── train_net.py
    ├── compress_net.py
    ├── demo.py
    ├── demo_show.py
    ├── demo_video_for_video.py
    ├── demo_location.py
    ├── demo_for_video.py
    ├── demo_video_for_video_XXX.py
    └── vis_fasterRCNN.py
├── train.sh
├── data
    ├── .gitignore
    ├── scripts
    │   ├── fetch_imagenet_models.sh
    │   ├── fetch_faster_rcnn_models.sh
    │   └── fetch_selective_search_data.sh
    ├── split_data.m
    ├── README.md
    └── convert_kitti.py
├── models
    ├── README.md
    └── VGG16
    │   └── faster_rcnn_alt_opt
    │       ├── stage1_rpn_solver60k80k.pt
    │       ├── stage2_rpn_solver60k80k.pt
    │       ├── stage1_fast_rcnn_ohem_solver30k40k.pt
    │       ├── stage2_fast_rcnn_ohem_solver30k40k.pt
    │       ├── rpn_test.pt
    │       └── faster_rcnn_test.pt
├── loss_accuracy.m
├── LICENSE
└── README.md


/lib/nms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *.txt*
2 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.so
3 | 


--------------------------------------------------------------------------------
/lib/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | rm -f data/cache/*
2 | ./experiments/scripts/TEST.sh 0 VGG16
3 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
1 | Tools for training, testing, and compressing Fast R-CNN networks.
2 | 


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
1 | #rm -f data/cache/*
2 | ./experiments/scripts/faster_rcnn_alt_opt.sh 0 VGG16
3 | 


--------------------------------------------------------------------------------
/experiments/cfgs/faster_rcnn_alt_opt.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: faster_rcnn_alt_opt
2 | TEST:
3 |   HAS_RPN: True
4 | 


--------------------------------------------------------------------------------
/tools/fcolor.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/tools/fcolor.pyc


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | selective_search*
2 | imagenet_models*
3 | fast_rcnn_models*
4 | VOCdevkit*
5 | cache
6 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/nms/__init__.pyc


--------------------------------------------------------------------------------
/lib/rpn/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/__init__.pyc


--------------------------------------------------------------------------------
/lib/rpn/generate.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/generate.pyc


--------------------------------------------------------------------------------
/lib/utils/blob.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/utils/blob.pyc


--------------------------------------------------------------------------------
/lib/utils/timer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/utils/timer.pyc


--------------------------------------------------------------------------------
/lib/datasets/imdb.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/imdb.pyc


--------------------------------------------------------------------------------
/lib/datasets/kakou.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/kakou.pyc


--------------------------------------------------------------------------------
/lib/fast_rcnn/test.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/test.pyc


--------------------------------------------------------------------------------
/lib/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/utils/__init__.pyc


--------------------------------------------------------------------------------
/tools/_init_paths.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/tools/_init_paths.pyc


--------------------------------------------------------------------------------
/lib/datasets/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/__init__.pyc


--------------------------------------------------------------------------------
/lib/datasets/factory.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/factory.pyc


--------------------------------------------------------------------------------
/lib/fast_rcnn/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/config.pyc


--------------------------------------------------------------------------------
/lib/fast_rcnn/train.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/train.pyc


--------------------------------------------------------------------------------
/lib/datasets/pascal_voc.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/pascal_voc.pyc


--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/__init__.pyc


--------------------------------------------------------------------------------
/lib/rpn/proposal_layer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/proposal_layer.pyc


--------------------------------------------------------------------------------
/lib/fast_rcnn/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/nms_wrapper.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/layer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer/layer.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer/roidb.pyc


--------------------------------------------------------------------------------
/lib/rpn/generate_anchors.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/generate_anchors.pyc


--------------------------------------------------------------------------------
/lib/fast_rcnn/bbox_transform.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/bbox_transform.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer/__init__.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer/minibatch.pyc


--------------------------------------------------------------------------------
/lib/rpn/anchor_target_layer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/anchor_target_layer.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer_original/layer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer_original/layer.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer_original/roidb.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer_original/roidb.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer_original/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer_original/__init__.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer_original/minibatch.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer_original/minibatch.pyc


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
1 | Scripts are under `experiments/scripts`.
2 | 
3 | Each script saves a log file under `experiments/logs`.
4 | 
5 | Configuration override files used in the experiments are stored in `experiments/cfgs`.
6 | 


--------------------------------------------------------------------------------
/models/README.md:
--------------------------------------------------------------------------------
1 | Prototxt files that define models and solvers.
2 | 
3 | Three models are defined, with some variations of each to support experiments
4 | in the paper.
5 |  - Caffenet (model **S**)
6 |  - VGG_CNN_M_1024 (model **M**)
7 |  - VGG16 (model **L**)
8 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/experiments/cfgs/faster_rcnn_end2end.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: faster_rcnn_end2end
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 | TEST:
10 |   HAS_RPN: True
11 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer_original/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/main.m:
--------------------------------------------------------------------------------
1 | close all;clear all;clc;
2 | path='/home/bsl/KITTI-detection/data';
3 | comp_id='comp4-7629';
4 | test_set='KakouTest';
5 | output_dir='/home/bsl/KITTI-detection/output/faster_rcnn_alt_opt/KakouTest/VGG16_faster_rcnn_final';
6 | img_list='KITTI_val_list.txt';
7 | img_gt='KITTI_gt_val.txt';
8 | res = detection_eval(path, comp_id, test_set,output_dir,img_list,img_gt);
9 | 


--------------------------------------------------------------------------------
/models/VGG16/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/faster_rcnn_alt_opt/stage1_rpn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 60000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg16_rpn"
17 | 


--------------------------------------------------------------------------------
/models/VGG16/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/faster_rcnn_alt_opt/stage2_rpn_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 60000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg16_rpn"
17 | 


--------------------------------------------------------------------------------
/models/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_ohem_solver30k40k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_ohem_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 30000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg16_fast_rcnn"
17 | 


--------------------------------------------------------------------------------
/models/VGG16/faster_rcnn_alt_opt/stage2_fast_rcnn_ohem_solver30k40k.pt:
--------------------------------------------------------------------------------
 1 | train_net: "models/VGG16/faster_rcnn_alt_opt/stage2_fast_rcnn_ohem_train.pt"
 2 | 
 3 | base_lr: 0.001
 4 | lr_policy: "step"
 5 | gamma: 0.1
 6 | stepsize: 30000
 7 | display: 20
 8 | average_loss: 100
 9 | momentum: 0.9
10 | weight_decay: 0.0005
11 | 
12 | # We disable standard caffe solver snapshotting and implement our own snapshot
13 | # function
14 | snapshot: 0
15 | # We still use the snapshot prefix, though
16 | snapshot_prefix: "vgg16_fast_rcnn"
17 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/compute_overlap.m:
--------------------------------------------------------------------------------
 1 | function overlap=compute_overlap(bb_pred,bb_target)
 2 | a=(bb_pred(3)-bb_pred(1)+1)*(bb_pred(4)-bb_pred(2)+1);
 3 | b=(bb_target(3)-bb_target(1)+1)*(bb_target(4)-bb_target(2)+1);
 4 | bb_overlap=[max(bb_pred(1),bb_target(1)),max(bb_pred(2),bb_target(2)),min(bb_pred(3),bb_target(3)),min(bb_pred(4),bb_target(4))];
 5 | iw=bb_overlap(3)-bb_overlap(1)+1;
 6 | ih=bb_overlap(4)-bb_overlap(2)+1;
 7 | if iw>0&ih>0
 8 |     overlap=iw*ih/(a+b-iw*ih);
 9 | else
10 |     overlap=-inf;
11 | end
12 | 
13 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from fast_rcnn.config import cfg
 9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 | 
12 | def nms(dets, thresh):
13 |     """Dispatch to either CPU or GPU NMS implementations."""
14 | 
15 |     if dets.shape[0] == 0:
16 |         return []
17 |     if cfg.USE_GPU_NMS:
18 |         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     else:
20 |         return cpu_nms(dets, thresh)
21 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Set up paths for Fast R-CNN."""
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | def add_path(path):
14 |     if path not in sys.path:
15 |         sys.path.insert(0, path)
16 | 
17 | this_dir = osp.dirname(__file__)
18 | 
19 | # Add caffe to PYTHONPATH
20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python')
21 | add_path(caffe_path)
22 | 
23 | # Add lib to PYTHONPATH
24 | lib_path = osp.join(this_dir, '..', 'lib')
25 | add_path(lib_path)
26 | 


--------------------------------------------------------------------------------
/lib/rpn/README.md:
--------------------------------------------------------------------------------
 1 | ### `rpn` module overview
 2 | 
 3 | ##### `generate_anchors.py`
 4 | 
 5 | Generates a regular grid of multi-scale, multi-aspect anchor boxes.
 6 | 
 7 | ##### `proposal_layer.py`
 8 | 
 9 | Converts RPN outputs (per-anchor scores and bbox regression estimates) into object proposals.
10 | 
11 | ##### `anchor_target_layer.py` 
12 | 
13 | Generates training targets/labels for each anchor. Classification labels are 1 (object), 0 (not object) or -1 (ignore).
14 | Bbox regression targets are specified when the classification label is > 0.
15 | 
16 | ##### `proposal_target_layer.py`
17 | 
18 | Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K)
19 | and bbox regression targets in that case that the label is > 0.
20 | 
21 | ##### `generate.py`
22 | 
23 | Generate object detection proposals from an imdb using an RPN.
24 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_imagenet_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=imagenet_models.tgz
 7 | URL=http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/$FILE
 8 | CHECKSUM=ed34ca912d6782edfb673a8c3a0bda6d
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading pretrained ImageNet models (1G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_faster_rcnn_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=faster_rcnn_models.tgz
 7 | URL=http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/$FILE
 8 | CHECKSUM=ac116844f66aefe29587214272054668
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading Faster R-CNN demo models (695M)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_selective_search_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | FILE=selective_search_data.tgz
 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE
 8 | CHECKSUM=7078c1db87a7851b31966b96774cd9b9
 9 | 
10 | if [ -f $FILE ]; then
11 |   echo "File already exists. Checking md5..."
12 |   os=`uname -s`
13 |   if [ "$os" = "Linux" ]; then
14 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
15 |   elif [ "$os" = "Darwin" ]; then
16 |     checksum=`cat $FILE | md5`
17 |   fi
18 |   if [ "$checksum" = "$CHECKSUM" ]; then
19 |     echo "Checksum is correct. No need to download."
20 |     exit 0
21 |   else
22 |     echo "Checksum is incorrect. Need to download again."
23 |   fi
24 | fi
25 | 
26 | echo "Downloading precomputed selective search boxes (0.5G)..."
27 | 
28 | wget $URL -O $FILE
29 | 
30 | echo "Unzipping..."
31 | 
32 | tar zxvf $FILE
33 | 
34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
35 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | import datasets.kakou
10 | import numpy as np
11 | 
12 | __sets = {}
13 | imageset = 'KakouTrain'
14 | devkit = '/home/bsl/KITTI-detection/data'
15 | 
16 | 
17 | def get_imdb(name):
18 |     """Get an imdb (image database) by name."""
19 |     __sets['KakouTrain'] = (lambda imageset = imageset, devkit = devkit: datasets.kakou(imageset,devkit))
20 |     __sets['KakouTest'] = (lambda imageset = 'KakouTest', devkit = devkit: datasets.kakou(imageset,devkit))
21 |     if not __sets.has_key(name):
22 |         raise KeyError('Unknown dataset: {}'.format(name))
23 |     return __sets[name]()
24 | 
25 | def list_imdbs():
26 |     """List all registered imdbs."""
27 |     return __sets.keys()
28 | 


--------------------------------------------------------------------------------
/experiments/scripts/TEST.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/default_faster_rcnn_alt_opt.sh GPU NET [--set ...]
 4 | # Example:
 5 | # ./experiments/scripts/default_faster_rcnn_alt_opt.sh 0 ZF \
 6 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400,500,600,700]"
 7 | 
 8 | set -x
 9 | set -e
10 | 
11 | export PYTHONUNBUFFERED="True"
12 | 
13 | GPU_ID=$1
14 | NET=$2
15 | NET_lc=${NET,,}
16 | 
17 | array=( $@ )
18 | len=${#array[@]}
19 | EXTRA_ARGS=${array[@]:2:$len}
20 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
21 | 
22 | LOG="experiments/logs/faster_rcnn_alt_opt_TEST_HUAIJIN_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
23 | exec &> >(tee -a "$LOG")
24 | echo Logging output to "$LOG"
25 | 
26 | 
27 | 
28 | 
29 | 
30 | NET_FINAL=output/faster_rcnn_alt_opt/KakouTrain/VGG16_faster_rcnn_final.caffemodel
31 | 
32 | time ./tools/test_net.py --gpu ${GPU_ID} \
33 |   --def models/${NET}/faster_rcnn_alt_opt/faster_rcnn_test.pt \
34 |   --net ${NET_FINAL} \
35 |   --imdb KakouTest \
36 |   --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \
37 |   ${EXTRA_ARGS}
38 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/experiments/scripts/fast_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/default.sh GPU NET [options args to {train,test}_net.py]
 4 | # Example:
 5 | # ./experiments/scripts/default.sh 0 CaffeNet \
 6 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"
 7 | 
 8 | set -x
 9 | set -e
10 | 
11 | export PYTHONUNBUFFERED="True"
12 | 
13 | GPU_ID=$1
14 | NET=$2
15 | NET_lc=${NET,,}
16 | 
17 | array=( $@ )
18 | len=${#array[@]}
19 | EXTRA_ARGS=${array[@]:2:$len}
20 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
21 | 
22 | LOG="experiments/logs/default_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
23 | exec &> >(tee -a "$LOG")
24 | echo Logging output to "$LOG"
25 | 
26 | time ./tools/train_net.py --gpu ${GPU_ID} \
27 |   --solver models/${NET}/fast_rcnn/solver.prototxt \
28 |   --weights data/imagenet_models/${NET}.v2.caffemodel \
29 |   --imdb voc_2007_trainval \
30 |   ${EXTRA_ARGS}
31 | 
32 | set +x
33 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
34 | set -x
35 | 
36 | time ./tools/test_net.py --gpu ${GPU_ID} \
37 |   --def models/${NET}/fast_rcnn/test.prototxt \
38 |   --net ${NET_FINAL} \
39 |   --imdb voc_2007_test \
40 |   ${EXTRA_ARGS}
41 | 


--------------------------------------------------------------------------------
/lib/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/experiments/scripts/faster_rcnn_alt_opt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage:
 3 | # ./experiments/scripts/default_faster_rcnn_alt_opt.sh GPU NET [--set ...]
 4 | # Example:
 5 | # ./experiments/scripts/default_faster_rcnn_alt_opt.sh 0 ZF \
 6 | #   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400,500,600,700]"
 7 | 
 8 | set -x
 9 | set -e
10 | 
11 | export PYTHONUNBUFFERED="True"
12 | 
13 | GPU_ID=$1
14 | NET=$2
15 | NET_lc=${NET,,}
16 | 
17 | array=( $@ )
18 | len=${#array[@]}
19 | EXTRA_ARGS=${array[@]:2:$len}
20 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
21 | 
22 | LOG="experiments/logs/faster_rcnn_alt_opt_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
23 | exec &> >(tee -a "$LOG")
24 | echo Logging output to "$LOG"
25 | 
26 | time ./tools/train_faster_rcnn_alt_opt.py --gpu ${GPU_ID} \
27 |   --net_name ${NET} \
28 |   --weights data/imagenet_models/${NET}.v2.caffemodel \
29 |   --imdb KakouTrain \
30 |   --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \
31 |   ${EXTRA_ARGS}
32 | 
33 | set +x
34 | NET_FINAL=`grep "Final model:" ${LOG} | awk '{print $3}'`
35 | set -x
36 | 
37 | time ./tools/test_net.py --gpu ${GPU_ID} \
38 |   --def models/${NET}/faster_rcnn_alt_opt/faster_rcnn_test.pt \
39 |   --net ${NET_FINAL} \
40 |   --imdb KakouTest \
41 |   --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \
42 |   ${EXTRA_ARGS}
43 | 


--------------------------------------------------------------------------------
/data/split_data.m:
--------------------------------------------------------------------------------
 1 | clear all;close all;clc;
 2 | image_list=importdata('TrainIndex_GT.txt','r');
 3 | rand('seed',2);
 4 | index=randperm(length(image_list));
 5 | fp_train=fopen('ImageList_Version_S_AddData_train.txt','w');
 6 | fp_val=fopen('ImageList_Version_S_AddData_val.txt','w');
 7 | fp_train_gt=fopen('ImageList_Version_S_GT_AddData_train.txt','w');
 8 | fp_val_gt=fopen('ImageList_Version_S_GT_AddData_val.txt','w');
 9 | trainsample=0.7;% 70% as training and 30% validation
10 | train_num=fix(trainsample*length(index));
11 | train_index=index(1:train_num);
12 | val_index=index(train_num+1:end);
13 | 
14 | fidin1=fopen('Train_image_list.txt','r');
15 | fidin2=fopen('TrainIndex_GT.txt','r');
16 | 
17 | ind=1;
18 | while ~feof(fidin1) 
19 |     tline=fgetl(fidin1);
20 |     if length(find(train_index==ind))==1
21 |         fprintf(fp_train,tline);
22 |         fprintf(fp_train,'\n');
23 |     else
24 |         fprintf(fp_val,tline);
25 |         fprintf(fp_val,'\n');
26 |     end
27 |     ind=ind+1;
28 | end
29 | 
30 | ind=1;
31 | while ~feof(fidin2) 
32 |     tline=fgetl(fidin2);
33 |     if length(find(train_index==ind))==1
34 |         fprintf(fp_train_gt,tline);
35 |         fprintf(fp_train_gt,'\n');
36 |     else
37 |         fprintf(fp_val_gt,tline);
38 |         fprintf(fp_val_gt,'\n');
39 |     end
40 |     ind=ind+1;
41 | end
42 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from .imdb import imdb
 9 | from . import factory
10 | from .kakou import kakou
11 | 
12 | import os.path as osp
13 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..')
14 | 
15 | # We assume your matlab binary is in your path and called `matlab'.
16 | # If either is not true, just add it to your path and alias it as matlab, or
17 | # you could change this file.
18 | MATLAB = 'matlab'
19 | 
20 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
21 | def _which(program):
22 |     import os
23 |     def is_exe(fpath):
24 |         return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
25 | 
26 |     fpath, fname = os.path.split(program)
27 |     if fpath:
28 |         if is_exe(program):
29 |             return program
30 |     else:
31 |         for path in os.environ["PATH"].split(os.pathsep):
32 |             path = path.strip('"')
33 |             exe_file = os.path.join(path, program)
34 |             if is_exe(exe_file):
35 |                 return exe_file
36 | 
37 |     return None
38 | 
39 | if _which(MATLAB) is None:
40 |     msg = ("MATLAB command '{}' not found. "
41 |            "Please add '{}' to your PATH.").format(MATLAB, MATLAB)
42 |     raise EnvironmentError(msg)
43 | 


--------------------------------------------------------------------------------
/tools/fcolor.py:
--------------------------------------------------------------------------------
 1 | import cv
 2 | 
 3 | def Color(image):
 4 |     w = image.width
 5 |     h = image.height
 6 |     size = (w,h)
 7 |     iColor = cv.CreateImage(size,8,3)
 8 |     for i in range(h):
 9 |         for j in range(w):
10 |             r = GetR(image[i,j])
11 |             g = GetG(image[i,j])
12 |             b = GetB(image[i,j])
13 |             iColor[i,j] = (r,g,b)
14 |     return iColor
15 |     
16 | def GetR(gray):
17 |     if gray < 127:
18 |         return 0
19 |     elif gray > 191:
20 |         return 255
21 |     else:
22 |         return (gray-127)*4-1
23 | 
24 |         
25 | def GetG(gray):
26 |     if gray < 64:
27 |         return 4*gray
28 |     elif gray > 191:
29 |         return 256-(gray-191)*4
30 |     else:
31 |         return 255
32 |         
33 | def GetB(gray):
34 |     if gray < 64:
35 |         return 255
36 |     elif gray > 127:
37 |         return 0
38 |     else:
39 |         return 256-(gray-63)*4
40 | 
41 | FCArray = [(0,51,0),(0,51,102),(51,51,102),(51,102,51),\
42 |             (51,51,153),(102,51,102),(153,153,0),(51,102,153),\
43 |             (153,102,51),(153,204,102),(204,153,102),(102,204,102),\
44 |             (153,204,153),(204,204,102),(204,255,204),(255,255,204)]        
45 | def FColor(image,array=FCArray):
46 |     w = image.width
47 |     h = image.height
48 |     size = (w,h)
49 |     iColor = cv.CreateImage(size,8,3)
50 |     for i in range(h):
51 |         for j in range(w):
52 |             iColor[i,j] = array[int(image[i,j]/16)]
53 |     return iColor
54 |         
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | This directory holds (*after you download them*):
 2 | - Caffe models pre-trained on ImageNet
 3 | - Faster R-CNN models
 4 | - Symlinks to datasets
 5 | 
 6 | To download Caffe models (ZF, VGG16) pre-trained on ImageNet, run:
 7 | 
 8 | ```
 9 | ./data/scripts/fetch_imagenet_models.sh
10 | ```
11 | 
12 | This script will populate `data/imagenet_models`.
13 | 
14 | To download Faster R-CNN models trained on VOC 2007, run:
15 | 
16 | ```
17 | ./data/scripts/fetch_faster_rcnn_models.sh
18 | ```
19 | 
20 | This script will populate `data/faster_rcnn_models`.
21 | 
22 | In order to train and test with PASCAL VOC, you will need to establish symlinks.
23 | From the `data` directory (`cd data`):
24 | 
25 | ```
26 | # For VOC 2007
27 | ln -s /your/path/to/VOC2007/VOCdevkit VOCdevkit2007
28 | 
29 | # For VOC 2012
30 | ln -s /your/path/to/VOC2012/VOCdevkit VOCdevkit2012
31 | ```
32 | 
33 | Since you'll likely be experimenting with multiple installs of Fast/er R-CNN in
34 | parallel, you'll probably want to keep all of this data in a shared place and
35 | use symlinks. On my system I create the following symlinks inside `data`:
36 | 
37 | ```
38 | # data/cache holds various outputs created by the datasets package
39 | ln -s /data/fast_rcnn_shared/cache
40 | 
41 | # move the imagenet_models to shared location and symlink to them
42 | ln -s /data/fast_rcnn_shared/imagenet_models
43 | 
44 | # move the selective search data to a shared location and symlink to them
45 | # (only applicable to Fast R-CNN training)
46 | ln -s /data/fast_rcnn_shared/selective_search_data
47 | 
48 | ln -s /data/VOC2007/VOCdevkit VOCdevkit2007
49 | ln -s /data/VOC2012/VOCdevkit VOCdevkit2012
50 | ```
51 | 


--------------------------------------------------------------------------------
/loss_accuracy.m:
--------------------------------------------------------------------------------
 1 | clc;
 2 | clear;
 3 | % log file of caffe model
 4 | logName = 'Kitti.log';
 5 | fid = fopen(logName, 'r');
 6 | fid_accuracy = fopen('output_accuracy.txt', 'w');
 7 | fid_loss = fopen('output_loss.txt', 'w');
 8 | tline = fgetl(fid); 
 9 | while ischar(tline) 
10 |     % First find the accuracy line
11 |     k = strfind(tline, 'Test net output');
12 |     if (k) 
13 |         k = strfind(tline, 'accuracy');
14 |         if (k)
15 |             % If the string contain test and accuracy at the same time
16 |             % The bias from 'accuracy' to the float number
17 |             indexStart = k + 11; indexEnd = size(tline);
18 |             str = tline(indexStart : indexEnd(2));
19 |         end
20 |         % Get the number of index
21 |         k = strfind(tline, '#');
22 |         if (k) indexStart = k + 1;
23 |             indexEnd = strfind(tline, ':');
24 |             str2 = tline(indexStart : indexEnd - 1);
25 |         end
26 |         % Concatenation of two string
27 |         res_str = strcat(str2, '/', str);
28 |         fprintf(fid_accuracy, '%s\r\n', res_str);
29 |     end
30 |     % Then find the loss line
31 |     k1 = strfind(tline, 'Iteration');
32 |     if (k1) k2 = strfind(tline, 'loss');
33 |         if (k2) indexStart = k2 + 7;
34 |             indexEnd = size(tline);
35 |             str1 = tline(indexStart:indexEnd(2));
36 |             indexStart = k1 + 10;
37 |             indexEnd = strfind(tline, ',') - 1;
38 |             str2 = tline(indexStart:indexEnd);
39 |             res_str1 = strcat(str2, '/ ', str1);
40 |             fprintf(fid_loss, '%s\r\n', res_str1);
41 |         end
42 |     end
43 |     tline = fgetl(fid);
44 | end
45 | fclose(fid); fclose(fid_accuracy);
46 | 


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | 
13 | def im_list_to_blob(ims):
14 |     """Convert a list of images into a network input.
15 | 
16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
17 |     """
18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 |     num_images = len(ims)
20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 |                     dtype=np.float32)
22 |     for i in xrange(num_images):
23 |         im = ims[i]
24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 |     # Move channels (axis 3) to axis 1
26 |     # Axis order will become: (batch elem, channel, height, width)
27 |     channel_swap = (0, 3, 1, 2)
28 |     blob = blob.transpose(channel_swap)
29 |     return blob
30 | 
31 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
32 |     """Mean subtract and scale an image for use in a blob."""
33 |     im = im.astype(np.float32, copy=False)
34 |     im -= pixel_means
35 |     im_shape = im.shape
36 |     im_size_min = np.min(im_shape[0:2])
37 |     im_size_max = np.max(im_shape[0:2])
38 |     im_scale = float(target_size) / float(im_size_min)
39 |     # Prevent the biggest axis from being more than MAX_SIZE
40 |     if np.round(im_scale * im_size_max) > max_size:
41 |         im_scale = float(max_size) / float(im_size_max)
42 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
43 |                     interpolation=cv2.INTER_LINEAR)
44 | 
45 |     return im, im_scale
46 | 


--------------------------------------------------------------------------------
/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/tools/eval_recall.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import _init_paths
 4 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
 5 | from datasets.factory import get_imdb
 6 | import argparse
 7 | import time, os, sys
 8 | import numpy as np
 9 | 
10 | def parse_args():
11 |     """
12 |     Parse input arguments
13 |     """
14 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
15 |     parser.add_argument('--imdb', dest='imdb_name',
16 |                         help='dataset to test',
17 |                         default='voc_2007_test', type=str)
18 |     parser.add_argument('--method', dest='method',
19 |                         help='proposal method',
20 |                         default='selective_search', type=str)
21 |     parser.add_argument('--rpn-file', dest='rpn_file',
22 |                         default=None, type=str)
23 | 
24 |     if len(sys.argv) == 1:
25 |         parser.print_help()
26 |         sys.exit(1)
27 | 
28 |     args = parser.parse_args()
29 |     return args
30 | 
31 | if __name__ == '__main__':
32 |     args = parse_args()
33 | 
34 |     print('Called with args:')
35 |     print(args)
36 | 
37 |     imdb = get_imdb(args.imdb_name)
38 |     imdb.set_proposal_method(args.method)
39 |     if args.rpn_file is not None:
40 |         imdb.config['rpn_file'] = args.rpn_file
41 | 
42 |     candidate_boxes = None
43 |     if 0:
44 |         import scipy.io as sio
45 |         filename = 'debug/stage1_rpn_voc_2007_test.mat'
46 |         raw_data = sio.loadmat(filename)['aboxes'].ravel()
47 |         candidate_boxes = raw_data
48 | 
49 |     ar, gt_overlaps, recalls, thresholds = \
50 |         imdb.evaluate_recall(candidate_boxes=candidate_boxes)
51 |     print 'Method: {}'.format(args.method)
52 |     print 'AverageRec: {:.3f}'.format(ar)
53 | 
54 |     def recall_at(t):
55 |         ind = np.where(thresholds > t - 1e-5)[0][0]
56 |         assert np.isclose(thresholds[ind], t)
57 |         return recalls[ind]
58 | 
59 |     print 'Recall@0.5: {:.3f}'.format(recall_at(0.5))
60 |     print 'Recall@0.6: {:.3f}'.format(recall_at(0.6))
61 |     print 'Recall@0.7: {:.3f}'.format(recall_at(0.7))
62 |     print 'Recall@0.8: {:.3f}'.format(recall_at(0.8))
63 |     print 'Recall@0.9: {:.3f}'.format(recall_at(0.9))
64 |     # print again for easy spreadsheet copying
65 |     print '{:.3f}'.format(ar)
66 |     print '{:.3f}'.format(recall_at(0.5))
67 |     print '{:.3f}'.format(recall_at(0.6))
68 |     print '{:.3f}'.format(recall_at(0.7))
69 |     print '{:.3f}'.format(recall_at(0.8))
70 |     print '{:.3f}'.format(recall_at(0.9))
71 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def bbox_transform(ex_rois, gt_rois):
11 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 | 
16 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 | 
21 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 |     targets_dw = np.log(gt_widths / ex_widths)
24 |     targets_dh = np.log(gt_heights / ex_heights)
25 | 
26 |     targets = np.vstack(
27 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 |     return targets
29 | 
30 | def bbox_transform_inv(boxes, deltas):
31 |     if boxes.shape[0] == 0:
32 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 | 
34 |     boxes = boxes.astype(deltas.dtype, copy=False)
35 | 
36 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 |     ctr_x = boxes[:, 0] + 0.5 * widths
39 |     ctr_y = boxes[:, 1] + 0.5 * heights
40 | 
41 |     dx = deltas[:, 0::4]
42 |     dy = deltas[:, 1::4]
43 |     dw = deltas[:, 2::4]
44 |     dh = deltas[:, 3::4]
45 | 
46 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
49 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
50 | 
51 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 |     # x1
53 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 |     # y1
55 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 |     # x2
57 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 |     # y2
59 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 | 
61 |     return pred_boxes
62 | 
63 | def clip_boxes(boxes, im_shape):
64 |     """
65 |     Clip boxes to image boundaries.
66 |     """
67 | 
68 |     # x1 >= 0
69 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 |     # y1 >= 0
71 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 |     # x2 < im_shape[1]
73 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 |     # y2 < im_shape[0]
75 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 |     return boxes
77 | 


--------------------------------------------------------------------------------
/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Reval = re-eval. Re-evaluate saved detections."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import apply_nms
14 | from fast_rcnn.config import cfg
15 | from datasets.factory import get_imdb
16 | import cPickle
17 | import os, sys, argparse
18 | import numpy as np
19 | 
20 | def parse_args():
21 |     """
22 |     Parse input arguments
23 |     """
24 |     parser = argparse.ArgumentParser(description='Re-evaluate results')
25 |     parser.add_argument('output_dir', nargs=1, help='results directory',
26 |                         type=str)
27 |     parser.add_argument('--rerun', dest='rerun',
28 |                         help=('re-run evaluation code '
29 |                               '(otherwise: results are loaded from file)'),
30 |                         action='store_true')
31 |     parser.add_argument('--imdb', dest='imdb_name',
32 |                         help='dataset to re-evaluate',
33 |                         default='voc_2007_test', type=str)
34 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
35 |                         action='store_true')
36 | 
37 |     if len(sys.argv) == 1:
38 |         parser.print_help()
39 |         sys.exit(1)
40 | 
41 |     args = parser.parse_args()
42 |     return args
43 | 
44 | 
45 | def from_mats(imdb_name, output_dir):
46 |     import scipy.io as sio
47 | 
48 |     imdb = get_imdb(imdb_name)
49 | 
50 |     aps = []
51 |     for i, cls in enumerate(imdb.classes[1:]):
52 |         mat = sio.loadmat(os.path.join(output_dir, cls + '_pr.mat'))
53 |         ap = mat['ap'][0, 0] * 100
54 |         apAuC = mat['ap_auc'][0, 0] * 100
55 |         print '!!! {} : {:.1f} {:.1f}'.format(cls, ap, apAuC)
56 |         aps.append(ap)
57 | 
58 |     print '~~~~~~~~~~~~~~~~~~~'
59 |     print 'Results (from mat files):'
60 |     for ap in aps:
61 |         print '{:.1f}'.format(ap)
62 |     print '{:.1f}'.format(np.array(aps).mean())
63 |     print '~~~~~~~~~~~~~~~~~~~'
64 | 
65 | 
66 | def from_dets(imdb_name, output_dir, comp_mode):
67 |     imdb = get_imdb(imdb_name)
68 |     imdb.competition_mode(comp_mode)
69 |     with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
70 |         dets = cPickle.load(f)
71 | 
72 |     print 'Applying NMS to all detections'
73 |     nms_dets = apply_nms(dets, cfg.TEST.NMS)
74 | 
75 |     print 'Evaluating detections'
76 |     imdb.evaluate_detections(nms_dets, output_dir)
77 | 
78 | if __name__ == '__main__':
79 |     args = parse_args()
80 | 
81 |     output_dir = os.path.abspath(args.output_dir[0])
82 |     imdb_name = args.imdb_name
83 | 
84 |     if args.comp_mode and not args.rerun:
85 |         raise ValueError('--rerun must be used with --comp')
86 | 
87 |     if args.rerun:
88 |         from_dets(imdb_name, output_dir, args.comp_mode)
89 |     else:
90 |         from_mats(imdb_name, output_dir)
91 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Test a Fast R-CNN network on an image database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import test_net
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
15 | from datasets.factory import get_imdb
16 | import caffe
17 | import argparse
18 | import pprint
19 | import time, os, sys
20 | 
21 | def parse_args():
22 |     """
23 |     Parse input arguments
24 |     """
25 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
26 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use',
27 |                         default=0, type=int)
28 |     parser.add_argument('--def', dest='prototxt',
29 |                         help='prototxt file defining the network',
30 |                         default=None, type=str)
31 |     parser.add_argument('--net', dest='caffemodel',
32 |                         help='model to test',
33 |                         default=None, type=str)
34 |     parser.add_argument('--cfg', dest='cfg_file',
35 |                         help='optional config file', default=None, type=str)
36 |     parser.add_argument('--wait', dest='wait',
37 |                         help='wait until net file exists',
38 |                         default=True, type=bool)
39 |     parser.add_argument('--imdb', dest='imdb_name',
40 |                         help='dataset to test',
41 |                         default='voc_2007_test', type=str)
42 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
43 |                         action='store_true')
44 |     parser.add_argument('--set', dest='set_cfgs',
45 |                         help='set config keys', default=None,
46 |                         nargs=argparse.REMAINDER)
47 | 
48 |     if len(sys.argv) == 1:
49 |         parser.print_help()
50 |         sys.exit(1)
51 | 
52 |     args = parser.parse_args()
53 |     return args
54 | 
55 | if __name__ == '__main__':
56 |     args = parse_args()
57 | 
58 |     print('Called with args:')
59 |     print(args)
60 | 
61 |     if args.cfg_file is not None:
62 |         cfg_from_file(args.cfg_file)
63 |     if args.set_cfgs is not None:
64 |         cfg_from_list(args.set_cfgs)
65 | 
66 |     cfg.GPU_ID = args.gpu_id
67 | 
68 |     print('Using config:')
69 |     pprint.pprint(cfg)
70 | 
71 |     while not os.path.exists(args.caffemodel) and args.wait:
72 |         print('Waiting for {} to exist...'.format(args.caffemodel))
73 |         time.sleep(10)
74 | 
75 |     caffe.set_mode_gpu()
76 |     caffe.set_device(args.gpu_id)
77 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
78 |     net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
79 | 
80 |     imdb = get_imdb(args.imdb_name)
81 |     imdb.competition_mode(args.comp_mode)
82 |     if not cfg.TEST.HAS_RPN:
83 |         imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)
84 | 
85 |     test_net(net, imdb)
86 | 


--------------------------------------------------------------------------------
/tools/test_net_debug.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Test a Fast R-CNN network on an image database."""
11 | 
12 | import _init_paths
13 | from fast_rcnn.test import test_net
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list
15 | from datasets.factory import get_imdb
16 | import caffe
17 | import argparse
18 | import pprint
19 | import time, os, sys
20 | 
21 | def parse_args():
22 |     """
23 |     Parse input arguments
24 |     """
25 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
26 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use',
27 |                         default=0, type=int)
28 |     parser.add_argument('--def', dest='prototxt',
29 |                         help='prototxt file defining the network',
30 |                         default='/home/bsl/KITTI-detection/models/VGG16/faster_rcnn_alt_opt/faster_rcnn_test.pt', type=str)
31 |     parser.add_argument('--net', dest='caffemodel',
32 |                         help='model to test',
33 |                         default='/home/bsl/KITTI-detection/data/faster_rcnn_models/VGG16_faster_rcnn_final.caffemodel', type=str)
34 |     parser.add_argument('--cfg', dest='cfg_file',
35 |                         help='optional config file', default='/home/bsl/KITTI-detection/experiments/cfgs/faster_rcnn_alt_opt.yml', type=str)
36 |     parser.add_argument('--wait', dest='wait',
37 |                         help='wait until net file exists',
38 |                         default=True, type=bool)
39 |     parser.add_argument('--imdb', dest='imdb_name',
40 |                         help='dataset to test',
41 |                         default='KakouTest', type=str)
42 |     parser.add_argument('--comp', dest='comp_mode', help='competition mode',
43 |                         action='store_true')
44 |     parser.add_argument('--set', dest='set_cfgs',
45 |                         help='set config keys', default=None,
46 |                         nargs=argparse.REMAINDER)
47 | 
48 |     #if len(sys.argv) == 1:
49 |     #    parser.print_help()
50 |     #    sys.exit(1)
51 | 
52 |     args = parser.parse_args()
53 |     return args
54 | 
55 | if __name__ == '__main__':
56 |     args = parse_args()
57 | 
58 |     print('Called with args:')
59 |     print(args)
60 | 
61 |     if args.cfg_file is not None:
62 |         cfg_from_file(args.cfg_file)
63 |     if args.set_cfgs is not None:
64 |         cfg_from_list(args.set_cfgs)
65 | 
66 |     cfg.GPU_ID = args.gpu_id
67 | 
68 |     print('Using config:')
69 |     pprint.pprint(cfg)
70 | 
71 |     while not os.path.exists(args.caffemodel) and args.wait:
72 |         print('Waiting for {} to exist...'.format(args.caffemodel))
73 |         time.sleep(10)
74 | 
75 |     caffe.set_mode_gpu()
76 |     caffe.set_device(args.gpu_id)
77 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
78 |     net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
79 | 
80 |     imdb = get_imdb(args.imdb_name)
81 |     imdb.competition_mode(args.comp_mode)
82 |     if not cfg.TEST.HAS_RPN:
83 |         imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)
84 | 
85 |     test_net(net, imdb)
86 | 


--------------------------------------------------------------------------------
/tools/rpn_generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast/er/ R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | """Generate RPN proposals."""
11 | 
12 | import _init_paths
13 | import numpy as np
14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
15 | from datasets.factory import get_imdb
16 | from rpn.generate import imdb_proposals
17 | import cPickle
18 | import caffe
19 | import argparse
20 | import pprint
21 | import time, os, sys
22 | 
23 | def parse_args():
24 |     """
25 |     Parse input arguments
26 |     """
27 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
28 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use',
29 |                         default=0, type=int)
30 |     parser.add_argument('--def', dest='prototxt',
31 |                         help='prototxt file defining the network',
32 |                         default=None, type=str)
33 |     parser.add_argument('--net', dest='caffemodel',
34 |                         help='model to test',
35 |                         default=None, type=str)
36 |     parser.add_argument('--cfg', dest='cfg_file',
37 |                         help='optional config file', default=None, type=str)
38 |     parser.add_argument('--wait', dest='wait',
39 |                         help='wait until net file exists',
40 |                         default=True, type=bool)
41 |     parser.add_argument('--imdb', dest='imdb_name',
42 |                         help='dataset to test',
43 |                         default='voc_2007_test', type=str)
44 |     parser.add_argument('--set', dest='set_cfgs',
45 |                         help='set config keys', default=None,
46 |                         nargs=argparse.REMAINDER)
47 | 
48 |     if len(sys.argv) == 1:
49 |         parser.print_help()
50 |         sys.exit(1)
51 | 
52 |     args = parser.parse_args()
53 |     return args
54 | 
55 | if __name__ == '__main__':
56 |     args = parse_args()
57 | 
58 |     print('Called with args:')
59 |     print(args)
60 | 
61 |     if args.cfg_file is not None:
62 |         cfg_from_file(args.cfg_file)
63 |     if args.set_cfgs is not None:
64 |         cfg_from_list(args.set_cfgs)
65 | 
66 |     cfg.GPU_ID = args.gpu_id
67 | 
68 |     # RPN test settings
69 |     cfg.TEST.RPN_PRE_NMS_TOP_N = -1
70 |     cfg.TEST.RPN_POST_NMS_TOP_N = 2000
71 | 
72 |     print('Using config:')
73 |     pprint.pprint(cfg)
74 | 
75 |     while not os.path.exists(args.caffemodel) and args.wait:
76 |         print('Waiting for {} to exist...'.format(args.caffemodel))
77 |         time.sleep(10)
78 | 
79 |     caffe.set_mode_gpu()
80 |     caffe.set_device(args.gpu_id)
81 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
82 |     net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
83 | 
84 |     imdb = get_imdb(args.imdb_name)
85 |     imdb_boxes = imdb_proposals(net, imdb)
86 | 
87 |     # output_dir = os.path.dirname(args.caffemodel)
88 |     output_dir = get_output_dir(imdb, net)
89 |     if not os.path.exists(output_dir):
90 |         os.makedirs(output_dir)
91 | 
92 |     rpn_file = os.path.join(output_dir, net.name + '_rpn_proposals.pkl')
93 |     with open(rpn_file, 'wb') as f:
94 |         cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL)
95 |     print 'Wrote RPN proposals to {}'.format(rpn_file)
96 | 


--------------------------------------------------------------------------------
/tools/visulization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | # Make sure that caffe is on the python path:
 4 | caffe_root = '../'  # this file is expected to be in {caffe_root}/examples
 5 | import sys
 6 | sys.path.insert(0, caffe_root + 'python')
 7 | import caffe
 8 | plt.rcParams['figure.figsize'] = (10, 10)
 9 | plt.rcParams['image.interpolation'] = 'nearest'
10 | plt.rcParams['image.cmap'] = 'gray'
11 | caffe.set_mode_gpu()
12 | net = caffe.Net(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt',
13 |                 caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel',
14 |                 caffe.TEST)
15 | # input preprocessing: 'data' is the name of the input blob == net.inputs[0]
16 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
17 | transformer.set_transpose('data', (2,0,1))
18 | #transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')) # mean pixel
19 | transformer.set_mean('data', np.load(caffe_root + 'data/ilsvrc12/imagenet_mean.npy').mean(0).mean(1).mean(1)) # mean pixel
20 | transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
21 | transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB
22 | net.blobs['data'].reshape(50,3,227,227)
23 | net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(caffe_root + 'examples/images/cat.jpg'))
24 | out = net.forward()
25 | print("Predicted class is #{}.".format(out['prob'][0].argmax()))
26 | plt.imshow(transformer.deprocess('data', net.blobs['data'].data[0]))
27 | plt.show()
28 | #[(k, v.data.shape) for k, v in net.blobs.items()]
29 | #[(k, v[0].data.shape) for k, v in net.params.items()]
30 | def vis_square(data, padsize=1, padval=0):
31 |     data -= data.min()
32 |     data /= data.max()
33 |     
34 |     # force the number of filters to be square
35 |     n = int(np.ceil(np.sqrt(data.shape[0])))
36 |     padding = ((0, n ** 2 - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3)
37 |     data = np.pad(data, padding, mode='constant', constant_values=(padval, padval))
38 |     
39 |     # tile the filters into an image
40 |     data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
41 |     data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
42 |     
43 |     plt.imshow(data)
44 |     plt.show()
45 | # 96 filters
46 | filters = net.params['conv1'][0].data
47 | vis_square(filters.transpose(0, 2, 3, 1))
48 | 
49 | feat = net.blobs['conv1'].data[0, :9]
50 | vis_square(feat, padval=1)
51 | 
52 | filters = net.params['conv2'][0].data
53 | vis_square(filters[:48].reshape(48**2, 5, 5))
54 | 
55 | feat = net.blobs['conv2'].data[0, :36]
56 | vis_square(feat, padval=1)
57 | 
58 | feat = net.blobs['conv3'].data[0]
59 | vis_square(feat, padval=0.5)
60 | 
61 | feat = net.blobs['conv4'].data[0]
62 | vis_square(feat, padval=0.5)
63 | 
64 | feat = net.blobs['conv5'].data[0]
65 | vis_square(feat, padval=0.5)
66 | 
67 | feat = net.blobs['pool5'].data[0]
68 | vis_square(feat, padval=1)
69 | 
70 | feat = net.blobs['fc6'].data[0]
71 | plt.subplot(2, 1, 1)
72 | plt.plot(feat.flat)
73 | plt.subplot(2, 1, 2)
74 | _ = plt.hist(feat.flat[feat.flat > 0], bins=100)
75 | 
76 | feat = net.blobs['fc7'].data[0]
77 | plt.subplot(2, 1, 1)
78 | plt.plot(feat.flat)
79 | plt.subplot(2, 1, 2)
80 | _ = plt.hist(feat.flat[feat.flat > 0], bins=100)
81 | 
82 | feat = net.blobs['prob'].data[0]
83 | plt.plot(feat.flat)
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/lib/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | 
 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 11 | #
 12 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 13 | #    >> anchors
 14 | #
 15 | #    anchors =
 16 | #
 17 | #       -83   -39   100    56
 18 | #      -175   -87   192   104
 19 | #      -359  -183   376   200
 20 | #       -55   -55    72    72
 21 | #      -119  -119   136   136
 22 | #      -247  -247   264   264
 23 | #       -35   -79    52    96
 24 | #       -79  -167    96   184
 25 | #      -167  -343   184   360
 26 | 
 27 | #array([[ -83.,  -39.,  100.,   56.],
 28 | #       [-175.,  -87.,  192.,  104.],
 29 | #       [-359., -183.,  376.,  200.],
 30 | #       [ -55.,  -55.,   72.,   72.],
 31 | #       [-119., -119.,  136.,  136.],
 32 | #       [-247., -247.,  264.,  264.],
 33 | #       [ -35.,  -79.,   52.,   96.],
 34 | #       [ -79., -167.,   96.,  184.],
 35 | #       [-167., -343.,  184.,  360.]])
 36 | 
 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 38 |     scales=2**np.arange(3, 6)):
 39 | 
 40 | 
 41 |     """
 42 |     Generate anchor (reference) windows by enumerating aspect ratios X
 43 |     scales wrt a reference (0, 0, 15, 15) window.  
 44 |     """
 45 | 
 46 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 47 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 48 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 49 |                          for i in xrange(ratio_anchors.shape[0])])
 50 |     return anchors
 51 | 
 52 | def _whctrs(anchor):
 53 |     """
 54 |     Return width, height, x center, and y center for an anchor (window).
 55 |     """
 56 | 
 57 |     w = anchor[2] - anchor[0] + 1
 58 |     h = anchor[3] - anchor[1] + 1
 59 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 60 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 61 |     return w, h, x_ctr, y_ctr
 62 | 
 63 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 64 |     """
 65 |     Given a vector of widths (ws) and heights (hs) around a center
 66 |     (x_ctr, y_ctr), output a set of anchors (windows).
 67 |     """
 68 | 
 69 |     ws = ws[:, np.newaxis]
 70 |     hs = hs[:, np.newaxis]
 71 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 72 |                          y_ctr - 0.5 * (hs - 1),
 73 |                          x_ctr + 0.5 * (ws - 1),
 74 |                          y_ctr + 0.5 * (hs - 1)))
 75 |     return anchors
 76 | 
 77 | def _ratio_enum(anchor, ratios):
 78 |     """
 79 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 80 |     """
 81 | 
 82 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 83 |     size = w * h
 84 |     size_ratios = size / ratios
 85 |     ws = np.round(np.sqrt(size_ratios))
 86 |     hs = np.round(ws * ratios)
 87 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 88 |     return anchors
 89 | 
 90 | def _scale_enum(anchor, scales):
 91 |     """
 92 |     Enumerate a set of anchors for each scale wrt an anchor.
 93 |     """
 94 | 
 95 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 96 |     ws = w * scales
 97 |     hs = h * scales
 98 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 99 |     return anchors
100 | 
101 | if __name__ == '__main__':
102 |     import time
103 |     t = time.time()
104 |     a = generate_anchors()
105 |     print time.time() - t
106 |     print a
107 |     from IPython import embed; embed()
108 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Faster R-CNN
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2015 Microsoft Corporation
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 
25 | ************************************************************************
26 | 
27 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
28 | 
29 | This project, Faster R-CNN, incorporates material from the project(s)
30 | listed below (collectively, "Third Party Code").  Microsoft is not the
31 | original author of the Third Party Code.  The original copyright notice
32 | and license under which Microsoft received such Third Party Code are set
33 | out below. This Third Party Code is licensed to you under their original
34 | license terms set forth below.  Microsoft reserves all other rights not
35 | expressly granted, whether by implication, estoppel or otherwise.
36 | 
37 | 1.	Caffe, (https://github.com/BVLC/caffe/)
38 | 
39 | COPYRIGHT
40 | 
41 | All contributions by the University of California:
42 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
43 | All rights reserved.
44 | 
45 | All other contributions:
46 | Copyright (c) 2014, 2015, the respective contributors
47 | All rights reserved.
48 | 
49 | Caffe uses a shared copyright model: each contributor holds copyright
50 | over their contributions to Caffe. The project versioning records all
51 | such contribution and copyright details. If a contributor wants to
52 | further mark their specific copyright on a particular contribution,
53 | they should indicate their copyright solely in the commit message of
54 | the change when it is committed.
55 | 
56 | The BSD 2-Clause License
57 | 
58 | Redistribution and use in source and binary forms, with or without
59 | modification, are permitted provided that the following conditions
60 | are met:
61 | 
62 | 1. Redistributions of source code must retain the above copyright notice,
63 | this list of conditions and the following disclaimer.
64 | 
65 | 2. Redistributions in binary form must reproduce the above copyright
66 | notice, this list of conditions and the following disclaimer in the
67 | documentation and/or other materials provided with the distribution.
68 | 
69 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
70 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
71 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
72 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
73 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
74 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
75 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
76 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
77 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
78 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
79 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
80 | 
81 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
82 | 


--------------------------------------------------------------------------------
/tools/train_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """Train a Fast R-CNN network on a region of interest database."""
 11 | 
 12 | import _init_paths
 13 | from fast_rcnn.train import get_training_roidb, train_net
 14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
 15 | from datasets.factory import get_imdb
 16 | import datasets.imdb
 17 | import caffe
 18 | import argparse
 19 | import pprint
 20 | import numpy as np
 21 | import sys
 22 | 
 23 | def parse_args():
 24 |     """
 25 |     Parse input arguments
 26 |     """
 27 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
 28 |     parser.add_argument('--gpu', dest='gpu_id',
 29 |                         help='GPU device id to use [0]',
 30 |                         default=0, type=int)
 31 |     parser.add_argument('--solver', dest='solver',
 32 |                         help='solver prototxt',
 33 |                         default=None, type=str)
 34 |     parser.add_argument('--iters', dest='max_iters',
 35 |                         help='number of iterations to train',
 36 |                         default=40000, type=int)
 37 |     parser.add_argument('--weights', dest='pretrained_model',
 38 |                         help='initialize with pretrained model weights',
 39 |                         default=None, type=str)
 40 |     parser.add_argument('--cfg', dest='cfg_file',
 41 |                         help='optional config file',
 42 |                         default=None, type=str)
 43 |     parser.add_argument('--imdb', dest='imdb_name',
 44 |                         help='dataset to train on',
 45 |                         default='voc_2007_trainval', type=str)
 46 |     parser.add_argument('--rand', dest='randomize',
 47 |                         help='randomize (do not use a fixed seed)',
 48 |                         action='store_true')
 49 |     parser.add_argument('--set', dest='set_cfgs',
 50 |                         help='set config keys', default=None,
 51 |                         nargs=argparse.REMAINDER)
 52 | 
 53 |     if len(sys.argv) == 1:
 54 |         parser.print_help()
 55 |         sys.exit(1)
 56 | 
 57 |     args = parser.parse_args()
 58 |     return args
 59 | 
 60 | def combined_roidb(imdb_names):
 61 |     def get_roidb(imdb_name):
 62 |         imdb = get_imdb(imdb_name)
 63 |         print 'Loaded dataset `{:s}` for training'.format(imdb.name)
 64 |         imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 65 |         print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
 66 |         roidb = get_training_roidb(imdb)
 67 |         return roidb
 68 | 
 69 |     roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 70 |     roidb = roidbs[0]
 71 |     if len(roidbs) > 1:
 72 |         for r in roidbs[1:]:
 73 |             roidb.extend(r)
 74 |         imdb = datasets.imdb(imdb_names)
 75 |     else:
 76 |         imdb = get_imdb(imdb_names)
 77 |     return imdb, roidb
 78 | 
 79 | if __name__ == '__main__':
 80 |     args = parse_args()
 81 | 
 82 |     print('Called with args:')
 83 |     print(args)
 84 | 
 85 |     if args.cfg_file is not None:
 86 |         cfg_from_file(args.cfg_file)
 87 |     if args.set_cfgs is not None:
 88 |         cfg_from_list(args.set_cfgs)
 89 | 
 90 |     cfg.GPU_ID = args.gpu_id
 91 | 
 92 |     print('Using config:')
 93 |     pprint.pprint(cfg)
 94 | 
 95 |     if not args.randomize:
 96 |         # fix the random seeds (numpy and caffe) for reproducibility
 97 |         np.random.seed(cfg.RNG_SEED)
 98 |         caffe.set_random_seed(cfg.RNG_SEED)
 99 | 
100 |     # set up caffe
101 |     caffe.set_mode_gpu()
102 |     caffe.set_device(args.gpu_id)
103 | 
104 |     imdb, roidb = combined_roidb(args.imdb_name)
105 |     print '{:d} roidb entries'.format(len(roidb))
106 | 
107 |     output_dir = get_output_dir(imdb, None)
108 |     print 'Output will be saved to `{:s}`'.format(output_dir)
109 | 
110 |     train_net(args.solver, roidb, output_dir,
111 |               pretrained_model=args.pretrained_model,
112 |               max_iters=args.max_iters)
113 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/detection_eval.m:
--------------------------------------------------------------------------------
  1 | function res = detection_eval(path, comp_id, test_set,output_dir,img_list,img_gt)
  2 | classes={'car','person','bike', 'truck', 'van', 'tram', 'misc'}
  3 | minoverlap=0.5;
  4 | class_num=zeros(1,length(classes));
  5 | img_list_path= strcat(path,'/',img_list);
  6 | img_gt_path= strcat(path,'/',img_gt);
  7 | image_list=importdata(img_list_path);
  8 | 
  9 | 
 10 | fidin=fopen(img_gt_path);
 11 | ind=1;
 12 | while ~feof(fidin)
 13 |     tline=fgetl(fidin);
 14 |     image_list_gt_data{ind}=str2num(tline(29:end));
 15 |     ind=ind+1;
 16 | end
 17 | 
 18 | 
 19 | for i=1:length(image_list)
 20 |     image_gt{i}.ids=image_list{i};
 21 |     image_gt{i}.total=image_list_gt_data{i}(1);
 22 |     ind=2;
 23 |     for j=1:length(classes)
 24 |         
 25 |         %image_list_gt_data{i}(ind)
 26 |         
 27 |         image_gt{i}.classes{j}=image_list_gt_data{i}(ind);
 28 |         if image_gt{i}.classes{j}>0
 29 |             
 30 |             %image_list_gt_data{i}(ind+1:ind+image_gt{i}.classes{j}*4)
 31 |             
 32 |             image_gt{i}.bb{j}=reshape(image_list_gt_data{i}(ind+1:ind+image_gt{i}.classes{j}*4),4,image_gt{i}.classes{j})';
 33 |         else
 34 |             image_gt{i}.bb{j}=[];
 35 |         end
 36 |         ind=ind+4*image_gt{i}.classes{j}+1;
 37 |     end
 38 | end
 39 | res_path=strcat(path,'/','results/%s_det_',test_set,'_%s.txt');
 40 | for i=1:length(classes)
 41 |     class_num=0;
 42 |     gt(length(image_list))=struct('BB',[]);
 43 |     for ii=1:length(image_list)
 44 |         if ~isempty(image_gt{ii}.bb{i})
 45 |             gt(ii).BB=image_gt{ii}.bb{i};
 46 |             class_num=class_num+image_gt{ii}.classes{i};
 47 |         end
 48 |     end
 49 |     [ids,confidence,b1,b2,b3,b4]=textread(sprintf(res_path,comp_id,classes{i}),'%s %f %f %f %f %f');
 50 |     BB=[b1 b2 b3 b4];
 51 |     [sc,si]=sort(-confidence);
 52 |     ids=ids(si);
 53 |     BB=BB(si,:);
 54 |     nd=length(confidence);
 55 |     tp=zeros(nd,1);
 56 |     fp=zeros(nd,1);
 57 |     
 58 |     for j=1:nd
 59 |         ovmax=-inf;
 60 |         bb_pred=BB(j,:);
 61 |         id_index=strmatch(ids{j},image_list,'exact');
 62 |         for k=1:size(gt(id_index).BB,1)
 63 |             bb_target=gt(id_index).BB(k,:);
 64 |             overlap=compute_overlap(bb_pred,bb_target);
 65 |             if overlap>ovmax
 66 |                 ovmax=overlap;
 67 |             end
 68 |         end
 69 |         path=strcat('../../../data/',ids{j});
 70 |         %img=imread(path);
 71 |         if ~exist(strcat('../../../data/results/',classes{i},'/tp'))
 72 |             mkdir(strcat('../../../data/results/',classes{i},'/tp'))
 73 |         end
 74 |         if ~exist(strcat('../../../data/results/',classes{i},'/fp'))
 75 |             mkdir(strcat('../../../data/results/',classes{i},'/fp'))
 76 |         end
 77 |         write_path1=strcat('../../../data/results/',classes{i},'/tp','/',num2str(j),'.jpg');
 78 |         write_path2=strcat('../../../data/results/',classes{i},'/fp','/',num2str(j),'.jpg');
 79 |         
 80 |        % img_size1 = size(img);
 81 |         
 82 |         if ovmax>=minoverlap
 83 |             tp(j)=1;
 84 |             %imwrite( img(fix(bb_pred(2))+1:fix(bb_pred(4)),fix(bb_pred(1))+1:fix(bb_pred(3)), :), write_path1,'jpg');
 85 |         else
 86 |             fp(j)=1;
 87 |            % imwrite( img(fix(bb_pred(2))+1:fix(bb_pred(4)),fix(bb_pred(1))+1:fix(bb_pred(3)), :), write_path2,'jpg');
 88 |         end
 89 |     end
 90 |     fp=cumsum(fp);
 91 |     tp=cumsum(tp);
 92 |     rec=tp/class_num;
 93 |     prec=tp./(fp+tp);
 94 |     ap=0;
 95 |     for t=0:0.1:1
 96 |         p=max(prec(rec>=t));
 97 |         if isempty(p)
 98 |             p=0;
 99 |         end
100 |         ap=ap+p/11;
101 |     end
102 |     if 1
103 |         % plot precision/recall
104 |         plot(rec,prec,'-');
105 |         grid;
106 |         xlabel 'recall'
107 |         ylabel 'precision'
108 |         title(sprintf('class: %s, subset: %s, AP = %.3f',classes{i},test_set,ap));
109 |     end
110 |     ap_auc = xVOCap(rec, prec);
111 |     res(i).recall=rec;
112 |     res(i).prec=prec;
113 |     res(i).ap=ap;
114 |     res(i).ap_auc=ap_auc;
115 |     hold on;
116 | end
117 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
118 | fprintf('Results:\n');
119 | aps = [res(:).ap]';
120 | fprintf('APs:\n')
121 | fprintf('%.1f\n', aps * 100);
122 | fprintf('mAP:')
123 | fprintf('%.1f\n', mean(aps) * 100);
124 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
125 | 
126 | 


--------------------------------------------------------------------------------
/lib/rpn/generate.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | from fast_rcnn.config import cfg
  9 | from utils.blob import im_list_to_blob
 10 | from utils.timer import Timer
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | def _vis_proposals(im, dets, thresh=0.5):
 15 |     """Draw detected bounding boxes."""
 16 |     inds = np.where(dets[:, -1] >= thresh)[0]
 17 |     if len(inds) == 0:
 18 |         return
 19 | 
 20 |     class_name = 'obj'
 21 |     im = im[:, :, (2, 1, 0)]
 22 |     fig, ax = plt.subplots(figsize=(12, 12))
 23 |     ax.imshow(im, aspect='equal')
 24 |     for i in inds:
 25 |         bbox = dets[i, :4]
 26 |         score = dets[i, -1]
 27 | 
 28 |         ax.add_patch(
 29 |             plt.Rectangle((bbox[0], bbox[1]),
 30 |                           bbox[2] - bbox[0],
 31 |                           bbox[3] - bbox[1], fill=False,
 32 |                           edgecolor='red', linewidth=3.5)
 33 |             )
 34 |         ax.text(bbox[0], bbox[1] - 2,
 35 |                 '{:s} {:.3f}'.format(class_name, score),
 36 |                 bbox=dict(facecolor='blue', alpha=0.5),
 37 |                 fontsize=14, color='white')
 38 | 
 39 |     ax.set_title(('{} detections with '
 40 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 41 |                                                   thresh),
 42 |                   fontsize=14)
 43 |     plt.axis('off')
 44 |     plt.tight_layout()
 45 |     plt.draw()
 46 | 
 47 | def _get_image_blob(im):
 48 |     """Converts an image into a network input.
 49 | 
 50 |     Arguments:
 51 |         im (ndarray): a color image in BGR order
 52 | 
 53 |     Returns:
 54 |         blob (ndarray): a data blob holding an image pyramid
 55 |         im_scale_factors (list): list of image scales (relative to im) used
 56 |             in the image pyramid
 57 |     """
 58 |     im_orig = im.astype(np.float32, copy=True)
 59 |     im_orig -= cfg.PIXEL_MEANS
 60 | 
 61 |     im_shape = im_orig.shape
 62 |     im_size_min = np.min(im_shape[0:2])
 63 |     im_size_max = np.max(im_shape[0:2])
 64 | 
 65 |     processed_ims = []
 66 | 
 67 |     assert len(cfg.TEST.SCALES) == 1
 68 |     target_size = cfg.TEST.SCALES[0]
 69 | 
 70 |     im_scale = float(target_size) / float(im_size_min)
 71 |     # Prevent the biggest axis from being more than MAX_SIZE
 72 |     if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 73 |         im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 74 |     im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 75 |                     interpolation=cv2.INTER_LINEAR)
 76 |     im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]
 77 |     processed_ims.append(im)
 78 | 
 79 |     # Create a blob to hold the input images
 80 |     blob = im_list_to_blob(processed_ims)
 81 | 
 82 |     return blob, im_info
 83 | 
 84 | def im_proposals(net, im):
 85 |     """Generate RPN proposals on a single image."""
 86 |     blobs = {}
 87 |     blobs['data'], blobs['im_info'] = _get_image_blob(im)
 88 |     net.blobs['data'].reshape(*(blobs['data'].shape))
 89 |     net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
 90 |     blobs_out = net.forward(
 91 |             data=blobs['data'].astype(np.float32, copy=False),
 92 |             im_info=blobs['im_info'].astype(np.float32, copy=False))
 93 | 
 94 |     scale = blobs['im_info'][0, 2]
 95 |     boxes = blobs_out['rois'][:, 1:].copy() / scale
 96 |     scores = blobs_out['scores'].copy()
 97 |     return boxes, scores
 98 | 
 99 | def imdb_proposals(net, imdb):
100 |     """Generate RPN proposals on all images in an imdb."""
101 | 
102 |     _t = Timer()
103 |     imdb_boxes = [[] for _ in xrange(imdb.num_images)]
104 |     for i in xrange(imdb.num_images):
105 |         im = cv2.imread(imdb.image_path_at(i))
106 |         _t.tic()
107 |         imdb_boxes[i], scores = im_proposals(net, im)
108 |         _t.toc()
109 |         print 'im_proposals: {:d}/{:d} {:.3f}s' \
110 |               .format(i + 1, imdb.num_images, _t.average_time)
111 |         if 0:
112 |             dets = np.hstack((imdb_boxes[i], scores))
113 |             # from IPython import embed; embed()
114 |             _vis_proposals(im, dets[:3, :], thresh=0.9)
115 |             plt.show()
116 | 
117 |     return imdb_boxes
118 | 


--------------------------------------------------------------------------------
/data/convert_kitti.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # --------------------------------------------------------
  3 | # kitti tool
  4 | # Copyright (c) 2016 Chao Chen
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Chao Chen
  7 | # --------------------------------------------------------
  8 | 
  9 | import os
 10 | 
 11 | def getLabelFilename(img_filename):
 12 |      vector_string = img_filename.split('.'); 
 13 |      tmp = vector_string[0] + '.txt'
 14 |      vector_tmp = tmp.split('/')
 15 |      return vector_tmp[-1]
 16 | 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | def parse_line(img_line, label_file):
 22 |     count_p = 0
 23 |     count_c = 0  
 24 |     count_cyc = 0
 25 |     count_van = 0
 26 |     count_truck = 0
 27 |     count_tram = 0
 28 |     count_misc = 0
 29 | 
 30 |     p_flag = 0
 31 |     c_flag = 0
 32 |     cyc_flag = 0
 33 |     van_flag = 0
 34 |     truck_flag = 0
 35 |     misc_flag = 0
 36 | 
 37 |     bbox_car = []
 38 |     bbox_per = []
 39 |     bbox_cyc = []
 40 |     bbox_truck = []
 41 |     bbox_van = []
 42 |     bbox_tram = []
 43 |     bbox_misc = []
 44 | 
 45 |     
 46 |     file = open(label_file, 'r')
 47 |     for line in file.xreadlines():
 48 |         line = line.strip('\n')
 49 |         vector_str = line.split(' ')
 50 |         #print vector_str
 51 |         if ("Car" == vector_str[0] ):
 52 |             count_c += 1
 53 |             for k in range(4, 8):
 54 |                 bbox_car.append(vector_str[k])
 55 |             continue
 56 |         if ('Pedestrian' == vector_str[0]):
 57 |             count_p += 1
 58 |             for k in range(4, 8):
 59 |                 bbox_per.append(vector_str[k])
 60 |             continue
 61 | 
 62 |         if('Cyclist' == vector_str[0]):
 63 |             count_cyc += 1
 64 |             for k in range(4, 8):
 65 |                 bbox_cyc.append(vector_str[k])
 66 |             continue
 67 | 
 68 |         if('Van' == vector_str[0]):
 69 |             count_van += 1
 70 |             for k in range(4, 8):
 71 |                 bbox_van.append(vector_str[k])
 72 |             continue
 73 | 
 74 |         if('Truck' == vector_str[0]):
 75 |             count_truck += 1
 76 |             for k in range(4, 8):
 77 |                 bbox_truck.append(vector_str[k])
 78 |             continue
 79 |         if('Misc' == vector_str[0]):
 80 |             count_misc += 1
 81 |             for k in range(4, 8):
 82 |                 bbox_misc.append(vector_str[k])
 83 |             continue
 84 | 
 85 |         if('Tram' == vector_str[0]):
 86 |             count_tram += 1
 87 |             for k in range(4, 8):
 88 |                 bbox_tram.append(vector_str[k])
 89 |             continue
 90 | 
 91 |     num = count_c + count_p + count_cyc + count_van + count_truck + count_misc + count_tram
 92 | 
 93 |     final_line = img_line + ' '+ str(num)
 94 |     #car 
 95 |     final_line += ' '+ str(count_c)
 96 |     for i in bbox_car:
 97 |         final_line += ' ' + i 
 98 |     #pre
 99 |     final_line += ' '+ str(count_p)
100 |     for i in bbox_per:
101 |         final_line += ' ' + i 
102 |     #cyc
103 |     final_line += ' '+ str(count_cyc)
104 |     for i in bbox_cyc:
105 |         final_line += ' ' + i 
106 |     #truck
107 |     final_line += ' '+ str(count_truck)
108 |     for i in bbox_truck:
109 |         final_line += ' ' + i 
110 |     #van
111 |     final_line += ' '+ str(count_van)
112 |     for i in bbox_van:
113 |         final_line += ' ' + i 
114 |     #tram
115 |     final_line += ' '+ str(count_tram)
116 |     for i in bbox_tram:
117 |         final_line += ' ' + i 
118 |     #misc
119 |     final_line += ' '+ str(count_misc)
120 |     for i in bbox_misc:
121 |         final_line += ' ' + i 
122 |     return final_line + '\n' 
123 | 
124 | def convertKitti(label_file_list, savedFilename):
125 |     if os.path.exists(label_file_list):
126 |         file = open(label_file_list, 'r')
127 |         final_lines_list = []
128 |         for line in file.xreadlines():
129 |             line = line.strip('\n')
130 |             print line
131 |             labelFile = getLabelFilename(line)
132 |             print labelFile
133 |             finalLine = parse_line(line, './training/label_2/'+labelFile)
134 |             final_lines_list.append(finalLine)
135 |     result_file = open('./' + savedFilename, 'w')
136 |     result_file.writelines(final_lines_list)
137 |     result_file.close()
138 | 
139 | if '__main__' == __name__:
140 |     convertKitti('Train_image_list.txt', 'TrainIndex.txt')
141 | 
142 | 
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/tools/compress_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """Compress a Fast R-CNN network using truncated SVD."""
 11 | 
 12 | import _init_paths
 13 | import caffe
 14 | import argparse
 15 | import numpy as np
 16 | import os, sys
 17 | 
 18 | def parse_args():
 19 |     """Parse input arguments."""
 20 |     parser = argparse.ArgumentParser(description='Compress a Fast R-CNN network')
 21 |     parser.add_argument('--def', dest='prototxt',
 22 |                         help='prototxt file defining the uncompressed network',
 23 |                         default=None, type=str)
 24 |     parser.add_argument('--def-svd', dest='prototxt_svd',
 25 |                         help='prototxt file defining the SVD compressed network',
 26 |                         default=None, type=str)
 27 |     parser.add_argument('--net', dest='caffemodel',
 28 |                         help='model to compress',
 29 |                         default=None, type=str)
 30 | 
 31 |     if len(sys.argv) == 1:
 32 |         parser.print_help()
 33 |         sys.exit(1)
 34 | 
 35 |     args = parser.parse_args()
 36 |     return args
 37 | 
 38 | def compress_weights(W, l):
 39 |     """Compress the weight matrix W of an inner product (fully connected) layer
 40 |     using truncated SVD.
 41 | 
 42 |     Parameters:
 43 |     W: N x M weights matrix
 44 |     l: number of singular values to retain
 45 | 
 46 |     Returns:
 47 |     Ul, L: matrices such that W \approx Ul*L
 48 |     """
 49 | 
 50 |     # numpy doesn't seem to have a fast truncated SVD algorithm...
 51 |     # this could be faster
 52 |     U, s, V = np.linalg.svd(W, full_matrices=False)
 53 | 
 54 |     Ul = U[:, :l]
 55 |     sl = s[:l]
 56 |     Vl = V[:l, :]
 57 | 
 58 |     L = np.dot(np.diag(sl), Vl)
 59 |     return Ul, L
 60 | 
 61 | def main():
 62 |     args = parse_args()
 63 | 
 64 |     # prototxt = 'models/VGG16/test.prototxt'
 65 |     # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel'
 66 |     net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
 67 | 
 68 |     # prototxt_svd = 'models/VGG16/svd/test_fc6_fc7.prototxt'
 69 |     # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel'
 70 |     net_svd = caffe.Net(args.prototxt_svd, args.caffemodel, caffe.TEST)
 71 | 
 72 |     print('Uncompressed network {} : {}'.format(args.prototxt, args.caffemodel))
 73 |     print('Compressed network prototxt {}'.format(args.prototxt_svd))
 74 | 
 75 |     out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svd'
 76 |     out_dir = os.path.dirname(args.caffemodel)
 77 | 
 78 |     # Compress fc6
 79 |     if net_svd.params.has_key('fc6_L'):
 80 |         l_fc6 = net_svd.params['fc6_L'][0].data.shape[0]
 81 |         print('  fc6_L bottleneck size: {}'.format(l_fc6))
 82 | 
 83 |         # uncompressed weights and biases
 84 |         W_fc6 = net.params['fc6'][0].data
 85 |         B_fc6 = net.params['fc6'][1].data
 86 | 
 87 |         print('  compressing fc6...')
 88 |         Ul_fc6, L_fc6 = compress_weights(W_fc6, l_fc6)
 89 | 
 90 |         assert(len(net_svd.params['fc6_L']) == 1)
 91 | 
 92 |         # install compressed matrix factors (and original biases)
 93 |         net_svd.params['fc6_L'][0].data[...] = L_fc6
 94 | 
 95 |         net_svd.params['fc6_U'][0].data[...] = Ul_fc6
 96 |         net_svd.params['fc6_U'][1].data[...] = B_fc6
 97 | 
 98 |         out += '_fc6_{}'.format(l_fc6)
 99 | 
100 |     # Compress fc7
101 |     if net_svd.params.has_key('fc7_L'):
102 |         l_fc7 = net_svd.params['fc7_L'][0].data.shape[0]
103 |         print '  fc7_L bottleneck size: {}'.format(l_fc7)
104 | 
105 |         W_fc7 = net.params['fc7'][0].data
106 |         B_fc7 = net.params['fc7'][1].data
107 | 
108 |         print('  compressing fc7...')
109 |         Ul_fc7, L_fc7 = compress_weights(W_fc7, l_fc7)
110 | 
111 |         assert(len(net_svd.params['fc7_L']) == 1)
112 | 
113 |         net_svd.params['fc7_L'][0].data[...] = L_fc7
114 | 
115 |         net_svd.params['fc7_U'][0].data[...] = Ul_fc7
116 |         net_svd.params['fc7_U'][1].data[...] = B_fc7
117 | 
118 |         out += '_fc7_{}'.format(l_fc7)
119 | 
120 |     filename = '{}/{}.caffemodel'.format(out_dir, out)
121 |     net_svd.save(filename)
122 |     print 'Wrote svd model to: {:s}'.format(filename)
123 | 
124 | if __name__ == '__main__':
125 |     main()
126 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/train.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Train a Fast R-CNN network."""
  9 | 
 10 | import caffe
 11 | from fast_rcnn.config import cfg
 12 | import roi_data_layer.roidb as rdl_roidb
 13 | from utils.timer import Timer
 14 | import numpy as np
 15 | import os
 16 | 
 17 | from caffe.proto import caffe_pb2
 18 | import google.protobuf as pb2
 19 | 
 20 | class SolverWrapper(object):
 21 |     """A simple wrapper around Caffe's solver.
 22 |     This wrapper gives us control over he snapshotting process, which we
 23 |     use to unnormalize the learned bounding-box regression weights.
 24 |     """
 25 | 
 26 |     def __init__(self, solver_prototxt, roidb, output_dir,
 27 |                  pretrained_model=None):
 28 |         """Initialize the SolverWrapper."""
 29 |         self.output_dir = output_dir
 30 | 
 31 |         if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and
 32 |             cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
 33 |             # RPN can only use precomputed normalization because there are no
 34 |             # fixed statistics to compute a priori
 35 |             assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED
 36 | 
 37 |         if cfg.TRAIN.BBOX_REG:
 38 |             print 'Computing bounding-box regression targets...'
 39 |             self.bbox_means, self.bbox_stds = \
 40 |                     rdl_roidb.add_bbox_regression_targets(roidb)
 41 |             print 'done'
 42 | 
 43 |         self.solver = caffe.SGDSolver(solver_prototxt)
 44 |         if pretrained_model is not None:
 45 |             print ('Loading pretrained model '
 46 |                    'weights from {:s}').format(pretrained_model)
 47 |             self.solver.net.copy_from(pretrained_model)
 48 | 
 49 |         self.solver_param = caffe_pb2.SolverParameter()
 50 |         with open(solver_prototxt, 'rt') as f:
 51 |             pb2.text_format.Merge(f.read(), self.solver_param)
 52 | 
 53 |         self.solver.net.layers[0].set_roidb(roidb)
 54 | 
 55 |     def snapshot(self):
 56 |         """Take a snapshot of the network after unnormalizing the learned
 57 |         bounding-box regression weights. This enables easy use at test-time.
 58 |         """
 59 |         net = self.solver.net
 60 | 
 61 |         scale_bbox_params = (cfg.TRAIN.BBOX_REG and
 62 |                              cfg.TRAIN.BBOX_NORMALIZE_TARGETS and
 63 |                              net.params.has_key('bbox_pred'))
 64 | 
 65 |         if scale_bbox_params:
 66 |             # save original values
 67 |             orig_0 = net.params['bbox_pred'][0].data.copy()
 68 |             orig_1 = net.params['bbox_pred'][1].data.copy()
 69 | 
 70 |             # scale and shift with bbox reg unnormalization; then save snapshot
 71 |             net.params['bbox_pred'][0].data[...] = \
 72 |                     (net.params['bbox_pred'][0].data *
 73 |                      self.bbox_stds[:, np.newaxis])
 74 |             net.params['bbox_pred'][1].data[...] = \
 75 |                     (net.params['bbox_pred'][1].data *
 76 |                      self.bbox_stds + self.bbox_means)
 77 | 
 78 |         if not os.path.exists(self.output_dir):
 79 |             os.makedirs(self.output_dir)
 80 | 
 81 |         infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
 82 |                  if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
 83 |         filename = (self.solver_param.snapshot_prefix + infix +
 84 |                     '_iter_{:d}'.format(self.solver.iter) + '.caffemodel')
 85 |         filename = os.path.join(self.output_dir, filename)
 86 | 
 87 |         net.save(str(filename))
 88 |         print 'Wrote snapshot to: {:s}'.format(filename)
 89 | 
 90 |         if scale_bbox_params:
 91 |             # restore net to original state
 92 |             net.params['bbox_pred'][0].data[...] = orig_0
 93 |             net.params['bbox_pred'][1].data[...] = orig_1
 94 |         return filename
 95 | 
 96 |     def train_model(self, max_iters):
 97 |         """Network training loop."""
 98 |         last_snapshot_iter = -1
 99 |         timer = Timer()
100 |         model_paths = []
101 |         while self.solver.iter < max_iters:
102 |             # Make one SGD update
103 |             timer.tic()
104 |             self.solver.step(1)
105 |             timer.toc()
106 |             if self.solver.iter % (10 * self.solver_param.display) == 0:
107 |                 print 'speed: {:.3f}s / iter'.format(timer.average_time)
108 | 
109 |             if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
110 |                 last_snapshot_iter = self.solver.iter
111 |                 model_paths.append(self.snapshot())
112 | 
113 |         if last_snapshot_iter != self.solver.iter:
114 |             model_paths.append(self.snapshot())
115 |         return model_paths
116 | 
117 | def get_training_roidb(imdb):
118 |     """Returns a roidb (Region of Interest database) for use in training."""
119 |     if cfg.TRAIN.USE_FLIPPED:
120 |         print 'Appending horizontally-flipped training examples...'
121 |         imdb.append_flipped_images()
122 |         print 'done'
123 | 
124 |     print 'Preparing training data...'
125 |     rdl_roidb.prepare_roidb(imdb)
126 |     print 'done'
127 | 
128 |     return imdb.roidb
129 | 
130 | def train_net(solver_prototxt, roidb, output_dir,
131 |               pretrained_model=None, max_iters=40000):
132 |     """Train a Fast R-CNN network."""
133 |     sw = SolverWrapper(solver_prototxt, roidb, output_dir,
134 |                        pretrained_model=pretrained_model)
135 | 
136 |     print 'Solving...'
137 |     model_paths = sw.train_model(max_iters)
138 |     print 'done solving'
139 |     return model_paths
140 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Faster R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Demo script showing detections in sample images.
 12 | 
 13 | See README.md for installation instructions before running.
 14 | """
 15 | 
 16 | import _init_paths
 17 | from fast_rcnn.config import cfg
 18 | from fast_rcnn.test import im_detect
 19 | from fast_rcnn.nms_wrapper import nms
 20 | from utils.timer import Timer
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import scipy.io as sio
 24 | import caffe, os, sys, cv2
 25 | import argparse
 26 | 
 27 | import matplotlib
 28 | from matplotlib.pyplot import plot,savefig
 29 | 
 30 | CLASSES = ('__background__','car','person')
 31 | 
 32 | NETS = {'vgg16': ('VGG16',
 33 |                   'VGG16_faster_rcnn_final.caffemodel'),
 34 |         'zf': ('ZF',
 35 |                   'ZF_faster_rcnn_final.caffemodel'),
 36 |         'vgg_m': ('VGG_CNN_M_1024',
 37 |                    'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')}
 38 | 
 39 | 
 40 | def vis_detections(im, class_name, dets, image_name, ax, thresh=0.5):
 41 |     """Draw detected bounding boxes."""
 42 |     inds = np.where(dets[:, -1] >= thresh)[0]
 43 |     if len(inds) == 0:
 44 |         plt.axis('off')
 45 |         plt.tight_layout()
 46 |         plt.draw()
 47 |         return
 48 |     for i in inds:
 49 |         bbox = dets[i, :4]
 50 |         score = dets[i, -1]
 51 | 
 52 |         ax.add_patch(
 53 |             plt.Rectangle((bbox[0], bbox[1]),
 54 |                           bbox[2] - bbox[0],
 55 |                           bbox[3] - bbox[1], fill=False,
 56 |                           edgecolor='red', linewidth=3.5)
 57 |             )
 58 |         ax.text(bbox[0], bbox[1] - 2,
 59 |                 '{:s} {:.3f}'.format(class_name, score),
 60 |                 bbox=dict(facecolor='blue', alpha=0.5),
 61 |                 fontsize=14, color='white')
 62 | 
 63 |     ax.set_title(('{} detections with '
 64 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 65 |                                                   thresh),
 66 |                   fontsize=14)
 67 |     plt.axis('off')
 68 |     plt.tight_layout()
 69 |     plt.draw()
 70 | 
 71 | 
 72 | def demo(net, image_name):
 73 |     """Detect object classes in an image using pre-computed object proposals."""
 74 | 
 75 |     # Load the demo image
 76 |     im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name)
 77 |     im = cv2.imread(im_file)
 78 | 
 79 |     # Detect all object classes and regress object bounds
 80 |     timer = Timer()
 81 |     timer.tic()
 82 |     scores, boxes = im_detect(net, im)
 83 |     timer.toc()
 84 |     print ('Detection took {:.3f}s for '
 85 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 86 | 
 87 |     # Visualize detections for each class
 88 |     CONF_THRESH = 0.7
 89 |     NMS_THRESH = 0.3
 90 | 
 91 |     im = im[:, :, (2, 1, 0)]
 92 |     fig, ax = plt.subplots(figsize=(12, 12))
 93 |     ax.imshow(im, aspect='equal')
 94 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 95 |         cls_ind += 1 # because we skipped background
 96 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 97 |         cls_scores = scores[:, cls_ind]
 98 |         dets = np.hstack((cls_boxes,
 99 |                           cls_scores[:, np.newaxis])).astype(np.float32)
100 |         keep = nms(dets, NMS_THRESH)
101 |         dets = dets[keep, :]
102 |         vis_detections(im, cls, dets, image_name, ax,thresh=CONF_THRESH)
103 | 
104 | def parse_args():
105 |     """Parse input arguments."""
106 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
107 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
108 |                         default=0, type=int)
109 |     parser.add_argument('--cpu', dest='cpu_mode',
110 |                         help='Use CPU mode (overrides --gpu)',
111 |                         action='store_true')
112 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
113 |                         choices=NETS.keys(), default='vgg_m')
114 | 
115 |     args = parser.parse_args()
116 | 
117 |     return args
118 | 
119 | if __name__ == '__main__':
120 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
121 | 
122 |     args = parse_args()
123 | 
124 |     prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0],
125 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
126 |     caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'faster_rcnn_models',
127 |                               NETS[args.demo_net][1])
128 | 
129 |     if not os.path.isfile(caffemodel):
130 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
131 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
132 | 
133 |     if args.cpu_mode:
134 |         caffe.set_mode_cpu()
135 |     else:
136 |         caffe.set_mode_gpu()
137 |         caffe.set_device(args.gpu_id)
138 |         cfg.GPU_ID = args.gpu_id
139 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
140 | 
141 |     print '\n\nLoaded network {:s}'.format(caffemodel)
142 | 
143 |     # Warmup on a dummy image
144 |     im = 128 * np.ones((300, 500, 3), dtype=np.uint8)
145 |     #im = 128 * np.ones((1280, 960, 3), dtype=np.uint8)
146 |     for i in xrange(2):
147 |         _, _= im_detect(net, im)
148 | 
149 |     
150 |     for ind in range(0,31):
151 |         str="%06d"%(ind)
152 |         im_name=str+".png"
153 |         print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
154 |         print 'Demo for data/demo/{}'.format(im_name)
155 |         demo(net, im_name)
156 |     plt.show()
157 | 


--------------------------------------------------------------------------------
/tools/demo_show.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Faster R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Demo script showing detections in sample images.
 12 | 
 13 | See README.md for installation instructions before running.
 14 | """
 15 | 
 16 | import _init_paths
 17 | from fast_rcnn.config import cfg
 18 | from fast_rcnn.test import im_detect
 19 | from fast_rcnn.nms_wrapper import nms
 20 | from utils.timer import Timer
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import scipy.io as sio
 24 | import caffe, os, sys, cv2
 25 | import argparse
 26 | 
 27 | CLASSES = ('__background__','car','person','bike')
 28 | 
 29 | NETS = {'vgg16': ('VGG16',
 30 |                   'VGG16_faster_rcnn_final.caffemodel'),
 31 |         'zf': ('ZF',
 32 |                   'ZF_faster_rcnn_final.caffemodel'),
 33 |         'vgg_m': ('VGG_CNN_M_1024',
 34 |                    'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')}
 35 | 
 36 | 
 37 | def demo(net,image_list):
 38 |     """Detect object classes in an image using pre-computed object proposals."""
 39 | 
 40 |     # Load the demo image
 41 |     im_file = os.path.join(cfg.ROOT_DIR, 'data', image_list[0])
 42 |     im = cv2.imread(im_file)
 43 | 
 44 |     # Detect all object classes and regress object bounds
 45 |     timer = Timer()
 46 |     timer.tic()
 47 |     scores, boxes = im_detect(net, im)
 48 |     timer.toc()
 49 |     print ('Detection took {:.3f}s for '
 50 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 51 | 
 52 |     # Visualize detections for each class
 53 | 
 54 |     ind=1
 55 |     color_list=[(255,0,0),(0,255,0),(0,0,255)]
 56 |     color_cls=[(0,255,255),(255,0,255),(255,255,0)]
 57 |     for j in range(1, len(CLASSES)):
 58 |         num_objs = int(image_list[ind+1])
 59 |         for i in xrange(num_objs):
 60 |             x1 = int(float(image_list[ind+2 + i * 4]))
 61 |             y1 = int(float(image_list[ind+3 + i * 4]))
 62 |             x2 = int(float(image_list[ind+4 + i * 4]))
 63 |             y2 = int(float(image_list[ind+5 + i * 4]))
 64 |             rect_start = (x1,y1)
 65 |             rect_end = (x2,y2)
 66 |             #cv2.rectangle(im, rect_start, rect_end, color_list[j-1], 2)
 67 |         ind+=4*num_objs+1
 68 | 
 69 |     thresh= 0.5
 70 |     NMS_THRESH = 0.3
 71 |     path = os.path.join(cfg.ROOT_DIR, 'data', 'results','show',image_list[0][17:])
 72 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 73 |         cls_ind += 1 # because we skipped background
 74 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 75 |         cls_scores = scores[:, cls_ind]
 76 |         dets = np.hstack((cls_boxes,
 77 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 78 |         keep = nms(dets, NMS_THRESH)
 79 |         dets = dets[keep, :]
 80 |         inds = np.where(dets[:, -1] >= thresh)[0]
 81 | 
 82 |         index=1
 83 |         if len(inds) == 0 and index==len(CLASSES[1:]):
 84 |             cv2.imwrite(path,im)
 85 |             return
 86 |         elif len(inds) == 0 and index<len(CLASSES[1:]):
 87 |             index+=1
 88 |             continue
 89 |         for i in inds:
 90 |             bbox = dets[i, :4]
 91 |             score = dets[i, -1]
 92 |             x = bbox[0]
 93 |             y = bbox[1]
 94 |             rect_start = (x,y)
 95 |             x1 = bbox[2]
 96 |             y1 = bbox[3]
 97 |             rect_end = (x1,y1)
 98 |             color_pred=color_cls[cls_ind-1]
 99 |             cv2.rectangle(im, rect_start, rect_end, color_pred, 2)
100 |     cv2.imwrite(path,im)
101 | 
102 | def parse_args():
103 |     """Parse input arguments."""
104 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
105 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
106 |                         default=0, type=int)
107 |     parser.add_argument('--cpu', dest='cpu_mode',
108 |                         help='Use CPU mode (overrides --gpu)',
109 |                         action='store_true')
110 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
111 |                         choices=NETS.keys(), default='vgg_m')
112 | 
113 |     args = parser.parse_args()
114 | 
115 |     return args
116 | 
117 | if __name__ == '__main__':
118 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
119 | 
120 |     args = parse_args()
121 | 
122 |     prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0],
123 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
124 |     caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'faster_rcnn_models',
125 |                               NETS[args.demo_net][1])
126 | 
127 |     if not os.path.isfile(caffemodel):
128 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
129 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
130 | 
131 |     if args.cpu_mode:
132 |         caffe.set_mode_cpu()
133 |     else:
134 |         caffe.set_mode_gpu()
135 |         caffe.set_device(args.gpu_id)
136 |         cfg.GPU_ID = args.gpu_id
137 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
138 | 
139 | 
140 |     image_list=[]
141 |     annotationfile = os.path.join(cfg.ROOT_DIR, 'data','KITTI_gt_val.txt')
142 |     f = open(annotationfile)
143 |     split_line = f.readline().strip().split()
144 |     while(split_line):
145 |         image_list.append(split_line)
146 |         split_line = f.readline().strip().split()
147 | 
148 |     print '\n\nLoaded network {:s}'.format(caffemodel)
149 | 
150 | 
151 |     for im_list in image_list:
152 |         print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
153 |         demo(net ,im_list)
154 | 
155 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer_original/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | from fast_rcnn.bbox_transform import bbox_transform
 13 | from utils.cython_bbox import bbox_overlaps
 14 | import PIL
 15 | 
 16 | def prepare_roidb(imdb):
 17 |     """Enrich the imdb's roidb by adding some derived quantities that
 18 |     are useful for training. This function precomputes the maximum
 19 |     overlap, taken over ground-truth boxes, between each ROI and
 20 |     each ground-truth box. The class with maximum overlap is also
 21 |     recorded.
 22 |     """
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |              for i in xrange(imdb.num_images)]
 25 |     roidb = imdb.roidb
 26 |     for i in xrange(len(imdb.image_index)):
 27 |         roidb[i]['image'] = imdb.image_path_at(i)
 28 |         roidb[i]['width'] = sizes[i][0]
 29 |         roidb[i]['height'] = sizes[i][1]
 30 |         # need gt_overlaps as a dense array for argmax
 31 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 32 |         # max overlap with gt over classes (columns)
 33 |         max_overlaps = gt_overlaps.max(axis=1)
 34 |         # gt class that had the max overlap
 35 |         max_classes = gt_overlaps.argmax(axis=1)
 36 |         roidb[i]['max_classes'] = max_classes
 37 |         roidb[i]['max_overlaps'] = max_overlaps
 38 |         # sanity checks
 39 |         # max overlap of 0 => class should be zero (background)
 40 |         zero_inds = np.where(max_overlaps == 0)[0]
 41 |         assert all(max_classes[zero_inds] == 0)
 42 |         # max overlap > 0 => class should not be zero (must be a fg class)
 43 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 44 |         assert all(max_classes[nonzero_inds] != 0)
 45 | 
 46 | def add_bbox_regression_targets(roidb):
 47 |     """Add information needed to train bounding-box regressors."""
 48 |     assert len(roidb) > 0
 49 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 50 | 
 51 |     num_images = len(roidb)
 52 |     # Infer number of classes from the number of columns in gt_overlaps
 53 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 54 |     for im_i in xrange(num_images):
 55 |         rois = roidb[im_i]['boxes']
 56 |         max_overlaps = roidb[im_i]['max_overlaps']
 57 |         max_classes = roidb[im_i]['max_classes']
 58 |         roidb[im_i]['bbox_targets'] = \
 59 |                 _compute_targets(rois, max_overlaps, max_classes)
 60 | 
 61 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 62 |         # Use fixed / precomputed "means" and "stds" instead of empirical values
 63 |         means = np.tile(
 64 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
 65 |         stds = np.tile(
 66 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
 67 |     else:
 68 |         # Compute values needed for means and stds
 69 |         # var(x) = E(x^2) - E(x)^2
 70 |         class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 71 |         sums = np.zeros((num_classes, 4))
 72 |         squared_sums = np.zeros((num_classes, 4))
 73 |         for im_i in xrange(num_images):
 74 |             targets = roidb[im_i]['bbox_targets']
 75 |             for cls in xrange(1, num_classes):
 76 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
 77 |                 if cls_inds.size > 0:
 78 |                     class_counts[cls] += cls_inds.size
 79 |                     sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 80 |                     squared_sums[cls, :] += \
 81 |                             (targets[cls_inds, 1:] ** 2).sum(axis=0)
 82 | 
 83 |         means = sums / class_counts
 84 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 85 | 
 86 |     print 'bbox target means:'
 87 |     print means
 88 |     print means[1:, :].mean(axis=0) # ignore bg class
 89 |     print 'bbox target stdevs:'
 90 |     print stds
 91 |     print stds[1:, :].mean(axis=0) # ignore bg class
 92 | 
 93 |     # Normalize targets
 94 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
 95 |         print "Normalizing targets"
 96 |         for im_i in xrange(num_images):
 97 |             targets = roidb[im_i]['bbox_targets']
 98 |             for cls in xrange(1, num_classes):
 99 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
100 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
101 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
102 |     else:
103 |         print "NOT normalizing targets"
104 | 
105 |     # These values will be needed for making predictions
106 |     # (the predicts will need to be unnormalized and uncentered)
107 |     return means.ravel(), stds.ravel()
108 | 
109 | def _compute_targets(rois, overlaps, labels):
110 |     """Compute bounding-box regression targets for an image."""
111 |     # Indices of ground-truth ROIs
112 |     gt_inds = np.where(overlaps == 1)[0]
113 |     # Indices of examples for which we try to make predictions
114 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
115 | 
116 |     # Get IoU overlap between each ex ROI and gt ROI
117 |     ex_gt_overlaps = bbox_overlaps(
118 |         np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
119 |         np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
120 | 
121 |     # Find which gt ROI each ex ROI has max overlap with:
122 |     # this will be the ex ROI's gt target
123 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
124 |     gt_rois = rois[gt_inds[gt_assignment], :]
125 |     ex_rois = rois[ex_inds, :]
126 | 
127 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
128 |     targets[ex_inds, 0] = labels[ex_inds]
129 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
130 |     return targets
131 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import subprocess
 14 | import numpy as np
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 19 |     for dir in path.split(os.pathsep):
 20 |         binpath = pjoin(dir, name)
 21 |         if os.path.exists(binpath):
 22 |             return os.path.abspath(binpath)
 23 |     return None
 24 | 
 25 | 
 26 | def locate_cuda():
 27 |     """Locate the CUDA environment on the system
 28 | 
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 | 
 32 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 33 |     is based on finding 'nvcc' in the PATH.
 34 |     """
 35 | 
 36 |     # first check if the CUDAHOME env variable is in use
 37 |     if 'CUDAHOME' in os.environ:
 38 |         home = os.environ['CUDAHOME']
 39 |         nvcc = pjoin(home, 'bin', 'nvcc')
 40 |     else:
 41 |         # otherwise, search the PATH for NVCC
 42 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 43 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 44 |         if nvcc is None:
 45 |             raise EnvironmentError('The nvcc binary could not be '
 46 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 47 |         home = os.path.dirname(os.path.dirname(nvcc))
 48 | 
 49 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 50 |                   'include': pjoin(home, 'include'),
 51 |                   'lib64': pjoin(home, 'lib64')}
 52 |     for k, v in cudaconfig.iteritems():
 53 |         if not os.path.exists(v):
 54 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 55 | 
 56 |     return cudaconfig
 57 | CUDA = locate_cuda()
 58 | 
 59 | 
 60 | # Obtain the numpy include directory.  This logic works across numpy versions.
 61 | try:
 62 |     numpy_include = np.get_include()
 63 | except AttributeError:
 64 |     numpy_include = np.get_numpy_include()
 65 | 
 66 | def customize_compiler_for_nvcc(self):
 67 |     """inject deep into distutils to customize how the dispatch
 68 |     to gcc/nvcc works.
 69 | 
 70 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 71 |     injected in, and still have the right customizations (i.e.
 72 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 73 |     the OO route, I have this. Note, it's kindof like a wierd functional
 74 |     subclassing going on."""
 75 | 
 76 |     # tell the compiler it can processes .cu
 77 |     self.src_extensions.append('.cu')
 78 | 
 79 |     # save references to the default compiler_so and _comple methods
 80 |     default_compiler_so = self.compiler_so
 81 |     super = self._compile
 82 | 
 83 |     # now redefine the _compile method. This gets executed for each
 84 |     # object but distutils doesn't have the ability to change compilers
 85 |     # based on source extension: we add it.
 86 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 87 |         if os.path.splitext(src)[1] == '.cu':
 88 |             # use the cuda for .cu files
 89 |             self.set_executable('compiler_so', CUDA['nvcc'])
 90 |             # use only a subset of the extra_postargs, which are 1-1 translated
 91 |             # from the extra_compile_args in the Extension class
 92 |             postargs = extra_postargs['nvcc']
 93 |         else:
 94 |             postargs = extra_postargs['gcc']
 95 | 
 96 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 97 |         # reset the default compiler_so, which we might have changed for cuda
 98 |         self.compiler_so = default_compiler_so
 99 | 
100 |     # inject our redefined _compile method into the class
101 |     self._compile = _compile
102 | 
103 | 
104 | # run the customize_compiler
105 | class custom_build_ext(build_ext):
106 |     def build_extensions(self):
107 |         customize_compiler_for_nvcc(self.compiler)
108 |         build_ext.build_extensions(self)
109 | 
110 | 
111 | ext_modules = [
112 |     Extension(
113 |         "utils.cython_bbox",
114 |         ["utils/bbox.pyx"],
115 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
116 |         include_dirs = [numpy_include]
117 |     ),
118 |     Extension(
119 |         "nms.cpu_nms",
120 |         ["nms/cpu_nms.pyx"],
121 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
122 |         include_dirs = [numpy_include]
123 |     ),
124 |     Extension('nms.gpu_nms',
125 |         ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
126 |         library_dirs=[CUDA['lib64']],
127 |         libraries=['cudart'],
128 |         language='c++',
129 |         runtime_library_dirs=[CUDA['lib64']],
130 |         # this syntax is specific to this build system
131 |         # we're only going to use certain compiler args with nvcc and not with gcc
132 |         # the implementation of this trick is in customize_compiler() below
133 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
134 |                             'nvcc': ['-arch=sm_35',
135 |                                      '--ptxas-options=-v',
136 |                                      '-c',
137 |                                      '--compiler-options',
138 |                                      "'-fPIC'"]},
139 |         include_dirs = [numpy_include, CUDA['include']]
140 |     )
141 | ]
142 | 
143 | setup(
144 |     name='fast_rcnn',
145 |     ext_modules=ext_modules,
146 |     # inject our custom trigger
147 |     cmdclass={'build_ext': custom_build_ext},
148 | )
149 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  9 | 
 10 | import numpy as np
 11 | from fast_rcnn.config import cfg
 12 | from fast_rcnn.bbox_transform import bbox_transform
 13 | from utils.cython_bbox import bbox_overlaps
 14 | import PIL
 15 | 
 16 | def prepare_roidb(imdb):
 17 |     """Enrich the imdb's roidb by adding some derived quantities that
 18 |     are useful for training. This function precomputes the maximum
 19 |     overlap, taken over ground-truth boxes, between each ROI and
 20 |     each ground-truth box. The class with maximum overlap is also
 21 |     recorded.
 22 |     """
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |              for i in xrange(imdb.num_images)]
 25 |     roidb = imdb.roidb
 26 |     for i in xrange(len(imdb.image_index)):
 27 |         roidb[i]['image'] = imdb.image_path_at(i)
 28 |         roidb[i]['width'] = sizes[i][0]
 29 |         roidb[i]['height'] = sizes[i][1]
 30 |         # need gt_overlaps as a dense array for argmax
 31 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 32 |         # max overlap with gt over classes (columns)
 33 |         max_overlaps = gt_overlaps.max(axis=1)
 34 |         # gt class that had the max overlap
 35 |         max_classes = gt_overlaps.argmax(axis=1)
 36 |         roidb[i]['max_classes'] = max_classes
 37 |         roidb[i]['max_overlaps'] = max_overlaps
 38 |         # sanity checks
 39 |         # max overlap of 0 => class should be zero (background)
 40 |         zero_inds = np.where(max_overlaps == 0)[0]
 41 |         assert all(max_classes[zero_inds] == 0)
 42 |         # max overlap > 0 => class should not be zero (must be a fg class)
 43 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 44 |         assert all(max_classes[nonzero_inds] != 0)
 45 | 
 46 | def add_bbox_regression_targets(roidb):
 47 |     """Add information needed to train bounding-box regressors."""
 48 |     assert len(roidb) > 0
 49 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 50 | 
 51 |     num_images = len(roidb)
 52 |     # Infer number of classes from the number of columns in gt_overlaps
 53 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 54 |     for im_i in xrange(num_images):
 55 |         rois = roidb[im_i]['boxes']
 56 |         max_overlaps = roidb[im_i]['max_overlaps']
 57 |         max_classes = roidb[im_i]['max_classes']
 58 |         roidb[im_i]['bbox_targets'] = \
 59 |                 _compute_targets(rois, max_overlaps, max_classes)
 60 | 
 61 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 62 |         # Use fixed / precomputed "means" and "stds" instead of empirical values
 63 |         means = np.tile(
 64 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
 65 |         stds = np.tile(
 66 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
 67 |     else:
 68 |         # Compute values needed for means and stds
 69 |         # var(x) = E(x^2) - E(x)^2
 70 |         class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 71 |         sums = np.zeros((num_classes, 4))
 72 |         squared_sums = np.zeros((num_classes, 4))
 73 |         for im_i in xrange(num_images):
 74 |             targets = roidb[im_i]['bbox_targets']
 75 |             for cls in xrange(1, num_classes):
 76 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
 77 |                 if cls_inds.size > 0:
 78 |                     class_counts[cls] += cls_inds.size
 79 |                     sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 80 |                     squared_sums[cls, :] += \
 81 |                             (targets[cls_inds, 1:] ** 2).sum(axis=0)
 82 | 
 83 |         means = sums / class_counts
 84 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 85 | 
 86 |     print 'bbox target means:'
 87 |     print means
 88 |     print means[1:, :].mean(axis=0) # ignore bg class
 89 |     print 'bbox target stdevs:'
 90 |     print stds
 91 |     print stds[1:, :].mean(axis=0) # ignore bg class
 92 | 
 93 |     # Normalize targets
 94 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
 95 |         print "Normalizing targets"
 96 |         for im_i in xrange(num_images):
 97 |             targets = roidb[im_i]['bbox_targets']
 98 |             for cls in xrange(1, num_classes):
 99 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
100 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
101 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
102 |     else:
103 |         print "NOT normalizing targets"
104 | 
105 |     # These values will be needed for making predictions
106 |     # (the predicts will need to be unnormalized and uncentered)
107 |     return means.ravel(), stds.ravel()
108 | 
109 | def _compute_targets(rois, overlaps, labels):
110 |     """Compute bounding-box regression targets for an image."""
111 |     # Indices of ground-truth ROIs
112 |     gt_inds = np.where(overlaps == 1)[0]
113 |     if len(gt_inds) == 0:
114 |         # Bail if the image has no ground-truth ROIs
115 |         return np.zeros((rois.shape[0], 5), dtype=np.float32)
116 |     # Indices of examples for which we try to make predictions
117 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
118 | 
119 |     # Get IoU overlap between each ex ROI and gt ROI
120 |     ex_gt_overlaps = bbox_overlaps(
121 |         np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
122 |         np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
123 | 
124 |     # Find which gt ROI each ex ROI has max overlap with:
125 |     # this will be the ex ROI's gt target
126 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
127 |     gt_rois = rois[gt_inds[gt_assignment], :]
128 |     ex_rois = rois[ex_inds, :]
129 | 
130 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
131 |     targets[ex_inds, 0] = labels[ex_inds]
132 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
133 |     return targets
134 | 


--------------------------------------------------------------------------------
/tools/demo_video_for_video.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Faster R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Demo script showing detections in sample images.
 12 | 
 13 | See README.md for installation instructions before running.
 14 | """
 15 | 
 16 | import _init_paths
 17 | from fast_rcnn.config import cfg
 18 | from fast_rcnn.test import im_detect
 19 | from fast_rcnn.nms_wrapper import nms
 20 | from utils.timer import Timer
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import scipy.io as sio
 24 | import caffe, os, sys, cv2
 25 | import argparse
 26 | 
 27 | 
 28 | import matplotlib
 29 | from matplotlib.pyplot import plot,savefig
 30 | import cv2.cv as cv
 31 | 
 32 | CLASSES = ('__background__','car','person')
 33 | 
 34 | NETS = {'vgg16': ('VGG16',
 35 |                   'VGG16_faster_rcnn_final.caffemodel'),
 36 |         'zf': ('ZF',
 37 |                   'ZF_faster_rcnn_final.caffemodel'),
 38 |         'vgg_m': ('VGG_CNN_M_1024',
 39 |                    'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')}
 40 | 
 41 | 
 42 | def demo(net, im):
 43 |     """Detect object classes in an image using pre-computed object proposals."""
 44 |     # Load the demo image
 45 | 
 46 |     # Detect all object classes and regress object bounds
 47 |     timer = Timer()
 48 |     timer.tic()
 49 |     scores, boxes = im_detect(net, im)
 50 |     timer.toc()
 51 |     print ('Detection took {:.3f}s for '
 52 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 53 | 
 54 |     # Visualize detections for each class
 55 |     CONF_THRESH = 0.6
 56 |     NMS_THRESH = 0.3
 57 |     index=1
 58 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 59 |         cls_ind += 1 # because we skipped background
 60 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 61 |         cls_scores = scores[:, cls_ind]
 62 |         dets = np.hstack((cls_boxes,
 63 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 64 |         keep = nms(dets, NMS_THRESH)
 65 |         dets = dets[keep, :]
 66 | 
 67 |         #im = im[:, :, (2, 1, 0)]
 68 | 
 69 |         inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
 70 |         if len(inds) == 0 and index==len(CLASSES[1:]):
 71 |             #cv2.imwrite(path,im)
 72 |             video.write(im)
 73 |             return
 74 |         elif len(inds) == 0 and index<len(CLASSES[1:]):
 75 |             index+=1
 76 |             continue
 77 |         for i in inds:
 78 |             bbox = dets[i, :4]
 79 |             score = dets[i, -1]
 80 |             x = bbox[0]
 81 |             y = bbox[1]
 82 |             rect_start = (x,y)
 83 |             x1 = bbox[2]
 84 |             y1 = bbox[3]
 85 |             rect_end = (x1,y1)
 86 |             color0=(100,100,100)
 87 |             color1=(255,0,0)
 88 | 
 89 | 
 90 |             xx1 = bbox[0]
 91 |             yy1= int(bbox[1]-10)
 92 |             point_start = (xx1,yy1)
 93 |             xx2 = bbox[0]+(bbox[2]-bbox[0])*score
 94 |             yy2= int(bbox[1]-2)
 95 |             point_end = (xx2,yy2)
 96 |             color2=(0,0,225)
 97 |             color3=(0,255,0)
 98 |             if cls_ind==1:
 99 |                 cv2.rectangle(im, rect_start, rect_end, color1, 2)
100 |             elif cls_ind==2:
101 |                 cv2.rectangle(im, rect_start, rect_end, color3, 2)
102 |             elif cls_ind==3:
103 |                 cv2.rectangle(im, rect_start, rect_end, color0, 2)
104 |             cv2.rectangle(im, point_start, point_end, color2, -1)
105 |     cv2.namedWindow("Image")
106 |     res=cv2.resize(im,(1080,608),interpolation=cv2.INTER_CUBIC)
107 |     cv2.imshow("Image", res)
108 |     cv2.waitKey (1)
109 |     #cv2.imwrite(path,im)
110 |     #video.write(im)
111 | 
112 | 
113 | 
114 | 
115 | def parse_args():
116 |     """Parse input arguments."""
117 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
118 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
119 |                         default=0, type=int)
120 |     parser.add_argument('--cpu', dest='cpu_mode',
121 |                         help='Use CPU mode (overrides --gpu)',
122 |                         action='store_true')
123 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
124 |                         choices=NETS.keys(), default='vgg_m')
125 | 
126 |     args = parser.parse_args()
127 | 
128 |     return args
129 | 
130 | if __name__ == '__main__':
131 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
132 | 
133 |     args = parse_args()
134 | 
135 |     prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0],
136 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
137 |     caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'faster_rcnn_models',
138 |                               NETS[args.demo_net][1])
139 | 
140 |     if not os.path.isfile(caffemodel):
141 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
142 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
143 | 
144 |     if args.cpu_mode:
145 |         caffe.set_mode_cpu()
146 |     else:
147 |         caffe.set_mode_gpu()
148 |         caffe.set_device(args.gpu_id)
149 |         cfg.GPU_ID = args.gpu_id
150 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
151 | 
152 |     print '\n\nLoaded network {:s}'.format(caffemodel)
153 | 
154 |     # Warmup on a dummy image
155 |     #im = 128 * np.ones((300, 500, 3), dtype=np.uint8)
156 |     #im = 128 * np.ones((1280, 960, 3), dtype=np.uint8)
157 |     #for i in xrange(2):
158 |     #   _, _= im_detect(net, im)
159 | 
160 |     #dictionary=['20121117_142852', '20121117_150315', '20121117_153526', '20121128_092059', '20121130_095032', '20130110_135753', '20130110_140950', '20130110_142518', '20130123_094123', '20130123_112228', '20130123_132342', '20130123_143631', '20130129_133540', '20130311_112935', '20130311_115905', '20130314_102842', '20130314_144414', '20130319_121354']
161 |     dir_name='04041652_2624.MP4'
162 | 
163 |     dir_root=os.path.join(cfg.ROOT_DIR, 'data', 'demo',dir_name)
164 | 
165 |     videoCapture = cv2.VideoCapture(dir_root)
166 |     fps = videoCapture.get(cv2.cv.CV_CAP_PROP_FPS)
167 |    # fps=25
168 |     size = (int(videoCapture.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)),
169 |         int(videoCapture.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)))
170 | 
171 |     success, frame = videoCapture.read()
172 | 
173 | 
174 |     #cv2.cv.CV_FOURCC('I','4','2','0') avi
175 |     #cv2.cv.CV_FOURCC('P','I','M','1') avi
176 |     #cv2.cv.CV_FOURCC('M','J','P','G') avi
177 |     #cv2.cv.CV_FOURCC('T','H','E','O') ogv
178 |     #cv2.cv.CV_FOURCC('F','L','V','1') flv
179 |     video=cv2.VideoWriter(dir_name, cv2.cv.CV_FOURCC('M','J','P','G'), int(fps),size)
180 |     if not video:
181 |         print "Error in creating video writer"
182 |     while success :
183 |         demo(net,frame)
184 |         success, frame = videoCapture.read()
185 |     video.release()
186 | 


--------------------------------------------------------------------------------
/models/VGG16/faster_rcnn_alt_opt/rpn_test.pt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | 
  3 | input: "data"
  4 | input_shape {
  5 |   dim: 1
  6 |   dim: 3
  7 |   dim: 224
  8 |   dim: 224
  9 | }
 10 | 
 11 | input: "im_info"
 12 | input_shape {
 13 |   dim: 1
 14 |   dim: 3
 15 | }
 16 | 
 17 | layer {
 18 |   name: "conv1_1"
 19 |   type: "Convolution"
 20 |   bottom: "data"
 21 |   top: "conv1_1"
 22 |   convolution_param {
 23 |     num_output: 64
 24 |     pad: 1 kernel_size: 3
 25 |   }
 26 | }
 27 | layer {
 28 |   name: "relu1_1"
 29 |   type: "ReLU"
 30 |   bottom: "conv1_1"
 31 |   top: "conv1_1"
 32 | }
 33 | layer {
 34 |   name: "conv1_2"
 35 |   type: "Convolution"
 36 |   bottom: "conv1_1"
 37 |   top: "conv1_2"
 38 |   convolution_param {
 39 |     num_output: 64
 40 |     pad: 1 kernel_size: 3
 41 |   }
 42 | }
 43 | layer {
 44 |   name: "relu1_2"
 45 |   type: "ReLU"
 46 |   bottom: "conv1_2"
 47 |   top: "conv1_2"
 48 | }
 49 | layer {
 50 |   name: "pool1"
 51 |   type: "Pooling"
 52 |   bottom: "conv1_2"
 53 |   top: "pool1"
 54 |   pooling_param {
 55 |     pool: MAX
 56 |     kernel_size: 2 stride: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "conv2_1"
 61 |   type: "Convolution"
 62 |   bottom: "pool1"
 63 |   top: "conv2_1"
 64 |   convolution_param {
 65 |     num_output: 128
 66 |     pad: 1 kernel_size: 3
 67 |   }
 68 | }
 69 | layer {
 70 |   name: "relu2_1"
 71 |   type: "ReLU"
 72 |   bottom: "conv2_1"
 73 |   top: "conv2_1"
 74 | }
 75 | layer {
 76 |   name: "conv2_2"
 77 |   type: "Convolution"
 78 |   bottom: "conv2_1"
 79 |   top: "conv2_2"
 80 |   convolution_param {
 81 |     num_output: 128
 82 |     pad: 1 kernel_size: 3
 83 |   }
 84 | }
 85 | layer {
 86 |   name: "relu2_2"
 87 |   type: "ReLU"
 88 |   bottom: "conv2_2"
 89 |   top: "conv2_2"
 90 | }
 91 | layer {
 92 |   name: "pool2"
 93 |   type: "Pooling"
 94 |   bottom: "conv2_2"
 95 |   top: "pool2"
 96 |   pooling_param {
 97 |     pool: MAX
 98 |     kernel_size: 2 stride: 2
 99 |   }
100 | }
101 | layer {
102 |   name: "conv3_1"
103 |   type: "Convolution"
104 |   bottom: "pool2"
105 |   top: "conv3_1"
106 |   convolution_param {
107 |     num_output: 256
108 |     pad: 1 kernel_size: 3
109 |   }
110 | }
111 | layer {
112 |   name: "relu3_1"
113 |   type: "ReLU"
114 |   bottom: "conv3_1"
115 |   top: "conv3_1"
116 | }
117 | layer {
118 |   name: "conv3_2"
119 |   type: "Convolution"
120 |   bottom: "conv3_1"
121 |   top: "conv3_2"
122 |   convolution_param {
123 |     num_output: 256
124 |     pad: 1 kernel_size: 3
125 |   }
126 | }
127 | layer {
128 |   name: "relu3_2"
129 |   type: "ReLU"
130 |   bottom: "conv3_2"
131 |   top: "conv3_2"
132 | }
133 | layer {
134 |   name: "conv3_3"
135 |   type: "Convolution"
136 |   bottom: "conv3_2"
137 |   top: "conv3_3"
138 |   convolution_param {
139 |     num_output: 256
140 |     pad: 1 kernel_size: 3
141 |   }
142 | }
143 | layer {
144 |   name: "relu3_3"
145 |   type: "ReLU"
146 |   bottom: "conv3_3"
147 |   top: "conv3_3"
148 | }
149 | layer {
150 |   name: "pool3"
151 |   type: "Pooling"
152 |   bottom: "conv3_3"
153 |   top: "pool3"
154 |   pooling_param {
155 |     pool: MAX
156 |     kernel_size: 2 stride: 2
157 |   }
158 | }
159 | layer {
160 |   name: "conv4_1"
161 |   type: "Convolution"
162 |   bottom: "pool3"
163 |   top: "conv4_1"
164 |   convolution_param {
165 |     num_output: 512
166 |     pad: 1 kernel_size: 3
167 |   }
168 | }
169 | layer {
170 |   name: "relu4_1"
171 |   type: "ReLU"
172 |   bottom: "conv4_1"
173 |   top: "conv4_1"
174 | }
175 | layer {
176 |   name: "conv4_2"
177 |   type: "Convolution"
178 |   bottom: "conv4_1"
179 |   top: "conv4_2"
180 |   convolution_param {
181 |     num_output: 512
182 |     pad: 1 kernel_size: 3
183 |   }
184 | }
185 | layer {
186 |   name: "relu4_2"
187 |   type: "ReLU"
188 |   bottom: "conv4_2"
189 |   top: "conv4_2"
190 | }
191 | layer {
192 |   name: "conv4_3"
193 |   type: "Convolution"
194 |   bottom: "conv4_2"
195 |   top: "conv4_3"
196 |   convolution_param {
197 |     num_output: 512
198 |     pad: 1 kernel_size: 3
199 |   }
200 | }
201 | layer {
202 |   name: "relu4_3"
203 |   type: "ReLU"
204 |   bottom: "conv4_3"
205 |   top: "conv4_3"
206 | }
207 | layer {
208 |   name: "pool4"
209 |   type: "Pooling"
210 |   bottom: "conv4_3"
211 |   top: "pool4"
212 |   pooling_param {
213 |     pool: MAX
214 |     kernel_size: 2 stride: 2
215 |   }
216 | }
217 | layer {
218 |   name: "conv5_1"
219 |   type: "Convolution"
220 |   bottom: "pool4"
221 |   top: "conv5_1"
222 |   convolution_param {
223 |     num_output: 512
224 |     pad: 1 kernel_size: 3
225 |   }
226 | }
227 | layer {
228 |   name: "relu5_1"
229 |   type: "ReLU"
230 |   bottom: "conv5_1"
231 |   top: "conv5_1"
232 | }
233 | layer {
234 |   name: "conv5_2"
235 |   type: "Convolution"
236 |   bottom: "conv5_1"
237 |   top: "conv5_2"
238 |   convolution_param {
239 |     num_output: 512
240 |     pad: 1 kernel_size: 3
241 |   }
242 | }
243 | layer {
244 |   name: "relu5_2"
245 |   type: "ReLU"
246 |   bottom: "conv5_2"
247 |   top: "conv5_2"
248 | }
249 | layer {
250 |   name: "conv5_3"
251 |   type: "Convolution"
252 |   bottom: "conv5_2"
253 |   top: "conv5_3"
254 |   convolution_param {
255 |     num_output: 512
256 |     pad: 1 kernel_size: 3
257 |   }
258 | }
259 | layer {
260 |   name: "relu5_3"
261 |   type: "ReLU"
262 |   bottom: "conv5_3"
263 |   top: "conv5_3"
264 | }
265 | 
266 | #========= RPN ============
267 | 
268 | layer {
269 |   name: "rpn_conv/3x3"
270 |   type: "Convolution"
271 |   bottom: "conv5_3"
272 |   top: "rpn/output"
273 |   convolution_param {
274 |     num_output: 512
275 |     kernel_size: 3 pad: 1 stride: 1
276 |   }
277 | }
278 | layer {
279 |   name: "rpn_relu/3x3"
280 |   type: "ReLU"
281 |   bottom: "rpn/output"
282 |   top: "rpn/output"
283 | }
284 | 
285 | layer {
286 |   name: "rpn_cls_score"
287 |   type: "Convolution"
288 |   bottom: "rpn/output"
289 |   top: "rpn_cls_score"
290 |   convolution_param {
291 |     num_output: 18   # 2(bg/fg) * 9(anchors)
292 |     kernel_size: 1 pad: 0 stride: 1
293 |   }
294 | }
295 | layer {
296 |   name: "rpn_bbox_pred"
297 |   type: "Convolution"
298 |   bottom: "rpn/output"
299 |   top: "rpn_bbox_pred"
300 |   convolution_param {
301 |     num_output: 36   # 4 * 9(anchors)
302 |     kernel_size: 1 pad: 0 stride: 1
303 |   }
304 | }
305 | layer {
306 |    bottom: "rpn_cls_score"
307 |    top: "rpn_cls_score_reshape"
308 |    name: "rpn_cls_score_reshape"
309 |    type: "Reshape"
310 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
311 | }
312 | 
313 | #========= RoI Proposal ============
314 | 
315 | layer {
316 |   name: "rpn_cls_prob"
317 |   type: "Softmax"
318 |   bottom: "rpn_cls_score_reshape"
319 |   top: "rpn_cls_prob"
320 | }
321 | layer {
322 |   name: 'rpn_cls_prob_reshape'
323 |   type: 'Reshape'
324 |   bottom: 'rpn_cls_prob'
325 |   top: 'rpn_cls_prob_reshape'
326 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
327 | }
328 | layer {
329 |   name: 'proposal'
330 |   type: 'Python'
331 |   bottom: 'rpn_cls_prob_reshape'
332 |   bottom: 'rpn_bbox_pred'
333 |   bottom: 'im_info'
334 |   top: 'rois'
335 |   top: 'scores'
336 |   python_param {
337 |     module: 'rpn.proposal_layer'
338 |     layer: 'ProposalLayer'
339 |     param_str: "'feat_stride': 16"
340 |   }
341 | }
342 | 


--------------------------------------------------------------------------------
/tools/demo_location.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Faster R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Demo script showing detections in sample images.
 12 | 
 13 | See README.md for installation instructions before running.
 14 | """
 15 | 
 16 | import _init_paths
 17 | from fast_rcnn.config import cfg
 18 | from fast_rcnn.test import im_detect
 19 | from fast_rcnn.nms_wrapper import nms
 20 | from utils.timer import Timer
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import scipy.io as sio
 24 | import caffe, os, sys, cv2
 25 | import argparse
 26 | import glob
 27 | 
 28 | import matplotlib
 29 | from matplotlib.pyplot import plot,savefig
 30 | 
 31 | CLASSES = ('__background__','car')
 32 | 
 33 | NETS = {'vgg16': ('VGG16',
 34 |                   'VGG16_faster_rcnn_final.caffemodel'),
 35 |         'zf': ('ZF',
 36 |                   'ZF_faster_rcnn_final.caffemodel'),
 37 |         'vgg_m': ('VGG_CNN_M_1024',
 38 |                    'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')}
 39 | 
 40 | 
 41 | def vis_detections(im, class_name, dets, dir_name, image_name, thresh=0.5):
 42 |     """Draw detected bounding boxes."""
 43 |     im = im[:, :, (2, 1, 0)]
 44 |     fig, ax = plt.subplots()
 45 |     ax.imshow(im, aspect='equal')
 46 |     inds = np.where(dets[:, -1] >= thresh)[0]
 47 |     if len(inds) == 0:
 48 |         plt.axis('off')
 49 |         plt.tight_layout()
 50 |         plt.draw()
 51 |         path=dir_name+'/'+image_name
 52 |         savefig(path)
 53 |         plt.close()
 54 |         return
 55 |     ind_str=(int(image_name[0:6])-1)/600
 56 |     ind_frame=(int(image_name[0:6])-1)%600
 57 |     str_txt='***** 00000'+str(ind_str)+'.STR - '+str(ind_frame)+' *****'
 58 |     f.write(str_txt+'\n')
 59 |     f.write(str(len(inds)))
 60 |     for i in inds:
 61 |         bbox = dets[i, :4]
 62 |         f.write(' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+' '+str(6)+' '+str(20000))
 63 |         score = dets[i, -1]
 64 |         ax.add_patch(
 65 |             plt.Rectangle((bbox[0], bbox[1]),
 66 |                           bbox[2] - bbox[0],
 67 |                           bbox[3] - bbox[1], fill=False,
 68 |                           edgecolor='red', linewidth=3.5)
 69 |             )
 70 |         ax.text(bbox[0], bbox[1] - 2,
 71 |                 '{:s} {:.3f}'.format(class_name, score),
 72 |                 bbox=dict(facecolor='blue', alpha=0.5),
 73 |                 fontsize=14, color='white')
 74 | 
 75 |     ax.set_title(('{} detections with '
 76 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 77 |                                                   thresh),
 78 |                   fontsize=14)
 79 |     f.write('\n')
 80 |     f.write('\n')
 81 |     f.write('\n')
 82 |     plt.axis('off')
 83 |     plt.tight_layout()
 84 |     plt.draw()
 85 |     path=dir_name+'/'+image_name
 86 |     savefig(path)
 87 |     plt.close()
 88 | 
 89 | def demo(net, dir_name, image_name):
 90 |     """Detect object classes in an image using pre-computed object proposals."""
 91 | 
 92 |     # Load the demo image
 93 |     im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo','test',dir_name, image_name)
 94 |     im = cv2.imread(im_file)
 95 | 
 96 |     # Detect all object classes and regress object bounds
 97 |     timer = Timer()
 98 |     timer.tic()
 99 |     scores, boxes = im_detect(net, im)
100 |     timer.toc()
101 |     print ('Detection took {:.3f}s for '
102 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
103 | 
104 |     # Visualize detections for each class
105 |     CONF_THRESH = 0.8
106 |     NMS_THRESH = 0.3
107 |     for cls_ind, cls in enumerate(CLASSES[1:]):
108 |         cls_ind += 1 # because we skipped background
109 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
110 |         cls_scores = scores[:, cls_ind]
111 |         dets = np.hstack((cls_boxes,
112 |                           cls_scores[:, np.newaxis])).astype(np.float32)
113 |         keep = nms(dets, NMS_THRESH)
114 |         dets = dets[keep, :]
115 |         vis_detections(im, cls, dets, dir_name, image_name, thresh=CONF_THRESH)
116 | 
117 | def parse_args():
118 |     """Parse input arguments."""
119 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
120 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
121 |                         default=0, type=int)
122 |     parser.add_argument('--cpu', dest='cpu_mode',
123 |                         help='Use CPU mode (overrides --gpu)',
124 |                         action='store_true')
125 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
126 |                         choices=NETS.keys(), default='vgg_m')
127 | 
128 |     args = parser.parse_args()
129 | 
130 |     return args
131 | 
132 | if __name__ == '__main__':
133 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
134 | 
135 |     args = parse_args()
136 | 
137 |     prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0],
138 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
139 |     caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'faster_rcnn_models',
140 |                               NETS[args.demo_net][1])
141 | 
142 |     if not os.path.isfile(caffemodel):
143 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
144 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
145 | 
146 |     if args.cpu_mode:
147 |         caffe.set_mode_cpu()
148 |     else:
149 |         caffe.set_mode_gpu()
150 |         caffe.set_device(args.gpu_id)
151 |         cfg.GPU_ID = args.gpu_id
152 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
153 | 
154 |     print '\n\nLoaded network {:s}'.format(caffemodel)
155 | 
156 |     # Warmup on a dummy image
157 |     #im = 128 * np.ones((300, 500, 3), dtype=np.uint8)
158 |     im = 128 * np.ones((1280, 960, 3), dtype=np.uint8)
159 |     for i in xrange(2):
160 |         _, _= im_detect(net, im)
161 | 
162 |     #dictionary=['20121117_142852', '20121117_150315', '20121117_153526', '20121128_092059', '20121130_095032', '20130110_135753', '20130110_140950', '20130110_142518', '20130123_094123', '20130123_112228', '20130123_132342', '20130123_143631', '20130129_133540', '20130311_112935', '20130311_115905', '20130314_102842', '20130314_144414', '20130319_121354']
163 | 
164 |     dictionary=['20130618_110313_gray']
165 | 
166 |     for dir_name in dictionary:
167 |         if os.path.isdir(dir_name):
168 |             pass
169 |         else:
170 |             os.mkdir(dir_name)
171 |         txt_path=dir_name+'/'+'fasterRCNNTrackingResult.txt'
172 |         f=open(txt_path,'w')
173 | 
174 |  
175 |         dir_list= os.path.join(cfg.ROOT_DIR, 'data', 'demo', 'test', dir_name)
176 |         dic=os.listdir(dir_list)
177 |         dic.sort() 
178 |         for im_name in dic:
179 |             print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
180 |             print 'Demo for data/demo/{}'.format(im_name)
181 |             demo(net, dir_name, im_name)
182 | 


--------------------------------------------------------------------------------
/tools/demo_for_video.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Faster R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Demo script showing detections in sample images.
 12 | 
 13 | See README.md for installation instructions before running.
 14 | """
 15 | 
 16 | import _init_paths
 17 | from fast_rcnn.config import cfg
 18 | from fast_rcnn.test import im_detect
 19 | from fast_rcnn.nms_wrapper import nms
 20 | from utils.timer import Timer
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import scipy.io as sio
 24 | import caffe, os, sys, cv2
 25 | import argparse
 26 | 
 27 | 
 28 | import matplotlib
 29 | from matplotlib.pyplot import plot,savefig
 30 | import cv2.cv as cv
 31 | 
 32 | CLASSES = ('__background__','car','person')
 33 | 
 34 | NETS = {'vgg16': ('VGG16',
 35 |                   'VGG16_faster_rcnn_final.caffemodel'),
 36 |         'zf': ('ZF',
 37 |                   'ZF_faster_rcnn_final.caffemodel'),
 38 |         'vgg_m': ('VGG_CNN_M_1024',
 39 |                    'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')}
 40 | 
 41 | 
 42 | def demo(net, dir_name, image_name):
 43 |     """Detect object classes in an image using pre-computed object proposals."""
 44 |     # Load the demo image
 45 |     im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo','test',dir_name, image_name)
 46 |     im = cv2.imread(im_file)
 47 | 
 48 |     # Detect all object classes and regress object bounds
 49 |     timer = Timer()
 50 |     timer.tic()
 51 |     scores, boxes = im_detect(net, im)
 52 |     timer.toc()
 53 |     print ('Detection took {:.3f}s for '
 54 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 55 | 
 56 |     # Visualize detections for each class
 57 |     CONF_THRESH = 0.7
 58 |     NMS_THRESH = 0.2
 59 |     index=1
 60 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 61 |         cls_ind += 1 # because we skipped background
 62 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 63 |         cls_scores = scores[:, cls_ind]
 64 |         dets = np.hstack((cls_boxes,
 65 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 66 |         keep = nms(dets, NMS_THRESH)
 67 |         dets = dets[keep, :]
 68 | 
 69 |         #im = im[:, :, (2, 1, 0)]
 70 | 
 71 |         inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
 72 |         if len(inds) == 0 and index==len(CLASSES[1:]):
 73 |             #cv2.imwrite(path,im)
 74 |             video.write(im)
 75 |             return
 76 |         elif len(inds) == 0 and index<len(CLASSES[1:]):
 77 |             index+=1
 78 |             continue
 79 |         for i in inds:
 80 |             bbox = dets[i, :4]
 81 |             score = dets[i, -1]
 82 |             x = bbox[0]
 83 |             y = bbox[1]
 84 |             rect_start = (x,y)
 85 |             x1 = bbox[2]
 86 |             y1 = bbox[3]
 87 |             rect_end = (x1,y1)
 88 |             color0=(100,100,100)
 89 |             color1=(255,0,0)
 90 | 
 91 | 
 92 |             xx1 = bbox[0]
 93 |             yy1= int(bbox[1]-10)
 94 |             point_start = (xx1,yy1)
 95 |             xx2 = bbox[0]+(bbox[2]-bbox[0])*score
 96 |             yy2= int(bbox[1]-2)
 97 |             point_end = (xx2,yy2)
 98 |             color2=(0,0,225)
 99 |             color3=(0,255,0)
100 |             if cls_ind==1:
101 |                 cv2.rectangle(im, rect_start, rect_end, color1, 2)
102 |             elif cls_ind==2:
103 |                 cv2.rectangle(im, rect_start, rect_end, color3, 2)
104 |             elif cls_ind==3:
105 |                 cv2.rectangle(im, rect_start, rect_end, color0, 2)
106 |             cv2.rectangle(im, point_start, point_end, color2, -1)
107 |     #cv2.namedWindow("Image")
108 |     #cv2.imshow("Image", im)
109 |     #cv2.waitKey (0)
110 |     #cv2.imwrite(path,im)
111 |     video.write(im)
112 | 
113 | 
114 | 
115 | 
116 | def parse_args():
117 |     """Parse input arguments."""
118 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
119 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
120 |                         default=0, type=int)
121 |     parser.add_argument('--cpu', dest='cpu_mode',
122 |                         help='Use CPU mode (overrides --gpu)',
123 |                         action='store_true')
124 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
125 |                         choices=NETS.keys(), default='vgg_m')
126 | 
127 |     args = parser.parse_args()
128 | 
129 |     return args
130 | 
131 | if __name__ == '__main__':
132 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
133 | 
134 |     args = parse_args()
135 | 
136 |     prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0],
137 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
138 |     caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'faster_rcnn_models',
139 |                               NETS[args.demo_net][1])
140 | 
141 |     if not os.path.isfile(caffemodel):
142 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
143 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
144 | 
145 |     if args.cpu_mode:
146 |         caffe.set_mode_cpu()
147 |     else:
148 |         caffe.set_mode_gpu()
149 |         caffe.set_device(args.gpu_id)
150 |         cfg.GPU_ID = args.gpu_id
151 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
152 | 
153 |     print '\n\nLoaded network {:s}'.format(caffemodel)
154 | 
155 |     # Warmup on a dummy image
156 |     #im = 128 * np.ones((300, 500, 3), dtype=np.uint8)
157 |     #im = 128 * np.ones((1280, 960, 3), dtype=np.uint8)
158 |     #for i in xrange(2):
159 |     #   _, _= im_detect(net, im)
160 | 
161 |     #dictionary=['20121117_142852', '20121117_150315', '20121117_153526', '20121128_092059', '20121130_095032', '20130110_135753', '20130110_140950', '20130110_142518', '20130123_094123', '20130123_112228', '20130123_132342', '20130123_143631', '20130129_133540', '20130311_112935', '20130311_115905', '20130314_102842', '20130314_144414', '20130319_121354']
162 |     dictionary=['image_2']
163 | 
164 |     size = (1250,960)
165 |     fps=10
166 |     for dir_name in dictionary:
167 |         if os.path.isdir(dir_name):
168 |             pass
169 |         else:
170 |             os.mkdir(dir_name)
171 |             #pass
172 |         #cv2.cv.CV_FOURCC('I','4','2','0') avi
173 |         #cv2.cv.CV_FOURCC('P','I','M','1') avi
174 |         #cv2.cv.CV_FOURCC('M','J','P','G') avi
175 |         #cv2.cv.CV_FOURCC('T','H','E','O') ogv
176 |         #cv2.cv.CV_FOURCC('F','L','V','1') flv
177 |         video=cv2.VideoWriter(dir_name+'.avi', cv2.cv.CV_FOURCC('M','J','P','G'), fps,size)
178 |         if not video:
179 |             print "Error in creating video writer"
180 |         dir_root=os.path.join(cfg.ROOT_DIR, 'data', 'demo','test',dir_name)
181 |         dic=os.listdir(dir_root)
182 |         dic.sort() 
183 |         for im_name in dic:
184 |             print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
185 |             print 'Demo for data/demo/{}'.format(im_name)
186 |             demo(net, dir_name, im_name)
187 |         video.release()
188 | 


--------------------------------------------------------------------------------
/lib/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import caffe
  9 | import numpy as np
 10 | import yaml
 11 | from fast_rcnn.config import cfg
 12 | from generate_anchors import generate_anchors
 13 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
 14 | from fast_rcnn.nms_wrapper import nms
 15 | 
 16 | DEBUG = False
 17 | 
 18 | class ProposalLayer(caffe.Layer):
 19 |     """
 20 |     Outputs object detection proposals by applying estimated bounding-box
 21 |     transformations to a set of regular boxes (called "anchors").
 22 |     """
 23 | 
 24 |     def setup(self, bottom, top):
 25 |         # parse the layer parameter string, which must be valid YAML
 26 |         layer_params = yaml.load(self.param_str_)
 27 | 
 28 |         self._feat_stride = layer_params['feat_stride']
 29 |         self._anchors     = generate_anchors()
 30 |         self._num_anchors = self._anchors.shape[0]
 31 | 
 32 |         if DEBUG:
 33 |             print 'feat_stride: {}'.format(self._feat_stride)
 34 |             print 'anchors:'
 35 |             print self._anchors
 36 | 
 37 |         # rois blob: holds R regions of interest, each is a 5-tuple
 38 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 39 |         # rectangle (x1, y1, x2, y2)
 40 |         top[0].reshape(1, 5)
 41 | 
 42 |         # scores blob: holds scores for R regions of interest
 43 |         if len(top) > 1:
 44 |             top[1].reshape(1, 1, 1, 1)
 45 | 
 46 |     def forward(self, bottom, top):
 47 |         # Algorithm:
 48 |         #
 49 |         # for each (H, W) location i
 50 |         #   generate A anchor boxes centered on cell i
 51 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 52 |         # clip predicted boxes to image
 53 |         # remove predicted boxes with either height or width < threshold
 54 |         # sort all (proposal, score) pairs by score from highest to lowest
 55 |         # take top pre_nms_topN proposals before NMS
 56 |         # apply NMS with threshold 0.7 to remaining proposals
 57 |         # take after_nms_topN proposals after NMS
 58 |         # return the top proposals (-> RoIs top, scores top)
 59 | 
 60 |         assert bottom[0].data.shape[0] == 1, \
 61 |             'Only single item batches are supported'
 62 | 
 63 |         cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
 64 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 65 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 66 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 67 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 68 | 
 69 |         # the first set of _num_anchors channels are bg probs
 70 |         # the second set are the fg probs, which we want
 71 |         scores = bottom[0].data[:, self._num_anchors:, :, :]
 72 |         bbox_deltas = bottom[1].data
 73 |         im_info = bottom[2].data[0, :]
 74 | 
 75 |         if DEBUG:
 76 |             print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
 77 |             print 'scale: {}'.format(im_info[2])
 78 | 
 79 |         # 1. Generate proposals from bbox deltas and shifted anchors
 80 |         height, width = scores.shape[-2:]
 81 | 
 82 |         if DEBUG:
 83 |             print 'score map size: {}'.format(scores.shape)
 84 | 
 85 |         # Enumerate all shifts
 86 |         shift_x = np.arange(0, width) * self._feat_stride
 87 |         shift_y = np.arange(0, height) * self._feat_stride
 88 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 89 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 90 |                             shift_x.ravel(), shift_y.ravel())).transpose()
 91 | 
 92 |         # Enumerate all shifted anchors:
 93 |         #
 94 |         # add A anchors (1, A, 4) to
 95 |         # cell K shifts (K, 1, 4) to get
 96 |         # shift anchors (K, A, 4)
 97 |         # reshape to (K*A, 4) shifted anchors
 98 |         A = self._num_anchors
 99 |         K = shifts.shape[0]
100 |         anchors = self._anchors.reshape((1, A, 4)) + \
101 |                   shifts.reshape((1, K, 4)).transpose((1, 0, 2))
102 |         anchors = anchors.reshape((K * A, 4))
103 | 
104 |         # Transpose and reshape predicted bbox transformations to get them
105 |         # into the same order as the anchors:
106 |         #
107 |         # bbox deltas will be (1, 4 * A, H, W) format
108 |         # transpose to (1, H, W, 4 * A)
109 |         # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
110 |         # in slowest to fastest order
111 |         bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
112 | 
113 |         # Same story for the scores:
114 |         #
115 |         # scores are (1, A, H, W) format
116 |         # transpose to (1, H, W, A)
117 |         # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
118 |         scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
119 | 
120 |         # Convert anchors into proposals via bbox transformations
121 |         proposals = bbox_transform_inv(anchors, bbox_deltas)
122 | 
123 |         # 2. clip predicted boxes to image
124 |         proposals = clip_boxes(proposals, im_info[:2])
125 | 
126 |         # 3. remove predicted boxes with either height or width < threshold
127 |         # (NOTE: convert min_size to input image scale stored in im_info[2])
128 |         keep = _filter_boxes(proposals, min_size * im_info[2])
129 |         proposals = proposals[keep, :]
130 |         scores = scores[keep]
131 | 
132 |         # 4. sort all (proposal, score) pairs by score from highest to lowest
133 |         # 5. take top pre_nms_topN (e.g. 6000)
134 |         order = scores.ravel().argsort()[::-1]
135 |         if pre_nms_topN > 0:
136 |             order = order[:pre_nms_topN]
137 |         proposals = proposals[order, :]
138 |         scores = scores[order]
139 | 
140 |         # 6. apply nms (e.g. threshold = 0.7)
141 |         # 7. take after_nms_topN (e.g. 300)
142 |         # 8. return the top proposals (-> RoIs top)
143 |         keep = nms(np.hstack((proposals, scores)), nms_thresh)
144 |         if post_nms_topN > 0:
145 |             keep = keep[:post_nms_topN]
146 |         proposals = proposals[keep, :]
147 |         scores = scores[keep]
148 | 
149 |         # Output rois blob
150 |         # Our RPN implementation only supports a single input image, so all
151 |         # batch inds are 0
152 |         batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
153 |         blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
154 |         top[0].reshape(*(blob.shape))
155 |         top[0].data[...] = blob
156 | 
157 |         # [Optional] output scores blob
158 |         if len(top) > 1:
159 |             top[1].reshape(*(scores.shape))
160 |             top[1].data[...] = scores
161 | 
162 |     def backward(self, top, propagate_down, bottom):
163 |         """This layer does not propagate gradients."""
164 |         pass
165 | 
166 |     def reshape(self, bottom, top):
167 |         """Reshaping happens during the call to forward."""
168 |         pass
169 | 
170 | def _filter_boxes(boxes, min_size):
171 |     """Remove all boxes with any side smaller than min_size."""
172 |     ws = boxes[:, 2] - boxes[:, 0] + 1
173 |     hs = boxes[:, 3] - boxes[:, 1] + 1
174 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
175 |     return keep
176 | 


--------------------------------------------------------------------------------
/tools/demo_video_for_video_XXX.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # --------------------------------------------------------
  5 | # Faster R-CNN
  6 | # Copyright (c) 2015 Microsoft
  7 | # Licensed under The MIT License [see LICENSE for details]
  8 | # Written by Ross Girshick
  9 | # --------------------------------------------------------
 10 | 
 11 | """
 12 | Demo script showing detections in sample images.
 13 | 
 14 | See README.md for installation instructions before running.
 15 | """
 16 | 
 17 | import _init_paths
 18 | from fast_rcnn.config import cfg
 19 | from fast_rcnn.test import im_detect
 20 | from fast_rcnn.nms_wrapper import nms
 21 | from utils.timer import Timer
 22 | import matplotlib.pyplot as plt
 23 | import numpy as np
 24 | import scipy.io as sio
 25 | import caffe, os, sys, cv2
 26 | import argparse
 27 | 
 28 | 
 29 | import matplotlib
 30 | from matplotlib.pyplot import plot,savefig
 31 | import cv2.cv as cv
 32 | 
 33 | CLASSES = ('__background__', 'car', 'person', 'bike', 'truck', 'van','tram', 'misc')
 34 | 
 35 | NETS = {'vgg16': ('VGG16',
 36 |                   'VGG16_faster_rcnn_final.caffemodel'),
 37 |         'zf': ('ZF',
 38 |                   'ZF_faster_rcnn_final.caffemodel'),
 39 |         'vgg_m': ('VGG_CNN_M_1024',
 40 |                    'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')}
 41 | 
 42 | 
 43 | def demo(net, im):
 44 |     """Detect object classes in an image using pre-computed object proposals."""
 45 |     # Load the demo image
 46 | 
 47 |     # Detect all object classes and regress object bounds
 48 |     timer = Timer()
 49 |     timer.tic()
 50 |     scores, boxes = im_detect(net, im)
 51 |     timer.toc()
 52 |     print ('Detection took {:.3f}s for '
 53 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 54 | 
 55 |     # Visualize detections for each class
 56 |     CONF_THRESH = 0.8
 57 |     NMS_THRESH = 0.3
 58 |     index=1
 59 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 60 |         cls_ind += 1 # because we skipped background
 61 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 62 |         cls_scores = scores[:, cls_ind]
 63 |         dets = np.hstack((cls_boxes,
 64 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 65 |         keep = nms(dets, NMS_THRESH)
 66 |         dets = dets[keep, :]
 67 | 
 68 |         #im = im[:, :, (2, 1, 0)]
 69 | 
 70 |         inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
 71 |         if len(inds) == 0 and index==len(CLASSES[1:]):
 72 |             #cv2.imwrite(path,im)
 73 |             video.write(im)
 74 |             return
 75 |         elif len(inds) == 0 and index<len(CLASSES[1:]):
 76 |             index+=1
 77 |             continue
 78 |         for i in inds:
 79 |             bbox = dets[i, :4]
 80 |             score = dets[i, -1]
 81 |             x = bbox[0]
 82 |             y = bbox[1]
 83 |             rect_start = (x,y)
 84 |             x1 = bbox[2]
 85 |             y1 = bbox[3]
 86 |             rect_end = (x1,y1)
 87 |             color0=(100,100,100)
 88 |             color1=(255,0,0)
 89 | 
 90 | 
 91 |             xx1 = bbox[0]
 92 |             yy1= int(bbox[1]-10)
 93 |             point_start = (xx1,yy1)
 94 |             xx2 = bbox[0]+(bbox[2]-bbox[0])*score
 95 |             yy2= int(bbox[1]-2)
 96 |             point_end = (xx2,yy2)
 97 |             color2=(0,0,225)
 98 |             color3=(0,255,0)
 99 |             if cls_ind in [1, 4, 5, 6, 7, 8]:
100 |                 cv2.rectangle(im, rect_start, rect_end, color1, 2)
101 |             elif cls_ind==2:
102 |                 cv2.rectangle(im, rect_start, rect_end, color3, 2)
103 |             elif cls_ind==3:from fcolor
104 |                 cv2.rectangle(im, rect_start, rect_end, color0, 2)
105 |             #elif cls_ind==4:
106 |             #    cv2.rectangle(im, rect_start, rect_end, (100, 100, 100), 2)
107 |             #elif cls_ind==5:
108 |             #    cv2.rectangle(im, rect_start, rect_end, (255, 100, 80), 2)
109 |             #elif cls_ind==6:
110 |             #    cv2.rectangle(im, rect_start, rect_end, (100, 255, 30), 2)
111 |             #elif cls_ind==7:
112 |             #    cv2.rectangle(im, rect_start, rect_end, (200, 80, 90), 2)
113 |             #elif cls_ind==8:
114 |             #   cv2.rectangle(im, rect_start, rect_end, (100, 10, 30), 2)
115 |             cv2.rectangle(im, point_start, point_end, color2, -1)
116 |     cv2.namedWindow("Image")
117 |     res=cv2.resize(im,(1080,608),interpolation=cv2.INTER_CUBIC)
118 |     cv2.imshow("Image", res)
119 |     cv2.waitKey (1)
120 |     #cv2.imwrite(path,im)
121 |     video.write(im)
122 | 
123 | 
124 | 
125 | 
126 | def parse_args():
127 |     """Parse input arguments."""
128 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
129 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
130 |                         default=0, type=int)
131 |     parser.add_argument('--cpu', dest='cpu_mode',
132 |                         help='Use CPU mode (overrides --gpu)',
133 |                         action='store_true')
134 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
135 |                         choices=NETS.keys(), default='vgg16')
136 | 
137 |     args = parser.parse_args()
138 | 
139 |     return args
140 | 
141 | if __name__ == '__main__':
142 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
143 | 
144 |     args = parse_args()
145 | 
146 |     prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0],
147 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
148 |     caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'faster_rcnn_models',
149 |                               NETS[args.demo_net][1])
150 | 
151 |     if not os.path.isfile(caffemodel):
152 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
153 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
154 | 
155 |     if args.cpu_mode:
156 |         caffe.set_mode_cpu()
157 |     else:
158 |         caffe.set_mode_gpu()
159 |         caffe.set_device(args.gpu_id)
160 |         cfg.GPU_ID = args.gpu_id
161 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
162 | 
163 |     print '\n\nLoaded network {:s}'.format(caffemodel)
164 | 
165 |     # Warmup on a dummy image
166 |     #im = 128 * np.ones((300, 500, 3), dtype=np.uint8)
167 |     #im = 128 * np.ones((1280, 960, 3), dtype=np.uint8)
168 |     #for i in xrange(2):
169 |     #   _, _= im_detect(net, im)
170 | 
171 |     #dictionary=['20121117_142852', '20121117_150315', '20121117_153526', '20121128_092059', '20121130_095032', '20130110_135753', '20130110_140950', '20130110_142518', '20130123_094123', '20130123_112228', '20130123_132342', '20130123_143631', '20130129_133540', '20130311_112935', '20130311_115905', '20130314_102842', '20130314_144414', '20130319_121354']
172 |     dir_name='2.avi'
173 | 
174 |     dir_root=os.path.join(cfg.ROOT_DIR, 'data',dir_name)
175 | 
176 |     videoCapture = cv2.VideoCapture(dir_root)
177 |     fps = videoCapture.get(cv2.cv.CV_CAP_PROP_FPS)
178 |    # fps=25
179 |     size = (int(videoCapture.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)),
180 |         int(videoCapture.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)))
181 | 
182 |     success, frame = videoCapture.read()
183 | 
184 | 
185 |     #cv2.cv.CV_FOURCC('I','4','2','0') avi
186 |     #cv2.cv.CV_FOURCC('P','I','M','1') avi
187 |     #cv2.cv.CV_FOURCC('M','J','P','G') avi
188 |     #cv2.cv.CV_FOURCC('T','H','E','O') ogv
189 |     #cv2.cv.CV_FOURCC('F','L','V','1') flv
190 |     video=cv2.VideoWriter(dir_name, cv2.cv.CV_FOURCC('M','J','P','G'), int(fps),size)
191 |     if not video:
192 |         print "Error in creating video writer"
193 |     while success :
194 |         demo(net,frame)
195 |         success, frame = videoCapture.read()
196 |     video.release()
197 | 


--------------------------------------------------------------------------------
/tools/vis_fasterRCNN.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | Demo script showing detections in sample images.
  6 | 
  7 | See README.md for installation instructions before running.
  8 | """
  9 | 
 10 | import _init_paths
 11 | from fast_rcnn.config import cfg
 12 | from fast_rcnn.test import im_detect
 13 | from fast_rcnn.nms_wrapper import nms
 14 | from utils.timer import Timer
 15 | import matplotlib.pyplot as plt
 16 | import numpy as np
 17 | import scipy.io as sio
 18 | import caffe, os, sys, cv2
 19 | import argparse
 20 | from fcolor import FColor 
 21 | 
 22 | 
 23 | import matplotlib
 24 | from matplotlib.pyplot import plot,savefig
 25 | import cv2.cv as cv
 26 | 
 27 | def vis_square(data, padsize=1, padval=0):
 28 |     data -= data.min()
 29 |     data /= data.max()
 30 |     
 31 |     # force the number of filters to be square
 32 |     n = int(np.ceil(np.sqrt(data.shape[0])))
 33 |     padding = ((0, n ** 2 - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3)
 34 |     data = np.pad(data, padding, mode='constant', constant_values=(padval, padval))
 35 |     
 36 |     # tile the filters into an image
 37 |     data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
 38 |     data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
 39 |     
 40 |     plt.imshow(data)
 41 |     plt.show()
 42 | 
 43 | CLASSES = ('__background__', 'car', 'person', 'bike', 'truck', 'van','tram', 'misc')
 44 | 
 45 | NETS = {'vgg16': ('VGG16',
 46 |                   'VGG16_faster_rcnn_final.caffemodel'),
 47 |         'zf': ('ZF',
 48 |                   'ZF_faster_rcnn_final.caffemodel'),
 49 |         'vgg_m': ('VGG_CNN_M_1024',
 50 |                    'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')}
 51 | 
 52 | 
 53 | def demo(net, im):
 54 |     """Detect object classes in an image using pre-computed object proposals."""
 55 |     # Load the demo image
 56 | 
 57 |     # Detect all object classes and regress object bounds
 58 |     timer = Timer()
 59 |     timer.tic()
 60 |     scores, boxes = im_detect(net, im)
 61 |     timer.toc()
 62 |     print ('Detection took {:.3f}s for '
 63 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0])
 64 | 
 65 |     # Visualize detections for each class
 66 |     CONF_THRESH = 0.8
 67 |     NMS_THRESH = 0.3
 68 |     index=1
 69 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 70 |         cls_ind += 1 # because we skipped background
 71 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 72 |         cls_scores = scores[:, cls_ind]
 73 |         dets = np.hstack((cls_boxes,
 74 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 75 |         keep = nms(dets, NMS_THRESH)
 76 |         dets = dets[keep, :]
 77 | 
 78 |         #im = im[:, :, (2, 1, 0)]
 79 | 
 80 |         inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
 81 |         if len(inds) == 0 and index==len(CLASSES[1:]):
 82 |             #cv2.imwrite(path,im)
 83 |             #video.write(im)
 84 |             return
 85 |         elif len(inds) == 0 and index<len(CLASSES[1:]):
 86 |             index+=1
 87 |             continue
 88 |         for i in inds:
 89 |             bbox = dets[i, :4]
 90 |             score = dets[i, -1]
 91 |             x = bbox[0]
 92 |             y = bbox[1]
 93 |             rect_start = (x,y)
 94 |             x1 = bbox[2]
 95 |             y1 = bbox[3]
 96 |             rect_end = (x1,y1)
 97 |             color0=(100,100,100)
 98 |             color1=(255,0,0)
 99 | 
100 | 
101 |             xx1 = bbox[0]
102 |             yy1= int(bbox[1]-10)
103 |             point_start = (xx1,yy1)
104 |             xx2 = bbox[0]+(bbox[2]-bbox[0])*score
105 |             yy2= int(bbox[1]-2)
106 |             point_end = (xx2,yy2)
107 |             color2=(0,0,225)
108 |             color3=(0,255,0)
109 |             if cls_ind in [1, 4, 5, 6, 7, 8]:
110 |                 cv2.rectangle(im, rect_start, rect_end, color1, 2)
111 |             elif cls_ind==2:
112 |                 cv2.rectangle(im, rect_start, rect_end, color3, 2)
113 |             elif cls_ind==3:
114 |                 cv2.rectangle(im, rect_start, rect_end, color0, 2)
115 |             cv2.rectangle(im, point_start, point_end, color2, -1)
116 |     cv2.namedWindow("Image")
117 |     res=cv2.resize(im,(1080,608),interpolation=cv2.INTER_CUBIC)
118 |     cv2.imshow("Image", res)
119 |     cv2.waitKey (0)
120 |     #cv2.imwrite(path,im)
121 |     #video.write(im)
122 | 
123 | 
124 | 
125 | def visualization(net, layer_name, save_dir):
126 |     save_path = os.path.join(cfg.ROOT_DIR, 'visualization', save_dir, layer_name) 
127 |     if not os.path.exists(save_path):
128 |         os.makedirs(save_path)
129 |     feat = net.blobs[layer_name].data[0]
130 |     print feat.shape
131 |     feat -= feat.min()
132 |     feat /= feat.max()
133 |     feat *=255
134 |     i = 0
135 |     for im in feat:
136 |         #iFColor = FColor(im)
137 |         cv2.imwrite(os.path.join(save_path, '{:d}.png'.format(i)), im)
138 |         i = i + 1
139 |     #vis_square(feat, padval=1)
140 | 
141 | def visualization_plus(net, layer_name, save_dir):
142 |     save_path = os.path.join(cfg.ROOT_DIR, 'visualization_plus', save_dir) 
143 |     if not os.path.exists(save_path):
144 |         os.makedirs(save_path)
145 |     feat = net.blobs[layer_name].data[0]
146 |     fm = feat[0]
147 |     print type(fm)
148 | 
149 |     print fm.shape
150 |     for f in feat:
151 |         fm += f
152 |     fm = fm - feat[0]
153 | 
154 |     print 'fm  max = {}, min = {}'.format(fm.max(), fm.min())
155 |     #fm -= fm.min()
156 |     #fm /= fm.max()
157 | 
158 |     fm *=255
159 |     i = 0
160 |     cv2.imwrite(os.path.join(save_path, '{:s}.png'.format(layer_name)), fm)
161 | 
162 | 
163 | def parse_args():
164 |     """Parse input arguments."""
165 |     parser = argparse.ArgumentParser(description='Faster R-CNN demo')
166 |     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
167 |                         default=0, type=int)
168 |     parser.add_argument('--cpu', dest='cpu_mode',
169 |                         help='Use CPU mode (overrides --gpu)',
170 |                         action='store_true')
171 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
172 |                         choices=NETS.keys(), default='vgg16')
173 | 
174 |     args = parser.parse_args()
175 | 
176 |     return args
177 | 
178 | if __name__ == '__main__':
179 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
180 | 
181 |     args = parse_args()
182 | 
183 |     prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0],
184 |                             'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
185 |     caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'faster_rcnn_models',
186 |                               NETS[args.demo_net][1])
187 | 
188 |     if not os.path.isfile(caffemodel):
189 |         raise IOError(('{:s} not found.\nDid you run ./data/script/'
190 |                        'fetch_faster_rcnn_models.sh?').format(caffemodel))
191 | 
192 |     if args.cpu_mode:
193 |         caffe.set_mode_cpu()
194 |     else:
195 |         caffe.set_mode_gpu()
196 |         caffe.set_device(args.gpu_id)
197 |         cfg.GPU_ID = args.gpu_id
198 |     net = caffe.Net(prototxt, caffemodel, caffe.TEST)
199 | 
200 |     print '\n\nLoaded network {:s}'.format(caffemodel)
201 |     file_name = '0000008'
202 |     #img_path = os.path.join('visualization', file_name+'.jpg')
203 |     img_path = os.path.join('data/training/image_2', file_name+'.png')
204 |     im = cv2.imread(img_path)
205 |     demo(net, im)
206 |     #visualization_plus(net,'conv4_3', file_name)
207 |     #visualization_plus(net,'conv5_3', file_name)
208 |     visualization(net,'conv3_3', file_name)
209 |     #visualization(net,'conv2_2', file_name)
210 |     #visualization(net,'conv1_1', file_name)
211 |     #visualization(net,'rpn/output', file_name)
212 | 


--------------------------------------------------------------------------------
/lib/datasets/imdb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | import os.path as osp
 10 | import PIL
 11 | from utils.cython_bbox import bbox_overlaps
 12 | import numpy as np
 13 | import scipy.sparse
 14 | import datasets
 15 | 
 16 | class imdb(object):
 17 |     """Image database."""
 18 | 
 19 |     def __init__(self, name):
 20 |         self._name = name
 21 |         self._num_classes = 0
 22 |         self._classes = []
 23 |         self._image_index = []
 24 |         self._obj_proposer = 'selective_search'
 25 |         self._roidb = None
 26 |         self._roidb_handler = self.default_roidb
 27 |         # Use this dict for storing dataset specific config options
 28 |         self.config = {}
 29 | 
 30 |     @property
 31 |     def name(self):
 32 |         return self._name
 33 | 
 34 |     @property
 35 |     def num_classes(self):
 36 |         return len(self._classes)
 37 | 
 38 |     @property
 39 |     def classes(self):
 40 |         return self._classes
 41 | 
 42 |     @property
 43 |     def image_index(self):
 44 |         return self._image_index
 45 | 
 46 |     @property
 47 |     def roidb_handler(self):
 48 |         return self._roidb_handler
 49 | 
 50 |     @roidb_handler.setter
 51 |     def roidb_handler(self, val):
 52 |         self._roidb_handler = val
 53 | 
 54 |     def set_proposal_method(self, method):
 55 |         method = eval('self.' + method + '_roidb')
 56 |         self.roidb_handler = method
 57 | 
 58 |     @property
 59 |     def roidb(self):
 60 |         # A roidb is a list of dictionaries, each with the following keys:
 61 |         #   boxes
 62 |         #   gt_overlaps
 63 |         #   gt_classes
 64 |         #   flipped
 65 |         if self._roidb is not None:
 66 |             return self._roidb
 67 |         self._roidb = self.roidb_handler()
 68 |         return self._roidb
 69 | 
 70 |     @property
 71 |     def cache_path(self):
 72 |         cache_path = osp.abspath(osp.join(datasets.ROOT_DIR, 'data', 'cache'))
 73 |         if not os.path.exists(cache_path):
 74 |             os.makedirs(cache_path)
 75 |         return cache_path
 76 | 
 77 |     @property
 78 |     def num_images(self):
 79 |       return len(self.image_index)
 80 | 
 81 |     def image_path_at(self, i):
 82 |         raise NotImplementedError
 83 | 
 84 |     def default_roidb(self):
 85 |         raise NotImplementedError
 86 | 
 87 |     def evaluate_detections(self, all_boxes, output_dir=None):
 88 |         """
 89 |         all_boxes is a list of length number-of-classes.
 90 |         Each list element is a list of length number-of-images.
 91 |         Each of those list elements is either an empty list []
 92 |         or a numpy array of detection.
 93 | 
 94 |         all_boxes[class][image] = [] or np.array of shape #dets x 5
 95 |         """
 96 |         raise NotImplementedError
 97 | 
 98 |     def append_flipped_images(self):
 99 |         num_images = self.num_images
100 |         widths = [PIL.Image.open(self.image_path_at(i)).size[0]
101 |                   for i in xrange(num_images)]
102 |         for i in xrange(num_images):
103 |             boxes = self.roidb[i]['boxes'].copy()
104 |             oldx1 = boxes[:, 0].copy()
105 |             oldx2 = boxes[:, 2].copy()
106 |             boxes[:, 0] = widths[i] - oldx2 - 1
107 |             boxes[:, 2] = widths[i] - oldx1 - 1
108 |             assert (boxes[:, 2] >= boxes[:, 0]).all()
109 |             entry = {'boxes' : boxes,
110 |                      'gt_overlaps' : self.roidb[i]['gt_overlaps'],
111 |                      'gt_classes' : self.roidb[i]['gt_classes'],
112 |                      'flipped' : True}
113 |             self.roidb.append(entry)
114 |         self._image_index = self._image_index * 2
115 | 
116 |     def evaluate_recall(self, candidate_boxes=None, ar_thresh=0.5):
117 |         # Record max overlap value for each gt box
118 |         # Return vector of overlap values
119 |         gt_overlaps = np.zeros(0)
120 |         for i in xrange(self.num_images):
121 |             gt_inds = np.where(self.roidb[i]['gt_classes'] > 0)[0]
122 |             gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
123 | 
124 |             if candidate_boxes is None:
125 |                 non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
126 |                 boxes = self.roidb[i]['boxes'][non_gt_inds, :]
127 |             else:
128 |                 boxes = candidate_boxes[i]
129 |             if boxes.shape[0] == 0:
130 |                 continue
131 |             overlaps = bbox_overlaps(boxes.astype(np.float),
132 |                                      gt_boxes.astype(np.float))
133 | 
134 |             # gt_overlaps = np.hstack((gt_overlaps, overlaps.max(axis=0)))
135 |             _gt_overlaps = np.zeros((gt_boxes.shape[0]))
136 |             for j in xrange(gt_boxes.shape[0]):
137 |                 argmax_overlaps = overlaps.argmax(axis=0)
138 |                 max_overlaps = overlaps.max(axis=0)
139 |                 gt_ind = max_overlaps.argmax()
140 |                 gt_ovr = max_overlaps.max()
141 |                 assert(gt_ovr >= 0)
142 |                 box_ind = argmax_overlaps[gt_ind]
143 |                 _gt_overlaps[j] = overlaps[box_ind, gt_ind]
144 |                 assert(_gt_overlaps[j] == gt_ovr)
145 |                 overlaps[box_ind, :] = -1
146 |                 overlaps[:, gt_ind] = -1
147 | 
148 |             gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
149 | 
150 |         num_pos = gt_overlaps.size
151 |         gt_overlaps = np.sort(gt_overlaps)
152 |         step = 0.001
153 |         thresholds = np.minimum(np.arange(0.5, 1.0 + step, step), 1.0)
154 |         recalls = np.zeros_like(thresholds)
155 |         for i, t in enumerate(thresholds):
156 |             recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
157 |         ar = 2 * np.trapz(recalls, thresholds)
158 | 
159 |         return ar, gt_overlaps, recalls, thresholds
160 | 
161 |     def create_roidb_from_box_list(self, box_list, gt_roidb):
162 |         assert len(box_list) == self.num_images, \
163 |                 'Number of boxes must match number of ground-truth images'
164 |         roidb = []
165 |         for i in xrange(self.num_images):
166 |             boxes = box_list[i]
167 |             num_boxes = boxes.shape[0]
168 |             overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
169 | 
170 |             if gt_roidb is not None:
171 |                 gt_boxes = gt_roidb[i]['boxes']
172 |                 gt_classes = gt_roidb[i]['gt_classes']
173 |                 gt_overlaps = bbox_overlaps(boxes.astype(np.float),
174 |                                             gt_boxes.astype(np.float))
175 |                 argmaxes = gt_overlaps.argmax(axis=1)
176 |                 maxes = gt_overlaps.max(axis=1)
177 |                 I = np.where(maxes > 0)[0]
178 |                 overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
179 | 
180 |             overlaps = scipy.sparse.csr_matrix(overlaps)
181 |             roidb.append({'boxes' : boxes,
182 |                           'gt_classes' : np.zeros((num_boxes,),
183 |                                                   dtype=np.int32),
184 |                           'gt_overlaps' : overlaps,
185 |                           'flipped' : False})
186 |         return roidb
187 | 
188 |     @staticmethod
189 |     def merge_roidbs(a, b):
190 |         assert len(a) == len(b)
191 |         for i in xrange(len(a)):
192 |             a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
193 |             a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
194 |                                             b[i]['gt_classes']))
195 |             a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
196 |                                                        b[i]['gt_overlaps']])
197 |         return a
198 | 
199 |     def competition_mode(self, on):
200 |         """Turn competition mode on or off."""
201 |         pass
202 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer_original/layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """The data layer used during training to train a Fast R-CNN network.
  9 | 
 10 | RoIDataLayer implements a Caffe Python layer.
 11 | """
 12 | 
 13 | import caffe
 14 | from fast_rcnn.config import cfg
 15 | from roi_data_layer.minibatch import get_minibatch
 16 | import numpy as np
 17 | import yaml
 18 | from multiprocessing import Process, Queue
 19 | 
 20 | class RoIDataLayer(caffe.Layer):
 21 |     """Fast R-CNN data layer used for training."""
 22 | 
 23 |     def _shuffle_roidb_inds(self):
 24 |         """Randomly permute the training roidb."""
 25 |         if cfg.TRAIN.ASPECT_GROUPING:
 26 |             widths = np.array([r['width'] for r in self._roidb])
 27 |             heights = np.array([r['height'] for r in self._roidb])
 28 |             horz = (widths >= heights)
 29 |             vert = np.logical_not(horz)
 30 |             horz_inds = np.where(horz)[0]
 31 |             vert_inds = np.where(vert)[0]
 32 |             inds = np.hstack((
 33 |                 np.random.permutation(horz_inds),
 34 |                 np.random.permutation(vert_inds)))
 35 |             inds = np.reshape(inds, (-1, 2))
 36 |             row_perm = np.random.permutation(np.arange(inds.shape[0]))
 37 |             inds = np.reshape(inds[row_perm, :], (-1,))
 38 |             self._perm = inds
 39 |         else:
 40 |             self._perm = np.random.permutation(np.arange(len(self._roidb)))
 41 |         self._cur = 0
 42 | 
 43 |     def _get_next_minibatch_inds(self):
 44 |         """Return the roidb indices for the next minibatch."""
 45 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
 46 |             self._shuffle_roidb_inds()
 47 | 
 48 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
 49 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
 50 |         return db_inds
 51 | 
 52 |     def _get_next_minibatch(self):
 53 |         """Return the blobs to be used for the next minibatch.
 54 | 
 55 |         If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
 56 |         separate process and made available through self._blob_queue.
 57 |         """
 58 |         if cfg.TRAIN.USE_PREFETCH:
 59 |             return self._blob_queue.get()
 60 |         else:
 61 |             db_inds = self._get_next_minibatch_inds()
 62 |             minibatch_db = [self._roidb[i] for i in db_inds]
 63 |             return get_minibatch(minibatch_db, self._num_classes)
 64 | 
 65 |     def set_roidb(self, roidb):
 66 |         """Set the roidb to be used by this layer during training."""
 67 |         self._roidb = roidb
 68 |         self._shuffle_roidb_inds()
 69 |         if cfg.TRAIN.USE_PREFETCH:
 70 |             self._blob_queue = Queue(10)
 71 |             self._prefetch_process = BlobFetcher(self._blob_queue,
 72 |                                                  self._roidb,
 73 |                                                  self._num_classes)
 74 |             self._prefetch_process.start()
 75 |             # Terminate the child process when the parent exists
 76 |             def cleanup():
 77 |                 print 'Terminating BlobFetcher'
 78 |                 self._prefetch_process.terminate()
 79 |                 self._prefetch_process.join()
 80 |             import atexit
 81 |             atexit.register(cleanup)
 82 | 
 83 |     def setup(self, bottom, top):
 84 |         """Setup the RoIDataLayer."""
 85 | 
 86 |         # parse the layer parameter string, which must be valid YAML
 87 |         layer_params = yaml.load(self.param_str_)
 88 | 
 89 |         self._num_classes = layer_params['num_classes']
 90 | 
 91 |         self._name_to_top_map = {}
 92 | 
 93 |         # data blob: holds a batch of N images, each with 3 channels
 94 |         idx = 0
 95 |         top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3,
 96 |             max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
 97 |         self._name_to_top_map['data'] = idx
 98 |         idx += 1
 99 | 
100 |         if cfg.TRAIN.HAS_RPN:
101 |             top[idx].reshape(1, 3)
102 |             self._name_to_top_map['im_info'] = idx
103 |             idx += 1
104 | 
105 |             top[idx].reshape(1, 4)
106 |             self._name_to_top_map['gt_boxes'] = idx
107 |             idx += 1
108 |         else: # not using RPN
109 |             # rois blob: holds R regions of interest, each is a 5-tuple
110 |             # (n, x1, y1, x2, y2) specifying an image batch index n and a
111 |             # rectangle (x1, y1, x2, y2)
112 |             top[idx].reshape(1, 5)
113 |             self._name_to_top_map['rois'] = idx
114 |             idx += 1
115 | 
116 |             # labels blob: R categorical labels in [0, ..., K] for K foreground
117 |             # classes plus background
118 |             top[idx].reshape(1)
119 |             self._name_to_top_map['labels'] = idx
120 |             idx += 1
121 | 
122 |             if cfg.TRAIN.BBOX_REG:
123 |                 # bbox_targets blob: R bounding-box regression targets with 4
124 |                 # targets per class
125 |                 top[idx].reshape(1, self._num_classes * 4)
126 |                 self._name_to_top_map['bbox_targets'] = idx
127 |                 idx += 1
128 | 
129 |                 # bbox_inside_weights blob: At most 4 targets per roi are active;
130 |                 # thisbinary vector sepcifies the subset of active targets
131 |                 top[idx].reshape(1, self._num_classes * 4)
132 |                 self._name_to_top_map['bbox_inside_weights'] = idx
133 |                 idx += 1
134 | 
135 |                 top[idx].reshape(1, self._num_classes * 4)
136 |                 self._name_to_top_map['bbox_outside_weights'] = idx
137 |                 idx += 1
138 | 
139 |         print 'RoiDataLayer: name_to_top:', self._name_to_top_map
140 |         assert len(top) == len(self._name_to_top_map)
141 | 
142 |     def forward(self, bottom, top):
143 |         """Get blobs and copy them into this layer's top blob vector."""
144 |         blobs = self._get_next_minibatch()
145 | 
146 |         for blob_name, blob in blobs.iteritems():
147 |             top_ind = self._name_to_top_map[blob_name]
148 |             # Reshape net's input blobs
149 |             top[top_ind].reshape(*(blob.shape))
150 |             # Copy data into net's input blobs
151 |             top[top_ind].data[...] = blob.astype(np.float32, copy=False)
152 | 
153 |     def backward(self, top, propagate_down, bottom):
154 |         """This layer does not propagate gradients."""
155 |         pass
156 | 
157 |     def reshape(self, bottom, top):
158 |         """Reshaping happens during the call to forward."""
159 |         pass
160 | 
161 | class BlobFetcher(Process):
162 |     """Experimental class for prefetching blobs in a separate process."""
163 |     def __init__(self, queue, roidb, num_classes):
164 |         super(BlobFetcher, self).__init__()
165 |         self._queue = queue
166 |         self._roidb = roidb
167 |         self._num_classes = num_classes
168 |         self._perm = None
169 |         self._cur = 0
170 |         self._shuffle_roidb_inds()
171 |         # fix the random seed for reproducibility
172 |         np.random.seed(cfg.RNG_SEED)
173 | 
174 |     def _shuffle_roidb_inds(self):
175 |         """Randomly permute the training roidb."""
176 |         # TODO(rbg): remove duplicated code
177 |         self._perm = np.random.permutation(np.arange(len(self._roidb)))
178 |         self._cur = 0
179 | 
180 |     def _get_next_minibatch_inds(self):
181 |         """Return the roidb indices for the next minibatch."""
182 |         # TODO(rbg): remove duplicated code
183 |         if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
184 |             self._shuffle_roidb_inds()
185 | 
186 |         db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
187 |         self._cur += cfg.TRAIN.IMS_PER_BATCH
188 |         return db_inds
189 | 
190 |     def run(self):
191 |         print 'BlobFetcher started'
192 |         while True:
193 |             db_inds = self._get_next_minibatch_inds()
194 |             minibatch_db = [self._roidb[i] for i in db_inds]
195 |             blobs = get_minibatch(minibatch_db, self._num_classes)
196 |             self._queue.put(blobs)
197 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ### Disclaimer
  2 | 
  3 | The official Faster R-CNN code (written in MATLAB) is available [here](https://github.com/ShaoqingRen/faster_rcnn).
  4 | If your goal is to reproduce the results in our NIPS 2015 paper, please use the [official code](https://github.com/ShaoqingRen/faster_rcnn).
  5 | 
  6 | This repository contains a Python *reimplementation* of the MATLAB code.
  7 | This Python implementation is built on a fork of [Fast R-CNN](https://github.com/rbgirshick/fast-rcnn).
  8 | There are slight differences between the two implementations.
  9 | In particular, this Python port
 10 |  - is ~10% slower at test-time, because some operations execute on the CPU in Python layers (e.g., 220ms / image vs. 200ms / image for VGG16)
 11 |  - gives similar, but not exactly the same, mAP as the MATLAB version
 12 |  - is *not compatible* with models trained using the MATLAB code due to the minor implementation differences
 13 | 
 14 | # *Faster* R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
 15 | 
 16 | By Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun (Microsoft Research)
 17 | 
 18 | This Python implementation contains contributions from Sean Bell (Cornell) written during an MSR internship.
 19 | 
 20 | Please see the official [README.md](https://github.com/ShaoqingRen/faster_rcnn/blob/master/README.md) for more details.
 21 | 
 22 | Faster R-CNN was initially described in an [arXiv tech report](http://arxiv.org/abs/1506.01497) and was subsequently published in NIPS 2015.
 23 | 
 24 | ### License
 25 | 
 26 | Faster R-CNN is released under the MIT License (refer to the LICENSE file for details).
 27 | 
 28 | ### Citing Faster R-CNN
 29 | 
 30 | If you find Faster R-CNN useful in your research, please consider citing:
 31 | 
 32 |     @inproceedings{renNIPS15fasterrcnn,
 33 |         Author = {Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun},
 34 |         Title = {Faster {R-CNN}: Towards Real-Time Object Detection
 35 |                  with Region Proposal Networks},
 36 |         Booktitle = {Advances in Neural Information Processing Systems ({NIPS})},
 37 |         Year = {2015}
 38 |     }
 39 | 
 40 | ### Contents
 41 | 1. [Requirements: software](#requirements-software)
 42 | 2. [Requirements: hardware](#requirements-hardware)
 43 | 3. [Basic installation](#installation-sufficient-for-the-demo)
 44 | 4. [Demo](#demo)
 45 | 5. [Beyond the demo: training and testing](#beyond-the-demo-installation-for-training-and-testing-models)
 46 | 6. [Usage](#usage)
 47 | 
 48 | ### Requirements: software
 49 | 
 50 | 1. Requirements for `Caffe` and `pycaffe` (see: [Caffe installation instructions](http://caffe.berkeleyvision.org/installation.html))
 51 | 
 52 |   **Note:** Caffe *must* be built with support for Python layers!
 53 | 
 54 |   ```make
 55 |   # In your Makefile.config, make sure to have this line uncommented
 56 |   WITH_PYTHON_LAYER := 1
 57 |   ```
 58 | 
 59 |   You can download my [Makefile.config](http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/Makefile.config) for reference.
 60 | 2. Python packages you might not have: `cython`, `python-opencv`, `easydict`
 61 | 3. [optional] MATLAB (required for PASCAL VOC evaluation only)
 62 | 
 63 | ### Requirements: hardware
 64 | 
 65 | 1. For training smaller networks (ZF, VGG_CNN_M_1024) a good GPU (e.g., Titan, K20, K40, ...) with at least 3G of memory suffices
 66 | 2. For training with VGG16, you'll need a K40 (~11G of memory)
 67 | 
 68 | ### Installation (sufficient for the demo)
 69 | 
 70 | 1. Clone the Faster R-CNN repository
 71 |   ```Shell
 72 |   # Make sure to clone with --recursive
 73 |   git clone --recursive https://github.com/rbgirshick/py-faster-rcnn.git
 74 |   ```
 75 | 
 76 | 2. We'll call the directory that you cloned Faster R-CNN into `FRCN_ROOT`
 77 | 
 78 |    *Ignore notes 1 and 2 if you followed step 1 above.*
 79 | 
 80 |    **Note 1:** If you didn't clone Faster R-CNN with the `--recursive` flag, then you'll need to manually clone the `caffe-fast-rcnn` submodule:
 81 |     ```Shell
 82 |     git submodule update --init --recursive
 83 |     ```
 84 |     **Note 2:** The `caffe-fast-rcnn` submodule needs to be on the `faster-rcnn` branch (or equivalent detached state). This will happen automatically *if you followed step 1 instructions*.
 85 | 
 86 | 3. Build the Cython modules
 87 |     ```Shell
 88 |     cd $FRCN_ROOT/lib
 89 |     make
 90 |     ```
 91 | 
 92 | 4. Build Caffe and pycaffe
 93 |     ```Shell
 94 |     cd $FRCN_ROOT/caffe-fast-rcnn
 95 |     # Now follow the Caffe installation instructions here:
 96 |     #   http://caffe.berkeleyvision.org/installation.html
 97 | 
 98 |     # If you're experienced with Caffe and have all of the requirements installed
 99 |     # and your Makefile.config in place, then simply do:
100 |     make -j8 && make pycaffe
101 |     ```
102 | 
103 | 5. Download pre-computed Faster R-CNN detectors
104 |     ```Shell
105 |     cd $FRCN_ROOT
106 |     ./data/scripts/fetch_faster_rcnn_models.sh
107 |     ```
108 | 
109 |     This will populate the `$FRCN_ROOT/data` folder with `faster_rcnn_models`. See `data/README.md` for details.
110 |     These models were trained on VOC 2007 trainval.
111 | 
112 | ### Demo
113 | 
114 | *After successfully completing [basic installation](#installation-sufficient-for-the-demo)*, you'll be ready to run the demo.
115 | 
116 | **Python**
117 | 
118 | To run the demo
119 | ```Shell
120 | cd $FRCN_ROOT
121 | ./tools/demo.py
122 | ```
123 | The demo performs detection using a VGG16 network trained for detection on PASCAL VOC 2007.
124 | 
125 | ### Beyond the demo: installation for training and testing models
126 | 1. Download the training, validation, test data and VOCdevkit
127 | 
128 | 	```Shell
129 | 	wget http://pascallin.ecs.soton.ac.uk/challenges/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
130 | 	wget http://pascallin.ecs.soton.ac.uk/challenges/VOC/voc2007/VOCtest_06-Nov-2007.tar
131 | 	wget http://pascallin.ecs.soton.ac.uk/challenges/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
132 | 	```
133 | 
134 | 2. Extract all of these tars into one directory named `VOCdevkit`
135 | 
136 | 	```Shell
137 | 	tar xvf VOCtrainval_06-Nov-2007.tar
138 | 	tar xvf VOCtest_06-Nov-2007.tar
139 | 	tar xvf VOCdevkit_08-Jun-2007.tar
140 | 	```
141 | 
142 | 3. It should have this basic structure
143 | 
144 | 	```Shell
145 |   	$VOCdevkit/                           # development kit
146 |   	$VOCdevkit/VOCcode/                   # VOC utility code
147 |   	$VOCdevkit/VOC2007                    # image sets, annotations, etc.
148 |   	# ... and several other directories ...
149 |   	```
150 | 
151 | 4. Create symlinks for the PASCAL VOC dataset
152 | 
153 | 	```Shell
154 |     cd $FRCN_ROOT/data
155 |     ln -s $VOCdevkit VOCdevkit2007
156 |     ```
157 |     Using symlinks is a good idea because you will likely want to share the same PASCAL dataset installation between multiple projects.
158 | 5. [Optional] follow similar steps to get PASCAL VOC 2010 and 2012
159 | 6. Follow the next sections to download pre-trained ImageNet models
160 | 
161 | ### Download pre-trained ImageNet models
162 | 
163 | Pre-trained ImageNet models can be downloaded for the three networks described in the paper: ZF and VGG16.
164 | 
165 | ```Shell
166 | cd $FRCN_ROOT
167 | ./data/scripts/fetch_imagenet_models.sh
168 | ```
169 | VGG16 comes from the [Caffe Model Zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo), but is provided here for your convenience.
170 | ZF was trained at MSRA.
171 | 
172 | ### Usage
173 | 
174 | To train and test a Faster R-CNN detector use `experiments/scripts/faster_rcnn_alt_opt.sh`.
175 | Output is written underneath `$FRCN_ROOT/output`.
176 | 
177 | ```Shell
178 | cd $FRCN_ROOT
179 | ./experiments/scripts/faster_rcnn_alt_opt.sh [GPU_ID] [NET] [--set ...]
180 | # GPU_ID is the GPU you want to train on
181 | # NET in {ZF, VGG_CNN_M_1024, VGG16} is the network arch to use
182 | # --set ... allows you to specify fast_rcnn.config options, e.g.
183 | #   --set EXP_DIR seed_rng1701 RNG_SEED 1701
184 | ```
185 | 
186 | ("alt opt" refers to the alternating optimization training algorithm described in the NIPS paper.)
187 | 
188 | 
189 | download the KITTI in data/training/ and use the train.sh to train the model
190 | the OHEM result is :
191 | 
192 | Results:
193 | APs:
194 | 78.4
195 | 64.7
196 | 75.1
197 | 86.5
198 | 89.6
199 | 83.5
200 | 74.0
201 | mAP:78.8
202 | 
203 | and the standard result based on faster rcnn is :
204 | 
205 | Results:
206 | APs:
207 | 78.4
208 | 65.4
209 | 69.2
210 | 88.1
211 | 86.6
212 | 82.6
213 | 67.8
214 | mAP:76.9
215 | 


--------------------------------------------------------------------------------
/models/VGG16/faster_rcnn_alt_opt/faster_rcnn_test.pt:
--------------------------------------------------------------------------------
  1 | name: "VGG_ILSVRC_16_layers"
  2 | input: "data"
  3 | input_shape {
  4 |   dim: 1
  5 |   dim: 3
  6 |   dim: 224
  7 |   dim: 224
  8 | }
  9 | 
 10 | input: "im_info"
 11 | input_shape {
 12 |   dim: 1
 13 |   dim: 3
 14 | }
 15 | 
 16 | layer {
 17 |   name: "conv1_1"
 18 |   type: "Convolution"
 19 |   bottom: "data"
 20 |   top: "conv1_1"
 21 |   convolution_param {
 22 |     num_output: 64
 23 |     pad: 1 kernel_size: 3
 24 |   }
 25 | }
 26 | layer {
 27 |   name: "relu1_1"
 28 |   type: "ReLU"
 29 |   bottom: "conv1_1"
 30 |   top: "conv1_1"
 31 | }
 32 | layer {
 33 |   name: "conv1_2"
 34 |   type: "Convolution"
 35 |   bottom: "conv1_1"
 36 |   top: "conv1_2"
 37 |   convolution_param {
 38 |     num_output: 64
 39 |     pad: 1 kernel_size: 3
 40 |   }
 41 | }
 42 | layer {
 43 |   name: "relu1_2"
 44 |   type: "ReLU"
 45 |   bottom: "conv1_2"
 46 |   top: "conv1_2"
 47 | }
 48 | layer {
 49 |   name: "pool1"
 50 |   type: "Pooling"
 51 |   bottom: "conv1_2"
 52 |   top: "pool1"
 53 |   pooling_param {
 54 |     pool: MAX
 55 |     kernel_size: 2 stride: 2
 56 |   }
 57 | }
 58 | layer {
 59 |   name: "conv2_1"
 60 |   type: "Convolution"
 61 |   bottom: "pool1"
 62 |   top: "conv2_1"
 63 |   convolution_param {
 64 |     num_output: 128
 65 |     pad: 1 kernel_size: 3
 66 |   }
 67 | }
 68 | layer {
 69 |   name: "relu2_1"
 70 |   type: "ReLU"
 71 |   bottom: "conv2_1"
 72 |   top: "conv2_1"
 73 | }
 74 | layer {
 75 |   name: "conv2_2"
 76 |   type: "Convolution"
 77 |   bottom: "conv2_1"
 78 |   top: "conv2_2"
 79 |   convolution_param {
 80 |     num_output: 128
 81 |     pad: 1 kernel_size: 3
 82 |   }
 83 | }
 84 | layer {
 85 |   name: "relu2_2"
 86 |   type: "ReLU"
 87 |   bottom: "conv2_2"
 88 |   top: "conv2_2"
 89 | }
 90 | layer {
 91 |   name: "pool2"
 92 |   type: "Pooling"
 93 |   bottom: "conv2_2"
 94 |   top: "pool2"
 95 |   pooling_param {
 96 |     pool: MAX
 97 |     kernel_size: 2 stride: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "conv3_1"
102 |   type: "Convolution"
103 |   bottom: "pool2"
104 |   top: "conv3_1"
105 |   convolution_param {
106 |     num_output: 256
107 |     pad: 1 kernel_size: 3
108 |   }
109 | }
110 | layer {
111 |   name: "relu3_1"
112 |   type: "ReLU"
113 |   bottom: "conv3_1"
114 |   top: "conv3_1"
115 | }
116 | layer {
117 |   name: "conv3_2"
118 |   type: "Convolution"
119 |   bottom: "conv3_1"
120 |   top: "conv3_2"
121 |   convolution_param {
122 |     num_output: 256
123 |     pad: 1 kernel_size: 3
124 |   }
125 | }
126 | layer {
127 |   name: "relu3_2"
128 |   type: "ReLU"
129 |   bottom: "conv3_2"
130 |   top: "conv3_2"
131 | }
132 | layer {
133 |   name: "conv3_3"
134 |   type: "Convolution"
135 |   bottom: "conv3_2"
136 |   top: "conv3_3"
137 |   convolution_param {
138 |     num_output: 256
139 |     pad: 1 kernel_size: 3
140 |   }
141 | }
142 | layer {
143 |   name: "relu3_3"
144 |   type: "ReLU"
145 |   bottom: "conv3_3"
146 |   top: "conv3_3"
147 | }
148 | layer {
149 |   name: "pool3"
150 |   type: "Pooling"
151 |   bottom: "conv3_3"
152 |   top: "pool3"
153 |   pooling_param {
154 |     pool: MAX
155 |     kernel_size: 2 stride: 2
156 |   }
157 | }
158 | layer {
159 |   name: "conv4_1"
160 |   type: "Convolution"
161 |   bottom: "pool3"
162 |   top: "conv4_1"
163 |   convolution_param {
164 |     num_output: 512
165 |     pad: 1 kernel_size: 3
166 |   }
167 | }
168 | layer {
169 |   name: "relu4_1"
170 |   type: "ReLU"
171 |   bottom: "conv4_1"
172 |   top: "conv4_1"
173 | }
174 | layer {
175 |   name: "conv4_2"
176 |   type: "Convolution"
177 |   bottom: "conv4_1"
178 |   top: "conv4_2"
179 |   convolution_param {
180 |     num_output: 512
181 |     pad: 1 kernel_size: 3
182 |   }
183 | }
184 | layer {
185 |   name: "relu4_2"
186 |   type: "ReLU"
187 |   bottom: "conv4_2"
188 |   top: "conv4_2"
189 | }
190 | layer {
191 |   name: "conv4_3"
192 |   type: "Convolution"
193 |   bottom: "conv4_2"
194 |   top: "conv4_3"
195 |   convolution_param {
196 |     num_output: 512
197 |     pad: 1 kernel_size: 3
198 |   }
199 | }
200 | layer {
201 |   name: "relu4_3"
202 |   type: "ReLU"
203 |   bottom: "conv4_3"
204 |   top: "conv4_3"
205 | }
206 | layer {
207 |   name: "pool4"
208 |   type: "Pooling"
209 |   bottom: "conv4_3"
210 |   top: "pool4"
211 |   pooling_param {
212 |     pool: MAX
213 |     kernel_size: 2 stride: 2
214 |   }
215 | }
216 | layer {
217 |   name: "conv5_1"
218 |   type: "Convolution"
219 |   bottom: "pool4"
220 |   top: "conv5_1"
221 |   convolution_param {
222 |     num_output: 512
223 |     pad: 1 kernel_size: 3
224 |   }
225 | }
226 | layer {
227 |   name: "relu5_1"
228 |   type: "ReLU"
229 |   bottom: "conv5_1"
230 |   top: "conv5_1"
231 | }
232 | layer {
233 |   name: "conv5_2"
234 |   type: "Convolution"
235 |   bottom: "conv5_1"
236 |   top: "conv5_2"
237 |   convolution_param {
238 |     num_output: 512
239 |     pad: 1 kernel_size: 3
240 |   }
241 | }
242 | layer {
243 |   name: "relu5_2"
244 |   type: "ReLU"
245 |   bottom: "conv5_2"
246 |   top: "conv5_2"
247 | }
248 | layer {
249 |   name: "conv5_3"
250 |   type: "Convolution"
251 |   bottom: "conv5_2"
252 |   top: "conv5_3"
253 |   convolution_param {
254 |     num_output: 512
255 |     pad: 1 kernel_size: 3
256 |   }
257 | }
258 | layer {
259 |   name: "relu5_3"
260 |   type: "ReLU"
261 |   bottom: "conv5_3"
262 |   top: "conv5_3"
263 | }
264 | 
265 | #========= RPN ============
266 | 
267 | layer {
268 |   name: "rpn_conv/3x3"
269 |   type: "Convolution"
270 |   bottom: "conv5_3"
271 |   top: "rpn/output"
272 |   convolution_param {
273 |     num_output: 512
274 |     kernel_size: 3 pad: 1 stride: 1
275 |   }
276 | }
277 | layer {
278 |   name: "rpn_relu/3x3"
279 |   type: "ReLU"
280 |   bottom: "rpn/output"
281 |   top: "rpn/output"
282 | }
283 | 
284 | layer {
285 |   name: "rpn_cls_score"
286 |   type: "Convolution"
287 |   bottom: "rpn/output"
288 |   top: "rpn_cls_score"
289 |   convolution_param {
290 |     num_output: 18   # 2(bg/fg) * 9(anchors)
291 |     kernel_size: 1 pad: 0 stride: 1
292 |   }
293 | }
294 | layer {
295 |   name: "rpn_bbox_pred"
296 |   type: "Convolution"
297 |   bottom: "rpn/output"
298 |   top: "rpn_bbox_pred"
299 |   convolution_param {
300 |     num_output: 36   # 4 * 9(anchors)
301 |     kernel_size: 1 pad: 0 stride: 1
302 |   }
303 | }
304 | layer {
305 |    bottom: "rpn_cls_score"
306 |    top: "rpn_cls_score_reshape"
307 |    name: "rpn_cls_score_reshape"
308 |    type: "Reshape"
309 |    reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
310 | }
311 | 
312 | #========= RoI Proposal ============
313 | 
314 | layer {
315 |   name: "rpn_cls_prob"
316 |   type: "Softmax"
317 |   bottom: "rpn_cls_score_reshape"
318 |   top: "rpn_cls_prob"
319 | }
320 | layer {
321 |   name: 'rpn_cls_prob_reshape'
322 |   type: 'Reshape'
323 |   bottom: 'rpn_cls_prob'
324 |   top: 'rpn_cls_prob_reshape'
325 |   reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
326 | }
327 | layer {
328 |   name: 'proposal'
329 |   type: 'Python'
330 |   bottom: 'rpn_cls_prob_reshape'
331 |   bottom: 'rpn_bbox_pred'
332 |   bottom: 'im_info'
333 |   top: 'rois'
334 |   python_param {
335 |     module: 'rpn.proposal_layer'
336 |     layer: 'ProposalLayer'
337 |     param_str: "'feat_stride': 16"
338 |   }
339 | }
340 | 
341 | #========= RCNN ============
342 | 
343 | layer {
344 |   name: "roi_pool5"
345 |   type: "ROIPooling"
346 |   bottom: "conv5_3"
347 |   bottom: "rois"
348 |   top: "pool5"
349 |   roi_pooling_param {
350 |     pooled_w: 7
351 |     pooled_h: 7
352 |     spatial_scale: 0.0625 # 1/16
353 |   }
354 | }
355 | layer {
356 |   name: "fc6"
357 |   type: "InnerProduct"
358 |   bottom: "pool5"
359 |   top: "fc6"
360 |   inner_product_param {
361 |     num_output: 4096
362 |   }
363 | }
364 | layer {
365 |   name: "relu6"
366 |   type: "ReLU"
367 |   bottom: "fc6"
368 |   top: "fc6"
369 | }
370 | layer {
371 |   name: "fc7"
372 |   type: "InnerProduct"
373 |   bottom: "fc6"
374 |   top: "fc7"
375 |   inner_product_param {
376 |     num_output: 4096
377 |   }
378 | }
379 | layer {
380 |   name: "relu7"
381 |   type: "ReLU"
382 |   bottom: "fc7"
383 |   top: "fc7"
384 | }
385 | layer {
386 |   name: "cls_score"
387 |   type: "InnerProduct"
388 |   bottom: "fc7"
389 |   top: "cls_score"
390 |   inner_product_param {
391 |     num_output: 8
392 |   }
393 | }
394 | layer {
395 |   name: "bbox_pred"
396 |   type: "InnerProduct"
397 |   bottom: "fc7"
398 |   top: "bbox_pred"
399 |   inner_product_param {
400 |     num_output: 32
401 |   }
402 | }
403 | layer {
404 |   name: "cls_prob"
405 |   type: "Softmax"
406 |   bottom: "cls_score"
407 |   top: "cls_prob"
408 | }
409 | 


--------------------------------------------------------------------------------