├── lib ├── nms │ ├── __init__.py │ ├── .gitignore │ ├── __init__.pyc │ ├── gpu_nms.hpp │ ├── py_cpu_nms.py │ ├── gpu_nms.pyx │ ├── cpu_nms.pyx │ └── nms_kernel.cu ├── utils │ ├── .gitignore │ ├── blob.pyc │ ├── timer.pyc │ ├── __init__.pyc │ ├── __init__.py │ ├── timer.py │ ├── blob.py │ └── bbox.pyx ├── Makefile ├── rpn │ ├── __init__.pyc │ ├── generate.pyc │ ├── proposal_layer.pyc │ ├── generate_anchors.pyc │ ├── anchor_target_layer.pyc │ ├── __init__.py │ ├── README.md │ ├── generate_anchors.py │ ├── generate.py │ └── proposal_layer.py ├── datasets │ ├── imdb.pyc │ ├── kakou.pyc │ ├── __init__.pyc │ ├── factory.pyc │ ├── pascal_voc.pyc │ ├── VOCdevkit-matlab-wrapper │ │ ├── xVOCap.m │ │ ├── main.m │ │ ├── compute_overlap.m │ │ └── detection_eval.m │ ├── factory.py │ ├── __init__.py │ └── imdb.py ├── fast_rcnn │ ├── test.pyc │ ├── config.pyc │ ├── train.pyc │ ├── __init__.pyc │ ├── nms_wrapper.pyc │ ├── bbox_transform.pyc │ ├── __init__.py │ ├── nms_wrapper.py │ ├── bbox_transform.py │ └── train.py ├── roi_data_layer │ ├── layer.pyc │ ├── roidb.pyc │ ├── __init__.pyc │ ├── minibatch.pyc │ ├── __init__.py │ └── roidb.py ├── roi_data_layer_original │ ├── layer.pyc │ ├── roidb.pyc │ ├── __init__.pyc │ ├── minibatch.pyc │ ├── __init__.py │ ├── roidb.py │ └── layer.py └── setup.py ├── experiments ├── logs │ └── .gitignore ├── cfgs │ ├── faster_rcnn_alt_opt.yml │ └── faster_rcnn_end2end.yml ├── README.md └── scripts │ ├── TEST.sh │ ├── fast_rcnn.sh │ └── faster_rcnn_alt_opt.sh ├── test.sh ├── tools ├── README.md ├── fcolor.pyc ├── _init_paths.pyc ├── _init_paths.py ├── fcolor.py ├── eval_recall.py ├── reval.py ├── test_net.py ├── test_net_debug.py ├── rpn_generate.py ├── visulization.py ├── train_net.py ├── compress_net.py ├── demo.py ├── demo_show.py ├── demo_video_for_video.py ├── demo_location.py ├── demo_for_video.py ├── demo_video_for_video_XXX.py └── vis_fasterRCNN.py ├── train.sh ├── data ├── .gitignore ├── scripts │ ├── fetch_imagenet_models.sh │ ├── fetch_faster_rcnn_models.sh │ └── fetch_selective_search_data.sh ├── split_data.m ├── README.md └── convert_kitti.py ├── models ├── README.md └── VGG16 │ └── faster_rcnn_alt_opt │ ├── stage1_rpn_solver60k80k.pt │ ├── stage2_rpn_solver60k80k.pt │ ├── stage1_fast_rcnn_ohem_solver30k40k.pt │ ├── stage2_fast_rcnn_ohem_solver30k40k.pt │ ├── rpn_test.pt │ └── faster_rcnn_test.pt ├── loss_accuracy.m ├── LICENSE └── README.md /lib/nms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/logs/.gitignore: -------------------------------------------------------------------------------- 1 | *.txt* 2 | -------------------------------------------------------------------------------- /lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.so 3 | -------------------------------------------------------------------------------- /lib/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | rm -f data/cache/* 2 | ./experiments/scripts/TEST.sh 0 VGG16 3 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | Tools for training, testing, and compressing Fast R-CNN networks. 2 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | #rm -f data/cache/* 2 | ./experiments/scripts/faster_rcnn_alt_opt.sh 0 VGG16 3 | -------------------------------------------------------------------------------- /experiments/cfgs/faster_rcnn_alt_opt.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: faster_rcnn_alt_opt 2 | TEST: 3 | HAS_RPN: True 4 | -------------------------------------------------------------------------------- /tools/fcolor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/tools/fcolor.pyc -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | selective_search* 2 | imagenet_models* 3 | fast_rcnn_models* 4 | VOCdevkit* 5 | cache 6 | -------------------------------------------------------------------------------- /lib/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/nms/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn/generate.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/generate.pyc -------------------------------------------------------------------------------- /lib/utils/blob.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/utils/blob.pyc -------------------------------------------------------------------------------- /lib/utils/timer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/utils/timer.pyc -------------------------------------------------------------------------------- /lib/datasets/imdb.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/imdb.pyc -------------------------------------------------------------------------------- /lib/datasets/kakou.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/kakou.pyc -------------------------------------------------------------------------------- /lib/fast_rcnn/test.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/test.pyc -------------------------------------------------------------------------------- /lib/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/utils/__init__.pyc -------------------------------------------------------------------------------- /tools/_init_paths.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/tools/_init_paths.pyc -------------------------------------------------------------------------------- /lib/datasets/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/__init__.pyc -------------------------------------------------------------------------------- /lib/datasets/factory.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/factory.pyc -------------------------------------------------------------------------------- /lib/fast_rcnn/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/config.pyc -------------------------------------------------------------------------------- /lib/fast_rcnn/train.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/train.pyc -------------------------------------------------------------------------------- /lib/datasets/pascal_voc.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/datasets/pascal_voc.pyc -------------------------------------------------------------------------------- /lib/fast_rcnn/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn/proposal_layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/proposal_layer.pyc -------------------------------------------------------------------------------- /lib/fast_rcnn/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/nms_wrapper.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer/layer.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer/roidb.pyc -------------------------------------------------------------------------------- /lib/rpn/generate_anchors.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/generate_anchors.pyc -------------------------------------------------------------------------------- /lib/fast_rcnn/bbox_transform.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/fast_rcnn/bbox_transform.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer/__init__.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/minibatch.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer/minibatch.pyc -------------------------------------------------------------------------------- /lib/rpn/anchor_target_layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/rpn/anchor_target_layer.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer_original/layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer_original/layer.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer_original/roidb.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer_original/roidb.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer_original/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer_original/__init__.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer_original/minibatch.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimoralea/KITTI-detection-OHEM/master/lib/roi_data_layer_original/minibatch.pyc -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /experiments/README.md: -------------------------------------------------------------------------------- 1 | Scripts are under `experiments/scripts`. 2 | 3 | Each script saves a log file under `experiments/logs`. 4 | 5 | Configuration override files used in the experiments are stored in `experiments/cfgs`. 6 | -------------------------------------------------------------------------------- /models/README.md: -------------------------------------------------------------------------------- 1 | Prototxt files that define models and solvers. 2 | 3 | Three models are defined, with some variations of each to support experiments 4 | in the paper. 5 | - Caffenet (model **S**) 6 | - VGG_CNN_M_1024 (model **M**) 7 | - VGG16 (model **L**) 8 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /experiments/cfgs/faster_rcnn_end2end.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: faster_rcnn_end2end 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | TEST: 10 | HAS_RPN: True 11 | -------------------------------------------------------------------------------- /lib/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/roi_data_layer_original/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/main.m: -------------------------------------------------------------------------------- 1 | close all;clear all;clc; 2 | path='/home/bsl/KITTI-detection/data'; 3 | comp_id='comp4-7629'; 4 | test_set='KakouTest'; 5 | output_dir='/home/bsl/KITTI-detection/output/faster_rcnn_alt_opt/KakouTest/VGG16_faster_rcnn_final'; 6 | img_list='KITTI_val_list.txt'; 7 | img_gt='KITTI_gt_val.txt'; 8 | res = detection_eval(path, comp_id, test_set,output_dir,img_list,img_gt); 9 | -------------------------------------------------------------------------------- /models/VGG16/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG16/faster_rcnn_alt_opt/stage1_rpn_train.pt" 2 | 3 | base_lr: 0.001 4 | lr_policy: "step" 5 | gamma: 0.1 6 | stepsize: 60000 7 | display: 20 8 | average_loss: 100 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | 12 | # We disable standard caffe solver snapshotting and implement our own snapshot 13 | # function 14 | snapshot: 0 15 | # We still use the snapshot prefix, though 16 | snapshot_prefix: "vgg16_rpn" 17 | -------------------------------------------------------------------------------- /models/VGG16/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG16/faster_rcnn_alt_opt/stage2_rpn_train.pt" 2 | 3 | base_lr: 0.001 4 | lr_policy: "step" 5 | gamma: 0.1 6 | stepsize: 60000 7 | display: 20 8 | average_loss: 100 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | 12 | # We disable standard caffe solver snapshotting and implement our own snapshot 13 | # function 14 | snapshot: 0 15 | # We still use the snapshot prefix, though 16 | snapshot_prefix: "vgg16_rpn" 17 | -------------------------------------------------------------------------------- /models/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_ohem_solver30k40k.pt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_ohem_train.pt" 2 | 3 | base_lr: 0.001 4 | lr_policy: "step" 5 | gamma: 0.1 6 | stepsize: 30000 7 | display: 20 8 | average_loss: 100 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | 12 | # We disable standard caffe solver snapshotting and implement our own snapshot 13 | # function 14 | snapshot: 0 15 | # We still use the snapshot prefix, though 16 | snapshot_prefix: "vgg16_fast_rcnn" 17 | -------------------------------------------------------------------------------- /models/VGG16/faster_rcnn_alt_opt/stage2_fast_rcnn_ohem_solver30k40k.pt: -------------------------------------------------------------------------------- 1 | train_net: "models/VGG16/faster_rcnn_alt_opt/stage2_fast_rcnn_ohem_train.pt" 2 | 3 | base_lr: 0.001 4 | lr_policy: "step" 5 | gamma: 0.1 6 | stepsize: 30000 7 | display: 20 8 | average_loss: 100 9 | momentum: 0.9 10 | weight_decay: 0.0005 11 | 12 | # We disable standard caffe solver snapshotting and implement our own snapshot 13 | # function 14 | snapshot: 0 15 | # We still use the snapshot prefix, though 16 | snapshot_prefix: "vgg16_fast_rcnn" 17 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/compute_overlap.m: -------------------------------------------------------------------------------- 1 | function overlap=compute_overlap(bb_pred,bb_target) 2 | a=(bb_pred(3)-bb_pred(1)+1)*(bb_pred(4)-bb_pred(2)+1); 3 | b=(bb_target(3)-bb_target(1)+1)*(bb_target(4)-bb_target(2)+1); 4 | bb_overlap=[max(bb_pred(1),bb_target(1)),max(bb_pred(2),bb_target(2)),min(bb_pred(3),bb_target(3)),min(bb_pred(4),bb_target(4))]; 5 | iw=bb_overlap(3)-bb_overlap(1)+1; 6 | ih=bb_overlap(4)-bb_overlap(2)+1; 7 | if iw>0&ih>0 8 | overlap=iw*ih/(a+b-iw*ih); 9 | else 10 | overlap=-inf; 11 | end 12 | 13 | -------------------------------------------------------------------------------- /lib/fast_rcnn/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from fast_rcnn.config import cfg 9 | from nms.gpu_nms import gpu_nms 10 | from nms.cpu_nms import cpu_nms 11 | 12 | def nms(dets, thresh): 13 | """Dispatch to either CPU or GPU NMS implementations.""" 14 | 15 | if dets.shape[0] == 0: 16 | return [] 17 | if cfg.USE_GPU_NMS: 18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | else: 20 | return cpu_nms(dets, thresh) 21 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Set up paths for Fast R-CNN.""" 9 | 10 | import os.path as osp 11 | import sys 12 | 13 | def add_path(path): 14 | if path not in sys.path: 15 | sys.path.insert(0, path) 16 | 17 | this_dir = osp.dirname(__file__) 18 | 19 | # Add caffe to PYTHONPATH 20 | caffe_path = osp.join(this_dir, '..', 'caffe-fast-rcnn', 'python') 21 | add_path(caffe_path) 22 | 23 | # Add lib to PYTHONPATH 24 | lib_path = osp.join(this_dir, '..', 'lib') 25 | add_path(lib_path) 26 | -------------------------------------------------------------------------------- /lib/rpn/README.md: -------------------------------------------------------------------------------- 1 | ### `rpn` module overview 2 | 3 | ##### `generate_anchors.py` 4 | 5 | Generates a regular grid of multi-scale, multi-aspect anchor boxes. 6 | 7 | ##### `proposal_layer.py` 8 | 9 | Converts RPN outputs (per-anchor scores and bbox regression estimates) into object proposals. 10 | 11 | ##### `anchor_target_layer.py` 12 | 13 | Generates training targets/labels for each anchor. Classification labels are 1 (object), 0 (not object) or -1 (ignore). 14 | Bbox regression targets are specified when the classification label is > 0. 15 | 16 | ##### `proposal_target_layer.py` 17 | 18 | Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K) 19 | and bbox regression targets in that case that the label is > 0. 20 | 21 | ##### `generate.py` 22 | 23 | Generate object detection proposals from an imdb using an RPN. 24 | -------------------------------------------------------------------------------- /data/scripts/fetch_imagenet_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | FILE=imagenet_models.tgz 7 | URL=http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/$FILE 8 | CHECKSUM=ed34ca912d6782edfb673a8c3a0bda6d 9 | 10 | if [ -f $FILE ]; then 11 | echo "File already exists. Checking md5..." 12 | os=`uname -s` 13 | if [ "$os" = "Linux" ]; then 14 | checksum=`md5sum $FILE | awk '{ print $1 }'` 15 | elif [ "$os" = "Darwin" ]; then 16 | checksum=`cat $FILE | md5` 17 | fi 18 | if [ "$checksum" = "$CHECKSUM" ]; then 19 | echo "Checksum is correct. No need to download." 20 | exit 0 21 | else 22 | echo "Checksum is incorrect. Need to download again." 23 | fi 24 | fi 25 | 26 | echo "Downloading pretrained ImageNet models (1G)..." 27 | 28 | wget $URL -O $FILE 29 | 30 | echo "Unzipping..." 31 | 32 | tar zxvf $FILE 33 | 34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM." 35 | -------------------------------------------------------------------------------- /data/scripts/fetch_faster_rcnn_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | FILE=faster_rcnn_models.tgz 7 | URL=http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/$FILE 8 | CHECKSUM=ac116844f66aefe29587214272054668 9 | 10 | if [ -f $FILE ]; then 11 | echo "File already exists. Checking md5..." 12 | os=`uname -s` 13 | if [ "$os" = "Linux" ]; then 14 | checksum=`md5sum $FILE | awk '{ print $1 }'` 15 | elif [ "$os" = "Darwin" ]; then 16 | checksum=`cat $FILE | md5` 17 | fi 18 | if [ "$checksum" = "$CHECKSUM" ]; then 19 | echo "Checksum is correct. No need to download." 20 | exit 0 21 | else 22 | echo "Checksum is incorrect. Need to download again." 23 | fi 24 | fi 25 | 26 | echo "Downloading Faster R-CNN demo models (695M)..." 27 | 28 | wget $URL -O $FILE 29 | 30 | echo "Unzipping..." 31 | 32 | tar zxvf $FILE 33 | 34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM." 35 | -------------------------------------------------------------------------------- /data/scripts/fetch_selective_search_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | FILE=selective_search_data.tgz 7 | URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE 8 | CHECKSUM=7078c1db87a7851b31966b96774cd9b9 9 | 10 | if [ -f $FILE ]; then 11 | echo "File already exists. Checking md5..." 12 | os=`uname -s` 13 | if [ "$os" = "Linux" ]; then 14 | checksum=`md5sum $FILE | awk '{ print $1 }'` 15 | elif [ "$os" = "Darwin" ]; then 16 | checksum=`cat $FILE | md5` 17 | fi 18 | if [ "$checksum" = "$CHECKSUM" ]; then 19 | echo "Checksum is correct. No need to download." 20 | exit 0 21 | else 22 | echo "Checksum is incorrect. Need to download again." 23 | fi 24 | fi 25 | 26 | echo "Downloading precomputed selective search boxes (0.5G)..." 27 | 28 | wget $URL -O $FILE 29 | 30 | echo "Unzipping..." 31 | 32 | tar zxvf $FILE 33 | 34 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM." 35 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | import datasets.kakou 10 | import numpy as np 11 | 12 | __sets = {} 13 | imageset = 'KakouTrain' 14 | devkit = '/home/bsl/KITTI-detection/data' 15 | 16 | 17 | def get_imdb(name): 18 | """Get an imdb (image database) by name.""" 19 | __sets['KakouTrain'] = (lambda imageset = imageset, devkit = devkit: datasets.kakou(imageset,devkit)) 20 | __sets['KakouTest'] = (lambda imageset = 'KakouTest', devkit = devkit: datasets.kakou(imageset,devkit)) 21 | if not __sets.has_key(name): 22 | raise KeyError('Unknown dataset: {}'.format(name)) 23 | return __sets[name]() 24 | 25 | def list_imdbs(): 26 | """List all registered imdbs.""" 27 | return __sets.keys() 28 | -------------------------------------------------------------------------------- /experiments/scripts/TEST.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Usage: 3 | # ./experiments/scripts/default_faster_rcnn_alt_opt.sh GPU NET [--set ...] 4 | # Example: 5 | # ./experiments/scripts/default_faster_rcnn_alt_opt.sh 0 ZF \ 6 | # --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400,500,600,700]" 7 | 8 | set -x 9 | set -e 10 | 11 | export PYTHONUNBUFFERED="True" 12 | 13 | GPU_ID=$1 14 | NET=$2 15 | NET_lc=${NET,,} 16 | 17 | array=( $@ ) 18 | len=${#array[@]} 19 | EXTRA_ARGS=${array[@]:2:$len} 20 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 21 | 22 | LOG="experiments/logs/faster_rcnn_alt_opt_TEST_HUAIJIN_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 23 | exec &> >(tee -a "$LOG") 24 | echo Logging output to "$LOG" 25 | 26 | 27 | 28 | 29 | 30 | NET_FINAL=output/faster_rcnn_alt_opt/KakouTrain/VGG16_faster_rcnn_final.caffemodel 31 | 32 | time ./tools/test_net.py --gpu ${GPU_ID} \ 33 | --def models/${NET}/faster_rcnn_alt_opt/faster_rcnn_test.pt \ 34 | --net ${NET_FINAL} \ 35 | --imdb KakouTest \ 36 | --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \ 37 | ${EXTRA_ARGS} 38 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /experiments/scripts/fast_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Usage: 3 | # ./experiments/scripts/default.sh GPU NET [options args to {train,test}_net.py] 4 | # Example: 5 | # ./experiments/scripts/default.sh 0 CaffeNet \ 6 | # --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]" 7 | 8 | set -x 9 | set -e 10 | 11 | export PYTHONUNBUFFERED="True" 12 | 13 | GPU_ID=$1 14 | NET=$2 15 | NET_lc=${NET,,} 16 | 17 | array=( $@ ) 18 | len=${#array[@]} 19 | EXTRA_ARGS=${array[@]:2:$len} 20 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 21 | 22 | LOG="experiments/logs/default_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 23 | exec &> >(tee -a "$LOG") 24 | echo Logging output to "$LOG" 25 | 26 | time ./tools/train_net.py --gpu ${GPU_ID} \ 27 | --solver models/${NET}/fast_rcnn/solver.prototxt \ 28 | --weights data/imagenet_models/${NET}.v2.caffemodel \ 29 | --imdb voc_2007_trainval \ 30 | ${EXTRA_ARGS} 31 | 32 | set +x 33 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'` 34 | set -x 35 | 36 | time ./tools/test_net.py --gpu ${GPU_ID} \ 37 | --def models/${NET}/fast_rcnn/test.prototxt \ 38 | --net ${NET_FINAL} \ 39 | --imdb voc_2007_test \ 40 | ${EXTRA_ARGS} 41 | -------------------------------------------------------------------------------- /lib/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /experiments/scripts/faster_rcnn_alt_opt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Usage: 3 | # ./experiments/scripts/default_faster_rcnn_alt_opt.sh GPU NET [--set ...] 4 | # Example: 5 | # ./experiments/scripts/default_faster_rcnn_alt_opt.sh 0 ZF \ 6 | # --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400,500,600,700]" 7 | 8 | set -x 9 | set -e 10 | 11 | export PYTHONUNBUFFERED="True" 12 | 13 | GPU_ID=$1 14 | NET=$2 15 | NET_lc=${NET,,} 16 | 17 | array=( $@ ) 18 | len=${#array[@]} 19 | EXTRA_ARGS=${array[@]:2:$len} 20 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 21 | 22 | LOG="experiments/logs/faster_rcnn_alt_opt_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 23 | exec &> >(tee -a "$LOG") 24 | echo Logging output to "$LOG" 25 | 26 | time ./tools/train_faster_rcnn_alt_opt.py --gpu ${GPU_ID} \ 27 | --net_name ${NET} \ 28 | --weights data/imagenet_models/${NET}.v2.caffemodel \ 29 | --imdb KakouTrain \ 30 | --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \ 31 | ${EXTRA_ARGS} 32 | 33 | set +x 34 | NET_FINAL=`grep "Final model:" ${LOG} | awk '{print $3}'` 35 | set -x 36 | 37 | time ./tools/test_net.py --gpu ${GPU_ID} \ 38 | --def models/${NET}/faster_rcnn_alt_opt/faster_rcnn_test.pt \ 39 | --net ${NET_FINAL} \ 40 | --imdb KakouTest \ 41 | --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \ 42 | ${EXTRA_ARGS} 43 | -------------------------------------------------------------------------------- /data/split_data.m: -------------------------------------------------------------------------------- 1 | clear all;close all;clc; 2 | image_list=importdata('TrainIndex_GT.txt','r'); 3 | rand('seed',2); 4 | index=randperm(length(image_list)); 5 | fp_train=fopen('ImageList_Version_S_AddData_train.txt','w'); 6 | fp_val=fopen('ImageList_Version_S_AddData_val.txt','w'); 7 | fp_train_gt=fopen('ImageList_Version_S_GT_AddData_train.txt','w'); 8 | fp_val_gt=fopen('ImageList_Version_S_GT_AddData_val.txt','w'); 9 | trainsample=0.7;% 70% as training and 30% validation 10 | train_num=fix(trainsample*length(index)); 11 | train_index=index(1:train_num); 12 | val_index=index(train_num+1:end); 13 | 14 | fidin1=fopen('Train_image_list.txt','r'); 15 | fidin2=fopen('TrainIndex_GT.txt','r'); 16 | 17 | ind=1; 18 | while ~feof(fidin1) 19 | tline=fgetl(fidin1); 20 | if length(find(train_index==ind))==1 21 | fprintf(fp_train,tline); 22 | fprintf(fp_train,'\n'); 23 | else 24 | fprintf(fp_val,tline); 25 | fprintf(fp_val,'\n'); 26 | end 27 | ind=ind+1; 28 | end 29 | 30 | ind=1; 31 | while ~feof(fidin2) 32 | tline=fgetl(fidin2); 33 | if length(find(train_index==ind))==1 34 | fprintf(fp_train_gt,tline); 35 | fprintf(fp_train_gt,'\n'); 36 | else 37 | fprintf(fp_val_gt,tline); 38 | fprintf(fp_val_gt,'\n'); 39 | end 40 | ind=ind+1; 41 | end 42 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from .imdb import imdb 9 | from . import factory 10 | from .kakou import kakou 11 | 12 | import os.path as osp 13 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..') 14 | 15 | # We assume your matlab binary is in your path and called `matlab'. 16 | # If either is not true, just add it to your path and alias it as matlab, or 17 | # you could change this file. 18 | MATLAB = 'matlab' 19 | 20 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python 21 | def _which(program): 22 | import os 23 | def is_exe(fpath): 24 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 25 | 26 | fpath, fname = os.path.split(program) 27 | if fpath: 28 | if is_exe(program): 29 | return program 30 | else: 31 | for path in os.environ["PATH"].split(os.pathsep): 32 | path = path.strip('"') 33 | exe_file = os.path.join(path, program) 34 | if is_exe(exe_file): 35 | return exe_file 36 | 37 | return None 38 | 39 | if _which(MATLAB) is None: 40 | msg = ("MATLAB command '{}' not found. " 41 | "Please add '{}' to your PATH.").format(MATLAB, MATLAB) 42 | raise EnvironmentError(msg) 43 | -------------------------------------------------------------------------------- /tools/fcolor.py: -------------------------------------------------------------------------------- 1 | import cv 2 | 3 | def Color(image): 4 | w = image.width 5 | h = image.height 6 | size = (w,h) 7 | iColor = cv.CreateImage(size,8,3) 8 | for i in range(h): 9 | for j in range(w): 10 | r = GetR(image[i,j]) 11 | g = GetG(image[i,j]) 12 | b = GetB(image[i,j]) 13 | iColor[i,j] = (r,g,b) 14 | return iColor 15 | 16 | def GetR(gray): 17 | if gray < 127: 18 | return 0 19 | elif gray > 191: 20 | return 255 21 | else: 22 | return (gray-127)*4-1 23 | 24 | 25 | def GetG(gray): 26 | if gray < 64: 27 | return 4*gray 28 | elif gray > 191: 29 | return 256-(gray-191)*4 30 | else: 31 | return 255 32 | 33 | def GetB(gray): 34 | if gray < 64: 35 | return 255 36 | elif gray > 127: 37 | return 0 38 | else: 39 | return 256-(gray-63)*4 40 | 41 | FCArray = [(0,51,0),(0,51,102),(51,51,102),(51,102,51),\ 42 | (51,51,153),(102,51,102),(153,153,0),(51,102,153),\ 43 | (153,102,51),(153,204,102),(204,153,102),(102,204,102),\ 44 | (153,204,153),(204,204,102),(204,255,204),(255,255,204)] 45 | def FColor(image,array=FCArray): 46 | w = image.width 47 | h = image.height 48 | size = (w,h) 49 | iColor = cv.CreateImage(size,8,3) 50 | for i in range(h): 51 | for j in range(w): 52 | iColor[i,j] = array[int(image[i,j]/16)] 53 | return iColor 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | This directory holds (*after you download them*): 2 | - Caffe models pre-trained on ImageNet 3 | - Faster R-CNN models 4 | - Symlinks to datasets 5 | 6 | To download Caffe models (ZF, VGG16) pre-trained on ImageNet, run: 7 | 8 | ``` 9 | ./data/scripts/fetch_imagenet_models.sh 10 | ``` 11 | 12 | This script will populate `data/imagenet_models`. 13 | 14 | To download Faster R-CNN models trained on VOC 2007, run: 15 | 16 | ``` 17 | ./data/scripts/fetch_faster_rcnn_models.sh 18 | ``` 19 | 20 | This script will populate `data/faster_rcnn_models`. 21 | 22 | In order to train and test with PASCAL VOC, you will need to establish symlinks. 23 | From the `data` directory (`cd data`): 24 | 25 | ``` 26 | # For VOC 2007 27 | ln -s /your/path/to/VOC2007/VOCdevkit VOCdevkit2007 28 | 29 | # For VOC 2012 30 | ln -s /your/path/to/VOC2012/VOCdevkit VOCdevkit2012 31 | ``` 32 | 33 | Since you'll likely be experimenting with multiple installs of Fast/er R-CNN in 34 | parallel, you'll probably want to keep all of this data in a shared place and 35 | use symlinks. On my system I create the following symlinks inside `data`: 36 | 37 | ``` 38 | # data/cache holds various outputs created by the datasets package 39 | ln -s /data/fast_rcnn_shared/cache 40 | 41 | # move the imagenet_models to shared location and symlink to them 42 | ln -s /data/fast_rcnn_shared/imagenet_models 43 | 44 | # move the selective search data to a shared location and symlink to them 45 | # (only applicable to Fast R-CNN training) 46 | ln -s /data/fast_rcnn_shared/selective_search_data 47 | 48 | ln -s /data/VOC2007/VOCdevkit VOCdevkit2007 49 | ln -s /data/VOC2012/VOCdevkit VOCdevkit2012 50 | ``` 51 | -------------------------------------------------------------------------------- /loss_accuracy.m: -------------------------------------------------------------------------------- 1 | clc; 2 | clear; 3 | % log file of caffe model 4 | logName = 'Kitti.log'; 5 | fid = fopen(logName, 'r'); 6 | fid_accuracy = fopen('output_accuracy.txt', 'w'); 7 | fid_loss = fopen('output_loss.txt', 'w'); 8 | tline = fgetl(fid); 9 | while ischar(tline) 10 | % First find the accuracy line 11 | k = strfind(tline, 'Test net output'); 12 | if (k) 13 | k = strfind(tline, 'accuracy'); 14 | if (k) 15 | % If the string contain test and accuracy at the same time 16 | % The bias from 'accuracy' to the float number 17 | indexStart = k + 11; indexEnd = size(tline); 18 | str = tline(indexStart : indexEnd(2)); 19 | end 20 | % Get the number of index 21 | k = strfind(tline, '#'); 22 | if (k) indexStart = k + 1; 23 | indexEnd = strfind(tline, ':'); 24 | str2 = tline(indexStart : indexEnd - 1); 25 | end 26 | % Concatenation of two string 27 | res_str = strcat(str2, '/', str); 28 | fprintf(fid_accuracy, '%s\r\n', res_str); 29 | end 30 | % Then find the loss line 31 | k1 = strfind(tline, 'Iteration'); 32 | if (k1) k2 = strfind(tline, 'loss'); 33 | if (k2) indexStart = k2 + 7; 34 | indexEnd = size(tline); 35 | str1 = tline(indexStart:indexEnd(2)); 36 | indexStart = k1 + 10; 37 | indexEnd = strfind(tline, ',') - 1; 38 | str2 = tline(indexStart:indexEnd); 39 | res_str1 = strcat(str2, '/ ', str1); 40 | fprintf(fid_loss, '%s\r\n', res_str1); 41 | end 42 | end 43 | tline = fgetl(fid); 44 | end 45 | fclose(fid); fclose(fid_accuracy); 46 | -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | 13 | def im_list_to_blob(ims): 14 | """Convert a list of images into a network input. 15 | 16 | Assumes images are already prepared (means subtracted, BGR order, ...). 17 | """ 18 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 19 | num_images = len(ims) 20 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 21 | dtype=np.float32) 22 | for i in xrange(num_images): 23 | im = ims[i] 24 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 25 | # Move channels (axis 3) to axis 1 26 | # Axis order will become: (batch elem, channel, height, width) 27 | channel_swap = (0, 3, 1, 2) 28 | blob = blob.transpose(channel_swap) 29 | return blob 30 | 31 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 32 | """Mean subtract and scale an image for use in a blob.""" 33 | im = im.astype(np.float32, copy=False) 34 | im -= pixel_means 35 | im_shape = im.shape 36 | im_size_min = np.min(im_shape[0:2]) 37 | im_size_max = np.max(im_shape[0:2]) 38 | im_scale = float(target_size) / float(im_size_min) 39 | # Prevent the biggest axis from being more than MAX_SIZE 40 | if np.round(im_scale * im_size_max) > max_size: 41 | im_scale = float(max_size) / float(im_size_max) 42 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 43 | interpolation=cv2.INTER_LINEAR) 44 | 45 | return im, im_scale 46 | -------------------------------------------------------------------------------- /lib/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /tools/eval_recall.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import _init_paths 4 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list 5 | from datasets.factory import get_imdb 6 | import argparse 7 | import time, os, sys 8 | import numpy as np 9 | 10 | def parse_args(): 11 | """ 12 | Parse input arguments 13 | """ 14 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 15 | parser.add_argument('--imdb', dest='imdb_name', 16 | help='dataset to test', 17 | default='voc_2007_test', type=str) 18 | parser.add_argument('--method', dest='method', 19 | help='proposal method', 20 | default='selective_search', type=str) 21 | parser.add_argument('--rpn-file', dest='rpn_file', 22 | default=None, type=str) 23 | 24 | if len(sys.argv) == 1: 25 | parser.print_help() 26 | sys.exit(1) 27 | 28 | args = parser.parse_args() 29 | return args 30 | 31 | if __name__ == '__main__': 32 | args = parse_args() 33 | 34 | print('Called with args:') 35 | print(args) 36 | 37 | imdb = get_imdb(args.imdb_name) 38 | imdb.set_proposal_method(args.method) 39 | if args.rpn_file is not None: 40 | imdb.config['rpn_file'] = args.rpn_file 41 | 42 | candidate_boxes = None 43 | if 0: 44 | import scipy.io as sio 45 | filename = 'debug/stage1_rpn_voc_2007_test.mat' 46 | raw_data = sio.loadmat(filename)['aboxes'].ravel() 47 | candidate_boxes = raw_data 48 | 49 | ar, gt_overlaps, recalls, thresholds = \ 50 | imdb.evaluate_recall(candidate_boxes=candidate_boxes) 51 | print 'Method: {}'.format(args.method) 52 | print 'AverageRec: {:.3f}'.format(ar) 53 | 54 | def recall_at(t): 55 | ind = np.where(thresholds > t - 1e-5)[0][0] 56 | assert np.isclose(thresholds[ind], t) 57 | return recalls[ind] 58 | 59 | print 'Recall@0.5: {:.3f}'.format(recall_at(0.5)) 60 | print 'Recall@0.6: {:.3f}'.format(recall_at(0.6)) 61 | print 'Recall@0.7: {:.3f}'.format(recall_at(0.7)) 62 | print 'Recall@0.8: {:.3f}'.format(recall_at(0.8)) 63 | print 'Recall@0.9: {:.3f}'.format(recall_at(0.9)) 64 | # print again for easy spreadsheet copying 65 | print '{:.3f}'.format(ar) 66 | print '{:.3f}'.format(recall_at(0.5)) 67 | print '{:.3f}'.format(recall_at(0.6)) 68 | print '{:.3f}'.format(recall_at(0.7)) 69 | print '{:.3f}'.format(recall_at(0.8)) 70 | print '{:.3f}'.format(recall_at(0.9)) 71 | -------------------------------------------------------------------------------- /lib/fast_rcnn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def bbox_transform(ex_rois, gt_rois): 11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 15 | 16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 20 | 21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 23 | targets_dw = np.log(gt_widths / ex_widths) 24 | targets_dh = np.log(gt_heights / ex_heights) 25 | 26 | targets = np.vstack( 27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 28 | return targets 29 | 30 | def bbox_transform_inv(boxes, deltas): 31 | if boxes.shape[0] == 0: 32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 33 | 34 | boxes = boxes.astype(deltas.dtype, copy=False) 35 | 36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 38 | ctr_x = boxes[:, 0] + 0.5 * widths 39 | ctr_y = boxes[:, 1] + 0.5 * heights 40 | 41 | dx = deltas[:, 0::4] 42 | dy = deltas[:, 1::4] 43 | dw = deltas[:, 2::4] 44 | dh = deltas[:, 3::4] 45 | 46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 48 | pred_w = np.exp(dw) * widths[:, np.newaxis] 49 | pred_h = np.exp(dh) * heights[:, np.newaxis] 50 | 51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 52 | # x1 53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 54 | # y1 55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 56 | # x2 57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 58 | # y2 59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 60 | 61 | return pred_boxes 62 | 63 | def clip_boxes(boxes, im_shape): 64 | """ 65 | Clip boxes to image boundaries. 66 | """ 67 | 68 | # x1 >= 0 69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 70 | # y1 >= 0 71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 72 | # x2 < im_shape[1] 73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 74 | # y2 < im_shape[0] 75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 76 | return boxes 77 | -------------------------------------------------------------------------------- /tools/reval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Reval = re-eval. Re-evaluate saved detections.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.test import apply_nms 14 | from fast_rcnn.config import cfg 15 | from datasets.factory import get_imdb 16 | import cPickle 17 | import os, sys, argparse 18 | import numpy as np 19 | 20 | def parse_args(): 21 | """ 22 | Parse input arguments 23 | """ 24 | parser = argparse.ArgumentParser(description='Re-evaluate results') 25 | parser.add_argument('output_dir', nargs=1, help='results directory', 26 | type=str) 27 | parser.add_argument('--rerun', dest='rerun', 28 | help=('re-run evaluation code ' 29 | '(otherwise: results are loaded from file)'), 30 | action='store_true') 31 | parser.add_argument('--imdb', dest='imdb_name', 32 | help='dataset to re-evaluate', 33 | default='voc_2007_test', type=str) 34 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 35 | action='store_true') 36 | 37 | if len(sys.argv) == 1: 38 | parser.print_help() 39 | sys.exit(1) 40 | 41 | args = parser.parse_args() 42 | return args 43 | 44 | 45 | def from_mats(imdb_name, output_dir): 46 | import scipy.io as sio 47 | 48 | imdb = get_imdb(imdb_name) 49 | 50 | aps = [] 51 | for i, cls in enumerate(imdb.classes[1:]): 52 | mat = sio.loadmat(os.path.join(output_dir, cls + '_pr.mat')) 53 | ap = mat['ap'][0, 0] * 100 54 | apAuC = mat['ap_auc'][0, 0] * 100 55 | print '!!! {} : {:.1f} {:.1f}'.format(cls, ap, apAuC) 56 | aps.append(ap) 57 | 58 | print '~~~~~~~~~~~~~~~~~~~' 59 | print 'Results (from mat files):' 60 | for ap in aps: 61 | print '{:.1f}'.format(ap) 62 | print '{:.1f}'.format(np.array(aps).mean()) 63 | print '~~~~~~~~~~~~~~~~~~~' 64 | 65 | 66 | def from_dets(imdb_name, output_dir, comp_mode): 67 | imdb = get_imdb(imdb_name) 68 | imdb.competition_mode(comp_mode) 69 | with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f: 70 | dets = cPickle.load(f) 71 | 72 | print 'Applying NMS to all detections' 73 | nms_dets = apply_nms(dets, cfg.TEST.NMS) 74 | 75 | print 'Evaluating detections' 76 | imdb.evaluate_detections(nms_dets, output_dir) 77 | 78 | if __name__ == '__main__': 79 | args = parse_args() 80 | 81 | output_dir = os.path.abspath(args.output_dir[0]) 82 | imdb_name = args.imdb_name 83 | 84 | if args.comp_mode and not args.rerun: 85 | raise ValueError('--rerun must be used with --comp') 86 | 87 | if args.rerun: 88 | from_dets(imdb_name, output_dir, args.comp_mode) 89 | else: 90 | from_mats(imdb_name, output_dir) 91 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Test a Fast R-CNN network on an image database.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.test import test_net 14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list 15 | from datasets.factory import get_imdb 16 | import caffe 17 | import argparse 18 | import pprint 19 | import time, os, sys 20 | 21 | def parse_args(): 22 | """ 23 | Parse input arguments 24 | """ 25 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 26 | parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use', 27 | default=0, type=int) 28 | parser.add_argument('--def', dest='prototxt', 29 | help='prototxt file defining the network', 30 | default=None, type=str) 31 | parser.add_argument('--net', dest='caffemodel', 32 | help='model to test', 33 | default=None, type=str) 34 | parser.add_argument('--cfg', dest='cfg_file', 35 | help='optional config file', default=None, type=str) 36 | parser.add_argument('--wait', dest='wait', 37 | help='wait until net file exists', 38 | default=True, type=bool) 39 | parser.add_argument('--imdb', dest='imdb_name', 40 | help='dataset to test', 41 | default='voc_2007_test', type=str) 42 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 43 | action='store_true') 44 | parser.add_argument('--set', dest='set_cfgs', 45 | help='set config keys', default=None, 46 | nargs=argparse.REMAINDER) 47 | 48 | if len(sys.argv) == 1: 49 | parser.print_help() 50 | sys.exit(1) 51 | 52 | args = parser.parse_args() 53 | return args 54 | 55 | if __name__ == '__main__': 56 | args = parse_args() 57 | 58 | print('Called with args:') 59 | print(args) 60 | 61 | if args.cfg_file is not None: 62 | cfg_from_file(args.cfg_file) 63 | if args.set_cfgs is not None: 64 | cfg_from_list(args.set_cfgs) 65 | 66 | cfg.GPU_ID = args.gpu_id 67 | 68 | print('Using config:') 69 | pprint.pprint(cfg) 70 | 71 | while not os.path.exists(args.caffemodel) and args.wait: 72 | print('Waiting for {} to exist...'.format(args.caffemodel)) 73 | time.sleep(10) 74 | 75 | caffe.set_mode_gpu() 76 | caffe.set_device(args.gpu_id) 77 | net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) 78 | net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] 79 | 80 | imdb = get_imdb(args.imdb_name) 81 | imdb.competition_mode(args.comp_mode) 82 | if not cfg.TEST.HAS_RPN: 83 | imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) 84 | 85 | test_net(net, imdb) 86 | -------------------------------------------------------------------------------- /tools/test_net_debug.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Test a Fast R-CNN network on an image database.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.test import test_net 14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list 15 | from datasets.factory import get_imdb 16 | import caffe 17 | import argparse 18 | import pprint 19 | import time, os, sys 20 | 21 | def parse_args(): 22 | """ 23 | Parse input arguments 24 | """ 25 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 26 | parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use', 27 | default=0, type=int) 28 | parser.add_argument('--def', dest='prototxt', 29 | help='prototxt file defining the network', 30 | default='/home/bsl/KITTI-detection/models/VGG16/faster_rcnn_alt_opt/faster_rcnn_test.pt', type=str) 31 | parser.add_argument('--net', dest='caffemodel', 32 | help='model to test', 33 | default='/home/bsl/KITTI-detection/data/faster_rcnn_models/VGG16_faster_rcnn_final.caffemodel', type=str) 34 | parser.add_argument('--cfg', dest='cfg_file', 35 | help='optional config file', default='/home/bsl/KITTI-detection/experiments/cfgs/faster_rcnn_alt_opt.yml', type=str) 36 | parser.add_argument('--wait', dest='wait', 37 | help='wait until net file exists', 38 | default=True, type=bool) 39 | parser.add_argument('--imdb', dest='imdb_name', 40 | help='dataset to test', 41 | default='KakouTest', type=str) 42 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 43 | action='store_true') 44 | parser.add_argument('--set', dest='set_cfgs', 45 | help='set config keys', default=None, 46 | nargs=argparse.REMAINDER) 47 | 48 | #if len(sys.argv) == 1: 49 | # parser.print_help() 50 | # sys.exit(1) 51 | 52 | args = parser.parse_args() 53 | return args 54 | 55 | if __name__ == '__main__': 56 | args = parse_args() 57 | 58 | print('Called with args:') 59 | print(args) 60 | 61 | if args.cfg_file is not None: 62 | cfg_from_file(args.cfg_file) 63 | if args.set_cfgs is not None: 64 | cfg_from_list(args.set_cfgs) 65 | 66 | cfg.GPU_ID = args.gpu_id 67 | 68 | print('Using config:') 69 | pprint.pprint(cfg) 70 | 71 | while not os.path.exists(args.caffemodel) and args.wait: 72 | print('Waiting for {} to exist...'.format(args.caffemodel)) 73 | time.sleep(10) 74 | 75 | caffe.set_mode_gpu() 76 | caffe.set_device(args.gpu_id) 77 | net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) 78 | net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] 79 | 80 | imdb = get_imdb(args.imdb_name) 81 | imdb.competition_mode(args.comp_mode) 82 | if not cfg.TEST.HAS_RPN: 83 | imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) 84 | 85 | test_net(net, imdb) 86 | -------------------------------------------------------------------------------- /tools/rpn_generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast/er/ R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Generate RPN proposals.""" 11 | 12 | import _init_paths 13 | import numpy as np 14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir 15 | from datasets.factory import get_imdb 16 | from rpn.generate import imdb_proposals 17 | import cPickle 18 | import caffe 19 | import argparse 20 | import pprint 21 | import time, os, sys 22 | 23 | def parse_args(): 24 | """ 25 | Parse input arguments 26 | """ 27 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 28 | parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use', 29 | default=0, type=int) 30 | parser.add_argument('--def', dest='prototxt', 31 | help='prototxt file defining the network', 32 | default=None, type=str) 33 | parser.add_argument('--net', dest='caffemodel', 34 | help='model to test', 35 | default=None, type=str) 36 | parser.add_argument('--cfg', dest='cfg_file', 37 | help='optional config file', default=None, type=str) 38 | parser.add_argument('--wait', dest='wait', 39 | help='wait until net file exists', 40 | default=True, type=bool) 41 | parser.add_argument('--imdb', dest='imdb_name', 42 | help='dataset to test', 43 | default='voc_2007_test', type=str) 44 | parser.add_argument('--set', dest='set_cfgs', 45 | help='set config keys', default=None, 46 | nargs=argparse.REMAINDER) 47 | 48 | if len(sys.argv) == 1: 49 | parser.print_help() 50 | sys.exit(1) 51 | 52 | args = parser.parse_args() 53 | return args 54 | 55 | if __name__ == '__main__': 56 | args = parse_args() 57 | 58 | print('Called with args:') 59 | print(args) 60 | 61 | if args.cfg_file is not None: 62 | cfg_from_file(args.cfg_file) 63 | if args.set_cfgs is not None: 64 | cfg_from_list(args.set_cfgs) 65 | 66 | cfg.GPU_ID = args.gpu_id 67 | 68 | # RPN test settings 69 | cfg.TEST.RPN_PRE_NMS_TOP_N = -1 70 | cfg.TEST.RPN_POST_NMS_TOP_N = 2000 71 | 72 | print('Using config:') 73 | pprint.pprint(cfg) 74 | 75 | while not os.path.exists(args.caffemodel) and args.wait: 76 | print('Waiting for {} to exist...'.format(args.caffemodel)) 77 | time.sleep(10) 78 | 79 | caffe.set_mode_gpu() 80 | caffe.set_device(args.gpu_id) 81 | net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) 82 | net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] 83 | 84 | imdb = get_imdb(args.imdb_name) 85 | imdb_boxes = imdb_proposals(net, imdb) 86 | 87 | # output_dir = os.path.dirname(args.caffemodel) 88 | output_dir = get_output_dir(imdb, net) 89 | if not os.path.exists(output_dir): 90 | os.makedirs(output_dir) 91 | 92 | rpn_file = os.path.join(output_dir, net.name + '_rpn_proposals.pkl') 93 | with open(rpn_file, 'wb') as f: 94 | cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL) 95 | print 'Wrote RPN proposals to {}'.format(rpn_file) 96 | -------------------------------------------------------------------------------- /tools/visulization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | # Make sure that caffe is on the python path: 4 | caffe_root = '../' # this file is expected to be in {caffe_root}/examples 5 | import sys 6 | sys.path.insert(0, caffe_root + 'python') 7 | import caffe 8 | plt.rcParams['figure.figsize'] = (10, 10) 9 | plt.rcParams['image.interpolation'] = 'nearest' 10 | plt.rcParams['image.cmap'] = 'gray' 11 | caffe.set_mode_gpu() 12 | net = caffe.Net(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt', 13 | caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel', 14 | caffe.TEST) 15 | # input preprocessing: 'data' is the name of the input blob == net.inputs[0] 16 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 17 | transformer.set_transpose('data', (2,0,1)) 18 | #transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')) # mean pixel 19 | transformer.set_mean('data', np.load(caffe_root + 'data/ilsvrc12/imagenet_mean.npy').mean(0).mean(1).mean(1)) # mean pixel 20 | transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1] 21 | transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB 22 | net.blobs['data'].reshape(50,3,227,227) 23 | net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')) 24 | out = net.forward() 25 | print("Predicted class is #{}.".format(out['prob'][0].argmax())) 26 | plt.imshow(transformer.deprocess('data', net.blobs['data'].data[0])) 27 | plt.show() 28 | #[(k, v.data.shape) for k, v in net.blobs.items()] 29 | #[(k, v[0].data.shape) for k, v in net.params.items()] 30 | def vis_square(data, padsize=1, padval=0): 31 | data -= data.min() 32 | data /= data.max() 33 | 34 | # force the number of filters to be square 35 | n = int(np.ceil(np.sqrt(data.shape[0]))) 36 | padding = ((0, n ** 2 - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3) 37 | data = np.pad(data, padding, mode='constant', constant_values=(padval, padval)) 38 | 39 | # tile the filters into an image 40 | data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) 41 | data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:]) 42 | 43 | plt.imshow(data) 44 | plt.show() 45 | # 96 filters 46 | filters = net.params['conv1'][0].data 47 | vis_square(filters.transpose(0, 2, 3, 1)) 48 | 49 | feat = net.blobs['conv1'].data[0, :9] 50 | vis_square(feat, padval=1) 51 | 52 | filters = net.params['conv2'][0].data 53 | vis_square(filters[:48].reshape(48**2, 5, 5)) 54 | 55 | feat = net.blobs['conv2'].data[0, :36] 56 | vis_square(feat, padval=1) 57 | 58 | feat = net.blobs['conv3'].data[0] 59 | vis_square(feat, padval=0.5) 60 | 61 | feat = net.blobs['conv4'].data[0] 62 | vis_square(feat, padval=0.5) 63 | 64 | feat = net.blobs['conv5'].data[0] 65 | vis_square(feat, padval=0.5) 66 | 67 | feat = net.blobs['pool5'].data[0] 68 | vis_square(feat, padval=1) 69 | 70 | feat = net.blobs['fc6'].data[0] 71 | plt.subplot(2, 1, 1) 72 | plt.plot(feat.flat) 73 | plt.subplot(2, 1, 2) 74 | _ = plt.hist(feat.flat[feat.flat > 0], bins=100) 75 | 76 | feat = net.blobs['fc7'].data[0] 77 | plt.subplot(2, 1, 1) 78 | plt.plot(feat.flat) 79 | plt.subplot(2, 1, 2) 80 | _ = plt.hist(feat.flat[feat.flat > 0], bins=100) 81 | 82 | feat = net.blobs['prob'].data[0] 83 | plt.plot(feat.flat) 84 | 85 | 86 | -------------------------------------------------------------------------------- /lib/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 11 | # 12 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 13 | # >> anchors 14 | # 15 | # anchors = 16 | # 17 | # -83 -39 100 56 18 | # -175 -87 192 104 19 | # -359 -183 376 200 20 | # -55 -55 72 72 21 | # -119 -119 136 136 22 | # -247 -247 264 264 23 | # -35 -79 52 96 24 | # -79 -167 96 184 25 | # -167 -343 184 360 26 | 27 | #array([[ -83., -39., 100., 56.], 28 | # [-175., -87., 192., 104.], 29 | # [-359., -183., 376., 200.], 30 | # [ -55., -55., 72., 72.], 31 | # [-119., -119., 136., 136.], 32 | # [-247., -247., 264., 264.], 33 | # [ -35., -79., 52., 96.], 34 | # [ -79., -167., 96., 184.], 35 | # [-167., -343., 184., 360.]]) 36 | 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 38 | scales=2**np.arange(3, 6)): 39 | 40 | 41 | """ 42 | Generate anchor (reference) windows by enumerating aspect ratios X 43 | scales wrt a reference (0, 0, 15, 15) window. 44 | """ 45 | 46 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 47 | ratio_anchors = _ratio_enum(base_anchor, ratios) 48 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 49 | for i in xrange(ratio_anchors.shape[0])]) 50 | return anchors 51 | 52 | def _whctrs(anchor): 53 | """ 54 | Return width, height, x center, and y center for an anchor (window). 55 | """ 56 | 57 | w = anchor[2] - anchor[0] + 1 58 | h = anchor[3] - anchor[1] + 1 59 | x_ctr = anchor[0] + 0.5 * (w - 1) 60 | y_ctr = anchor[1] + 0.5 * (h - 1) 61 | return w, h, x_ctr, y_ctr 62 | 63 | def _mkanchors(ws, hs, x_ctr, y_ctr): 64 | """ 65 | Given a vector of widths (ws) and heights (hs) around a center 66 | (x_ctr, y_ctr), output a set of anchors (windows). 67 | """ 68 | 69 | ws = ws[:, np.newaxis] 70 | hs = hs[:, np.newaxis] 71 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 72 | y_ctr - 0.5 * (hs - 1), 73 | x_ctr + 0.5 * (ws - 1), 74 | y_ctr + 0.5 * (hs - 1))) 75 | return anchors 76 | 77 | def _ratio_enum(anchor, ratios): 78 | """ 79 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 80 | """ 81 | 82 | w, h, x_ctr, y_ctr = _whctrs(anchor) 83 | size = w * h 84 | size_ratios = size / ratios 85 | ws = np.round(np.sqrt(size_ratios)) 86 | hs = np.round(ws * ratios) 87 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 88 | return anchors 89 | 90 | def _scale_enum(anchor, scales): 91 | """ 92 | Enumerate a set of anchors for each scale wrt an anchor. 93 | """ 94 | 95 | w, h, x_ctr, y_ctr = _whctrs(anchor) 96 | ws = w * scales 97 | hs = h * scales 98 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 99 | return anchors 100 | 101 | if __name__ == '__main__': 102 | import time 103 | t = time.time() 104 | a = generate_anchors() 105 | print time.time() - t 106 | print a 107 | from IPython import embed; embed() 108 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Faster R-CNN 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2015 Microsoft Corporation 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | ************************************************************************ 26 | 27 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION 28 | 29 | This project, Faster R-CNN, incorporates material from the project(s) 30 | listed below (collectively, "Third Party Code"). Microsoft is not the 31 | original author of the Third Party Code. The original copyright notice 32 | and license under which Microsoft received such Third Party Code are set 33 | out below. This Third Party Code is licensed to you under their original 34 | license terms set forth below. Microsoft reserves all other rights not 35 | expressly granted, whether by implication, estoppel or otherwise. 36 | 37 | 1. Caffe, (https://github.com/BVLC/caffe/) 38 | 39 | COPYRIGHT 40 | 41 | All contributions by the University of California: 42 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents) 43 | All rights reserved. 44 | 45 | All other contributions: 46 | Copyright (c) 2014, 2015, the respective contributors 47 | All rights reserved. 48 | 49 | Caffe uses a shared copyright model: each contributor holds copyright 50 | over their contributions to Caffe. The project versioning records all 51 | such contribution and copyright details. If a contributor wants to 52 | further mark their specific copyright on a particular contribution, 53 | they should indicate their copyright solely in the commit message of 54 | the change when it is committed. 55 | 56 | The BSD 2-Clause License 57 | 58 | Redistribution and use in source and binary forms, with or without 59 | modification, are permitted provided that the following conditions 60 | are met: 61 | 62 | 1. Redistributions of source code must retain the above copyright notice, 63 | this list of conditions and the following disclaimer. 64 | 65 | 2. Redistributions in binary form must reproduce the above copyright 66 | notice, this list of conditions and the following disclaimer in the 67 | documentation and/or other materials provided with the distribution. 68 | 69 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 70 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 71 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 72 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 73 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 74 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 75 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 76 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 77 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 78 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 79 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 80 | 81 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION********** 82 | -------------------------------------------------------------------------------- /tools/train_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Train a Fast R-CNN network on a region of interest database.""" 11 | 12 | import _init_paths 13 | from fast_rcnn.train import get_training_roidb, train_net 14 | from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir 15 | from datasets.factory import get_imdb 16 | import datasets.imdb 17 | import caffe 18 | import argparse 19 | import pprint 20 | import numpy as np 21 | import sys 22 | 23 | def parse_args(): 24 | """ 25 | Parse input arguments 26 | """ 27 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 28 | parser.add_argument('--gpu', dest='gpu_id', 29 | help='GPU device id to use [0]', 30 | default=0, type=int) 31 | parser.add_argument('--solver', dest='solver', 32 | help='solver prototxt', 33 | default=None, type=str) 34 | parser.add_argument('--iters', dest='max_iters', 35 | help='number of iterations to train', 36 | default=40000, type=int) 37 | parser.add_argument('--weights', dest='pretrained_model', 38 | help='initialize with pretrained model weights', 39 | default=None, type=str) 40 | parser.add_argument('--cfg', dest='cfg_file', 41 | help='optional config file', 42 | default=None, type=str) 43 | parser.add_argument('--imdb', dest='imdb_name', 44 | help='dataset to train on', 45 | default='voc_2007_trainval', type=str) 46 | parser.add_argument('--rand', dest='randomize', 47 | help='randomize (do not use a fixed seed)', 48 | action='store_true') 49 | parser.add_argument('--set', dest='set_cfgs', 50 | help='set config keys', default=None, 51 | nargs=argparse.REMAINDER) 52 | 53 | if len(sys.argv) == 1: 54 | parser.print_help() 55 | sys.exit(1) 56 | 57 | args = parser.parse_args() 58 | return args 59 | 60 | def combined_roidb(imdb_names): 61 | def get_roidb(imdb_name): 62 | imdb = get_imdb(imdb_name) 63 | print 'Loaded dataset `{:s}` for training'.format(imdb.name) 64 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 65 | print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD) 66 | roidb = get_training_roidb(imdb) 67 | return roidb 68 | 69 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 70 | roidb = roidbs[0] 71 | if len(roidbs) > 1: 72 | for r in roidbs[1:]: 73 | roidb.extend(r) 74 | imdb = datasets.imdb(imdb_names) 75 | else: 76 | imdb = get_imdb(imdb_names) 77 | return imdb, roidb 78 | 79 | if __name__ == '__main__': 80 | args = parse_args() 81 | 82 | print('Called with args:') 83 | print(args) 84 | 85 | if args.cfg_file is not None: 86 | cfg_from_file(args.cfg_file) 87 | if args.set_cfgs is not None: 88 | cfg_from_list(args.set_cfgs) 89 | 90 | cfg.GPU_ID = args.gpu_id 91 | 92 | print('Using config:') 93 | pprint.pprint(cfg) 94 | 95 | if not args.randomize: 96 | # fix the random seeds (numpy and caffe) for reproducibility 97 | np.random.seed(cfg.RNG_SEED) 98 | caffe.set_random_seed(cfg.RNG_SEED) 99 | 100 | # set up caffe 101 | caffe.set_mode_gpu() 102 | caffe.set_device(args.gpu_id) 103 | 104 | imdb, roidb = combined_roidb(args.imdb_name) 105 | print '{:d} roidb entries'.format(len(roidb)) 106 | 107 | output_dir = get_output_dir(imdb, None) 108 | print 'Output will be saved to `{:s}`'.format(output_dir) 109 | 110 | train_net(args.solver, roidb, output_dir, 111 | pretrained_model=args.pretrained_model, 112 | max_iters=args.max_iters) 113 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/detection_eval.m: -------------------------------------------------------------------------------- 1 | function res = detection_eval(path, comp_id, test_set,output_dir,img_list,img_gt) 2 | classes={'car','person','bike', 'truck', 'van', 'tram', 'misc'} 3 | minoverlap=0.5; 4 | class_num=zeros(1,length(classes)); 5 | img_list_path= strcat(path,'/',img_list); 6 | img_gt_path= strcat(path,'/',img_gt); 7 | image_list=importdata(img_list_path); 8 | 9 | 10 | fidin=fopen(img_gt_path); 11 | ind=1; 12 | while ~feof(fidin) 13 | tline=fgetl(fidin); 14 | image_list_gt_data{ind}=str2num(tline(29:end)); 15 | ind=ind+1; 16 | end 17 | 18 | 19 | for i=1:length(image_list) 20 | image_gt{i}.ids=image_list{i}; 21 | image_gt{i}.total=image_list_gt_data{i}(1); 22 | ind=2; 23 | for j=1:length(classes) 24 | 25 | %image_list_gt_data{i}(ind) 26 | 27 | image_gt{i}.classes{j}=image_list_gt_data{i}(ind); 28 | if image_gt{i}.classes{j}>0 29 | 30 | %image_list_gt_data{i}(ind+1:ind+image_gt{i}.classes{j}*4) 31 | 32 | image_gt{i}.bb{j}=reshape(image_list_gt_data{i}(ind+1:ind+image_gt{i}.classes{j}*4),4,image_gt{i}.classes{j})'; 33 | else 34 | image_gt{i}.bb{j}=[]; 35 | end 36 | ind=ind+4*image_gt{i}.classes{j}+1; 37 | end 38 | end 39 | res_path=strcat(path,'/','results/%s_det_',test_set,'_%s.txt'); 40 | for i=1:length(classes) 41 | class_num=0; 42 | gt(length(image_list))=struct('BB',[]); 43 | for ii=1:length(image_list) 44 | if ~isempty(image_gt{ii}.bb{i}) 45 | gt(ii).BB=image_gt{ii}.bb{i}; 46 | class_num=class_num+image_gt{ii}.classes{i}; 47 | end 48 | end 49 | [ids,confidence,b1,b2,b3,b4]=textread(sprintf(res_path,comp_id,classes{i}),'%s %f %f %f %f %f'); 50 | BB=[b1 b2 b3 b4]; 51 | [sc,si]=sort(-confidence); 52 | ids=ids(si); 53 | BB=BB(si,:); 54 | nd=length(confidence); 55 | tp=zeros(nd,1); 56 | fp=zeros(nd,1); 57 | 58 | for j=1:nd 59 | ovmax=-inf; 60 | bb_pred=BB(j,:); 61 | id_index=strmatch(ids{j},image_list,'exact'); 62 | for k=1:size(gt(id_index).BB,1) 63 | bb_target=gt(id_index).BB(k,:); 64 | overlap=compute_overlap(bb_pred,bb_target); 65 | if overlap>ovmax 66 | ovmax=overlap; 67 | end 68 | end 69 | path=strcat('../../../data/',ids{j}); 70 | %img=imread(path); 71 | if ~exist(strcat('../../../data/results/',classes{i},'/tp')) 72 | mkdir(strcat('../../../data/results/',classes{i},'/tp')) 73 | end 74 | if ~exist(strcat('../../../data/results/',classes{i},'/fp')) 75 | mkdir(strcat('../../../data/results/',classes{i},'/fp')) 76 | end 77 | write_path1=strcat('../../../data/results/',classes{i},'/tp','/',num2str(j),'.jpg'); 78 | write_path2=strcat('../../../data/results/',classes{i},'/fp','/',num2str(j),'.jpg'); 79 | 80 | % img_size1 = size(img); 81 | 82 | if ovmax>=minoverlap 83 | tp(j)=1; 84 | %imwrite( img(fix(bb_pred(2))+1:fix(bb_pred(4)),fix(bb_pred(1))+1:fix(bb_pred(3)), :), write_path1,'jpg'); 85 | else 86 | fp(j)=1; 87 | % imwrite( img(fix(bb_pred(2))+1:fix(bb_pred(4)),fix(bb_pred(1))+1:fix(bb_pred(3)), :), write_path2,'jpg'); 88 | end 89 | end 90 | fp=cumsum(fp); 91 | tp=cumsum(tp); 92 | rec=tp/class_num; 93 | prec=tp./(fp+tp); 94 | ap=0; 95 | for t=0:0.1:1 96 | p=max(prec(rec>=t)); 97 | if isempty(p) 98 | p=0; 99 | end 100 | ap=ap+p/11; 101 | end 102 | if 1 103 | % plot precision/recall 104 | plot(rec,prec,'-'); 105 | grid; 106 | xlabel 'recall' 107 | ylabel 'precision' 108 | title(sprintf('class: %s, subset: %s, AP = %.3f',classes{i},test_set,ap)); 109 | end 110 | ap_auc = xVOCap(rec, prec); 111 | res(i).recall=rec; 112 | res(i).prec=prec; 113 | res(i).ap=ap; 114 | res(i).ap_auc=ap_auc; 115 | hold on; 116 | end 117 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 118 | fprintf('Results:\n'); 119 | aps = [res(:).ap]'; 120 | fprintf('APs:\n') 121 | fprintf('%.1f\n', aps * 100); 122 | fprintf('mAP:') 123 | fprintf('%.1f\n', mean(aps) * 100); 124 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 125 | 126 | -------------------------------------------------------------------------------- /lib/rpn/generate.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from fast_rcnn.config import cfg 9 | from utils.blob import im_list_to_blob 10 | from utils.timer import Timer 11 | import numpy as np 12 | import cv2 13 | 14 | def _vis_proposals(im, dets, thresh=0.5): 15 | """Draw detected bounding boxes.""" 16 | inds = np.where(dets[:, -1] >= thresh)[0] 17 | if len(inds) == 0: 18 | return 19 | 20 | class_name = 'obj' 21 | im = im[:, :, (2, 1, 0)] 22 | fig, ax = plt.subplots(figsize=(12, 12)) 23 | ax.imshow(im, aspect='equal') 24 | for i in inds: 25 | bbox = dets[i, :4] 26 | score = dets[i, -1] 27 | 28 | ax.add_patch( 29 | plt.Rectangle((bbox[0], bbox[1]), 30 | bbox[2] - bbox[0], 31 | bbox[3] - bbox[1], fill=False, 32 | edgecolor='red', linewidth=3.5) 33 | ) 34 | ax.text(bbox[0], bbox[1] - 2, 35 | '{:s} {:.3f}'.format(class_name, score), 36 | bbox=dict(facecolor='blue', alpha=0.5), 37 | fontsize=14, color='white') 38 | 39 | ax.set_title(('{} detections with ' 40 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 41 | thresh), 42 | fontsize=14) 43 | plt.axis('off') 44 | plt.tight_layout() 45 | plt.draw() 46 | 47 | def _get_image_blob(im): 48 | """Converts an image into a network input. 49 | 50 | Arguments: 51 | im (ndarray): a color image in BGR order 52 | 53 | Returns: 54 | blob (ndarray): a data blob holding an image pyramid 55 | im_scale_factors (list): list of image scales (relative to im) used 56 | in the image pyramid 57 | """ 58 | im_orig = im.astype(np.float32, copy=True) 59 | im_orig -= cfg.PIXEL_MEANS 60 | 61 | im_shape = im_orig.shape 62 | im_size_min = np.min(im_shape[0:2]) 63 | im_size_max = np.max(im_shape[0:2]) 64 | 65 | processed_ims = [] 66 | 67 | assert len(cfg.TEST.SCALES) == 1 68 | target_size = cfg.TEST.SCALES[0] 69 | 70 | im_scale = float(target_size) / float(im_size_min) 71 | # Prevent the biggest axis from being more than MAX_SIZE 72 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 73 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 74 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 75 | interpolation=cv2.INTER_LINEAR) 76 | im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :] 77 | processed_ims.append(im) 78 | 79 | # Create a blob to hold the input images 80 | blob = im_list_to_blob(processed_ims) 81 | 82 | return blob, im_info 83 | 84 | def im_proposals(net, im): 85 | """Generate RPN proposals on a single image.""" 86 | blobs = {} 87 | blobs['data'], blobs['im_info'] = _get_image_blob(im) 88 | net.blobs['data'].reshape(*(blobs['data'].shape)) 89 | net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) 90 | blobs_out = net.forward( 91 | data=blobs['data'].astype(np.float32, copy=False), 92 | im_info=blobs['im_info'].astype(np.float32, copy=False)) 93 | 94 | scale = blobs['im_info'][0, 2] 95 | boxes = blobs_out['rois'][:, 1:].copy() / scale 96 | scores = blobs_out['scores'].copy() 97 | return boxes, scores 98 | 99 | def imdb_proposals(net, imdb): 100 | """Generate RPN proposals on all images in an imdb.""" 101 | 102 | _t = Timer() 103 | imdb_boxes = [[] for _ in xrange(imdb.num_images)] 104 | for i in xrange(imdb.num_images): 105 | im = cv2.imread(imdb.image_path_at(i)) 106 | _t.tic() 107 | imdb_boxes[i], scores = im_proposals(net, im) 108 | _t.toc() 109 | print 'im_proposals: {:d}/{:d} {:.3f}s' \ 110 | .format(i + 1, imdb.num_images, _t.average_time) 111 | if 0: 112 | dets = np.hstack((imdb_boxes[i], scores)) 113 | # from IPython import embed; embed() 114 | _vis_proposals(im, dets[:3, :], thresh=0.9) 115 | plt.show() 116 | 117 | return imdb_boxes 118 | -------------------------------------------------------------------------------- /data/convert_kitti.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -------------------------------------------------------- 3 | # kitti tool 4 | # Copyright (c) 2016 Chao Chen 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Chao Chen 7 | # -------------------------------------------------------- 8 | 9 | import os 10 | 11 | def getLabelFilename(img_filename): 12 | vector_string = img_filename.split('.'); 13 | tmp = vector_string[0] + '.txt' 14 | vector_tmp = tmp.split('/') 15 | return vector_tmp[-1] 16 | 17 | 18 | 19 | 20 | 21 | def parse_line(img_line, label_file): 22 | count_p = 0 23 | count_c = 0 24 | count_cyc = 0 25 | count_van = 0 26 | count_truck = 0 27 | count_tram = 0 28 | count_misc = 0 29 | 30 | p_flag = 0 31 | c_flag = 0 32 | cyc_flag = 0 33 | van_flag = 0 34 | truck_flag = 0 35 | misc_flag = 0 36 | 37 | bbox_car = [] 38 | bbox_per = [] 39 | bbox_cyc = [] 40 | bbox_truck = [] 41 | bbox_van = [] 42 | bbox_tram = [] 43 | bbox_misc = [] 44 | 45 | 46 | file = open(label_file, 'r') 47 | for line in file.xreadlines(): 48 | line = line.strip('\n') 49 | vector_str = line.split(' ') 50 | #print vector_str 51 | if ("Car" == vector_str[0] ): 52 | count_c += 1 53 | for k in range(4, 8): 54 | bbox_car.append(vector_str[k]) 55 | continue 56 | if ('Pedestrian' == vector_str[0]): 57 | count_p += 1 58 | for k in range(4, 8): 59 | bbox_per.append(vector_str[k]) 60 | continue 61 | 62 | if('Cyclist' == vector_str[0]): 63 | count_cyc += 1 64 | for k in range(4, 8): 65 | bbox_cyc.append(vector_str[k]) 66 | continue 67 | 68 | if('Van' == vector_str[0]): 69 | count_van += 1 70 | for k in range(4, 8): 71 | bbox_van.append(vector_str[k]) 72 | continue 73 | 74 | if('Truck' == vector_str[0]): 75 | count_truck += 1 76 | for k in range(4, 8): 77 | bbox_truck.append(vector_str[k]) 78 | continue 79 | if('Misc' == vector_str[0]): 80 | count_misc += 1 81 | for k in range(4, 8): 82 | bbox_misc.append(vector_str[k]) 83 | continue 84 | 85 | if('Tram' == vector_str[0]): 86 | count_tram += 1 87 | for k in range(4, 8): 88 | bbox_tram.append(vector_str[k]) 89 | continue 90 | 91 | num = count_c + count_p + count_cyc + count_van + count_truck + count_misc + count_tram 92 | 93 | final_line = img_line + ' '+ str(num) 94 | #car 95 | final_line += ' '+ str(count_c) 96 | for i in bbox_car: 97 | final_line += ' ' + i 98 | #pre 99 | final_line += ' '+ str(count_p) 100 | for i in bbox_per: 101 | final_line += ' ' + i 102 | #cyc 103 | final_line += ' '+ str(count_cyc) 104 | for i in bbox_cyc: 105 | final_line += ' ' + i 106 | #truck 107 | final_line += ' '+ str(count_truck) 108 | for i in bbox_truck: 109 | final_line += ' ' + i 110 | #van 111 | final_line += ' '+ str(count_van) 112 | for i in bbox_van: 113 | final_line += ' ' + i 114 | #tram 115 | final_line += ' '+ str(count_tram) 116 | for i in bbox_tram: 117 | final_line += ' ' + i 118 | #misc 119 | final_line += ' '+ str(count_misc) 120 | for i in bbox_misc: 121 | final_line += ' ' + i 122 | return final_line + '\n' 123 | 124 | def convertKitti(label_file_list, savedFilename): 125 | if os.path.exists(label_file_list): 126 | file = open(label_file_list, 'r') 127 | final_lines_list = [] 128 | for line in file.xreadlines(): 129 | line = line.strip('\n') 130 | print line 131 | labelFile = getLabelFilename(line) 132 | print labelFile 133 | finalLine = parse_line(line, './training/label_2/'+labelFile) 134 | final_lines_list.append(finalLine) 135 | result_file = open('./' + savedFilename, 'w') 136 | result_file.writelines(final_lines_list) 137 | result_file.close() 138 | 139 | if '__main__' == __name__: 140 | convertKitti('Train_image_list.txt', 'TrainIndex.txt') 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /tools/compress_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Compress a Fast R-CNN network using truncated SVD.""" 11 | 12 | import _init_paths 13 | import caffe 14 | import argparse 15 | import numpy as np 16 | import os, sys 17 | 18 | def parse_args(): 19 | """Parse input arguments.""" 20 | parser = argparse.ArgumentParser(description='Compress a Fast R-CNN network') 21 | parser.add_argument('--def', dest='prototxt', 22 | help='prototxt file defining the uncompressed network', 23 | default=None, type=str) 24 | parser.add_argument('--def-svd', dest='prototxt_svd', 25 | help='prototxt file defining the SVD compressed network', 26 | default=None, type=str) 27 | parser.add_argument('--net', dest='caffemodel', 28 | help='model to compress', 29 | default=None, type=str) 30 | 31 | if len(sys.argv) == 1: 32 | parser.print_help() 33 | sys.exit(1) 34 | 35 | args = parser.parse_args() 36 | return args 37 | 38 | def compress_weights(W, l): 39 | """Compress the weight matrix W of an inner product (fully connected) layer 40 | using truncated SVD. 41 | 42 | Parameters: 43 | W: N x M weights matrix 44 | l: number of singular values to retain 45 | 46 | Returns: 47 | Ul, L: matrices such that W \approx Ul*L 48 | """ 49 | 50 | # numpy doesn't seem to have a fast truncated SVD algorithm... 51 | # this could be faster 52 | U, s, V = np.linalg.svd(W, full_matrices=False) 53 | 54 | Ul = U[:, :l] 55 | sl = s[:l] 56 | Vl = V[:l, :] 57 | 58 | L = np.dot(np.diag(sl), Vl) 59 | return Ul, L 60 | 61 | def main(): 62 | args = parse_args() 63 | 64 | # prototxt = 'models/VGG16/test.prototxt' 65 | # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel' 66 | net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) 67 | 68 | # prototxt_svd = 'models/VGG16/svd/test_fc6_fc7.prototxt' 69 | # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel' 70 | net_svd = caffe.Net(args.prototxt_svd, args.caffemodel, caffe.TEST) 71 | 72 | print('Uncompressed network {} : {}'.format(args.prototxt, args.caffemodel)) 73 | print('Compressed network prototxt {}'.format(args.prototxt_svd)) 74 | 75 | out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svd' 76 | out_dir = os.path.dirname(args.caffemodel) 77 | 78 | # Compress fc6 79 | if net_svd.params.has_key('fc6_L'): 80 | l_fc6 = net_svd.params['fc6_L'][0].data.shape[0] 81 | print(' fc6_L bottleneck size: {}'.format(l_fc6)) 82 | 83 | # uncompressed weights and biases 84 | W_fc6 = net.params['fc6'][0].data 85 | B_fc6 = net.params['fc6'][1].data 86 | 87 | print(' compressing fc6...') 88 | Ul_fc6, L_fc6 = compress_weights(W_fc6, l_fc6) 89 | 90 | assert(len(net_svd.params['fc6_L']) == 1) 91 | 92 | # install compressed matrix factors (and original biases) 93 | net_svd.params['fc6_L'][0].data[...] = L_fc6 94 | 95 | net_svd.params['fc6_U'][0].data[...] = Ul_fc6 96 | net_svd.params['fc6_U'][1].data[...] = B_fc6 97 | 98 | out += '_fc6_{}'.format(l_fc6) 99 | 100 | # Compress fc7 101 | if net_svd.params.has_key('fc7_L'): 102 | l_fc7 = net_svd.params['fc7_L'][0].data.shape[0] 103 | print ' fc7_L bottleneck size: {}'.format(l_fc7) 104 | 105 | W_fc7 = net.params['fc7'][0].data 106 | B_fc7 = net.params['fc7'][1].data 107 | 108 | print(' compressing fc7...') 109 | Ul_fc7, L_fc7 = compress_weights(W_fc7, l_fc7) 110 | 111 | assert(len(net_svd.params['fc7_L']) == 1) 112 | 113 | net_svd.params['fc7_L'][0].data[...] = L_fc7 114 | 115 | net_svd.params['fc7_U'][0].data[...] = Ul_fc7 116 | net_svd.params['fc7_U'][1].data[...] = B_fc7 117 | 118 | out += '_fc7_{}'.format(l_fc7) 119 | 120 | filename = '{}/{}.caffemodel'.format(out_dir, out) 121 | net_svd.save(filename) 122 | print 'Wrote svd model to: {:s}'.format(filename) 123 | 124 | if __name__ == '__main__': 125 | main() 126 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/fast_rcnn/train.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Train a Fast R-CNN network.""" 9 | 10 | import caffe 11 | from fast_rcnn.config import cfg 12 | import roi_data_layer.roidb as rdl_roidb 13 | from utils.timer import Timer 14 | import numpy as np 15 | import os 16 | 17 | from caffe.proto import caffe_pb2 18 | import google.protobuf as pb2 19 | 20 | class SolverWrapper(object): 21 | """A simple wrapper around Caffe's solver. 22 | This wrapper gives us control over he snapshotting process, which we 23 | use to unnormalize the learned bounding-box regression weights. 24 | """ 25 | 26 | def __init__(self, solver_prototxt, roidb, output_dir, 27 | pretrained_model=None): 28 | """Initialize the SolverWrapper.""" 29 | self.output_dir = output_dir 30 | 31 | if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and 32 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS): 33 | # RPN can only use precomputed normalization because there are no 34 | # fixed statistics to compute a priori 35 | assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED 36 | 37 | if cfg.TRAIN.BBOX_REG: 38 | print 'Computing bounding-box regression targets...' 39 | self.bbox_means, self.bbox_stds = \ 40 | rdl_roidb.add_bbox_regression_targets(roidb) 41 | print 'done' 42 | 43 | self.solver = caffe.SGDSolver(solver_prototxt) 44 | if pretrained_model is not None: 45 | print ('Loading pretrained model ' 46 | 'weights from {:s}').format(pretrained_model) 47 | self.solver.net.copy_from(pretrained_model) 48 | 49 | self.solver_param = caffe_pb2.SolverParameter() 50 | with open(solver_prototxt, 'rt') as f: 51 | pb2.text_format.Merge(f.read(), self.solver_param) 52 | 53 | self.solver.net.layers[0].set_roidb(roidb) 54 | 55 | def snapshot(self): 56 | """Take a snapshot of the network after unnormalizing the learned 57 | bounding-box regression weights. This enables easy use at test-time. 58 | """ 59 | net = self.solver.net 60 | 61 | scale_bbox_params = (cfg.TRAIN.BBOX_REG and 62 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS and 63 | net.params.has_key('bbox_pred')) 64 | 65 | if scale_bbox_params: 66 | # save original values 67 | orig_0 = net.params['bbox_pred'][0].data.copy() 68 | orig_1 = net.params['bbox_pred'][1].data.copy() 69 | 70 | # scale and shift with bbox reg unnormalization; then save snapshot 71 | net.params['bbox_pred'][0].data[...] = \ 72 | (net.params['bbox_pred'][0].data * 73 | self.bbox_stds[:, np.newaxis]) 74 | net.params['bbox_pred'][1].data[...] = \ 75 | (net.params['bbox_pred'][1].data * 76 | self.bbox_stds + self.bbox_means) 77 | 78 | if not os.path.exists(self.output_dir): 79 | os.makedirs(self.output_dir) 80 | 81 | infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX 82 | if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') 83 | filename = (self.solver_param.snapshot_prefix + infix + 84 | '_iter_{:d}'.format(self.solver.iter) + '.caffemodel') 85 | filename = os.path.join(self.output_dir, filename) 86 | 87 | net.save(str(filename)) 88 | print 'Wrote snapshot to: {:s}'.format(filename) 89 | 90 | if scale_bbox_params: 91 | # restore net to original state 92 | net.params['bbox_pred'][0].data[...] = orig_0 93 | net.params['bbox_pred'][1].data[...] = orig_1 94 | return filename 95 | 96 | def train_model(self, max_iters): 97 | """Network training loop.""" 98 | last_snapshot_iter = -1 99 | timer = Timer() 100 | model_paths = [] 101 | while self.solver.iter < max_iters: 102 | # Make one SGD update 103 | timer.tic() 104 | self.solver.step(1) 105 | timer.toc() 106 | if self.solver.iter % (10 * self.solver_param.display) == 0: 107 | print 'speed: {:.3f}s / iter'.format(timer.average_time) 108 | 109 | if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: 110 | last_snapshot_iter = self.solver.iter 111 | model_paths.append(self.snapshot()) 112 | 113 | if last_snapshot_iter != self.solver.iter: 114 | model_paths.append(self.snapshot()) 115 | return model_paths 116 | 117 | def get_training_roidb(imdb): 118 | """Returns a roidb (Region of Interest database) for use in training.""" 119 | if cfg.TRAIN.USE_FLIPPED: 120 | print 'Appending horizontally-flipped training examples...' 121 | imdb.append_flipped_images() 122 | print 'done' 123 | 124 | print 'Preparing training data...' 125 | rdl_roidb.prepare_roidb(imdb) 126 | print 'done' 127 | 128 | return imdb.roidb 129 | 130 | def train_net(solver_prototxt, roidb, output_dir, 131 | pretrained_model=None, max_iters=40000): 132 | """Train a Fast R-CNN network.""" 133 | sw = SolverWrapper(solver_prototxt, roidb, output_dir, 134 | pretrained_model=pretrained_model) 135 | 136 | print 'Solving...' 137 | model_paths = sw.train_model(max_iters) 138 | print 'done solving' 139 | return model_paths 140 | -------------------------------------------------------------------------------- /tools/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Faster R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """ 11 | Demo script showing detections in sample images. 12 | 13 | See README.md for installation instructions before running. 14 | """ 15 | 16 | import _init_paths 17 | from fast_rcnn.config import cfg 18 | from fast_rcnn.test import im_detect 19 | from fast_rcnn.nms_wrapper import nms 20 | from utils.timer import Timer 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | import scipy.io as sio 24 | import caffe, os, sys, cv2 25 | import argparse 26 | 27 | import matplotlib 28 | from matplotlib.pyplot import plot,savefig 29 | 30 | CLASSES = ('__background__','car','person') 31 | 32 | NETS = {'vgg16': ('VGG16', 33 | 'VGG16_faster_rcnn_final.caffemodel'), 34 | 'zf': ('ZF', 35 | 'ZF_faster_rcnn_final.caffemodel'), 36 | 'vgg_m': ('VGG_CNN_M_1024', 37 | 'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')} 38 | 39 | 40 | def vis_detections(im, class_name, dets, image_name, ax, thresh=0.5): 41 | """Draw detected bounding boxes.""" 42 | inds = np.where(dets[:, -1] >= thresh)[0] 43 | if len(inds) == 0: 44 | plt.axis('off') 45 | plt.tight_layout() 46 | plt.draw() 47 | return 48 | for i in inds: 49 | bbox = dets[i, :4] 50 | score = dets[i, -1] 51 | 52 | ax.add_patch( 53 | plt.Rectangle((bbox[0], bbox[1]), 54 | bbox[2] - bbox[0], 55 | bbox[3] - bbox[1], fill=False, 56 | edgecolor='red', linewidth=3.5) 57 | ) 58 | ax.text(bbox[0], bbox[1] - 2, 59 | '{:s} {:.3f}'.format(class_name, score), 60 | bbox=dict(facecolor='blue', alpha=0.5), 61 | fontsize=14, color='white') 62 | 63 | ax.set_title(('{} detections with ' 64 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 65 | thresh), 66 | fontsize=14) 67 | plt.axis('off') 68 | plt.tight_layout() 69 | plt.draw() 70 | 71 | 72 | def demo(net, image_name): 73 | """Detect object classes in an image using pre-computed object proposals.""" 74 | 75 | # Load the demo image 76 | im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name) 77 | im = cv2.imread(im_file) 78 | 79 | # Detect all object classes and regress object bounds 80 | timer = Timer() 81 | timer.tic() 82 | scores, boxes = im_detect(net, im) 83 | timer.toc() 84 | print ('Detection took {:.3f}s for ' 85 | '{:d} object proposals').format(timer.total_time, boxes.shape[0]) 86 | 87 | # Visualize detections for each class 88 | CONF_THRESH = 0.7 89 | NMS_THRESH = 0.3 90 | 91 | im = im[:, :, (2, 1, 0)] 92 | fig, ax = plt.subplots(figsize=(12, 12)) 93 | ax.imshow(im, aspect='equal') 94 | for cls_ind, cls in enumerate(CLASSES[1:]): 95 | cls_ind += 1 # because we skipped background 96 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 97 | cls_scores = scores[:, cls_ind] 98 | dets = np.hstack((cls_boxes, 99 | cls_scores[:, np.newaxis])).astype(np.float32) 100 | keep = nms(dets, NMS_THRESH) 101 | dets = dets[keep, :] 102 | vis_detections(im, cls, dets, image_name, ax,thresh=CONF_THRESH) 103 | 104 | def parse_args(): 105 | """Parse input arguments.""" 106 | parser = argparse.ArgumentParser(description='Faster R-CNN demo') 107 | parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', 108 | default=0, type=int) 109 | parser.add_argument('--cpu', dest='cpu_mode', 110 | help='Use CPU mode (overrides --gpu)', 111 | action='store_true') 112 | parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]', 113 | choices=NETS.keys(), default='vgg_m') 114 | 115 | args = parser.parse_args() 116 | 117 | return args 118 | 119 | if __name__ == '__main__': 120 | cfg.TEST.HAS_RPN = True # Use RPN for proposals 121 | 122 | args = parse_args() 123 | 124 | prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0], 125 | 'faster_rcnn_alt_opt', 'faster_rcnn_test.pt') 126 | caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'faster_rcnn_models', 127 | NETS[args.demo_net][1]) 128 | 129 | if not os.path.isfile(caffemodel): 130 | raise IOError(('{:s} not found.\nDid you run ./data/script/' 131 | 'fetch_faster_rcnn_models.sh?').format(caffemodel)) 132 | 133 | if args.cpu_mode: 134 | caffe.set_mode_cpu() 135 | else: 136 | caffe.set_mode_gpu() 137 | caffe.set_device(args.gpu_id) 138 | cfg.GPU_ID = args.gpu_id 139 | net = caffe.Net(prototxt, caffemodel, caffe.TEST) 140 | 141 | print '\n\nLoaded network {:s}'.format(caffemodel) 142 | 143 | # Warmup on a dummy image 144 | im = 128 * np.ones((300, 500, 3), dtype=np.uint8) 145 | #im = 128 * np.ones((1280, 960, 3), dtype=np.uint8) 146 | for i in xrange(2): 147 | _, _= im_detect(net, im) 148 | 149 | 150 | for ind in range(0,31): 151 | str="%06d"%(ind) 152 | im_name=str+".png" 153 | print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' 154 | print 'Demo for data/demo/{}'.format(im_name) 155 | demo(net, im_name) 156 | plt.show() 157 | -------------------------------------------------------------------------------- /tools/demo_show.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Faster R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """ 11 | Demo script showing detections in sample images. 12 | 13 | See README.md for installation instructions before running. 14 | """ 15 | 16 | import _init_paths 17 | from fast_rcnn.config import cfg 18 | from fast_rcnn.test import im_detect 19 | from fast_rcnn.nms_wrapper import nms 20 | from utils.timer import Timer 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | import scipy.io as sio 24 | import caffe, os, sys, cv2 25 | import argparse 26 | 27 | CLASSES = ('__background__','car','person','bike') 28 | 29 | NETS = {'vgg16': ('VGG16', 30 | 'VGG16_faster_rcnn_final.caffemodel'), 31 | 'zf': ('ZF', 32 | 'ZF_faster_rcnn_final.caffemodel'), 33 | 'vgg_m': ('VGG_CNN_M_1024', 34 | 'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')} 35 | 36 | 37 | def demo(net,image_list): 38 | """Detect object classes in an image using pre-computed object proposals.""" 39 | 40 | # Load the demo image 41 | im_file = os.path.join(cfg.ROOT_DIR, 'data', image_list[0]) 42 | im = cv2.imread(im_file) 43 | 44 | # Detect all object classes and regress object bounds 45 | timer = Timer() 46 | timer.tic() 47 | scores, boxes = im_detect(net, im) 48 | timer.toc() 49 | print ('Detection took {:.3f}s for ' 50 | '{:d} object proposals').format(timer.total_time, boxes.shape[0]) 51 | 52 | # Visualize detections for each class 53 | 54 | ind=1 55 | color_list=[(255,0,0),(0,255,0),(0,0,255)] 56 | color_cls=[(0,255,255),(255,0,255),(255,255,0)] 57 | for j in range(1, len(CLASSES)): 58 | num_objs = int(image_list[ind+1]) 59 | for i in xrange(num_objs): 60 | x1 = int(float(image_list[ind+2 + i * 4])) 61 | y1 = int(float(image_list[ind+3 + i * 4])) 62 | x2 = int(float(image_list[ind+4 + i * 4])) 63 | y2 = int(float(image_list[ind+5 + i * 4])) 64 | rect_start = (x1,y1) 65 | rect_end = (x2,y2) 66 | #cv2.rectangle(im, rect_start, rect_end, color_list[j-1], 2) 67 | ind+=4*num_objs+1 68 | 69 | thresh= 0.5 70 | NMS_THRESH = 0.3 71 | path = os.path.join(cfg.ROOT_DIR, 'data', 'results','show',image_list[0][17:]) 72 | for cls_ind, cls in enumerate(CLASSES[1:]): 73 | cls_ind += 1 # because we skipped background 74 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 75 | cls_scores = scores[:, cls_ind] 76 | dets = np.hstack((cls_boxes, 77 | cls_scores[:, np.newaxis])).astype(np.float32) 78 | keep = nms(dets, NMS_THRESH) 79 | dets = dets[keep, :] 80 | inds = np.where(dets[:, -1] >= thresh)[0] 81 | 82 | index=1 83 | if len(inds) == 0 and index==len(CLASSES[1:]): 84 | cv2.imwrite(path,im) 85 | return 86 | elif len(inds) == 0 and index class should be zero (background) 40 | zero_inds = np.where(max_overlaps == 0)[0] 41 | assert all(max_classes[zero_inds] == 0) 42 | # max overlap > 0 => class should not be zero (must be a fg class) 43 | nonzero_inds = np.where(max_overlaps > 0)[0] 44 | assert all(max_classes[nonzero_inds] != 0) 45 | 46 | def add_bbox_regression_targets(roidb): 47 | """Add information needed to train bounding-box regressors.""" 48 | assert len(roidb) > 0 49 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 50 | 51 | num_images = len(roidb) 52 | # Infer number of classes from the number of columns in gt_overlaps 53 | num_classes = roidb[0]['gt_overlaps'].shape[1] 54 | for im_i in xrange(num_images): 55 | rois = roidb[im_i]['boxes'] 56 | max_overlaps = roidb[im_i]['max_overlaps'] 57 | max_classes = roidb[im_i]['max_classes'] 58 | roidb[im_i]['bbox_targets'] = \ 59 | _compute_targets(rois, max_overlaps, max_classes) 60 | 61 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 62 | # Use fixed / precomputed "means" and "stds" instead of empirical values 63 | means = np.tile( 64 | np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) 65 | stds = np.tile( 66 | np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) 67 | else: 68 | # Compute values needed for means and stds 69 | # var(x) = E(x^2) - E(x)^2 70 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 71 | sums = np.zeros((num_classes, 4)) 72 | squared_sums = np.zeros((num_classes, 4)) 73 | for im_i in xrange(num_images): 74 | targets = roidb[im_i]['bbox_targets'] 75 | for cls in xrange(1, num_classes): 76 | cls_inds = np.where(targets[:, 0] == cls)[0] 77 | if cls_inds.size > 0: 78 | class_counts[cls] += cls_inds.size 79 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 80 | squared_sums[cls, :] += \ 81 | (targets[cls_inds, 1:] ** 2).sum(axis=0) 82 | 83 | means = sums / class_counts 84 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 85 | 86 | print 'bbox target means:' 87 | print means 88 | print means[1:, :].mean(axis=0) # ignore bg class 89 | print 'bbox target stdevs:' 90 | print stds 91 | print stds[1:, :].mean(axis=0) # ignore bg class 92 | 93 | # Normalize targets 94 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: 95 | print "Normalizing targets" 96 | for im_i in xrange(num_images): 97 | targets = roidb[im_i]['bbox_targets'] 98 | for cls in xrange(1, num_classes): 99 | cls_inds = np.where(targets[:, 0] == cls)[0] 100 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 101 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 102 | else: 103 | print "NOT normalizing targets" 104 | 105 | # These values will be needed for making predictions 106 | # (the predicts will need to be unnormalized and uncentered) 107 | return means.ravel(), stds.ravel() 108 | 109 | def _compute_targets(rois, overlaps, labels): 110 | """Compute bounding-box regression targets for an image.""" 111 | # Indices of ground-truth ROIs 112 | gt_inds = np.where(overlaps == 1)[0] 113 | # Indices of examples for which we try to make predictions 114 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] 115 | 116 | # Get IoU overlap between each ex ROI and gt ROI 117 | ex_gt_overlaps = bbox_overlaps( 118 | np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), 119 | np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) 120 | 121 | # Find which gt ROI each ex ROI has max overlap with: 122 | # this will be the ex ROI's gt target 123 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 124 | gt_rois = rois[gt_inds[gt_assignment], :] 125 | ex_rois = rois[ex_inds, :] 126 | 127 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 128 | targets[ex_inds, 0] = labels[ex_inds] 129 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 130 | return targets 131 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import subprocess 14 | import numpy as np 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 19 | for dir in path.split(os.pathsep): 20 | binpath = pjoin(dir, name) 21 | if os.path.exists(binpath): 22 | return os.path.abspath(binpath) 23 | return None 24 | 25 | 26 | def locate_cuda(): 27 | """Locate the CUDA environment on the system 28 | 29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 30 | and values giving the absolute path to each directory. 31 | 32 | Starts by looking for the CUDAHOME env variable. If not found, everything 33 | is based on finding 'nvcc' in the PATH. 34 | """ 35 | 36 | # first check if the CUDAHOME env variable is in use 37 | if 'CUDAHOME' in os.environ: 38 | home = os.environ['CUDAHOME'] 39 | nvcc = pjoin(home, 'bin', 'nvcc') 40 | else: 41 | # otherwise, search the PATH for NVCC 42 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 43 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 44 | if nvcc is None: 45 | raise EnvironmentError('The nvcc binary could not be ' 46 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 47 | home = os.path.dirname(os.path.dirname(nvcc)) 48 | 49 | cudaconfig = {'home':home, 'nvcc':nvcc, 50 | 'include': pjoin(home, 'include'), 51 | 'lib64': pjoin(home, 'lib64')} 52 | for k, v in cudaconfig.iteritems(): 53 | if not os.path.exists(v): 54 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 55 | 56 | return cudaconfig 57 | CUDA = locate_cuda() 58 | 59 | 60 | # Obtain the numpy include directory. This logic works across numpy versions. 61 | try: 62 | numpy_include = np.get_include() 63 | except AttributeError: 64 | numpy_include = np.get_numpy_include() 65 | 66 | def customize_compiler_for_nvcc(self): 67 | """inject deep into distutils to customize how the dispatch 68 | to gcc/nvcc works. 69 | 70 | If you subclass UnixCCompiler, it's not trivial to get your subclass 71 | injected in, and still have the right customizations (i.e. 72 | distutils.sysconfig.customize_compiler) run on it. So instead of going 73 | the OO route, I have this. Note, it's kindof like a wierd functional 74 | subclassing going on.""" 75 | 76 | # tell the compiler it can processes .cu 77 | self.src_extensions.append('.cu') 78 | 79 | # save references to the default compiler_so and _comple methods 80 | default_compiler_so = self.compiler_so 81 | super = self._compile 82 | 83 | # now redefine the _compile method. This gets executed for each 84 | # object but distutils doesn't have the ability to change compilers 85 | # based on source extension: we add it. 86 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 87 | if os.path.splitext(src)[1] == '.cu': 88 | # use the cuda for .cu files 89 | self.set_executable('compiler_so', CUDA['nvcc']) 90 | # use only a subset of the extra_postargs, which are 1-1 translated 91 | # from the extra_compile_args in the Extension class 92 | postargs = extra_postargs['nvcc'] 93 | else: 94 | postargs = extra_postargs['gcc'] 95 | 96 | super(obj, src, ext, cc_args, postargs, pp_opts) 97 | # reset the default compiler_so, which we might have changed for cuda 98 | self.compiler_so = default_compiler_so 99 | 100 | # inject our redefined _compile method into the class 101 | self._compile = _compile 102 | 103 | 104 | # run the customize_compiler 105 | class custom_build_ext(build_ext): 106 | def build_extensions(self): 107 | customize_compiler_for_nvcc(self.compiler) 108 | build_ext.build_extensions(self) 109 | 110 | 111 | ext_modules = [ 112 | Extension( 113 | "utils.cython_bbox", 114 | ["utils/bbox.pyx"], 115 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 116 | include_dirs = [numpy_include] 117 | ), 118 | Extension( 119 | "nms.cpu_nms", 120 | ["nms/cpu_nms.pyx"], 121 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 122 | include_dirs = [numpy_include] 123 | ), 124 | Extension('nms.gpu_nms', 125 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 126 | library_dirs=[CUDA['lib64']], 127 | libraries=['cudart'], 128 | language='c++', 129 | runtime_library_dirs=[CUDA['lib64']], 130 | # this syntax is specific to this build system 131 | # we're only going to use certain compiler args with nvcc and not with gcc 132 | # the implementation of this trick is in customize_compiler() below 133 | extra_compile_args={'gcc': ["-Wno-unused-function"], 134 | 'nvcc': ['-arch=sm_35', 135 | '--ptxas-options=-v', 136 | '-c', 137 | '--compiler-options', 138 | "'-fPIC'"]}, 139 | include_dirs = [numpy_include, CUDA['include']] 140 | ) 141 | ] 142 | 143 | setup( 144 | name='fast_rcnn', 145 | ext_modules=ext_modules, 146 | # inject our custom trigger 147 | cmdclass={'build_ext': custom_build_ext}, 148 | ) 149 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | from fast_rcnn.config import cfg 12 | from fast_rcnn.bbox_transform import bbox_transform 13 | from utils.cython_bbox import bbox_overlaps 14 | import PIL 15 | 16 | def prepare_roidb(imdb): 17 | """Enrich the imdb's roidb by adding some derived quantities that 18 | are useful for training. This function precomputes the maximum 19 | overlap, taken over ground-truth boxes, between each ROI and 20 | each ground-truth box. The class with maximum overlap is also 21 | recorded. 22 | """ 23 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size 24 | for i in xrange(imdb.num_images)] 25 | roidb = imdb.roidb 26 | for i in xrange(len(imdb.image_index)): 27 | roidb[i]['image'] = imdb.image_path_at(i) 28 | roidb[i]['width'] = sizes[i][0] 29 | roidb[i]['height'] = sizes[i][1] 30 | # need gt_overlaps as a dense array for argmax 31 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 32 | # max overlap with gt over classes (columns) 33 | max_overlaps = gt_overlaps.max(axis=1) 34 | # gt class that had the max overlap 35 | max_classes = gt_overlaps.argmax(axis=1) 36 | roidb[i]['max_classes'] = max_classes 37 | roidb[i]['max_overlaps'] = max_overlaps 38 | # sanity checks 39 | # max overlap of 0 => class should be zero (background) 40 | zero_inds = np.where(max_overlaps == 0)[0] 41 | assert all(max_classes[zero_inds] == 0) 42 | # max overlap > 0 => class should not be zero (must be a fg class) 43 | nonzero_inds = np.where(max_overlaps > 0)[0] 44 | assert all(max_classes[nonzero_inds] != 0) 45 | 46 | def add_bbox_regression_targets(roidb): 47 | """Add information needed to train bounding-box regressors.""" 48 | assert len(roidb) > 0 49 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 50 | 51 | num_images = len(roidb) 52 | # Infer number of classes from the number of columns in gt_overlaps 53 | num_classes = roidb[0]['gt_overlaps'].shape[1] 54 | for im_i in xrange(num_images): 55 | rois = roidb[im_i]['boxes'] 56 | max_overlaps = roidb[im_i]['max_overlaps'] 57 | max_classes = roidb[im_i]['max_classes'] 58 | roidb[im_i]['bbox_targets'] = \ 59 | _compute_targets(rois, max_overlaps, max_classes) 60 | 61 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 62 | # Use fixed / precomputed "means" and "stds" instead of empirical values 63 | means = np.tile( 64 | np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) 65 | stds = np.tile( 66 | np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) 67 | else: 68 | # Compute values needed for means and stds 69 | # var(x) = E(x^2) - E(x)^2 70 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 71 | sums = np.zeros((num_classes, 4)) 72 | squared_sums = np.zeros((num_classes, 4)) 73 | for im_i in xrange(num_images): 74 | targets = roidb[im_i]['bbox_targets'] 75 | for cls in xrange(1, num_classes): 76 | cls_inds = np.where(targets[:, 0] == cls)[0] 77 | if cls_inds.size > 0: 78 | class_counts[cls] += cls_inds.size 79 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 80 | squared_sums[cls, :] += \ 81 | (targets[cls_inds, 1:] ** 2).sum(axis=0) 82 | 83 | means = sums / class_counts 84 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 85 | 86 | print 'bbox target means:' 87 | print means 88 | print means[1:, :].mean(axis=0) # ignore bg class 89 | print 'bbox target stdevs:' 90 | print stds 91 | print stds[1:, :].mean(axis=0) # ignore bg class 92 | 93 | # Normalize targets 94 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: 95 | print "Normalizing targets" 96 | for im_i in xrange(num_images): 97 | targets = roidb[im_i]['bbox_targets'] 98 | for cls in xrange(1, num_classes): 99 | cls_inds = np.where(targets[:, 0] == cls)[0] 100 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 101 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 102 | else: 103 | print "NOT normalizing targets" 104 | 105 | # These values will be needed for making predictions 106 | # (the predicts will need to be unnormalized and uncentered) 107 | return means.ravel(), stds.ravel() 108 | 109 | def _compute_targets(rois, overlaps, labels): 110 | """Compute bounding-box regression targets for an image.""" 111 | # Indices of ground-truth ROIs 112 | gt_inds = np.where(overlaps == 1)[0] 113 | if len(gt_inds) == 0: 114 | # Bail if the image has no ground-truth ROIs 115 | return np.zeros((rois.shape[0], 5), dtype=np.float32) 116 | # Indices of examples for which we try to make predictions 117 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] 118 | 119 | # Get IoU overlap between each ex ROI and gt ROI 120 | ex_gt_overlaps = bbox_overlaps( 121 | np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), 122 | np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) 123 | 124 | # Find which gt ROI each ex ROI has max overlap with: 125 | # this will be the ex ROI's gt target 126 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 127 | gt_rois = rois[gt_inds[gt_assignment], :] 128 | ex_rois = rois[ex_inds, :] 129 | 130 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 131 | targets[ex_inds, 0] = labels[ex_inds] 132 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 133 | return targets 134 | -------------------------------------------------------------------------------- /tools/demo_video_for_video.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Faster R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """ 11 | Demo script showing detections in sample images. 12 | 13 | See README.md for installation instructions before running. 14 | """ 15 | 16 | import _init_paths 17 | from fast_rcnn.config import cfg 18 | from fast_rcnn.test import im_detect 19 | from fast_rcnn.nms_wrapper import nms 20 | from utils.timer import Timer 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | import scipy.io as sio 24 | import caffe, os, sys, cv2 25 | import argparse 26 | 27 | 28 | import matplotlib 29 | from matplotlib.pyplot import plot,savefig 30 | import cv2.cv as cv 31 | 32 | CLASSES = ('__background__','car','person') 33 | 34 | NETS = {'vgg16': ('VGG16', 35 | 'VGG16_faster_rcnn_final.caffemodel'), 36 | 'zf': ('ZF', 37 | 'ZF_faster_rcnn_final.caffemodel'), 38 | 'vgg_m': ('VGG_CNN_M_1024', 39 | 'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')} 40 | 41 | 42 | def demo(net, im): 43 | """Detect object classes in an image using pre-computed object proposals.""" 44 | # Load the demo image 45 | 46 | # Detect all object classes and regress object bounds 47 | timer = Timer() 48 | timer.tic() 49 | scores, boxes = im_detect(net, im) 50 | timer.toc() 51 | print ('Detection took {:.3f}s for ' 52 | '{:d} object proposals').format(timer.total_time, boxes.shape[0]) 53 | 54 | # Visualize detections for each class 55 | CONF_THRESH = 0.6 56 | NMS_THRESH = 0.3 57 | index=1 58 | for cls_ind, cls in enumerate(CLASSES[1:]): 59 | cls_ind += 1 # because we skipped background 60 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 61 | cls_scores = scores[:, cls_ind] 62 | dets = np.hstack((cls_boxes, 63 | cls_scores[:, np.newaxis])).astype(np.float32) 64 | keep = nms(dets, NMS_THRESH) 65 | dets = dets[keep, :] 66 | 67 | #im = im[:, :, (2, 1, 0)] 68 | 69 | inds = np.where(dets[:, -1] >= CONF_THRESH)[0] 70 | if len(inds) == 0 and index==len(CLASSES[1:]): 71 | #cv2.imwrite(path,im) 72 | video.write(im) 73 | return 74 | elif len(inds) == 0 and index= thresh)[0] 47 | if len(inds) == 0: 48 | plt.axis('off') 49 | plt.tight_layout() 50 | plt.draw() 51 | path=dir_name+'/'+image_name 52 | savefig(path) 53 | plt.close() 54 | return 55 | ind_str=(int(image_name[0:6])-1)/600 56 | ind_frame=(int(image_name[0:6])-1)%600 57 | str_txt='***** 00000'+str(ind_str)+'.STR - '+str(ind_frame)+' *****' 58 | f.write(str_txt+'\n') 59 | f.write(str(len(inds))) 60 | for i in inds: 61 | bbox = dets[i, :4] 62 | f.write(' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+' '+str(6)+' '+str(20000)) 63 | score = dets[i, -1] 64 | ax.add_patch( 65 | plt.Rectangle((bbox[0], bbox[1]), 66 | bbox[2] - bbox[0], 67 | bbox[3] - bbox[1], fill=False, 68 | edgecolor='red', linewidth=3.5) 69 | ) 70 | ax.text(bbox[0], bbox[1] - 2, 71 | '{:s} {:.3f}'.format(class_name, score), 72 | bbox=dict(facecolor='blue', alpha=0.5), 73 | fontsize=14, color='white') 74 | 75 | ax.set_title(('{} detections with ' 76 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 77 | thresh), 78 | fontsize=14) 79 | f.write('\n') 80 | f.write('\n') 81 | f.write('\n') 82 | plt.axis('off') 83 | plt.tight_layout() 84 | plt.draw() 85 | path=dir_name+'/'+image_name 86 | savefig(path) 87 | plt.close() 88 | 89 | def demo(net, dir_name, image_name): 90 | """Detect object classes in an image using pre-computed object proposals.""" 91 | 92 | # Load the demo image 93 | im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo','test',dir_name, image_name) 94 | im = cv2.imread(im_file) 95 | 96 | # Detect all object classes and regress object bounds 97 | timer = Timer() 98 | timer.tic() 99 | scores, boxes = im_detect(net, im) 100 | timer.toc() 101 | print ('Detection took {:.3f}s for ' 102 | '{:d} object proposals').format(timer.total_time, boxes.shape[0]) 103 | 104 | # Visualize detections for each class 105 | CONF_THRESH = 0.8 106 | NMS_THRESH = 0.3 107 | for cls_ind, cls in enumerate(CLASSES[1:]): 108 | cls_ind += 1 # because we skipped background 109 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 110 | cls_scores = scores[:, cls_ind] 111 | dets = np.hstack((cls_boxes, 112 | cls_scores[:, np.newaxis])).astype(np.float32) 113 | keep = nms(dets, NMS_THRESH) 114 | dets = dets[keep, :] 115 | vis_detections(im, cls, dets, dir_name, image_name, thresh=CONF_THRESH) 116 | 117 | def parse_args(): 118 | """Parse input arguments.""" 119 | parser = argparse.ArgumentParser(description='Faster R-CNN demo') 120 | parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', 121 | default=0, type=int) 122 | parser.add_argument('--cpu', dest='cpu_mode', 123 | help='Use CPU mode (overrides --gpu)', 124 | action='store_true') 125 | parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]', 126 | choices=NETS.keys(), default='vgg_m') 127 | 128 | args = parser.parse_args() 129 | 130 | return args 131 | 132 | if __name__ == '__main__': 133 | cfg.TEST.HAS_RPN = True # Use RPN for proposals 134 | 135 | args = parse_args() 136 | 137 | prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0], 138 | 'faster_rcnn_alt_opt', 'faster_rcnn_test.pt') 139 | caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'faster_rcnn_models', 140 | NETS[args.demo_net][1]) 141 | 142 | if not os.path.isfile(caffemodel): 143 | raise IOError(('{:s} not found.\nDid you run ./data/script/' 144 | 'fetch_faster_rcnn_models.sh?').format(caffemodel)) 145 | 146 | if args.cpu_mode: 147 | caffe.set_mode_cpu() 148 | else: 149 | caffe.set_mode_gpu() 150 | caffe.set_device(args.gpu_id) 151 | cfg.GPU_ID = args.gpu_id 152 | net = caffe.Net(prototxt, caffemodel, caffe.TEST) 153 | 154 | print '\n\nLoaded network {:s}'.format(caffemodel) 155 | 156 | # Warmup on a dummy image 157 | #im = 128 * np.ones((300, 500, 3), dtype=np.uint8) 158 | im = 128 * np.ones((1280, 960, 3), dtype=np.uint8) 159 | for i in xrange(2): 160 | _, _= im_detect(net, im) 161 | 162 | #dictionary=['20121117_142852', '20121117_150315', '20121117_153526', '20121128_092059', '20121130_095032', '20130110_135753', '20130110_140950', '20130110_142518', '20130123_094123', '20130123_112228', '20130123_132342', '20130123_143631', '20130129_133540', '20130311_112935', '20130311_115905', '20130314_102842', '20130314_144414', '20130319_121354'] 163 | 164 | dictionary=['20130618_110313_gray'] 165 | 166 | for dir_name in dictionary: 167 | if os.path.isdir(dir_name): 168 | pass 169 | else: 170 | os.mkdir(dir_name) 171 | txt_path=dir_name+'/'+'fasterRCNNTrackingResult.txt' 172 | f=open(txt_path,'w') 173 | 174 | 175 | dir_list= os.path.join(cfg.ROOT_DIR, 'data', 'demo', 'test', dir_name) 176 | dic=os.listdir(dir_list) 177 | dic.sort() 178 | for im_name in dic: 179 | print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' 180 | print 'Demo for data/demo/{}'.format(im_name) 181 | demo(net, dir_name, im_name) 182 | -------------------------------------------------------------------------------- /tools/demo_for_video.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Faster R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """ 11 | Demo script showing detections in sample images. 12 | 13 | See README.md for installation instructions before running. 14 | """ 15 | 16 | import _init_paths 17 | from fast_rcnn.config import cfg 18 | from fast_rcnn.test import im_detect 19 | from fast_rcnn.nms_wrapper import nms 20 | from utils.timer import Timer 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | import scipy.io as sio 24 | import caffe, os, sys, cv2 25 | import argparse 26 | 27 | 28 | import matplotlib 29 | from matplotlib.pyplot import plot,savefig 30 | import cv2.cv as cv 31 | 32 | CLASSES = ('__background__','car','person') 33 | 34 | NETS = {'vgg16': ('VGG16', 35 | 'VGG16_faster_rcnn_final.caffemodel'), 36 | 'zf': ('ZF', 37 | 'ZF_faster_rcnn_final.caffemodel'), 38 | 'vgg_m': ('VGG_CNN_M_1024', 39 | 'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')} 40 | 41 | 42 | def demo(net, dir_name, image_name): 43 | """Detect object classes in an image using pre-computed object proposals.""" 44 | # Load the demo image 45 | im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo','test',dir_name, image_name) 46 | im = cv2.imread(im_file) 47 | 48 | # Detect all object classes and regress object bounds 49 | timer = Timer() 50 | timer.tic() 51 | scores, boxes = im_detect(net, im) 52 | timer.toc() 53 | print ('Detection took {:.3f}s for ' 54 | '{:d} object proposals').format(timer.total_time, boxes.shape[0]) 55 | 56 | # Visualize detections for each class 57 | CONF_THRESH = 0.7 58 | NMS_THRESH = 0.2 59 | index=1 60 | for cls_ind, cls in enumerate(CLASSES[1:]): 61 | cls_ind += 1 # because we skipped background 62 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 63 | cls_scores = scores[:, cls_ind] 64 | dets = np.hstack((cls_boxes, 65 | cls_scores[:, np.newaxis])).astype(np.float32) 66 | keep = nms(dets, NMS_THRESH) 67 | dets = dets[keep, :] 68 | 69 | #im = im[:, :, (2, 1, 0)] 70 | 71 | inds = np.where(dets[:, -1] >= CONF_THRESH)[0] 72 | if len(inds) == 0 and index==len(CLASSES[1:]): 73 | #cv2.imwrite(path,im) 74 | video.write(im) 75 | return 76 | elif len(inds) == 0 and index 1: 44 | top[1].reshape(1, 1, 1, 1) 45 | 46 | def forward(self, bottom, top): 47 | # Algorithm: 48 | # 49 | # for each (H, W) location i 50 | # generate A anchor boxes centered on cell i 51 | # apply predicted bbox deltas at cell i to each of the A anchors 52 | # clip predicted boxes to image 53 | # remove predicted boxes with either height or width < threshold 54 | # sort all (proposal, score) pairs by score from highest to lowest 55 | # take top pre_nms_topN proposals before NMS 56 | # apply NMS with threshold 0.7 to remaining proposals 57 | # take after_nms_topN proposals after NMS 58 | # return the top proposals (-> RoIs top, scores top) 59 | 60 | assert bottom[0].data.shape[0] == 1, \ 61 | 'Only single item batches are supported' 62 | 63 | cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' 64 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 65 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 66 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 67 | min_size = cfg[cfg_key].RPN_MIN_SIZE 68 | 69 | # the first set of _num_anchors channels are bg probs 70 | # the second set are the fg probs, which we want 71 | scores = bottom[0].data[:, self._num_anchors:, :, :] 72 | bbox_deltas = bottom[1].data 73 | im_info = bottom[2].data[0, :] 74 | 75 | if DEBUG: 76 | print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) 77 | print 'scale: {}'.format(im_info[2]) 78 | 79 | # 1. Generate proposals from bbox deltas and shifted anchors 80 | height, width = scores.shape[-2:] 81 | 82 | if DEBUG: 83 | print 'score map size: {}'.format(scores.shape) 84 | 85 | # Enumerate all shifts 86 | shift_x = np.arange(0, width) * self._feat_stride 87 | shift_y = np.arange(0, height) * self._feat_stride 88 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 89 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 90 | shift_x.ravel(), shift_y.ravel())).transpose() 91 | 92 | # Enumerate all shifted anchors: 93 | # 94 | # add A anchors (1, A, 4) to 95 | # cell K shifts (K, 1, 4) to get 96 | # shift anchors (K, A, 4) 97 | # reshape to (K*A, 4) shifted anchors 98 | A = self._num_anchors 99 | K = shifts.shape[0] 100 | anchors = self._anchors.reshape((1, A, 4)) + \ 101 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 102 | anchors = anchors.reshape((K * A, 4)) 103 | 104 | # Transpose and reshape predicted bbox transformations to get them 105 | # into the same order as the anchors: 106 | # 107 | # bbox deltas will be (1, 4 * A, H, W) format 108 | # transpose to (1, H, W, 4 * A) 109 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) 110 | # in slowest to fastest order 111 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) 112 | 113 | # Same story for the scores: 114 | # 115 | # scores are (1, A, H, W) format 116 | # transpose to (1, H, W, A) 117 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) 118 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 119 | 120 | # Convert anchors into proposals via bbox transformations 121 | proposals = bbox_transform_inv(anchors, bbox_deltas) 122 | 123 | # 2. clip predicted boxes to image 124 | proposals = clip_boxes(proposals, im_info[:2]) 125 | 126 | # 3. remove predicted boxes with either height or width < threshold 127 | # (NOTE: convert min_size to input image scale stored in im_info[2]) 128 | keep = _filter_boxes(proposals, min_size * im_info[2]) 129 | proposals = proposals[keep, :] 130 | scores = scores[keep] 131 | 132 | # 4. sort all (proposal, score) pairs by score from highest to lowest 133 | # 5. take top pre_nms_topN (e.g. 6000) 134 | order = scores.ravel().argsort()[::-1] 135 | if pre_nms_topN > 0: 136 | order = order[:pre_nms_topN] 137 | proposals = proposals[order, :] 138 | scores = scores[order] 139 | 140 | # 6. apply nms (e.g. threshold = 0.7) 141 | # 7. take after_nms_topN (e.g. 300) 142 | # 8. return the top proposals (-> RoIs top) 143 | keep = nms(np.hstack((proposals, scores)), nms_thresh) 144 | if post_nms_topN > 0: 145 | keep = keep[:post_nms_topN] 146 | proposals = proposals[keep, :] 147 | scores = scores[keep] 148 | 149 | # Output rois blob 150 | # Our RPN implementation only supports a single input image, so all 151 | # batch inds are 0 152 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) 153 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) 154 | top[0].reshape(*(blob.shape)) 155 | top[0].data[...] = blob 156 | 157 | # [Optional] output scores blob 158 | if len(top) > 1: 159 | top[1].reshape(*(scores.shape)) 160 | top[1].data[...] = scores 161 | 162 | def backward(self, top, propagate_down, bottom): 163 | """This layer does not propagate gradients.""" 164 | pass 165 | 166 | def reshape(self, bottom, top): 167 | """Reshaping happens during the call to forward.""" 168 | pass 169 | 170 | def _filter_boxes(boxes, min_size): 171 | """Remove all boxes with any side smaller than min_size.""" 172 | ws = boxes[:, 2] - boxes[:, 0] + 1 173 | hs = boxes[:, 3] - boxes[:, 1] + 1 174 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 175 | return keep 176 | -------------------------------------------------------------------------------- /tools/demo_video_for_video_XXX.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # -------------------------------------------------------- 5 | # Faster R-CNN 6 | # Copyright (c) 2015 Microsoft 7 | # Licensed under The MIT License [see LICENSE for details] 8 | # Written by Ross Girshick 9 | # -------------------------------------------------------- 10 | 11 | """ 12 | Demo script showing detections in sample images. 13 | 14 | See README.md for installation instructions before running. 15 | """ 16 | 17 | import _init_paths 18 | from fast_rcnn.config import cfg 19 | from fast_rcnn.test import im_detect 20 | from fast_rcnn.nms_wrapper import nms 21 | from utils.timer import Timer 22 | import matplotlib.pyplot as plt 23 | import numpy as np 24 | import scipy.io as sio 25 | import caffe, os, sys, cv2 26 | import argparse 27 | 28 | 29 | import matplotlib 30 | from matplotlib.pyplot import plot,savefig 31 | import cv2.cv as cv 32 | 33 | CLASSES = ('__background__', 'car', 'person', 'bike', 'truck', 'van','tram', 'misc') 34 | 35 | NETS = {'vgg16': ('VGG16', 36 | 'VGG16_faster_rcnn_final.caffemodel'), 37 | 'zf': ('ZF', 38 | 'ZF_faster_rcnn_final.caffemodel'), 39 | 'vgg_m': ('VGG_CNN_M_1024', 40 | 'VGG_CNN_M_1024_faster_rcnn_final.caffemodel')} 41 | 42 | 43 | def demo(net, im): 44 | """Detect object classes in an image using pre-computed object proposals.""" 45 | # Load the demo image 46 | 47 | # Detect all object classes and regress object bounds 48 | timer = Timer() 49 | timer.tic() 50 | scores, boxes = im_detect(net, im) 51 | timer.toc() 52 | print ('Detection took {:.3f}s for ' 53 | '{:d} object proposals').format(timer.total_time, boxes.shape[0]) 54 | 55 | # Visualize detections for each class 56 | CONF_THRESH = 0.8 57 | NMS_THRESH = 0.3 58 | index=1 59 | for cls_ind, cls in enumerate(CLASSES[1:]): 60 | cls_ind += 1 # because we skipped background 61 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 62 | cls_scores = scores[:, cls_ind] 63 | dets = np.hstack((cls_boxes, 64 | cls_scores[:, np.newaxis])).astype(np.float32) 65 | keep = nms(dets, NMS_THRESH) 66 | dets = dets[keep, :] 67 | 68 | #im = im[:, :, (2, 1, 0)] 69 | 70 | inds = np.where(dets[:, -1] >= CONF_THRESH)[0] 71 | if len(inds) == 0 and index==len(CLASSES[1:]): 72 | #cv2.imwrite(path,im) 73 | video.write(im) 74 | return 75 | elif len(inds) == 0 and index= CONF_THRESH)[0] 81 | if len(inds) == 0 and index==len(CLASSES[1:]): 82 | #cv2.imwrite(path,im) 83 | #video.write(im) 84 | return 85 | elif len(inds) == 0 and index= boxes[:, 0]).all() 109 | entry = {'boxes' : boxes, 110 | 'gt_overlaps' : self.roidb[i]['gt_overlaps'], 111 | 'gt_classes' : self.roidb[i]['gt_classes'], 112 | 'flipped' : True} 113 | self.roidb.append(entry) 114 | self._image_index = self._image_index * 2 115 | 116 | def evaluate_recall(self, candidate_boxes=None, ar_thresh=0.5): 117 | # Record max overlap value for each gt box 118 | # Return vector of overlap values 119 | gt_overlaps = np.zeros(0) 120 | for i in xrange(self.num_images): 121 | gt_inds = np.where(self.roidb[i]['gt_classes'] > 0)[0] 122 | gt_boxes = self.roidb[i]['boxes'][gt_inds, :] 123 | 124 | if candidate_boxes is None: 125 | non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] 126 | boxes = self.roidb[i]['boxes'][non_gt_inds, :] 127 | else: 128 | boxes = candidate_boxes[i] 129 | if boxes.shape[0] == 0: 130 | continue 131 | overlaps = bbox_overlaps(boxes.astype(np.float), 132 | gt_boxes.astype(np.float)) 133 | 134 | # gt_overlaps = np.hstack((gt_overlaps, overlaps.max(axis=0))) 135 | _gt_overlaps = np.zeros((gt_boxes.shape[0])) 136 | for j in xrange(gt_boxes.shape[0]): 137 | argmax_overlaps = overlaps.argmax(axis=0) 138 | max_overlaps = overlaps.max(axis=0) 139 | gt_ind = max_overlaps.argmax() 140 | gt_ovr = max_overlaps.max() 141 | assert(gt_ovr >= 0) 142 | box_ind = argmax_overlaps[gt_ind] 143 | _gt_overlaps[j] = overlaps[box_ind, gt_ind] 144 | assert(_gt_overlaps[j] == gt_ovr) 145 | overlaps[box_ind, :] = -1 146 | overlaps[:, gt_ind] = -1 147 | 148 | gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) 149 | 150 | num_pos = gt_overlaps.size 151 | gt_overlaps = np.sort(gt_overlaps) 152 | step = 0.001 153 | thresholds = np.minimum(np.arange(0.5, 1.0 + step, step), 1.0) 154 | recalls = np.zeros_like(thresholds) 155 | for i, t in enumerate(thresholds): 156 | recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) 157 | ar = 2 * np.trapz(recalls, thresholds) 158 | 159 | return ar, gt_overlaps, recalls, thresholds 160 | 161 | def create_roidb_from_box_list(self, box_list, gt_roidb): 162 | assert len(box_list) == self.num_images, \ 163 | 'Number of boxes must match number of ground-truth images' 164 | roidb = [] 165 | for i in xrange(self.num_images): 166 | boxes = box_list[i] 167 | num_boxes = boxes.shape[0] 168 | overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) 169 | 170 | if gt_roidb is not None: 171 | gt_boxes = gt_roidb[i]['boxes'] 172 | gt_classes = gt_roidb[i]['gt_classes'] 173 | gt_overlaps = bbox_overlaps(boxes.astype(np.float), 174 | gt_boxes.astype(np.float)) 175 | argmaxes = gt_overlaps.argmax(axis=1) 176 | maxes = gt_overlaps.max(axis=1) 177 | I = np.where(maxes > 0)[0] 178 | overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] 179 | 180 | overlaps = scipy.sparse.csr_matrix(overlaps) 181 | roidb.append({'boxes' : boxes, 182 | 'gt_classes' : np.zeros((num_boxes,), 183 | dtype=np.int32), 184 | 'gt_overlaps' : overlaps, 185 | 'flipped' : False}) 186 | return roidb 187 | 188 | @staticmethod 189 | def merge_roidbs(a, b): 190 | assert len(a) == len(b) 191 | for i in xrange(len(a)): 192 | a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes'])) 193 | a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'], 194 | b[i]['gt_classes'])) 195 | a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'], 196 | b[i]['gt_overlaps']]) 197 | return a 198 | 199 | def competition_mode(self, on): 200 | """Turn competition mode on or off.""" 201 | pass 202 | -------------------------------------------------------------------------------- /lib/roi_data_layer_original/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | RoIDataLayer implements a Caffe Python layer. 11 | """ 12 | 13 | import caffe 14 | from fast_rcnn.config import cfg 15 | from roi_data_layer.minibatch import get_minibatch 16 | import numpy as np 17 | import yaml 18 | from multiprocessing import Process, Queue 19 | 20 | class RoIDataLayer(caffe.Layer): 21 | """Fast R-CNN data layer used for training.""" 22 | 23 | def _shuffle_roidb_inds(self): 24 | """Randomly permute the training roidb.""" 25 | if cfg.TRAIN.ASPECT_GROUPING: 26 | widths = np.array([r['width'] for r in self._roidb]) 27 | heights = np.array([r['height'] for r in self._roidb]) 28 | horz = (widths >= heights) 29 | vert = np.logical_not(horz) 30 | horz_inds = np.where(horz)[0] 31 | vert_inds = np.where(vert)[0] 32 | inds = np.hstack(( 33 | np.random.permutation(horz_inds), 34 | np.random.permutation(vert_inds))) 35 | inds = np.reshape(inds, (-1, 2)) 36 | row_perm = np.random.permutation(np.arange(inds.shape[0])) 37 | inds = np.reshape(inds[row_perm, :], (-1,)) 38 | self._perm = inds 39 | else: 40 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 41 | self._cur = 0 42 | 43 | def _get_next_minibatch_inds(self): 44 | """Return the roidb indices for the next minibatch.""" 45 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 46 | self._shuffle_roidb_inds() 47 | 48 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 49 | self._cur += cfg.TRAIN.IMS_PER_BATCH 50 | return db_inds 51 | 52 | def _get_next_minibatch(self): 53 | """Return the blobs to be used for the next minibatch. 54 | 55 | If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a 56 | separate process and made available through self._blob_queue. 57 | """ 58 | if cfg.TRAIN.USE_PREFETCH: 59 | return self._blob_queue.get() 60 | else: 61 | db_inds = self._get_next_minibatch_inds() 62 | minibatch_db = [self._roidb[i] for i in db_inds] 63 | return get_minibatch(minibatch_db, self._num_classes) 64 | 65 | def set_roidb(self, roidb): 66 | """Set the roidb to be used by this layer during training.""" 67 | self._roidb = roidb 68 | self._shuffle_roidb_inds() 69 | if cfg.TRAIN.USE_PREFETCH: 70 | self._blob_queue = Queue(10) 71 | self._prefetch_process = BlobFetcher(self._blob_queue, 72 | self._roidb, 73 | self._num_classes) 74 | self._prefetch_process.start() 75 | # Terminate the child process when the parent exists 76 | def cleanup(): 77 | print 'Terminating BlobFetcher' 78 | self._prefetch_process.terminate() 79 | self._prefetch_process.join() 80 | import atexit 81 | atexit.register(cleanup) 82 | 83 | def setup(self, bottom, top): 84 | """Setup the RoIDataLayer.""" 85 | 86 | # parse the layer parameter string, which must be valid YAML 87 | layer_params = yaml.load(self.param_str_) 88 | 89 | self._num_classes = layer_params['num_classes'] 90 | 91 | self._name_to_top_map = {} 92 | 93 | # data blob: holds a batch of N images, each with 3 channels 94 | idx = 0 95 | top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, 96 | max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE) 97 | self._name_to_top_map['data'] = idx 98 | idx += 1 99 | 100 | if cfg.TRAIN.HAS_RPN: 101 | top[idx].reshape(1, 3) 102 | self._name_to_top_map['im_info'] = idx 103 | idx += 1 104 | 105 | top[idx].reshape(1, 4) 106 | self._name_to_top_map['gt_boxes'] = idx 107 | idx += 1 108 | else: # not using RPN 109 | # rois blob: holds R regions of interest, each is a 5-tuple 110 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 111 | # rectangle (x1, y1, x2, y2) 112 | top[idx].reshape(1, 5) 113 | self._name_to_top_map['rois'] = idx 114 | idx += 1 115 | 116 | # labels blob: R categorical labels in [0, ..., K] for K foreground 117 | # classes plus background 118 | top[idx].reshape(1) 119 | self._name_to_top_map['labels'] = idx 120 | idx += 1 121 | 122 | if cfg.TRAIN.BBOX_REG: 123 | # bbox_targets blob: R bounding-box regression targets with 4 124 | # targets per class 125 | top[idx].reshape(1, self._num_classes * 4) 126 | self._name_to_top_map['bbox_targets'] = idx 127 | idx += 1 128 | 129 | # bbox_inside_weights blob: At most 4 targets per roi are active; 130 | # thisbinary vector sepcifies the subset of active targets 131 | top[idx].reshape(1, self._num_classes * 4) 132 | self._name_to_top_map['bbox_inside_weights'] = idx 133 | idx += 1 134 | 135 | top[idx].reshape(1, self._num_classes * 4) 136 | self._name_to_top_map['bbox_outside_weights'] = idx 137 | idx += 1 138 | 139 | print 'RoiDataLayer: name_to_top:', self._name_to_top_map 140 | assert len(top) == len(self._name_to_top_map) 141 | 142 | def forward(self, bottom, top): 143 | """Get blobs and copy them into this layer's top blob vector.""" 144 | blobs = self._get_next_minibatch() 145 | 146 | for blob_name, blob in blobs.iteritems(): 147 | top_ind = self._name_to_top_map[blob_name] 148 | # Reshape net's input blobs 149 | top[top_ind].reshape(*(blob.shape)) 150 | # Copy data into net's input blobs 151 | top[top_ind].data[...] = blob.astype(np.float32, copy=False) 152 | 153 | def backward(self, top, propagate_down, bottom): 154 | """This layer does not propagate gradients.""" 155 | pass 156 | 157 | def reshape(self, bottom, top): 158 | """Reshaping happens during the call to forward.""" 159 | pass 160 | 161 | class BlobFetcher(Process): 162 | """Experimental class for prefetching blobs in a separate process.""" 163 | def __init__(self, queue, roidb, num_classes): 164 | super(BlobFetcher, self).__init__() 165 | self._queue = queue 166 | self._roidb = roidb 167 | self._num_classes = num_classes 168 | self._perm = None 169 | self._cur = 0 170 | self._shuffle_roidb_inds() 171 | # fix the random seed for reproducibility 172 | np.random.seed(cfg.RNG_SEED) 173 | 174 | def _shuffle_roidb_inds(self): 175 | """Randomly permute the training roidb.""" 176 | # TODO(rbg): remove duplicated code 177 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 178 | self._cur = 0 179 | 180 | def _get_next_minibatch_inds(self): 181 | """Return the roidb indices for the next minibatch.""" 182 | # TODO(rbg): remove duplicated code 183 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 184 | self._shuffle_roidb_inds() 185 | 186 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 187 | self._cur += cfg.TRAIN.IMS_PER_BATCH 188 | return db_inds 189 | 190 | def run(self): 191 | print 'BlobFetcher started' 192 | while True: 193 | db_inds = self._get_next_minibatch_inds() 194 | minibatch_db = [self._roidb[i] for i in db_inds] 195 | blobs = get_minibatch(minibatch_db, self._num_classes) 196 | self._queue.put(blobs) 197 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Disclaimer 2 | 3 | The official Faster R-CNN code (written in MATLAB) is available [here](https://github.com/ShaoqingRen/faster_rcnn). 4 | If your goal is to reproduce the results in our NIPS 2015 paper, please use the [official code](https://github.com/ShaoqingRen/faster_rcnn). 5 | 6 | This repository contains a Python *reimplementation* of the MATLAB code. 7 | This Python implementation is built on a fork of [Fast R-CNN](https://github.com/rbgirshick/fast-rcnn). 8 | There are slight differences between the two implementations. 9 | In particular, this Python port 10 | - is ~10% slower at test-time, because some operations execute on the CPU in Python layers (e.g., 220ms / image vs. 200ms / image for VGG16) 11 | - gives similar, but not exactly the same, mAP as the MATLAB version 12 | - is *not compatible* with models trained using the MATLAB code due to the minor implementation differences 13 | 14 | # *Faster* R-CNN: Towards Real-Time Object Detection with Region Proposal Networks 15 | 16 | By Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun (Microsoft Research) 17 | 18 | This Python implementation contains contributions from Sean Bell (Cornell) written during an MSR internship. 19 | 20 | Please see the official [README.md](https://github.com/ShaoqingRen/faster_rcnn/blob/master/README.md) for more details. 21 | 22 | Faster R-CNN was initially described in an [arXiv tech report](http://arxiv.org/abs/1506.01497) and was subsequently published in NIPS 2015. 23 | 24 | ### License 25 | 26 | Faster R-CNN is released under the MIT License (refer to the LICENSE file for details). 27 | 28 | ### Citing Faster R-CNN 29 | 30 | If you find Faster R-CNN useful in your research, please consider citing: 31 | 32 | @inproceedings{renNIPS15fasterrcnn, 33 | Author = {Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun}, 34 | Title = {Faster {R-CNN}: Towards Real-Time Object Detection 35 | with Region Proposal Networks}, 36 | Booktitle = {Advances in Neural Information Processing Systems ({NIPS})}, 37 | Year = {2015} 38 | } 39 | 40 | ### Contents 41 | 1. [Requirements: software](#requirements-software) 42 | 2. [Requirements: hardware](#requirements-hardware) 43 | 3. [Basic installation](#installation-sufficient-for-the-demo) 44 | 4. [Demo](#demo) 45 | 5. [Beyond the demo: training and testing](#beyond-the-demo-installation-for-training-and-testing-models) 46 | 6. [Usage](#usage) 47 | 48 | ### Requirements: software 49 | 50 | 1. Requirements for `Caffe` and `pycaffe` (see: [Caffe installation instructions](http://caffe.berkeleyvision.org/installation.html)) 51 | 52 | **Note:** Caffe *must* be built with support for Python layers! 53 | 54 | ```make 55 | # In your Makefile.config, make sure to have this line uncommented 56 | WITH_PYTHON_LAYER := 1 57 | ``` 58 | 59 | You can download my [Makefile.config](http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/Makefile.config) for reference. 60 | 2. Python packages you might not have: `cython`, `python-opencv`, `easydict` 61 | 3. [optional] MATLAB (required for PASCAL VOC evaluation only) 62 | 63 | ### Requirements: hardware 64 | 65 | 1. For training smaller networks (ZF, VGG_CNN_M_1024) a good GPU (e.g., Titan, K20, K40, ...) with at least 3G of memory suffices 66 | 2. For training with VGG16, you'll need a K40 (~11G of memory) 67 | 68 | ### Installation (sufficient for the demo) 69 | 70 | 1. Clone the Faster R-CNN repository 71 | ```Shell 72 | # Make sure to clone with --recursive 73 | git clone --recursive https://github.com/rbgirshick/py-faster-rcnn.git 74 | ``` 75 | 76 | 2. We'll call the directory that you cloned Faster R-CNN into `FRCN_ROOT` 77 | 78 | *Ignore notes 1 and 2 if you followed step 1 above.* 79 | 80 | **Note 1:** If you didn't clone Faster R-CNN with the `--recursive` flag, then you'll need to manually clone the `caffe-fast-rcnn` submodule: 81 | ```Shell 82 | git submodule update --init --recursive 83 | ``` 84 | **Note 2:** The `caffe-fast-rcnn` submodule needs to be on the `faster-rcnn` branch (or equivalent detached state). This will happen automatically *if you followed step 1 instructions*. 85 | 86 | 3. Build the Cython modules 87 | ```Shell 88 | cd $FRCN_ROOT/lib 89 | make 90 | ``` 91 | 92 | 4. Build Caffe and pycaffe 93 | ```Shell 94 | cd $FRCN_ROOT/caffe-fast-rcnn 95 | # Now follow the Caffe installation instructions here: 96 | # http://caffe.berkeleyvision.org/installation.html 97 | 98 | # If you're experienced with Caffe and have all of the requirements installed 99 | # and your Makefile.config in place, then simply do: 100 | make -j8 && make pycaffe 101 | ``` 102 | 103 | 5. Download pre-computed Faster R-CNN detectors 104 | ```Shell 105 | cd $FRCN_ROOT 106 | ./data/scripts/fetch_faster_rcnn_models.sh 107 | ``` 108 | 109 | This will populate the `$FRCN_ROOT/data` folder with `faster_rcnn_models`. See `data/README.md` for details. 110 | These models were trained on VOC 2007 trainval. 111 | 112 | ### Demo 113 | 114 | *After successfully completing [basic installation](#installation-sufficient-for-the-demo)*, you'll be ready to run the demo. 115 | 116 | **Python** 117 | 118 | To run the demo 119 | ```Shell 120 | cd $FRCN_ROOT 121 | ./tools/demo.py 122 | ``` 123 | The demo performs detection using a VGG16 network trained for detection on PASCAL VOC 2007. 124 | 125 | ### Beyond the demo: installation for training and testing models 126 | 1. Download the training, validation, test data and VOCdevkit 127 | 128 | ```Shell 129 | wget http://pascallin.ecs.soton.ac.uk/challenges/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 130 | wget http://pascallin.ecs.soton.ac.uk/challenges/VOC/voc2007/VOCtest_06-Nov-2007.tar 131 | wget http://pascallin.ecs.soton.ac.uk/challenges/VOC/voc2007/VOCdevkit_08-Jun-2007.tar 132 | ``` 133 | 134 | 2. Extract all of these tars into one directory named `VOCdevkit` 135 | 136 | ```Shell 137 | tar xvf VOCtrainval_06-Nov-2007.tar 138 | tar xvf VOCtest_06-Nov-2007.tar 139 | tar xvf VOCdevkit_08-Jun-2007.tar 140 | ``` 141 | 142 | 3. It should have this basic structure 143 | 144 | ```Shell 145 | $VOCdevkit/ # development kit 146 | $VOCdevkit/VOCcode/ # VOC utility code 147 | $VOCdevkit/VOC2007 # image sets, annotations, etc. 148 | # ... and several other directories ... 149 | ``` 150 | 151 | 4. Create symlinks for the PASCAL VOC dataset 152 | 153 | ```Shell 154 | cd $FRCN_ROOT/data 155 | ln -s $VOCdevkit VOCdevkit2007 156 | ``` 157 | Using symlinks is a good idea because you will likely want to share the same PASCAL dataset installation between multiple projects. 158 | 5. [Optional] follow similar steps to get PASCAL VOC 2010 and 2012 159 | 6. Follow the next sections to download pre-trained ImageNet models 160 | 161 | ### Download pre-trained ImageNet models 162 | 163 | Pre-trained ImageNet models can be downloaded for the three networks described in the paper: ZF and VGG16. 164 | 165 | ```Shell 166 | cd $FRCN_ROOT 167 | ./data/scripts/fetch_imagenet_models.sh 168 | ``` 169 | VGG16 comes from the [Caffe Model Zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo), but is provided here for your convenience. 170 | ZF was trained at MSRA. 171 | 172 | ### Usage 173 | 174 | To train and test a Faster R-CNN detector use `experiments/scripts/faster_rcnn_alt_opt.sh`. 175 | Output is written underneath `$FRCN_ROOT/output`. 176 | 177 | ```Shell 178 | cd $FRCN_ROOT 179 | ./experiments/scripts/faster_rcnn_alt_opt.sh [GPU_ID] [NET] [--set ...] 180 | # GPU_ID is the GPU you want to train on 181 | # NET in {ZF, VGG_CNN_M_1024, VGG16} is the network arch to use 182 | # --set ... allows you to specify fast_rcnn.config options, e.g. 183 | # --set EXP_DIR seed_rng1701 RNG_SEED 1701 184 | ``` 185 | 186 | ("alt opt" refers to the alternating optimization training algorithm described in the NIPS paper.) 187 | 188 | 189 | download the KITTI in data/training/ and use the train.sh to train the model 190 | the OHEM result is : 191 | 192 | Results: 193 | APs: 194 | 78.4 195 | 64.7 196 | 75.1 197 | 86.5 198 | 89.6 199 | 83.5 200 | 74.0 201 | mAP:78.8 202 | 203 | and the standard result based on faster rcnn is : 204 | 205 | Results: 206 | APs: 207 | 78.4 208 | 65.4 209 | 69.2 210 | 88.1 211 | 86.6 212 | 82.6 213 | 67.8 214 | mAP:76.9 215 | -------------------------------------------------------------------------------- /models/VGG16/faster_rcnn_alt_opt/faster_rcnn_test.pt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | input: "data" 3 | input_shape { 4 | dim: 1 5 | dim: 3 6 | dim: 224 7 | dim: 224 8 | } 9 | 10 | input: "im_info" 11 | input_shape { 12 | dim: 1 13 | dim: 3 14 | } 15 | 16 | layer { 17 | name: "conv1_1" 18 | type: "Convolution" 19 | bottom: "data" 20 | top: "conv1_1" 21 | convolution_param { 22 | num_output: 64 23 | pad: 1 kernel_size: 3 24 | } 25 | } 26 | layer { 27 | name: "relu1_1" 28 | type: "ReLU" 29 | bottom: "conv1_1" 30 | top: "conv1_1" 31 | } 32 | layer { 33 | name: "conv1_2" 34 | type: "Convolution" 35 | bottom: "conv1_1" 36 | top: "conv1_2" 37 | convolution_param { 38 | num_output: 64 39 | pad: 1 kernel_size: 3 40 | } 41 | } 42 | layer { 43 | name: "relu1_2" 44 | type: "ReLU" 45 | bottom: "conv1_2" 46 | top: "conv1_2" 47 | } 48 | layer { 49 | name: "pool1" 50 | type: "Pooling" 51 | bottom: "conv1_2" 52 | top: "pool1" 53 | pooling_param { 54 | pool: MAX 55 | kernel_size: 2 stride: 2 56 | } 57 | } 58 | layer { 59 | name: "conv2_1" 60 | type: "Convolution" 61 | bottom: "pool1" 62 | top: "conv2_1" 63 | convolution_param { 64 | num_output: 128 65 | pad: 1 kernel_size: 3 66 | } 67 | } 68 | layer { 69 | name: "relu2_1" 70 | type: "ReLU" 71 | bottom: "conv2_1" 72 | top: "conv2_1" 73 | } 74 | layer { 75 | name: "conv2_2" 76 | type: "Convolution" 77 | bottom: "conv2_1" 78 | top: "conv2_2" 79 | convolution_param { 80 | num_output: 128 81 | pad: 1 kernel_size: 3 82 | } 83 | } 84 | layer { 85 | name: "relu2_2" 86 | type: "ReLU" 87 | bottom: "conv2_2" 88 | top: "conv2_2" 89 | } 90 | layer { 91 | name: "pool2" 92 | type: "Pooling" 93 | bottom: "conv2_2" 94 | top: "pool2" 95 | pooling_param { 96 | pool: MAX 97 | kernel_size: 2 stride: 2 98 | } 99 | } 100 | layer { 101 | name: "conv3_1" 102 | type: "Convolution" 103 | bottom: "pool2" 104 | top: "conv3_1" 105 | convolution_param { 106 | num_output: 256 107 | pad: 1 kernel_size: 3 108 | } 109 | } 110 | layer { 111 | name: "relu3_1" 112 | type: "ReLU" 113 | bottom: "conv3_1" 114 | top: "conv3_1" 115 | } 116 | layer { 117 | name: "conv3_2" 118 | type: "Convolution" 119 | bottom: "conv3_1" 120 | top: "conv3_2" 121 | convolution_param { 122 | num_output: 256 123 | pad: 1 kernel_size: 3 124 | } 125 | } 126 | layer { 127 | name: "relu3_2" 128 | type: "ReLU" 129 | bottom: "conv3_2" 130 | top: "conv3_2" 131 | } 132 | layer { 133 | name: "conv3_3" 134 | type: "Convolution" 135 | bottom: "conv3_2" 136 | top: "conv3_3" 137 | convolution_param { 138 | num_output: 256 139 | pad: 1 kernel_size: 3 140 | } 141 | } 142 | layer { 143 | name: "relu3_3" 144 | type: "ReLU" 145 | bottom: "conv3_3" 146 | top: "conv3_3" 147 | } 148 | layer { 149 | name: "pool3" 150 | type: "Pooling" 151 | bottom: "conv3_3" 152 | top: "pool3" 153 | pooling_param { 154 | pool: MAX 155 | kernel_size: 2 stride: 2 156 | } 157 | } 158 | layer { 159 | name: "conv4_1" 160 | type: "Convolution" 161 | bottom: "pool3" 162 | top: "conv4_1" 163 | convolution_param { 164 | num_output: 512 165 | pad: 1 kernel_size: 3 166 | } 167 | } 168 | layer { 169 | name: "relu4_1" 170 | type: "ReLU" 171 | bottom: "conv4_1" 172 | top: "conv4_1" 173 | } 174 | layer { 175 | name: "conv4_2" 176 | type: "Convolution" 177 | bottom: "conv4_1" 178 | top: "conv4_2" 179 | convolution_param { 180 | num_output: 512 181 | pad: 1 kernel_size: 3 182 | } 183 | } 184 | layer { 185 | name: "relu4_2" 186 | type: "ReLU" 187 | bottom: "conv4_2" 188 | top: "conv4_2" 189 | } 190 | layer { 191 | name: "conv4_3" 192 | type: "Convolution" 193 | bottom: "conv4_2" 194 | top: "conv4_3" 195 | convolution_param { 196 | num_output: 512 197 | pad: 1 kernel_size: 3 198 | } 199 | } 200 | layer { 201 | name: "relu4_3" 202 | type: "ReLU" 203 | bottom: "conv4_3" 204 | top: "conv4_3" 205 | } 206 | layer { 207 | name: "pool4" 208 | type: "Pooling" 209 | bottom: "conv4_3" 210 | top: "pool4" 211 | pooling_param { 212 | pool: MAX 213 | kernel_size: 2 stride: 2 214 | } 215 | } 216 | layer { 217 | name: "conv5_1" 218 | type: "Convolution" 219 | bottom: "pool4" 220 | top: "conv5_1" 221 | convolution_param { 222 | num_output: 512 223 | pad: 1 kernel_size: 3 224 | } 225 | } 226 | layer { 227 | name: "relu5_1" 228 | type: "ReLU" 229 | bottom: "conv5_1" 230 | top: "conv5_1" 231 | } 232 | layer { 233 | name: "conv5_2" 234 | type: "Convolution" 235 | bottom: "conv5_1" 236 | top: "conv5_2" 237 | convolution_param { 238 | num_output: 512 239 | pad: 1 kernel_size: 3 240 | } 241 | } 242 | layer { 243 | name: "relu5_2" 244 | type: "ReLU" 245 | bottom: "conv5_2" 246 | top: "conv5_2" 247 | } 248 | layer { 249 | name: "conv5_3" 250 | type: "Convolution" 251 | bottom: "conv5_2" 252 | top: "conv5_3" 253 | convolution_param { 254 | num_output: 512 255 | pad: 1 kernel_size: 3 256 | } 257 | } 258 | layer { 259 | name: "relu5_3" 260 | type: "ReLU" 261 | bottom: "conv5_3" 262 | top: "conv5_3" 263 | } 264 | 265 | #========= RPN ============ 266 | 267 | layer { 268 | name: "rpn_conv/3x3" 269 | type: "Convolution" 270 | bottom: "conv5_3" 271 | top: "rpn/output" 272 | convolution_param { 273 | num_output: 512 274 | kernel_size: 3 pad: 1 stride: 1 275 | } 276 | } 277 | layer { 278 | name: "rpn_relu/3x3" 279 | type: "ReLU" 280 | bottom: "rpn/output" 281 | top: "rpn/output" 282 | } 283 | 284 | layer { 285 | name: "rpn_cls_score" 286 | type: "Convolution" 287 | bottom: "rpn/output" 288 | top: "rpn_cls_score" 289 | convolution_param { 290 | num_output: 18 # 2(bg/fg) * 9(anchors) 291 | kernel_size: 1 pad: 0 stride: 1 292 | } 293 | } 294 | layer { 295 | name: "rpn_bbox_pred" 296 | type: "Convolution" 297 | bottom: "rpn/output" 298 | top: "rpn_bbox_pred" 299 | convolution_param { 300 | num_output: 36 # 4 * 9(anchors) 301 | kernel_size: 1 pad: 0 stride: 1 302 | } 303 | } 304 | layer { 305 | bottom: "rpn_cls_score" 306 | top: "rpn_cls_score_reshape" 307 | name: "rpn_cls_score_reshape" 308 | type: "Reshape" 309 | reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } 310 | } 311 | 312 | #========= RoI Proposal ============ 313 | 314 | layer { 315 | name: "rpn_cls_prob" 316 | type: "Softmax" 317 | bottom: "rpn_cls_score_reshape" 318 | top: "rpn_cls_prob" 319 | } 320 | layer { 321 | name: 'rpn_cls_prob_reshape' 322 | type: 'Reshape' 323 | bottom: 'rpn_cls_prob' 324 | top: 'rpn_cls_prob_reshape' 325 | reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } 326 | } 327 | layer { 328 | name: 'proposal' 329 | type: 'Python' 330 | bottom: 'rpn_cls_prob_reshape' 331 | bottom: 'rpn_bbox_pred' 332 | bottom: 'im_info' 333 | top: 'rois' 334 | python_param { 335 | module: 'rpn.proposal_layer' 336 | layer: 'ProposalLayer' 337 | param_str: "'feat_stride': 16" 338 | } 339 | } 340 | 341 | #========= RCNN ============ 342 | 343 | layer { 344 | name: "roi_pool5" 345 | type: "ROIPooling" 346 | bottom: "conv5_3" 347 | bottom: "rois" 348 | top: "pool5" 349 | roi_pooling_param { 350 | pooled_w: 7 351 | pooled_h: 7 352 | spatial_scale: 0.0625 # 1/16 353 | } 354 | } 355 | layer { 356 | name: "fc6" 357 | type: "InnerProduct" 358 | bottom: "pool5" 359 | top: "fc6" 360 | inner_product_param { 361 | num_output: 4096 362 | } 363 | } 364 | layer { 365 | name: "relu6" 366 | type: "ReLU" 367 | bottom: "fc6" 368 | top: "fc6" 369 | } 370 | layer { 371 | name: "fc7" 372 | type: "InnerProduct" 373 | bottom: "fc6" 374 | top: "fc7" 375 | inner_product_param { 376 | num_output: 4096 377 | } 378 | } 379 | layer { 380 | name: "relu7" 381 | type: "ReLU" 382 | bottom: "fc7" 383 | top: "fc7" 384 | } 385 | layer { 386 | name: "cls_score" 387 | type: "InnerProduct" 388 | bottom: "fc7" 389 | top: "cls_score" 390 | inner_product_param { 391 | num_output: 8 392 | } 393 | } 394 | layer { 395 | name: "bbox_pred" 396 | type: "InnerProduct" 397 | bottom: "fc7" 398 | top: "bbox_pred" 399 | inner_product_param { 400 | num_output: 32 401 | } 402 | } 403 | layer { 404 | name: "cls_prob" 405 | type: "Softmax" 406 | bottom: "cls_score" 407 | top: "cls_prob" 408 | } 409 | --------------------------------------------------------------------------------