├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── data
    └── pascal_voc
    │   └── synset_words.txt
├── faster_rcnn.cpp
├── faster_rcnn.hpp
└── main.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | faster_rcnn_cplusplus
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "py-faster-rcnn"]
2 | 	path = py-faster-rcnn
3 | 	url = https://github.com/rbgirshick/py-faster-rcnn
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.6)
 2 | project(faster_rcnn_cplusplus)
 3 | 
 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 5 | 
 6 | set(PYTHON_INCLUDE_DIR "/usr/include/python2.7")
 7 | set(CAFFE_INCLUDE_DIR "py-faster-rcnn/caffe-fast-rcnn/include")
 8 | set(CAFFE_LIBRARY_DIR "py-faster-rcnn/caffe-fast-rcnn/build/lib")
 9 | 
10 | find_package(OpenCV REQUIRED)
11 | find_package(CUDA REQUIRED)
12 | 
13 | include_directories(${PYTHON_INCLUDE_DIR} ${CAFFE_INCLUDE_DIR})
14 | link_directories(${CAFFE_LIBRARY_DIR})
15 | 
16 | set(SOURCE_FILES main.cpp faster_rcnn.cpp py-faster-rcnn/lib/nms/nms_kernel.cu)
17 | cuda_add_executable(faster_rcnn_cplusplus ${SOURCE_FILES})
18 | target_link_libraries(faster_rcnn_cplusplus caffe python2.7 glog boost_system opencv_highgui opencv_core opencv_imgproc)
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Faster R-CNN C++ Inference
  2 | 
  3 | Copyright (c) 2016 Centro de Investigación en Tecnoloxías da Información (CITIUS)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | of this software and associated documentation files (the "Software"), to deal
  7 | in the Software without restriction, including without limitation the rights
  8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | copies of the Software, and to permit persons to whom the Software is
 10 | furnished to do so, subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in
 13 | all copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | THE SOFTWARE.
 22 | 
 23 | ************************************************************************
 24 | 
 25 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
 26 | 
 27 | This project incorporates material from the project(s) listed below 
 28 | (collectively, "Third Party Code").  I'm not theoriginal author of the 
 29 | Third Party Code.  The original copyright notice and license under which 
 30 | I received such Third Party Code are set out below. This Third Party 
 31 | Code is licensed to you under their original license terms set forth 
 32 | below.  I reserve all other rights not expressly granted, whether by 
 33 | implication, estoppel or otherwise.
 34 | 
 35 | 1.	py-faster-rcnn, (https://github.com/deboc/py-faster-rcnn)
 36 | 
 37 | COPYRIGHT
 38 | 
 39 | The MIT License (MIT)
 40 | 
 41 | Copyright (c) 2015 Microsoft Corporation
 42 | 
 43 | Permission is hereby granted, free of charge, to any person obtaining a copy
 44 | of this software and associated documentation files (the "Software"), to deal
 45 | in the Software without restriction, including without limitation the rights
 46 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 47 | copies of the Software, and to permit persons to whom the Software is
 48 | furnished to do so, subject to the following conditions:
 49 | 
 50 | The above copyright notice and this permission notice shall be included in
 51 | all copies or substantial portions of the Software.
 52 | 
 53 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 54 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 55 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 56 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 57 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 58 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 59 | THE SOFTWARE.
 60 | 
 61 | 2.	Caffe, (https://github.com/BVLC/caffe/)
 62 | 
 63 | COPYRIGHT
 64 | 
 65 | All contributions by the University of California:
 66 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
 67 | All rights reserved.
 68 | 
 69 | All other contributions:
 70 | Copyright (c) 2014, 2015, the respective contributors
 71 | All rights reserved.
 72 | 
 73 | Caffe uses a shared copyright model: each contributor holds copyright
 74 | over their contributions to Caffe. The project versioning records all
 75 | such contribution and copyright details. If a contributor wants to
 76 | further mark their specific copyright on a particular contribution,
 77 | they should indicate their copyright solely in the commit message of
 78 | the change when it is committed.
 79 | 
 80 | The BSD 2-Clause License
 81 | 
 82 | Redistribution and use in source and binary forms, with or without
 83 | modification, are permitted provided that the following conditions
 84 | are met:
 85 | 
 86 | 1. Redistributions of source code must retain the above copyright notice,
 87 | this list of conditions and the following disclaimer.
 88 | 
 89 | 2. Redistributions in binary form must reproduce the above copyright
 90 | notice, this list of conditions and the following disclaimer in the
 91 | documentation and/or other materials provided with the distribution.
 92 | 
 93 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 94 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 95 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 96 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 97 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 98 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 99 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
100 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
101 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
102 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
103 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
104 | 
105 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
106 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Faster R-CNN C++ Inference
  2 | 
  3 | The above code is an interface inference in c++ against a [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn) trained network.
  4 | 
  5 | From the code developed by Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun (Microsoft Research) for the project [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn), some modules and files from the `py-faster-rcnn/lib` and `py-faster-rcnn/data/scripts/` folders are used directly:
  6 |  - **RPN module** (`py-faster-rcnn/lib/rpn/`): Needed to deploy the *region proposal network*.
  7 |  - **NMS module** (`py-faster-rcnn/lib/nms/`): Needed to apply *non-maximum suppression* step.
  8 |  - **Fast_rcnn module** (`py-faster-rcnn/lib/fast_rcnn/`): Contains auxiliary functions.
  9 |  - **`py-faster-rcnn/lib/Makefile` and `py-faster-rcnn/lib/setup.py`**: To compile NMS CUDA and Cython libraries.
 10 |  - **`py-faster-rcnn/data/scripts/fetch_faster_rcnn_models.sh`**: To download pre-computed Faster R-CNN detectors.
 11 | 
 12 | This code is added as a submodule to the present project for convenience. It also uses their branch of the framework Caffe ([caffe-fast-rcnn](https://github.com/rbgirshick/caffe-fast-rcnn/tree/0dcd397b29507b8314e252e850518c5695efbb83)).
 13 | 
 14 | `FASTER_RCNN` C++ class has adapated (copy/modify/correct) some fuctions from the project [FasterRCNN-Encapsulation-Cplusplus](https://github.com/YihangLou/FasterRCNN-Encapsulation-Cplusplus). Thanks so much to his SourceCode.
 15 | 
 16 | ### Steps to use the code
 17 | 
 18 | First of all, clone this project with **`--recursive`** flag:
 19 | ```Shell
 20 | git clone --recursive https://github.com/HermannHesse/faster_rcnn_cplusplus.git
 21 | ```
 22 | 
 23 | #### Requeriments inherited from py-faster-rcnn
 24 | 
 25 | 1. Requirements for `Caffe` and `pycaffe` (see: [Caffe installation instructions](http://caffe.berkeleyvision.org/installation.html))
 26 | 
 27 |     **Note:** Caffe *must* be built with support for Python layers!
 28 |     ```make
 29 |     # In your Makefile.config, make sure to have this line uncommented
 30 |     WITH_PYTHON_LAYER := 1
 31 |     # Unrelatedly, it's also recommended that you use CUDNN
 32 |     USE_CUDNN := 1
 33 |       ```
 34 |     You can download Ross Girshick [Makefile.config](http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/Makefile.config) for reference.
 35 |   
 36 | 2. Python packages you might not have: `cython`, `python-opencv`, `easydict`
 37 | 
 38 | 3. Build Caffe and pycaffe
 39 |     ```Shell
 40 |     cd $ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/
 41 |     # Now follow the Caffe installation instructions here:
 42 |     #   http://caffe.berkeleyvision.org/installation.html
 43 | 
 44 |     # If you're experienced with Caffe and have all of the requirements installed
 45 |     # and your Makefile.config in place, then simply do:
 46 |     make -j8 && make pycaffe
 47 |     ```
 48 |     
 49 | 4. Build the Cython modules
 50 |     ```Shell
 51 |     cd $ROOT_DIR/py-faster-rcnn/lib/
 52 |     make
 53 |     ```
 54 | 
 55 | 5. Download pre-computed Faster R-CNN detectors
 56 |     ```Shell
 57 |     cd $ROOT_DIR/py-faster-rcnn/
 58 |     ./data/scripts/fetch_faster_rcnn_models.sh
 59 |     ```
 60 |     
 61 | #### Own requeriments
 62 | 
 63 | 1. Set `$ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/python/` and `$ROOT_DIR/py-faster-rcnn/lib/` into the enviroment variable `PYTHONPATH`
 64 |     ```Shell
 65 |     export PYTHONPATH=$ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/python/:$ROOT_DIR/py-faster-rcnn/lib/:${PYTHONPATH}
 66 |     ```
 67 | 
 68 | 2. Compile the project with CMake
 69 |     ```Shell
 70 |     mkdir $ROOT_DIR/build/
 71 |     cd $ROOT_DIR/build/
 72 |     cmake ..
 73 |     make
 74 |     mv faster_rcnn_cplusplus ..
 75 |     ```
 76 | 
 77 | 3. Run the demo
 78 |     ```Shell
 79 |     cd $ROOT_DIR
 80 |     ./faster_rcnn_cplusplus
 81 |     ```
 82 |     
 83 | ### Popular issues
 84 | 
 85 | #### Issue 1
 86 | 
 87 | ```Shell
 88 | "Unknown layer type: Python"
 89 | ```
 90 | Caffe has been probably compiled without PYTHON_LAYER support. Uncomment the following line 
 91 | ```make
 92 | WITH_PYTHON_LAYER := 1
 93 | ```
 94 | in `$ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/Makefile.config` file and recompile it.
 95 | ```Shell
 96 | cd $ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/
 97 | make clean
 98 | make -j8 && make pycaffe
 99 | ```
100 | 
101 | #### Issue 2
102 | 
103 | ```Shell
104 | fatal error: caffe/proto/caffe.pb.h: No such file or directory
105 | #include "caffe/proto/caffe.pb.h"
106 |                                   ^
107 | compilation terminated.
108 | ```
109 | `caffe.pb.h` is a header file generated by Google Protocol Buffer and it is missing for some reason. Let's create it.
110 | ```Shell
111 | cd $ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/
112 | protoc src/caffe/proto/caffe.proto --cpp_out=.
113 | mkdir include/caffe/proto
114 | mv src/caffe/proto/caffe.pb.h include/caffe/proto
115 | ```
116 | Reference full discussion from [here](https://github.com/NVIDIA/DIGITS/issues/105).
117 | 
118 | #### Issue 3
119 | 
120 | When the net is loading Caffe can't find rpn python layer:
121 | ```Shell
122 | ImportError: No module named rpn.proposal_layer
123 | ```
124 | Add `lib/` directory to `PYTHONPATH`:
125 | ```Shell
126 | export PYTHONPATH=$ROOT_DIR/py-faster-rcnn/lib/:${PYTHONPATH} 
127 | ```
128 | 
129 | #### Issue 4
130 | 
131 | Python can't find gpu_nms library:
132 | ```Shell
133 | from nms.gpu_nms import gpu_nms
134 | ImportError: No module named gpu_nms
135 | ```
136 | Must compile libraries `gpu_nms.so` and `cpu_nms.so`done by running the fourth step:
137 | ```Shell
138 | cd $ROOT_DIR/py-faster-rcnn/lib/
139 | make
140 | ```
141 | 
142 | #### Issue 5
143 | 
144 | ```Shell
145 | /usr/bin/ld: cannot find -lopencv_dep_cudart 
146 | collect2: error: ld returned 1 exit status
147 | ```
148 | There is a problem with the `CMakeCache.txt` and the enviroment variable `CUDA_USE_STATIC_CUDA_RUNTIME`.
149 | To solve this issue, set it `OFF` in `CMakeList.txt`: 
150 | ```Shell
151 | # Add the following line to CMakeList.txt and recompile
152 | set(CUDA_USE_STATIC_CUDA_RUNTIME "OFF")
153 | ```
154 | or generate CMake files with:
155 | ```Shell
156 | cd $ROOT_DIR/build/
157 | cmake .. -D CUDA_USE_STATIC_CUDA_RUNTIME=OFF
158 | ```
159 | Reference full discussion from [here](https://github.com/opencv/opencv/issues/6542).
160 | 


--------------------------------------------------------------------------------
/data/pascal_voc/synset_words.txt:
--------------------------------------------------------------------------------
 1 | bg
 2 | aeroplane
 3 | bicycle
 4 | bird
 5 | boat
 6 | bottle
 7 | bus
 8 | car
 9 | cat
10 | chair
11 | cow
12 | diningtable
13 | dog
14 | horse
15 | motorbike
16 | person
17 | pottedplant
18 | sheep
19 | sofa
20 | train
21 | tvmonitor


--------------------------------------------------------------------------------
/faster_rcnn.cpp:
--------------------------------------------------------------------------------
  1 | #include "faster_rcnn.hpp"
  2 | 
  3 | using namespace caffe;
  4 | using namespace std;
  5 | 
  6 | /*
  7 |  * ===  FUNCTION  ======================================================================
  8 |  *         Name:  Faster_RCNN
  9 |  *  Description:  Load the model file and weights file
 10 |  *       Output:  Constructor
 11 |  * =====================================================================================
 12 |  */
 13 | Faster_RCNN::Faster_RCNN(const string& model_file, const string& weights_file, const string& labels_file, const int GPUID) {
 14 |     Caffe::SetDevice(GPUID);
 15 |     Caffe::set_mode(Caffe::GPU);
 16 |     net_ = std::shared_ptr<Net<float> >(new Net<float>(model_file, caffe::TEST));
 17 |     net_->CopyTrainedLayersFrom(weights_file);
 18 | 
 19 |     CHECK_EQ(net_->num_inputs(), 2) << "Network should have exactly two inputs.";
 20 |     CHECK_EQ(net_->num_outputs(), 2) << "Network should have exactly two outputs.";
 21 | 
 22 |     Blob<float>* input_layer = net_->input_blobs()[0];
 23 |     num_channels_ = input_layer->channels();
 24 |     CHECK_EQ(num_channels_, 3) << "Input layer should have 3 channels.";
 25 | 
 26 |     /* Load labels. */
 27 |     std::ifstream labels(labels_file.c_str());
 28 |     CHECK(labels) << "Unable to open labels file " << labels_file;
 29 |     string line;
 30 |     while (std::getline(labels, line))
 31 |         labels_.push_back(string(line));
 32 | 
 33 |     Blob<float>* cls_layer = net_->output_blobs()[1];
 34 |     Blob<float>* bbox_layer = net_->output_blobs()[0];
 35 |     CHECK_EQ(labels_.size(), cls_layer->channels()) << "Number of labels is different from the output layer dimension.";
 36 |     CHECK_EQ(labels_.size()*4, bbox_layer->channels()) << "Number of labels is different from the output layer dimension.";
 37 |     num_clases_ = labels_.size();
 38 | 
 39 | }
 40 | 
 41 | /*
 42 |  * ===  FUNCTION  ======================================================================
 43 |  *         Name:  Detect
 44 |  *  Description:  Perform detection operation
 45 |  *                Warning the max input size should less than 1000*600
 46 |  *       Output:  vector<Detection> detections
 47 |  * =====================================================================================
 48 |  */
 49 | void Faster_RCNN::detect(cv::Mat cv_image, vector<Detection>& detections) {
 50 | 
 51 |     if(cv_image.empty()) {
 52 |         std::cout<<"Can not reach the image"<<std::endl;
 53 |         return;
 54 |     }
 55 |     
 56 |     int height = cv_image.rows;
 57 |     int width = cv_image.cols;
 58 | 
 59 |     /* It is necessary to pass as pointers in order to keep them in the net */
 60 |     float *im_info = new float[3];
 61 | 
 62 |     preprocess(cv_image, im_info);
 63 | 
 64 |     int height_resized = int(im_info[0]);
 65 |     int width_resized = int(im_info[1]);
 66 |     float img_scale = im_info[2];
 67 | 
 68 |     net_->ForwardFrom(0);
 69 |     const float* bbox_delt = net_->blob_by_name("bbox_pred")->cpu_data();
 70 |     const float* pred_cls = net_->blob_by_name("cls_prob")->cpu_data(); // Array n*5 con las probabilidades de las cinco clases para cada roi
 71 |     const float* rois = net_->blob_by_name("rois")->cpu_data(); // Array n*5 donde en for 0<n:[n*5+0] == 0 y de [1-4] son las cordenadas de la roi
 72 |     const int num_rois = net_->blob_by_name("rois")->num();
 73 | 
 74 |     float *boxes = new float[num_rois*4];
 75 |     float *pred = new float[num_rois*5*num_clases_];
 76 |     int *num_keep = new int[num_clases_];
 77 | 
 78 |     float **pred_per_class = new float*[num_clases_];
 79 |     float **sorted_pred_cls = new float*[num_clases_];
 80 |     int **keep = new int*[num_clases_];
 81 |     for (int i = 0; i < num_clases_; i++) {
 82 |         pred_per_class[i] = new float[num_rois*5];
 83 |         sorted_pred_cls[i] = new float[num_rois*5];
 84 |         keep[i] = new int[num_rois];
 85 |     }
 86 | 
 87 | 
 88 |     for (int n = 0; n < num_rois; n++)
 89 |         for (int c = 0; c < 4; c++)
 90 |             boxes[n*4+c] = rois[n*5+c+1] / img_scale; //rois[n*5] == 0 SIEMPRE
 91 | 
 92 |     bbox_transform_inv(num_rois, bbox_delt, pred_cls, boxes, pred, height, width);
 93 | 
 94 |     /* Background class is ignored hereafter */
 95 |     for (int i = 1; i < num_clases_; i++) {
 96 |         for (int j = 0; j < num_rois; j++)
 97 |             for (int k = 0; k < 5; k++)
 98 |                 pred_per_class[i][j*5+k] = pred[(i*num_rois+j)*5+k];
 99 | 
100 |         bbox_sort(num_rois, pred_per_class[i], sorted_pred_cls[i]);
101 |         _nms(keep[i], &num_keep[i], sorted_pred_cls[i], num_rois, 5, NMS_THRESH, 0);
102 | 
103 |     }
104 | 
105 |     int k = 0;
106 |     for (int i = 1; i < num_clases_; i ++) {
107 |         while (sorted_pred_cls[i][keep[i][k] * 5 + 4] > CONF_THRESH && k < num_keep[i]) {
108 | 
109 |             Detection aux;
110 |             aux.x = sorted_pred_cls[i][keep[i][k] * 5 + 0];
111 |             aux.y = sorted_pred_cls[i][keep[i][k] * 5 + 1];
112 |             aux.width = sorted_pred_cls[i][keep[i][k] * 5 + 2] - aux.x;
113 |             aux.height = sorted_pred_cls[i][keep[i][k] * 5 + 3] - aux.y;
114 |             aux.score = sorted_pred_cls[i][keep[i][k] * 5 + 4];
115 |             aux.categoryIndex = i;
116 |             aux.category = labels_[i];
117 |             detections.push_back(aux);
118 |             k++;
119 |         }
120 | 
121 |         k = 0;
122 |     }
123 | 
124 |     delete []im_info;
125 |     delete []boxes;
126 |     delete []pred;
127 |     for (int i = 0; i < num_clases_; i++) {
128 |         delete []pred_per_class[i];
129 |         delete []sorted_pred_cls[i];
130 |         delete []keep[i];
131 |     }
132 |     delete []pred_per_class;
133 |     delete []sorted_pred_cls;
134 |     delete []keep;
135 | 
136 | }
137 | 
138 | /*
139 |  * ===  FUNCTION  ======================================================================
140 |  *         Name:  Preprocess
141 |  *  Description:  Compute mean substract
142 |  *                Copy input image into the net
143 |  *       Output:  float im_info[height, width, img_scale]
144 |  * 	              It is need to pass "im_info" through the functions to keep the net
145 |  *                blob("im_info") filled. 
146 |  *                "set_cpu_data" makes just a pointer to the memory
147 |  * =====================================================================================
148 |  */
149 | void Faster_RCNN::preprocess(const cv::Mat cv_image, float* im_info) {
150 | 
151 |     cv::Mat cv_new(cv_image.rows, cv_image.cols, CV_32FC3, cv::Scalar(0,0,0));
152 | 
153 |     int height = cv_image.rows;
154 |     int width = cv_image.cols;
155 | 
156 |     /* Mean normalization (in this case it may not be the average of the training) */
157 |     for (int h = 0; h < height; ++h ) {
158 |         for (int w = 0; w < width; ++w) {
159 |             cv_new.at<cv::Vec3f>(cv::Point(w, h))[0] = float(cv_image.at<cv::Vec3b>(cv::Point(w, h))[0])-float(102.9801);
160 |             cv_new.at<cv::Vec3f>(cv::Point(w, h))[1] = float(cv_image.at<cv::Vec3b>(cv::Point(w, h))[1])-float(115.9465);
161 |             cv_new.at<cv::Vec3f>(cv::Point(w, h))[2] = float(cv_image.at<cv::Vec3b>(cv::Point(w, h))[2])-float(122.7717);
162 |         }
163 |     }
164 | 
165 |     /* Max image size comparation to know if resize is needed */
166 |     int max_side = MAX(height, width);
167 |     int min_side = MIN(height, width);
168 | 
169 |     float max_side_scale = float(max_side) / MAX_INPUT_SIDE;
170 |     float min_side_scale = float(min_side) / MIN_INPUT_SIDE;
171 |     float max_scale = MAX(max_side_scale, min_side_scale);
172 |     float img_scale = 1;
173 | 
174 |     if(max_scale > 1)
175 |         img_scale = float(1) / max_scale;
176 | 
177 |     int height_resized = int(height * img_scale);
178 |     int width_resized = int(width * img_scale);
179 | 
180 |     cv::Mat cv_resized;
181 |     cv::resize(cv_new, cv_resized, cv::Size(width_resized, height_resized));
182 | 
183 |     float data_buf[height_resized*width_resized*3];
184 | 
185 |     for (int h = 0; h < height_resized; ++h )
186 |     {
187 |         for (int w = 0; w < width_resized; ++w)
188 |         {
189 |             data_buf[(0*height_resized+h)*width_resized+w] = float(cv_resized.at<cv::Vec3f>(cv::Point(w, h))[0]);
190 |             data_buf[(1*height_resized+h)*width_resized+w] = float(cv_resized.at<cv::Vec3f>(cv::Point(w, h))[1]);
191 |             data_buf[(2*height_resized+h)*width_resized+w] = float(cv_resized.at<cv::Vec3f>(cv::Point(w, h))[2]);
192 |         }
193 |     }
194 | 
195 |     net_->blob_by_name("data")->Reshape(1, num_channels_, height_resized, width_resized);
196 |     //net_->blob_by_name("data")->set_cpu_data(data_buf);
197 |     Blob<float> * input_blobs= net_->input_blobs()[0];
198 |     switch(Caffe::mode()){
199 |         case Caffe::CPU:
200 |             memcpy(input_blobs->mutable_cpu_data(), data_buf, sizeof(float) * input_blobs->count());
201 |             break;
202 |         case Caffe::GPU:
203 |             caffe_gpu_memcpy(sizeof(float)* input_blobs->count(), data_buf, input_blobs->mutable_gpu_data());
204 |             break;
205 |         default:
206 |             LOG(FATAL)<<"Unknow Caffe mode";
207 |     }
208 | 
209 |     im_info[0] = height_resized;
210 |     im_info[1] = width_resized;
211 |     im_info[2] = img_scale;
212 | 
213 |     net_->blob_by_name("im_info")->set_cpu_data(im_info);
214 | }
215 | 
216 | /*
217 |  * ===  FUNCTION  ======================================================================
218 |  *         Name:  vis_detections
219 |  *  Description:  Visuallize the detection result
220 |  *       Output:  None
221 |  * =====================================================================================
222 |  */
223 | void Faster_RCNN::vis_detections(cv::Mat& cv_image, vector<Detection> detections)
224 | {
225 |     int fontFace = cv::FONT_HERSHEY_PLAIN;
226 |     double fontScale = 2;
227 |     double thickness = 1.5;
228 |     int baseline = 0;
229 | 
230 |     for(int i = 0; i < detections.size(); i++) {
231 |         cv::rectangle(cv_image, cv::Point(detections[i].x,detections[i].y),
232 |                       cv::Point(detections[i].x + detections[i].width,detections[i].y + detections[i].height),
233 |                       cv::Scalar(0, 0, 255), 2, 8, 0);
234 |         string text = detections[i].category + " " + std::to_string(detections[i].score);
235 |         cv::Size textSize = cv::getTextSize(text, fontFace, fontScale, thickness, &baseline);
236 |         cv::rectangle(cv_image, cv::Point(detections[i].x,detections[i].y - 2),
237 |                       cv::Point(detections[i].x + textSize.width/1.3, detections[i].y - 2 - textSize.height),
238 |                       cv::Scalar::all(180), CV_FILLED);
239 |         cv::putText(cv_image, text, cv::Point(detections[i].x,detections[i].y - 2),
240 |                     fontFace, thickness, cv::Scalar::all(0), fontScale, 8);
241 |     }
242 | 
243 |     cv::imshow("Detections", cv_image);
244 |     cv::waitKey(0);
245 | }
246 | 
247 | /*
248 |  * ===  FUNCTION  ======================================================================
249 |  *         Name:  boxes_sort
250 |  *  Description:  Sort the bounding box according score
251 |  *       Output:  float* sorted_pred
252 |  *                An ordered pointer derived from "pred" by its score 
253 |  * =====================================================================================
254 |  */
255 | void Faster_RCNN::bbox_sort(const int num_rois, const float* pred, float* sorted_pred)
256 | {
257 |     vector<Info> my;
258 |     Info tmp;
259 |     for (int i = 0; i< num_rois; i++) {
260 |         tmp.score = pred[i*5 + 4];
261 |         tmp.head = pred + i*5;
262 |         my.push_back(tmp);
263 |     }
264 | 
265 |     std::sort(my.begin(), my.end());
266 | 
267 |     for (int i=0; i < num_rois; i++)
268 |         for (int j=0; j<5; j++)
269 |             sorted_pred[i*5+j] = my[i].head[j];
270 | }
271 | 
272 | /*
273 |  * ===  FUNCTION  ======================================================================
274 |  *         Name:  bbox_transform_inv
275 |  *  Description:  Compute bounding box regression value
276 |  *       Output:  float* pred
277 |  *                A pointer "pred" is formed with predictions [x, y, width, height, 
278 |  *                pred_category] ordered in such a way that all predictions of category
279 |  *                '0' go first, those of category '1' after...
280 |  *                So that the predictions derived from roi[0] appear in the pred[0 to 4] 
281 |  *                for category '0' and in the pred[(1*class_num+0)*5 to 
282 |  *                (1*class_num+0)*5+4] for category '1'
283 |  * =====================================================================================
284 |  */
285 | void Faster_RCNN::bbox_transform_inv(int num_rois, const float* box_deltas, const float* pred_cls, float* boxes, float* pred, int img_height, int img_width)  {
286 |     
287 |     float width, height, ctr_x, ctr_y, dx, dy, dw, dh, pred_ctr_x, pred_ctr_y, pred_w, pred_h;
288 | 
289 |     for(int i = 0; i < num_rois; i++) {
290 | 
291 |         width = boxes[i*4+2] - boxes[i*4+0] + 1.0;
292 |         height = boxes[i*4+3] - boxes[i*4+1] + 1.0;
293 |         ctr_x = boxes[i*4+0] + 0.5 * width;
294 |         ctr_y = boxes[i*4+1] + 0.5 * height;
295 | 
296 |         for (int j=0; j< num_clases_; j++) {
297 | 
298 |             dx = box_deltas[(i*num_clases_+j)*4+0];
299 |             dy = box_deltas[(i*num_clases_+j)*4+1];
300 |             dw = box_deltas[(i*num_clases_+j)*4+2];
301 |             dh = box_deltas[(i*num_clases_+j)*4+3];
302 |             pred_ctr_x = ctr_x + width*dx;
303 |             pred_ctr_y = ctr_y + height*dy;
304 |             pred_w = width * exp(dw);
305 |             pred_h = height * exp(dh);
306 |             pred[(j*num_rois+i)*5+0] = MAX(MIN(pred_ctr_x - 0.5* pred_w, img_width -1), 0);
307 |             pred[(j*num_rois+i)*5+1] = MAX(MIN(pred_ctr_y - 0.5* pred_h, img_height -1), 0);
308 |             pred[(j*num_rois+i)*5+2] = MAX(MIN(pred_ctr_x + 0.5* pred_w, img_width -1), 0);
309 |             pred[(j*num_rois+i)*5+3] = MAX(MIN(pred_ctr_y + 0.5* pred_h, img_height -1), 0);
310 |             pred[(j*num_rois+i)*5+4] = pred_cls[i*num_clases_+j];
311 |         }
312 |     }
313 | 
314 | }
315 | 


--------------------------------------------------------------------------------
/faster_rcnn.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FASTER_RCNN_HPP
 2 | #define FASTER_RCNN_HPP
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | #include <math.h>
 7 | #include <fstream>
 8 | #include <boost/python.hpp>
 9 | #include "caffe/caffe.hpp"
10 | #include "py-faster-rcnn/lib/nms/gpu_nms.hpp"
11 | #include <opencv2/core/core.hpp>
12 | #include <opencv2/highgui/highgui.hpp>
13 | #include <opencv2/imgproc/imgproc.hpp>
14 | 
15 | using namespace caffe;
16 | using namespace std;
17 | 
18 | #define CONF_THRESH 0.8
19 | #define NMS_THRESH 0.3
20 | #define MAX_INPUT_SIDE 1000;
21 | #define MIN_INPUT_SIDE 600;
22 | 
23 | /*
24 |  * ===  Struct  ======================================================================
25 |  *         Name:  Detection
26 |  *  Description:  Struct to return each detection
27 |  * =====================================================================================
28 |  */
29 | struct Detection {
30 |     float x;
31 |     float y;
32 |     float width;
33 |     float height;
34 |     float score;
35 |     int categoryIndex;
36 |     string category;
37 | };
38 | 
39 | /*
40 |  * ===  Struct  ======================================================================
41 |  *         Name:  Info
42 |  *  Description:  Used for bbox sort
43 |  * =====================================================================================
44 |  */
45 | struct Info {
46 |     float score;
47 |     const float* head;
48 | 
49 |     bool operator <(const Info& info) {
50 |         return (info.score < score);
51 |     }
52 | };
53 | 
54 | /*
55 |  * ===  Class  ======================================================================
56 |  *         Name:  Faster_RCNN
57 |  *  Description:  FasterRCNN C++ Detector
58 |  * =====================================================================================
59 |  */
60 | class Faster_RCNN {
61 | public:
62 |     Faster_RCNN(const string& model_file, const string& weights_file,
63 |                 const string& labels_file, const int GPUID);
64 |     void detect(cv::Mat cv_image, vector<Detection>& detections);
65 |     void vis_detections(cv::Mat& cv_image, vector<Detection> detections);
66 | 
67 | private:
68 |     void preprocess(const cv::Mat cv_image, float *im_info);
69 |     void bbox_sort(int num, const float* pred, float *sorted_pred);
70 |     void bbox_transform_inv(const int num_rois, const float* box_deltas, const float* pred_cls,
71 |                             float* boxes, float* pred, int img_height, int img_width);
72 | 
73 | private:
74 |     Faster_RCNN(){}
75 |     std::shared_ptr<Net<float> > net_;
76 |     std::vector<string> labels_;
77 |     int num_channels_;
78 |     int num_clases_;
79 | };
80 | 
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "faster_rcnn.hpp"
 2 | 
 3 | int main() {
 4 | 
 5 |     string model_file = "py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt";
 6 |     string weights_file = "py-faster-rcnn/data/faster_rcnn_models/ZF_faster_rcnn_final.caffemodel";
 7 |     string labels_file = "data/pascal_voc/synset_words.txt";
 8 |     int GPUID = 0;
 9 | 
10 |     Faster_RCNN detector(model_file, weights_file, labels_file, GPUID);
11 | 
12 |     vector<Detection> detections;
13 |     cv::Mat image = cv::imread("py-faster-rcnn/data/demo/001763.jpg");
14 |     detector.detect(image, detections);
15 | 
16 |     std::cout<<"x\ty\twidth\theight\tcategory\tscore"<<std::endl;
17 |     for(int i = 0; i < detections.size(); i++) {
18 |         std::cout<<detections[i].x<<"\t"<<detections[i].y<<"\t"
19 |                  <<detections[i].width<<"\t"<<detections[i].height<<"\t"
20 |                  <<detections[i].category<<"\t"<<detections[i].score<<std::endl;
21 |     }
22 | 
23 |     detector.vis_detections(image, detections);
24 |     return 0;
25 | }
26 | 


--------------------------------------------------------------------------------