├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── data └── pascal_voc │ └── synset_words.txt ├── faster_rcnn.cpp ├── faster_rcnn.hpp └── main.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | faster_rcnn_cplusplus 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "py-faster-rcnn"] 2 | path = py-faster-rcnn 3 | url = https://github.com/rbgirshick/py-faster-rcnn 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(faster_rcnn_cplusplus) 3 | 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 5 | 6 | set(PYTHON_INCLUDE_DIR "/usr/include/python2.7") 7 | set(CAFFE_INCLUDE_DIR "py-faster-rcnn/caffe-fast-rcnn/include") 8 | set(CAFFE_LIBRARY_DIR "py-faster-rcnn/caffe-fast-rcnn/build/lib") 9 | 10 | find_package(OpenCV REQUIRED) 11 | find_package(CUDA REQUIRED) 12 | 13 | include_directories(${PYTHON_INCLUDE_DIR} ${CAFFE_INCLUDE_DIR}) 14 | link_directories(${CAFFE_LIBRARY_DIR}) 15 | 16 | set(SOURCE_FILES main.cpp faster_rcnn.cpp py-faster-rcnn/lib/nms/nms_kernel.cu) 17 | cuda_add_executable(faster_rcnn_cplusplus ${SOURCE_FILES}) 18 | target_link_libraries(faster_rcnn_cplusplus caffe python2.7 glog boost_system opencv_highgui opencv_core opencv_imgproc) 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Faster R-CNN C++ Inference 2 | 3 | Copyright (c) 2016 Centro de Investigación en Tecnoloxías da Información (CITIUS) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | ************************************************************************ 24 | 25 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION 26 | 27 | This project incorporates material from the project(s) listed below 28 | (collectively, "Third Party Code"). I'm not theoriginal author of the 29 | Third Party Code. The original copyright notice and license under which 30 | I received such Third Party Code are set out below. This Third Party 31 | Code is licensed to you under their original license terms set forth 32 | below. I reserve all other rights not expressly granted, whether by 33 | implication, estoppel or otherwise. 34 | 35 | 1. py-faster-rcnn, (https://github.com/deboc/py-faster-rcnn) 36 | 37 | COPYRIGHT 38 | 39 | The MIT License (MIT) 40 | 41 | Copyright (c) 2015 Microsoft Corporation 42 | 43 | Permission is hereby granted, free of charge, to any person obtaining a copy 44 | of this software and associated documentation files (the "Software"), to deal 45 | in the Software without restriction, including without limitation the rights 46 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 47 | copies of the Software, and to permit persons to whom the Software is 48 | furnished to do so, subject to the following conditions: 49 | 50 | The above copyright notice and this permission notice shall be included in 51 | all copies or substantial portions of the Software. 52 | 53 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 54 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 55 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 56 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 57 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 58 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 59 | THE SOFTWARE. 60 | 61 | 2. Caffe, (https://github.com/BVLC/caffe/) 62 | 63 | COPYRIGHT 64 | 65 | All contributions by the University of California: 66 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents) 67 | All rights reserved. 68 | 69 | All other contributions: 70 | Copyright (c) 2014, 2015, the respective contributors 71 | All rights reserved. 72 | 73 | Caffe uses a shared copyright model: each contributor holds copyright 74 | over their contributions to Caffe. The project versioning records all 75 | such contribution and copyright details. If a contributor wants to 76 | further mark their specific copyright on a particular contribution, 77 | they should indicate their copyright solely in the commit message of 78 | the change when it is committed. 79 | 80 | The BSD 2-Clause License 81 | 82 | Redistribution and use in source and binary forms, with or without 83 | modification, are permitted provided that the following conditions 84 | are met: 85 | 86 | 1. Redistributions of source code must retain the above copyright notice, 87 | this list of conditions and the following disclaimer. 88 | 89 | 2. Redistributions in binary form must reproduce the above copyright 90 | notice, this list of conditions and the following disclaimer in the 91 | documentation and/or other materials provided with the distribution. 92 | 93 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 94 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 95 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 96 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 97 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 98 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 99 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 100 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 101 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 102 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 103 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 104 | 105 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION********** 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Faster R-CNN C++ Inference 2 | 3 | The above code is an interface inference in c++ against a [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn) trained network. 4 | 5 | From the code developed by Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun (Microsoft Research) for the project [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn), some modules and files from the `py-faster-rcnn/lib` and `py-faster-rcnn/data/scripts/` folders are used directly: 6 | - **RPN module** (`py-faster-rcnn/lib/rpn/`): Needed to deploy the *region proposal network*. 7 | - **NMS module** (`py-faster-rcnn/lib/nms/`): Needed to apply *non-maximum suppression* step. 8 | - **Fast_rcnn module** (`py-faster-rcnn/lib/fast_rcnn/`): Contains auxiliary functions. 9 | - **`py-faster-rcnn/lib/Makefile` and `py-faster-rcnn/lib/setup.py`**: To compile NMS CUDA and Cython libraries. 10 | - **`py-faster-rcnn/data/scripts/fetch_faster_rcnn_models.sh`**: To download pre-computed Faster R-CNN detectors. 11 | 12 | This code is added as a submodule to the present project for convenience. It also uses their branch of the framework Caffe ([caffe-fast-rcnn](https://github.com/rbgirshick/caffe-fast-rcnn/tree/0dcd397b29507b8314e252e850518c5695efbb83)). 13 | 14 | `FASTER_RCNN` C++ class has adapated (copy/modify/correct) some fuctions from the project [FasterRCNN-Encapsulation-Cplusplus](https://github.com/YihangLou/FasterRCNN-Encapsulation-Cplusplus). Thanks so much to his SourceCode. 15 | 16 | ### Steps to use the code 17 | 18 | First of all, clone this project with **`--recursive`** flag: 19 | ```Shell 20 | git clone --recursive https://github.com/HermannHesse/faster_rcnn_cplusplus.git 21 | ``` 22 | 23 | #### Requeriments inherited from py-faster-rcnn 24 | 25 | 1. Requirements for `Caffe` and `pycaffe` (see: [Caffe installation instructions](http://caffe.berkeleyvision.org/installation.html)) 26 | 27 | **Note:** Caffe *must* be built with support for Python layers! 28 | ```make 29 | # In your Makefile.config, make sure to have this line uncommented 30 | WITH_PYTHON_LAYER := 1 31 | # Unrelatedly, it's also recommended that you use CUDNN 32 | USE_CUDNN := 1 33 | ``` 34 | You can download Ross Girshick [Makefile.config](http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/Makefile.config) for reference. 35 | 36 | 2. Python packages you might not have: `cython`, `python-opencv`, `easydict` 37 | 38 | 3. Build Caffe and pycaffe 39 | ```Shell 40 | cd $ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/ 41 | # Now follow the Caffe installation instructions here: 42 | # http://caffe.berkeleyvision.org/installation.html 43 | 44 | # If you're experienced with Caffe and have all of the requirements installed 45 | # and your Makefile.config in place, then simply do: 46 | make -j8 && make pycaffe 47 | ``` 48 | 49 | 4. Build the Cython modules 50 | ```Shell 51 | cd $ROOT_DIR/py-faster-rcnn/lib/ 52 | make 53 | ``` 54 | 55 | 5. Download pre-computed Faster R-CNN detectors 56 | ```Shell 57 | cd $ROOT_DIR/py-faster-rcnn/ 58 | ./data/scripts/fetch_faster_rcnn_models.sh 59 | ``` 60 | 61 | #### Own requeriments 62 | 63 | 1. Set `$ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/python/` and `$ROOT_DIR/py-faster-rcnn/lib/` into the enviroment variable `PYTHONPATH` 64 | ```Shell 65 | export PYTHONPATH=$ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/python/:$ROOT_DIR/py-faster-rcnn/lib/:${PYTHONPATH} 66 | ``` 67 | 68 | 2. Compile the project with CMake 69 | ```Shell 70 | mkdir $ROOT_DIR/build/ 71 | cd $ROOT_DIR/build/ 72 | cmake .. 73 | make 74 | mv faster_rcnn_cplusplus .. 75 | ``` 76 | 77 | 3. Run the demo 78 | ```Shell 79 | cd $ROOT_DIR 80 | ./faster_rcnn_cplusplus 81 | ``` 82 | 83 | ### Popular issues 84 | 85 | #### Issue 1 86 | 87 | ```Shell 88 | "Unknown layer type: Python" 89 | ``` 90 | Caffe has been probably compiled without PYTHON_LAYER support. Uncomment the following line 91 | ```make 92 | WITH_PYTHON_LAYER := 1 93 | ``` 94 | in `$ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/Makefile.config` file and recompile it. 95 | ```Shell 96 | cd $ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/ 97 | make clean 98 | make -j8 && make pycaffe 99 | ``` 100 | 101 | #### Issue 2 102 | 103 | ```Shell 104 | fatal error: caffe/proto/caffe.pb.h: No such file or directory 105 | #include "caffe/proto/caffe.pb.h" 106 | ^ 107 | compilation terminated. 108 | ``` 109 | `caffe.pb.h` is a header file generated by Google Protocol Buffer and it is missing for some reason. Let's create it. 110 | ```Shell 111 | cd $ROOT_DIR/py-faster-rcnn/caffe-fast-rcnn/ 112 | protoc src/caffe/proto/caffe.proto --cpp_out=. 113 | mkdir include/caffe/proto 114 | mv src/caffe/proto/caffe.pb.h include/caffe/proto 115 | ``` 116 | Reference full discussion from [here](https://github.com/NVIDIA/DIGITS/issues/105). 117 | 118 | #### Issue 3 119 | 120 | When the net is loading Caffe can't find rpn python layer: 121 | ```Shell 122 | ImportError: No module named rpn.proposal_layer 123 | ``` 124 | Add `lib/` directory to `PYTHONPATH`: 125 | ```Shell 126 | export PYTHONPATH=$ROOT_DIR/py-faster-rcnn/lib/:${PYTHONPATH} 127 | ``` 128 | 129 | #### Issue 4 130 | 131 | Python can't find gpu_nms library: 132 | ```Shell 133 | from nms.gpu_nms import gpu_nms 134 | ImportError: No module named gpu_nms 135 | ``` 136 | Must compile libraries `gpu_nms.so` and `cpu_nms.so`done by running the fourth step: 137 | ```Shell 138 | cd $ROOT_DIR/py-faster-rcnn/lib/ 139 | make 140 | ``` 141 | 142 | #### Issue 5 143 | 144 | ```Shell 145 | /usr/bin/ld: cannot find -lopencv_dep_cudart 146 | collect2: error: ld returned 1 exit status 147 | ``` 148 | There is a problem with the `CMakeCache.txt` and the enviroment variable `CUDA_USE_STATIC_CUDA_RUNTIME`. 149 | To solve this issue, set it `OFF` in `CMakeList.txt`: 150 | ```Shell 151 | # Add the following line to CMakeList.txt and recompile 152 | set(CUDA_USE_STATIC_CUDA_RUNTIME "OFF") 153 | ``` 154 | or generate CMake files with: 155 | ```Shell 156 | cd $ROOT_DIR/build/ 157 | cmake .. -D CUDA_USE_STATIC_CUDA_RUNTIME=OFF 158 | ``` 159 | Reference full discussion from [here](https://github.com/opencv/opencv/issues/6542). 160 | -------------------------------------------------------------------------------- /data/pascal_voc/synset_words.txt: -------------------------------------------------------------------------------- 1 | bg 2 | aeroplane 3 | bicycle 4 | bird 5 | boat 6 | bottle 7 | bus 8 | car 9 | cat 10 | chair 11 | cow 12 | diningtable 13 | dog 14 | horse 15 | motorbike 16 | person 17 | pottedplant 18 | sheep 19 | sofa 20 | train 21 | tvmonitor -------------------------------------------------------------------------------- /faster_rcnn.cpp: -------------------------------------------------------------------------------- 1 | #include "faster_rcnn.hpp" 2 | 3 | using namespace caffe; 4 | using namespace std; 5 | 6 | /* 7 | * === FUNCTION ====================================================================== 8 | * Name: Faster_RCNN 9 | * Description: Load the model file and weights file 10 | * Output: Constructor 11 | * ===================================================================================== 12 | */ 13 | Faster_RCNN::Faster_RCNN(const string& model_file, const string& weights_file, const string& labels_file, const int GPUID) { 14 | Caffe::SetDevice(GPUID); 15 | Caffe::set_mode(Caffe::GPU); 16 | net_ = std::shared_ptr >(new Net(model_file, caffe::TEST)); 17 | net_->CopyTrainedLayersFrom(weights_file); 18 | 19 | CHECK_EQ(net_->num_inputs(), 2) << "Network should have exactly two inputs."; 20 | CHECK_EQ(net_->num_outputs(), 2) << "Network should have exactly two outputs."; 21 | 22 | Blob* input_layer = net_->input_blobs()[0]; 23 | num_channels_ = input_layer->channels(); 24 | CHECK_EQ(num_channels_, 3) << "Input layer should have 3 channels."; 25 | 26 | /* Load labels. */ 27 | std::ifstream labels(labels_file.c_str()); 28 | CHECK(labels) << "Unable to open labels file " << labels_file; 29 | string line; 30 | while (std::getline(labels, line)) 31 | labels_.push_back(string(line)); 32 | 33 | Blob* cls_layer = net_->output_blobs()[1]; 34 | Blob* bbox_layer = net_->output_blobs()[0]; 35 | CHECK_EQ(labels_.size(), cls_layer->channels()) << "Number of labels is different from the output layer dimension."; 36 | CHECK_EQ(labels_.size()*4, bbox_layer->channels()) << "Number of labels is different from the output layer dimension."; 37 | num_clases_ = labels_.size(); 38 | 39 | } 40 | 41 | /* 42 | * === FUNCTION ====================================================================== 43 | * Name: Detect 44 | * Description: Perform detection operation 45 | * Warning the max input size should less than 1000*600 46 | * Output: vector detections 47 | * ===================================================================================== 48 | */ 49 | void Faster_RCNN::detect(cv::Mat cv_image, vector& detections) { 50 | 51 | if(cv_image.empty()) { 52 | std::cout<<"Can not reach the image"<ForwardFrom(0); 69 | const float* bbox_delt = net_->blob_by_name("bbox_pred")->cpu_data(); 70 | const float* pred_cls = net_->blob_by_name("cls_prob")->cpu_data(); // Array n*5 con las probabilidades de las cinco clases para cada roi 71 | const float* rois = net_->blob_by_name("rois")->cpu_data(); // Array n*5 donde en for 0blob_by_name("rois")->num(); 73 | 74 | float *boxes = new float[num_rois*4]; 75 | float *pred = new float[num_rois*5*num_clases_]; 76 | int *num_keep = new int[num_clases_]; 77 | 78 | float **pred_per_class = new float*[num_clases_]; 79 | float **sorted_pred_cls = new float*[num_clases_]; 80 | int **keep = new int*[num_clases_]; 81 | for (int i = 0; i < num_clases_; i++) { 82 | pred_per_class[i] = new float[num_rois*5]; 83 | sorted_pred_cls[i] = new float[num_rois*5]; 84 | keep[i] = new int[num_rois]; 85 | } 86 | 87 | 88 | for (int n = 0; n < num_rois; n++) 89 | for (int c = 0; c < 4; c++) 90 | boxes[n*4+c] = rois[n*5+c+1] / img_scale; //rois[n*5] == 0 SIEMPRE 91 | 92 | bbox_transform_inv(num_rois, bbox_delt, pred_cls, boxes, pred, height, width); 93 | 94 | /* Background class is ignored hereafter */ 95 | for (int i = 1; i < num_clases_; i++) { 96 | for (int j = 0; j < num_rois; j++) 97 | for (int k = 0; k < 5; k++) 98 | pred_per_class[i][j*5+k] = pred[(i*num_rois+j)*5+k]; 99 | 100 | bbox_sort(num_rois, pred_per_class[i], sorted_pred_cls[i]); 101 | _nms(keep[i], &num_keep[i], sorted_pred_cls[i], num_rois, 5, NMS_THRESH, 0); 102 | 103 | } 104 | 105 | int k = 0; 106 | for (int i = 1; i < num_clases_; i ++) { 107 | while (sorted_pred_cls[i][keep[i][k] * 5 + 4] > CONF_THRESH && k < num_keep[i]) { 108 | 109 | Detection aux; 110 | aux.x = sorted_pred_cls[i][keep[i][k] * 5 + 0]; 111 | aux.y = sorted_pred_cls[i][keep[i][k] * 5 + 1]; 112 | aux.width = sorted_pred_cls[i][keep[i][k] * 5 + 2] - aux.x; 113 | aux.height = sorted_pred_cls[i][keep[i][k] * 5 + 3] - aux.y; 114 | aux.score = sorted_pred_cls[i][keep[i][k] * 5 + 4]; 115 | aux.categoryIndex = i; 116 | aux.category = labels_[i]; 117 | detections.push_back(aux); 118 | k++; 119 | } 120 | 121 | k = 0; 122 | } 123 | 124 | delete []im_info; 125 | delete []boxes; 126 | delete []pred; 127 | for (int i = 0; i < num_clases_; i++) { 128 | delete []pred_per_class[i]; 129 | delete []sorted_pred_cls[i]; 130 | delete []keep[i]; 131 | } 132 | delete []pred_per_class; 133 | delete []sorted_pred_cls; 134 | delete []keep; 135 | 136 | } 137 | 138 | /* 139 | * === FUNCTION ====================================================================== 140 | * Name: Preprocess 141 | * Description: Compute mean substract 142 | * Copy input image into the net 143 | * Output: float im_info[height, width, img_scale] 144 | * It is need to pass "im_info" through the functions to keep the net 145 | * blob("im_info") filled. 146 | * "set_cpu_data" makes just a pointer to the memory 147 | * ===================================================================================== 148 | */ 149 | void Faster_RCNN::preprocess(const cv::Mat cv_image, float* im_info) { 150 | 151 | cv::Mat cv_new(cv_image.rows, cv_image.cols, CV_32FC3, cv::Scalar(0,0,0)); 152 | 153 | int height = cv_image.rows; 154 | int width = cv_image.cols; 155 | 156 | /* Mean normalization (in this case it may not be the average of the training) */ 157 | for (int h = 0; h < height; ++h ) { 158 | for (int w = 0; w < width; ++w) { 159 | cv_new.at(cv::Point(w, h))[0] = float(cv_image.at(cv::Point(w, h))[0])-float(102.9801); 160 | cv_new.at(cv::Point(w, h))[1] = float(cv_image.at(cv::Point(w, h))[1])-float(115.9465); 161 | cv_new.at(cv::Point(w, h))[2] = float(cv_image.at(cv::Point(w, h))[2])-float(122.7717); 162 | } 163 | } 164 | 165 | /* Max image size comparation to know if resize is needed */ 166 | int max_side = MAX(height, width); 167 | int min_side = MIN(height, width); 168 | 169 | float max_side_scale = float(max_side) / MAX_INPUT_SIDE; 170 | float min_side_scale = float(min_side) / MIN_INPUT_SIDE; 171 | float max_scale = MAX(max_side_scale, min_side_scale); 172 | float img_scale = 1; 173 | 174 | if(max_scale > 1) 175 | img_scale = float(1) / max_scale; 176 | 177 | int height_resized = int(height * img_scale); 178 | int width_resized = int(width * img_scale); 179 | 180 | cv::Mat cv_resized; 181 | cv::resize(cv_new, cv_resized, cv::Size(width_resized, height_resized)); 182 | 183 | float data_buf[height_resized*width_resized*3]; 184 | 185 | for (int h = 0; h < height_resized; ++h ) 186 | { 187 | for (int w = 0; w < width_resized; ++w) 188 | { 189 | data_buf[(0*height_resized+h)*width_resized+w] = float(cv_resized.at(cv::Point(w, h))[0]); 190 | data_buf[(1*height_resized+h)*width_resized+w] = float(cv_resized.at(cv::Point(w, h))[1]); 191 | data_buf[(2*height_resized+h)*width_resized+w] = float(cv_resized.at(cv::Point(w, h))[2]); 192 | } 193 | } 194 | 195 | net_->blob_by_name("data")->Reshape(1, num_channels_, height_resized, width_resized); 196 | //net_->blob_by_name("data")->set_cpu_data(data_buf); 197 | Blob * input_blobs= net_->input_blobs()[0]; 198 | switch(Caffe::mode()){ 199 | case Caffe::CPU: 200 | memcpy(input_blobs->mutable_cpu_data(), data_buf, sizeof(float) * input_blobs->count()); 201 | break; 202 | case Caffe::GPU: 203 | caffe_gpu_memcpy(sizeof(float)* input_blobs->count(), data_buf, input_blobs->mutable_gpu_data()); 204 | break; 205 | default: 206 | LOG(FATAL)<<"Unknow Caffe mode"; 207 | } 208 | 209 | im_info[0] = height_resized; 210 | im_info[1] = width_resized; 211 | im_info[2] = img_scale; 212 | 213 | net_->blob_by_name("im_info")->set_cpu_data(im_info); 214 | } 215 | 216 | /* 217 | * === FUNCTION ====================================================================== 218 | * Name: vis_detections 219 | * Description: Visuallize the detection result 220 | * Output: None 221 | * ===================================================================================== 222 | */ 223 | void Faster_RCNN::vis_detections(cv::Mat& cv_image, vector detections) 224 | { 225 | int fontFace = cv::FONT_HERSHEY_PLAIN; 226 | double fontScale = 2; 227 | double thickness = 1.5; 228 | int baseline = 0; 229 | 230 | for(int i = 0; i < detections.size(); i++) { 231 | cv::rectangle(cv_image, cv::Point(detections[i].x,detections[i].y), 232 | cv::Point(detections[i].x + detections[i].width,detections[i].y + detections[i].height), 233 | cv::Scalar(0, 0, 255), 2, 8, 0); 234 | string text = detections[i].category + " " + std::to_string(detections[i].score); 235 | cv::Size textSize = cv::getTextSize(text, fontFace, fontScale, thickness, &baseline); 236 | cv::rectangle(cv_image, cv::Point(detections[i].x,detections[i].y - 2), 237 | cv::Point(detections[i].x + textSize.width/1.3, detections[i].y - 2 - textSize.height), 238 | cv::Scalar::all(180), CV_FILLED); 239 | cv::putText(cv_image, text, cv::Point(detections[i].x,detections[i].y - 2), 240 | fontFace, thickness, cv::Scalar::all(0), fontScale, 8); 241 | } 242 | 243 | cv::imshow("Detections", cv_image); 244 | cv::waitKey(0); 245 | } 246 | 247 | /* 248 | * === FUNCTION ====================================================================== 249 | * Name: boxes_sort 250 | * Description: Sort the bounding box according score 251 | * Output: float* sorted_pred 252 | * An ordered pointer derived from "pred" by its score 253 | * ===================================================================================== 254 | */ 255 | void Faster_RCNN::bbox_sort(const int num_rois, const float* pred, float* sorted_pred) 256 | { 257 | vector my; 258 | Info tmp; 259 | for (int i = 0; i< num_rois; i++) { 260 | tmp.score = pred[i*5 + 4]; 261 | tmp.head = pred + i*5; 262 | my.push_back(tmp); 263 | } 264 | 265 | std::sort(my.begin(), my.end()); 266 | 267 | for (int i=0; i < num_rois; i++) 268 | for (int j=0; j<5; j++) 269 | sorted_pred[i*5+j] = my[i].head[j]; 270 | } 271 | 272 | /* 273 | * === FUNCTION ====================================================================== 274 | * Name: bbox_transform_inv 275 | * Description: Compute bounding box regression value 276 | * Output: float* pred 277 | * A pointer "pred" is formed with predictions [x, y, width, height, 278 | * pred_category] ordered in such a way that all predictions of category 279 | * '0' go first, those of category '1' after... 280 | * So that the predictions derived from roi[0] appear in the pred[0 to 4] 281 | * for category '0' and in the pred[(1*class_num+0)*5 to 282 | * (1*class_num+0)*5+4] for category '1' 283 | * ===================================================================================== 284 | */ 285 | void Faster_RCNN::bbox_transform_inv(int num_rois, const float* box_deltas, const float* pred_cls, float* boxes, float* pred, int img_height, int img_width) { 286 | 287 | float width, height, ctr_x, ctr_y, dx, dy, dw, dh, pred_ctr_x, pred_ctr_y, pred_w, pred_h; 288 | 289 | for(int i = 0; i < num_rois; i++) { 290 | 291 | width = boxes[i*4+2] - boxes[i*4+0] + 1.0; 292 | height = boxes[i*4+3] - boxes[i*4+1] + 1.0; 293 | ctr_x = boxes[i*4+0] + 0.5 * width; 294 | ctr_y = boxes[i*4+1] + 0.5 * height; 295 | 296 | for (int j=0; j< num_clases_; j++) { 297 | 298 | dx = box_deltas[(i*num_clases_+j)*4+0]; 299 | dy = box_deltas[(i*num_clases_+j)*4+1]; 300 | dw = box_deltas[(i*num_clases_+j)*4+2]; 301 | dh = box_deltas[(i*num_clases_+j)*4+3]; 302 | pred_ctr_x = ctr_x + width*dx; 303 | pred_ctr_y = ctr_y + height*dy; 304 | pred_w = width * exp(dw); 305 | pred_h = height * exp(dh); 306 | pred[(j*num_rois+i)*5+0] = MAX(MIN(pred_ctr_x - 0.5* pred_w, img_width -1), 0); 307 | pred[(j*num_rois+i)*5+1] = MAX(MIN(pred_ctr_y - 0.5* pred_h, img_height -1), 0); 308 | pred[(j*num_rois+i)*5+2] = MAX(MIN(pred_ctr_x + 0.5* pred_w, img_width -1), 0); 309 | pred[(j*num_rois+i)*5+3] = MAX(MIN(pred_ctr_y + 0.5* pred_h, img_height -1), 0); 310 | pred[(j*num_rois+i)*5+4] = pred_cls[i*num_clases_+j]; 311 | } 312 | } 313 | 314 | } 315 | -------------------------------------------------------------------------------- /faster_rcnn.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FASTER_RCNN_HPP 2 | #define FASTER_RCNN_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "caffe/caffe.hpp" 10 | #include "py-faster-rcnn/lib/nms/gpu_nms.hpp" 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace caffe; 16 | using namespace std; 17 | 18 | #define CONF_THRESH 0.8 19 | #define NMS_THRESH 0.3 20 | #define MAX_INPUT_SIDE 1000; 21 | #define MIN_INPUT_SIDE 600; 22 | 23 | /* 24 | * === Struct ====================================================================== 25 | * Name: Detection 26 | * Description: Struct to return each detection 27 | * ===================================================================================== 28 | */ 29 | struct Detection { 30 | float x; 31 | float y; 32 | float width; 33 | float height; 34 | float score; 35 | int categoryIndex; 36 | string category; 37 | }; 38 | 39 | /* 40 | * === Struct ====================================================================== 41 | * Name: Info 42 | * Description: Used for bbox sort 43 | * ===================================================================================== 44 | */ 45 | struct Info { 46 | float score; 47 | const float* head; 48 | 49 | bool operator <(const Info& info) { 50 | return (info.score < score); 51 | } 52 | }; 53 | 54 | /* 55 | * === Class ====================================================================== 56 | * Name: Faster_RCNN 57 | * Description: FasterRCNN C++ Detector 58 | * ===================================================================================== 59 | */ 60 | class Faster_RCNN { 61 | public: 62 | Faster_RCNN(const string& model_file, const string& weights_file, 63 | const string& labels_file, const int GPUID); 64 | void detect(cv::Mat cv_image, vector& detections); 65 | void vis_detections(cv::Mat& cv_image, vector detections); 66 | 67 | private: 68 | void preprocess(const cv::Mat cv_image, float *im_info); 69 | void bbox_sort(int num, const float* pred, float *sorted_pred); 70 | void bbox_transform_inv(const int num_rois, const float* box_deltas, const float* pred_cls, 71 | float* boxes, float* pred, int img_height, int img_width); 72 | 73 | private: 74 | Faster_RCNN(){} 75 | std::shared_ptr > net_; 76 | std::vector labels_; 77 | int num_channels_; 78 | int num_clases_; 79 | }; 80 | 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include "faster_rcnn.hpp" 2 | 3 | int main() { 4 | 5 | string model_file = "py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt"; 6 | string weights_file = "py-faster-rcnn/data/faster_rcnn_models/ZF_faster_rcnn_final.caffemodel"; 7 | string labels_file = "data/pascal_voc/synset_words.txt"; 8 | int GPUID = 0; 9 | 10 | Faster_RCNN detector(model_file, weights_file, labels_file, GPUID); 11 | 12 | vector detections; 13 | cv::Mat image = cv::imread("py-faster-rcnn/data/demo/001763.jpg"); 14 | detector.detect(image, detections); 15 | 16 | std::cout<<"x\ty\twidth\theight\tcategory\tscore"<