├── README.md ├── demo.jpg ├── face_full_conv2.prototxt └── integrated_caffe ├── data_transformer.hpp ├── face_detection.cpp ├── face_detection.hpp └── main.cpp /README.md: -------------------------------------------------------------------------------- 1 | # caf_face_detection 2 | Implement Yahoo Paper: Multi-view Face Detection Using Deep Convolutional Neural Networks 3 | 4 | ### Lib 5 | 1. caffe 6 | 2. Eigen 7 | 3. OpenCV 8 | 9 | ### demo 10 | ![demo](https://github.com/LouieYang/caf_face_detection/blob/master/demo.jpg) 11 | 12 | The left side shows the result using caffe while the right side shows the result using simple CNN in my another repository 13 | 14 | ### Model 15 | caffe Model could download at [BaiduYun](http://pan.baidu.com/s/1i4Qokhn) or [GoogleDrive](https://drive.google.com/file/d/0BxvKyd83BJjYUTNnZWdURnczZ1U/view?usp=sharing) 16 | 17 | ### Referrence: 18 | [1] https://github.com/guoyilin/FaceDetection_CNN 19 | 20 | [2] http://arxiv.org/abs/1502.02766 21 | 22 | [3] http://caffe.berkeleyvision.org 23 | -------------------------------------------------------------------------------- /demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/watersink/caf_face_detection/0756d5b7b50139d812a43698cff7ffa7b99a35cb/demo.jpg -------------------------------------------------------------------------------- /face_full_conv2.prototxt: -------------------------------------------------------------------------------- 1 | # Fully convolutional network version of CaffeNet. 2 | name: "CaffeNetConv" 3 | input: "data" 4 | input_dim: 1 5 | input_dim: 3 6 | input_dim: 3300 7 | input_dim: 4950 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | convolution_param { 14 | num_output: 96 15 | kernel_size: 11 16 | stride: 4 17 | } 18 | } 19 | layer { 20 | name: "relu1" 21 | type: "ReLU" 22 | bottom: "conv1" 23 | top: "conv1" 24 | } 25 | layer { 26 | name: "pool1" 27 | type: "Pooling" 28 | bottom: "conv1" 29 | top: "pool1" 30 | pooling_param { 31 | pool: MAX 32 | kernel_size: 3 33 | stride: 2 34 | } 35 | } 36 | layer { 37 | name: "norm1" 38 | type: "LRN" 39 | bottom: "pool1" 40 | top: "norm1" 41 | lrn_param { 42 | local_size: 5 43 | alpha: 0.0001 44 | beta: 0.75 45 | } 46 | } 47 | layer { 48 | name: "conv2" 49 | type: "Convolution" 50 | bottom: "norm1" 51 | top: "conv2" 52 | convolution_param { 53 | num_output: 256 54 | pad: 2 55 | kernel_size: 5 56 | group: 2 57 | } 58 | } 59 | layer { 60 | name: "relu2" 61 | type: "ReLU" 62 | bottom: "conv2" 63 | top: "conv2" 64 | } 65 | layer { 66 | name: "pool2" 67 | type: "Pooling" 68 | bottom: "conv2" 69 | top: "pool2" 70 | pooling_param { 71 | pool: MAX 72 | kernel_size: 3 73 | stride: 2 74 | } 75 | } 76 | layer { 77 | name: "norm2" 78 | type: "LRN" 79 | bottom: "pool2" 80 | top: "norm2" 81 | lrn_param { 82 | local_size: 5 83 | alpha: 0.0001 84 | beta: 0.75 85 | } 86 | } 87 | layer { 88 | name: "conv3" 89 | type: "Convolution" 90 | bottom: "norm2" 91 | top: "conv3" 92 | convolution_param { 93 | num_output: 384 94 | pad: 1 95 | kernel_size: 3 96 | } 97 | } 98 | layer { 99 | name: "relu3" 100 | type: "ReLU" 101 | bottom: "conv3" 102 | top: "conv3" 103 | } 104 | layer { 105 | name: "conv4" 106 | type: "Convolution" 107 | bottom: "conv3" 108 | top: "conv4" 109 | convolution_param { 110 | num_output: 384 111 | pad: 1 112 | kernel_size: 3 113 | group: 2 114 | } 115 | } 116 | layer { 117 | name: "relu4" 118 | type: "ReLU" 119 | bottom: "conv4" 120 | top: "conv4" 121 | } 122 | layer { 123 | name: "conv5" 124 | type: "Convolution" 125 | bottom: "conv4" 126 | top: "conv5" 127 | convolution_param { 128 | num_output: 256 129 | pad: 1 130 | kernel_size: 3 131 | group: 2 132 | } 133 | } 134 | layer { 135 | name: "relu5" 136 | type: "ReLU" 137 | bottom: "conv5" 138 | top: "conv5" 139 | } 140 | layer { 141 | name: "pool5" 142 | type: "Pooling" 143 | bottom: "conv5" 144 | top: "pool5" 145 | pooling_param { 146 | pool: MAX 147 | kernel_size: 3 148 | stride: 2 149 | } 150 | } 151 | layer { 152 | name: "fc6-conv" 153 | type: "Convolution" 154 | bottom: "pool5" 155 | top: "fc6-conv" 156 | convolution_param { 157 | num_output: 4096 158 | kernel_size: 6 159 | } 160 | } 161 | layer { 162 | name: "relu6" 163 | type: "ReLU" 164 | bottom: "fc6-conv" 165 | top: "fc6-conv" 166 | } 167 | layer { 168 | name: "drop6" 169 | type: "Dropout" 170 | bottom: "fc6-conv" 171 | top: "fc6-conv" 172 | dropout_param { 173 | dropout_ratio: 0.5 174 | } 175 | } 176 | layer { 177 | name: "fc7-conv" 178 | type: "Convolution" 179 | bottom: "fc6-conv" 180 | top: "fc7-conv" 181 | convolution_param { 182 | num_output: 4096 183 | kernel_size: 1 184 | } 185 | } 186 | layer { 187 | name: "relu7" 188 | type: "ReLU" 189 | bottom: "fc7-conv" 190 | top: "fc7-conv" 191 | } 192 | layer { 193 | name: "drop7" 194 | type: "Dropout" 195 | bottom: "fc7-conv" 196 | top: "fc7-conv" 197 | dropout_param { 198 | dropout_ratio: 0.5 199 | } 200 | } 201 | layer { 202 | name: "fc8-conv" 203 | type: "Convolution" 204 | bottom: "fc7-conv" 205 | top: "fc8-conv" 206 | convolution_param { 207 | num_output: 2 208 | kernel_size: 1 209 | } 210 | } 211 | layer { 212 | name: "prob" 213 | type: "Softmax" 214 | bottom: "fc8-conv" 215 | top: "prob" 216 | } 217 | -------------------------------------------------------------------------------- /integrated_caffe/data_transformer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef data_transformer_hpp 2 | #define data_transformer_hpp 3 | 4 | #ifndef CPU_ONLY 5 | #define CPU_ONLY 6 | #endif 7 | 8 | #include "opencv2/opencv.hpp" 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | /** 18 | * @Brief: The data transform from OpenCV to caffe Blob 19 | * 20 | * @param image: OpenCV Mat data vector 21 | * @Warning: Template function must be defined in the .hpp file to avoid 22 | * linking error 23 | */ 24 | template 25 | void OpenCV2Blob(const std::vector &channels, 26 | std::unique_ptr> &net) 27 | { 28 | caffe::Blob *input_layer = net->input_blobs()[0]; 29 | DType *input_data = input_layer->mutable_cpu_data(); 30 | 31 | for (const auto &ch: channels) 32 | { 33 | for (auto i = 0; i != ch.rows; ++i) 34 | { 35 | std::memcpy(input_data, ch.ptr(i), sizeof(DType) * ch.cols); 36 | input_data += ch.cols; 37 | } 38 | } 39 | } 40 | 41 | #endif /* data_transformer_hpp */ 42 | -------------------------------------------------------------------------------- /integrated_caffe/face_detection.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************************* 2 | * Copyright(c) 2015 3 | * All rights reserved. 4 | * 5 | * Name: face detection 6 | * Description: multiview face detection 7 | * Reference: Multi-view Face Detection using deep convolutional neural networks 8 | * Lib: OpenCV2, Eigen3, caffe 9 | * Date: 2015-12-06 10 | * Author: Yang 11 | ******************************************************************/ 12 | #include "face_detection.hpp" 13 | 14 | void read_image_list(std::vector& imgFiles, std::string imgList) 15 | { 16 | /* 17 | * Description: Read imgfile in imgList to a vector 18 | */ 19 | 20 | imgFiles.clear(); 21 | std::ifstream fin(imgList, std::ios::in); 22 | 23 | for (std::string line; std::getline(fin, line);) 24 | { 25 | imgFiles.push_back(line); 26 | } 27 | fin.close(); 28 | } 29 | 30 | void generate_bounding_box(Eigen::MatrixXf prob, double scale, std::vector& bd) 31 | { 32 | const int stride = 32; 33 | const int cell_size = 227; 34 | 35 | for (int h = 0; h < prob.rows(); h++) 36 | { 37 | for (int w = 0; w < prob.cols(); w++) 38 | { 39 | /* threshold is set 0.85 */ 40 | if (prob(h, w) >= 0.85) 41 | { 42 | bd.push_back(boundingbox(cv::Rect_(float(w * stride) / scale, float(h * stride) / scale, (float)cell_size / scale, (float)cell_size / scale), prob(h, w))); 43 | } 44 | } 45 | } 46 | } 47 | 48 | void nms_max(std::vector& bd, std::vector& final_bd, double overlapped_thresh) 49 | { 50 | /* 51 | * Descripttion: Non-maximum suppression algorithm with maximizing 52 | */ 53 | 54 | std::sort(bd.begin(), bd.end(), sort_by_size); 55 | for (int i = 0; i < bd.size(); i++) 56 | { 57 | int j = 0; 58 | for (; j < final_bd.size(); j++) 59 | { 60 | /* Calculate the overlapped area */ 61 | float x11 = bd[i].first.x; 62 | float y11 = bd[i].first.y; 63 | float x12 = bd[i].first.x + bd[i].first.height; 64 | float y12 = bd[i].first.y + bd[i].first.width; 65 | 66 | float x21 = final_bd[j].first.x; 67 | float y21 = final_bd[j].first.y; 68 | float x22 = final_bd[j].first.x + final_bd[j].first.height; 69 | float y22 = final_bd[j].first.y + final_bd[j].first.width; 70 | 71 | float x_overlap = MAX(0, MIN(x12, x22) - MAX(x11, x21)); 72 | float y_overlap = MAX(0, MIN(y12, y22) - MAX(y11, y21)); 73 | 74 | if (x_overlap * y_overlap > MIN(bd[i].first.area(), final_bd[j].first.area()) * overlapped_thresh) 75 | { 76 | if (final_bd[j].second < bd[i].second) 77 | { 78 | final_bd[j] = bd[i]; 79 | } 80 | break; 81 | } 82 | } 83 | if (j == final_bd.size()) 84 | { 85 | final_bd.push_back(bd[i]); 86 | } 87 | } 88 | } 89 | 90 | void nms_average(std::vector& bd, std::vector& final_bd, double overlapped_thresh) 91 | { 92 | /* 93 | * Descripttion: Non-maximum suppression algorithm with averaging 94 | */ 95 | 96 | std::sort(bd.begin(), bd.end(), sort_by_confidence_reverse); 97 | while (bd.size() != 0) 98 | { 99 | std::vector iddlt(1, 0); 100 | 101 | float x11 = bd[0].first.x; 102 | float y11 = bd[0].first.y; 103 | float x12 = bd[0].first.x + bd[0].first.height; 104 | float y12 = bd[0].first.y + bd[0].first.width; 105 | 106 | if (bd.size() > 1) 107 | { 108 | for (int j = 1; j < bd.size(); j++) 109 | { 110 | float x21 = bd[j].first.x; 111 | float y21 = bd[j].first.y; 112 | float x22 = bd[j].first.x + bd[j].first.height; 113 | float y22 = bd[j].first.y + bd[j].first.width; 114 | 115 | float x_overlap = MAX(0, MIN(x12, x22) - MAX(x11, x21)); 116 | float y_overlap = MAX(0, MIN(y12, y22) - MAX(y11, y21)); 117 | 118 | if (x_overlap * y_overlap > MIN(bd[0].first.area(), bd[j].first.area()) * overlapped_thresh) 119 | { 120 | iddlt.push_back(j); 121 | } 122 | } 123 | } 124 | 125 | float x_average = 0; 126 | float y_average = 0; 127 | float width = 0; 128 | float height = 0; 129 | float confidence = 0; 130 | 131 | for (int i = 0; i < iddlt.size(); i++) 132 | { 133 | x_average += bd[iddlt[i]].first.x; 134 | y_average += bd[iddlt[i]].first.y; 135 | width += bd[iddlt[i]].first.width; 136 | height += bd[iddlt[i]].first.height; 137 | confidence += bd[iddlt[i]].second; 138 | } 139 | x_average /= iddlt.size(); 140 | y_average /= iddlt.size(); 141 | width /= iddlt.size(); 142 | height /= iddlt.size(); 143 | confidence /= iddlt.size(); 144 | 145 | final_bd.push_back(boundingbox(cv::Rect_(y_average, x_average, width, height), confidence)); 146 | 147 | 148 | for (int i = 0; i < iddlt.size(); i++) 149 | { 150 | bd.erase(bd.begin() + iddlt[i] - i); 151 | } 152 | } 153 | } 154 | 155 | bool sort_by_confidence_reverse(const boundingbox& a, const boundingbox& b) 156 | { 157 | return a.second > b.second; 158 | } 159 | 160 | bool sort_by_size(const boundingbox& a, const boundingbox& b) 161 | { 162 | return a.first.width < b.first.width; 163 | } 164 | 165 | void draw_boxes(std::vector& bd, cv::Mat& img) 166 | { 167 | for (int k = 0; k < bd.size(); k++) 168 | { 169 | cv::rectangle(img, cv::Rect(int(bd[k].first.y), int(bd[k].first.x), int(bd[k].first.width), int(bd[k].first.height)), cv::Scalar(0, 0, 255), 2); 170 | std::stringstream ss; 171 | ss << bd[k].second; 172 | cv::putText(img, ss.str(), cv::Point(bd[k].first.y, bd[k].first.x), 1, 1, cv::Scalar(255, 0, 0)); 173 | } 174 | } 175 | 176 | std::vector scale_list(const cv::Mat &img) 177 | { 178 | int min = 0; 179 | int max = 0; 180 | double delim = 5; 181 | double factor = 0.7937; 182 | double factor_count = 0; 183 | 184 | std::vector scales; 185 | 186 | max = MAX(img.cols, img.rows); 187 | min = MIN(img.cols, img.rows); 188 | 189 | // delim = 2500 / max; 190 | while (delim > 1 + 1e-4) 191 | { 192 | scales.push_back(delim); 193 | delim *= factor; 194 | } 195 | 196 | while (min >= 227) 197 | { 198 | scales.push_back(pow(factor, factor_count++)); 199 | min *= factor; 200 | } 201 | 202 | std::cout << "Image size: " << img.cols << "(Width)" << ' ' << img.rows << "(Height)" <<'\n'; 203 | std::cout << "Scaling: "; 204 | std::for_each(scales.begin(), scales.end(), [](double scale){ std::cout << scale << ' '; }); 205 | std::cout << '\n'; 206 | return scales; 207 | } 208 | 209 | void updatePrototxt(int rows, int cols) 210 | { 211 | std::ifstream fin("face_full_conv.prototxt", std::ios::in); 212 | std::ofstream fout("face_full_conv2.prototxt", std::ios::out); 213 | int index = 0; 214 | for (std::string line; std::getline(fin, line); index++) 215 | { 216 | if (index == 5) 217 | { 218 | fout << "input_dim: " << rows << '\n'; 219 | } 220 | else if (index == 6) 221 | { 222 | fout << "input_dim: " << cols << '\n'; 223 | } 224 | else 225 | { 226 | fout << line << '\n'; 227 | } 228 | } 229 | fin.close(); 230 | fout.close(); 231 | } 232 | 233 | void face_detection(std::string imgList, std::string resultList) 234 | { 235 | std::vector imgFiles; 236 | read_image_list(imgFiles, imgList); 237 | 238 | std::fstream output_file(resultList, std::ios::app|std::ios::out); 239 | output_file << "#faceID" << '\t' << "imagePath" << '\t'; 240 | output_file << "faceRect.y" << '\t' << "faceRect.x" << '\t'; 241 | output_file << "faceRect.w" << '\t' << "faceRect.h" << '\n'; 242 | 243 | for (int i = 0; i < imgFiles.size(); i++) 244 | { 245 | cv::Mat img = cv::imread(imgFiles[i]); 246 | std::vector scales(scale_list(img)); 247 | std::vector bd; 248 | for (int j = 0; j < scales.size(); j++) 249 | { 250 | cv::Mat scale_img; 251 | cv::resize(img, scale_img, cv::Size(img.cols * scales[j], img.rows * scales[j])); 252 | updatePrototxt(scale_img.rows, scale_img.cols); 253 | 254 | std::vector channels; 255 | scale_img.convertTo(scale_img, CV_32FC3); 256 | 257 | cv::split(scale_img, channels); 258 | channels[0] -= ILSVRC_BLUE_MEAN; 259 | channels[1] -= ILSVRC_GREEN_MEAN; 260 | channels[2] -= ILSVRC_RED_MEAN; 261 | 262 | std::unique_ptr> 263 | net(new caffe::Net("face_full_conv2.prototxt", 264 | caffe::Phase::TEST)); 265 | net->CopyTrainedLayersFrom("face_full_conv.caffemodel"); 266 | 267 | OpenCV2Blob(channels, net); 268 | 269 | net->ForwardPrefilled(); 270 | caffe::Blob* output_layer = net->output_blobs()[0]; 271 | 272 | float* data = const_cast(output_layer->cpu_data() + output_layer->shape(2) * output_layer->shape(3)); 273 | 274 | Eigen::Map> prob(data, output_layer->shape(2), output_layer->shape(3)); 275 | 276 | generate_bounding_box(prob, scales[j], bd); 277 | } 278 | 279 | std::vector bd1; 280 | std::vector bdf; 281 | nms_max(bd, bd1); 282 | nms_average(bd1, bdf); 283 | 284 | for (int k = 0; k < bdf.size(); k++) 285 | { 286 | output_file << i << '\t' << imgFiles[i] << '\t'; 287 | output_file << int(bdf[k].first.y) << '\t' 288 | << int(bdf[k].first.x) << '\t'; 289 | output_file << int(bdf[k].first.width) << '\t' 290 | << int(bdf[k].first.height) << '\n'; 291 | } 292 | } 293 | } 294 | 295 | cv::Mat &face_detection(cv::Mat &img) 296 | { 297 | std::vector scales(scale_list(img)); 298 | std::vector bd; 299 | for (int j = 0; j < scales.size(); j++) 300 | { 301 | cv::Mat scale_img; 302 | cv::resize(img, scale_img, cv::Size(img.cols * scales[j], img.rows * scales[j])); 303 | updatePrototxt(scale_img.rows, scale_img.cols); 304 | 305 | std::vector channels; 306 | scale_img.convertTo(scale_img, CV_32FC3); 307 | 308 | cv::split(scale_img, channels); 309 | channels[0] -= ILSVRC_BLUE_MEAN; 310 | channels[1] -= ILSVRC_GREEN_MEAN; 311 | channels[2] -= ILSVRC_RED_MEAN; 312 | 313 | std::unique_ptr> 314 | net(new caffe::Net("face_full_conv2.prototxt", 315 | caffe::Phase::TEST)); 316 | net->CopyTrainedLayersFrom("face_full_conv.caffemodel"); 317 | 318 | OpenCV2Blob(channels, net); 319 | 320 | net->ForwardPrefilled(); 321 | caffe::Blob* output_layer = net->output_blobs()[0]; 322 | 323 | float* data = const_cast(output_layer->cpu_data() + output_layer->shape(2) * output_layer->shape(3)); 324 | 325 | Eigen::Map> prob(data, output_layer->shape(2), output_layer->shape(3)); 326 | 327 | generate_bounding_box(prob, scales[j], bd); 328 | } 329 | 330 | std::vector bd1; 331 | std::vector bdf; 332 | nms_max(bd, bd1); 333 | nms_average(bd1, bdf); 334 | 335 | draw_boxes(bdf, img); 336 | return img; 337 | } -------------------------------------------------------------------------------- /integrated_caffe/face_detection.hpp: -------------------------------------------------------------------------------- 1 | #ifndef face_detection_hpp 2 | #define face_detection_hpp 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "data_transformer.hpp" 11 | 12 | using Eigen::MatrixXf; 13 | using boundingbox = std::pair, double>; 14 | 15 | constexpr double ILSVRC_RED_MEAN = 104.00698793; 16 | constexpr double ILSVRC_GREEN_MEAN = 116.66876762; 17 | constexpr double ILSVRC_BLUE_MEAN = 122.67891434; 18 | 19 | void face_detection(std::string imgList, std::string resultList); 20 | cv::Mat &face_detection(cv::Mat &img); 21 | 22 | 23 | 24 | std::vector scale_list(const cv::Mat &img); 25 | void updatePrototxt(int rows, int cols); 26 | void read_image_list(std::vector& imgFiles, std::string imgList); 27 | void generate_bounding_box(Eigen::MatrixXf prob, 28 | double scale, std::vector& bounding_box); 29 | 30 | void nms_max(std::vector& bd, 31 | std::vector& final_bd, double overlapped_thresh = 0.3); 32 | void nms_average(std::vector& bd, 33 | std::vector& final_bd, double overlapped_thresh = 0.2); 34 | void draw_boxes(std::vector& bd, cv::Mat& img); 35 | 36 | bool sort_by_size(const boundingbox& a, const boundingbox& b); 37 | bool sort_by_confidence_reverse(const boundingbox& a, const boundingbox& b ); 38 | 39 | #endif /* face_detection_hpp */ -------------------------------------------------------------------------------- /integrated_caffe/main.cpp: -------------------------------------------------------------------------------- 1 | #include "face_detection.hpp" 2 | 3 | int main() 4 | { 5 | cv::Mat img = cv::imread("tmp.jpg"); 6 | cv::imshow("test", face_detection(img)); 7 | cv::waitKey(); 8 | } 9 | --------------------------------------------------------------------------------