├── README.md ├── mtcnn.cpp └── mtcnn.h /README.md: -------------------------------------------------------------------------------- 1 | # mtcnn-oneperson-detect-function 2 | 1.该算法只针对图片中只有一个人的情况。如果画面中有多个人,它会随机给出一个结果,剩下的人是不会检测的。 3 | 4 | 2.算法思路由NCNN作者nihui提出,我根据他的思路实现出来了。 5 | 6 | 3.这个接口依赖于mtcnn-ncnn,我只是写出了这个函数实现。 7 | -------------------------------------------------------------------------------- /mtcnn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "mtcnn.h" 13 | #include "det1.id.h" 14 | #include "det1.mem.h" 15 | #include "det2.id.h" 16 | #include "det2.mem.h" 17 | #include "det3.id.h" 18 | #include "det3.mem.h" 19 | 20 | using namespace std; 21 | using namespace cv; 22 | 23 | float mtcnn::nms_threshold[3] = {0.5, 0.7, 0.7}; 24 | float mtcnn::threshold[3] = {0.7, 0.8, 0.9}; 25 | float mtcnn::nms_threshold_singleface[3] = {0.5, 0.7, 0.7}; 26 | float mtcnn::nms_threshold_mulface[3] = {0.5, 0.7, 0.7}; 27 | float mtcnn::nms_threshold_register[3] = {0.5, 0.7, 0.7}; 28 | 29 | float mtcnn::mean_vals[3] = {127.5, 127.5, 127.5}; 30 | float mtcnn::norm_vals[3] = {0.0078125, 0.0078125, 0.0078125}; 31 | 32 | bool cmpScore(orderScore lsh, orderScore rsh) 33 | { 34 | if(lsh.score& boundingBox_, std::vector& bboxScore_, float scale) 62 | { 63 | int stride = 2; 64 | int cellsize = 12; 65 | int count = 0; 66 | //score p 67 | float *p = score.channel(1);//score.data + score.cstep; 68 | #ifdef USE_NCNN_NEW_VERSION 69 | float *plocal = (float *)location.data; 70 | #else 71 | float *plocal = location.data; 72 | #endif 73 | Bbox bbox; 74 | orderScore order; 75 | for(int row=0;rowthreshold[0]) 80 | { 81 | bbox.score = *p; 82 | order.score = *p; 83 | order.oriOrder = count; 84 | bbox.x1 = round((stride*col+1)/scale); 85 | bbox.y1 = round((stride*row+1)/scale); 86 | bbox.x2 = round((stride*col+1+cellsize)/scale); 87 | bbox.y2 = round((stride*row+1+cellsize)/scale); 88 | bbox.exist = true; 89 | bbox.area = (bbox.x2 - bbox.x1)*(bbox.y2 - bbox.y1); 90 | for(int channel=0;channel<4;channel++) 91 | bbox.regreCoord[channel]=location.channel(channel)[0]; 92 | boundingBox_.push_back(bbox); 93 | bboxScore_.push_back(order); 94 | count++; 95 | } 96 | p++; 97 | plocal++; 98 | } 99 | } 100 | } 101 | 102 | void mtcnn::nms(std::vector &boundingBox_, std::vector &bboxScore_, const float overlap_threshold, string modelname) 103 | { 104 | if(boundingBox_.empty()) 105 | { 106 | return; 107 | } 108 | std::vector heros; 109 | //sort the score 110 | sort(bboxScore_.begin(), bboxScore_.end(), cmpScore); 111 | 112 | int order = 0; 113 | float IOU = 0; 114 | float maxX = 0; 115 | float maxY = 0; 116 | float minX = 0; 117 | float minY = 0; 118 | while(bboxScore_.size()>0) 119 | { 120 | order = bboxScore_.back().oriOrder; 121 | bboxScore_.pop_back(); 122 | if(order<0)continue; 123 | heros.push_back(order); 124 | boundingBox_.at(order).exist = false;//delete it 125 | 126 | for(int num=0;numboundingBox_.at(order).x1)?boundingBox_.at(num).x1:boundingBox_.at(order).x1; 132 | maxY = (boundingBox_.at(num).y1>boundingBox_.at(order).y1)?boundingBox_.at(num).y1:boundingBox_.at(order).y1; 133 | minX = (boundingBox_.at(num).x20)?(minX-maxX+1):0; 137 | maxY = ((minY-maxY+1)>0)?(minY-maxY+1):0; 138 | //IOU reuse for the area of two bbox 139 | IOU = maxX * maxY; 140 | if(!modelname.compare("Union")) 141 | IOU = IOU/(boundingBox_.at(num).area + boundingBox_.at(order).area - IOU); 142 | else if(!modelname.compare("Min")) 143 | { 144 | IOU = IOU/((boundingBox_.at(num).areaoverlap_threshold) 147 | { 148 | boundingBox_.at(num).exist=false; 149 | for(vector::iterator it=bboxScore_.begin(); it!=bboxScore_.end();it++) 150 | { 151 | if((*it).oriOrder == num) 152 | { 153 | (*it).oriOrder = -1; 154 | break; 155 | } 156 | } 157 | } 158 | } 159 | } 160 | } 161 | for(int i=0;i &vecBbox, const int &height, const int &width) 166 | { 167 | if(vecBbox.empty()) 168 | { 169 | cout<<"Bbox is empty!!"<::iterator it=vecBbox.begin(); it!=vecBbox.end();it++) 176 | { 177 | if((*it).exist) 178 | { 179 | bbw = (*it).x2 - (*it).x1 + 1; 180 | bbh = (*it).y2 - (*it).y1 + 1; 181 | x1 = (*it).x1 + (*it).regreCoord[0]*bbw; 182 | y1 = (*it).y1 + (*it).regreCoord[1]*bbh; 183 | x2 = (*it).x2 + (*it).regreCoord[2]*bbw; 184 | y2 = (*it).y2 + (*it).regreCoord[3]*bbh; 185 | 186 | w = x2 - x1 + 1; 187 | h = y2 - y1 + 1; 188 | 189 | maxSide = (h>w)?h:w; 190 | x1 = x1 + w*0.5 - maxSide*0.5; 191 | y1 = y1 + h*0.5 - maxSide*0.5; 192 | (*it).x2 = round(x1 + maxSide - 1); 193 | (*it).y2 = round(y1 + maxSide - 1); 194 | (*it).x1 = round(x1); 195 | (*it).y1 = round(y1); 196 | 197 | //boundary check 198 | if((*it).x1<0)(*it).x1=0; 199 | if((*it).y1<0)(*it).y1=0; 200 | if((*it).x2>width)(*it).x2 = width - 1; 201 | if((*it).y2>height)(*it).y2 = height - 1; 202 | 203 | it->area = (it->x2 - it->x1)*(it->y2 - it->y1); 204 | } 205 | } 206 | } 207 | 208 | void mtcnn::cleanUp() 209 | { 210 | firstBbox_.clear(); 211 | firstOrderScore_.clear(); 212 | secondBbox_.clear(); 213 | secondBboxScore_.clear(); 214 | thirdBbox_.clear(); 215 | thirdBboxScore_.clear(); 216 | } 217 | 218 | //优化手段:1.针对注册检测算法,只能检测一个人,检到人退出之后的循环,节省时间 219 | //备注:注册检测算法只针对注册图片中只有一个人。 220 | void mtcnn::mtcnn_oneperson_detect(Mat &image_rgb, vector &rect_res, vector > &points) 221 | { 222 | cleanUp(); 223 | static int fps = 0; 224 | fps++; 225 | ncnn::Mat img_; 226 | //深拷贝一张Mat 227 | Mat image_tmp; 228 | image_rgb.copyTo(image_tmp); 229 | //构造NCNN的Mat 230 | //img_ = ncnn::Mat::from_pixels(image_tmp.data, ncnn::Mat::PIXEL_BGR2RGB, image_tmp.cols, image_tmp.rows); 231 | img_ = ncnn::Mat::from_pixels(image_tmp.data, ncnn::Mat::PIXEL_BGR2RGB, image_tmp.cols, image_tmp.rows); 232 | img = img_; 233 | img_w = img.w; 234 | img_h = img.h; 235 | //减均值除方差,归一化 236 | img.substract_mean_normalize(mean_vals, norm_vals); 237 | //计算图像金字塔乘法因子 238 | float minl = img_w scales_; 245 | vector scales_extend; 246 | while(minl>MIN_DET_SIZE) 247 | { 248 | if(factor_count>0)m = m*factor; 249 | scales_.push_back(m); 250 | minl *= factor; 251 | factor_count++; 252 | } 253 | orderScore order; 254 | int count = 0; 255 | //先跑小图,再跑大图(scales_倒序),这样速度快些 256 | scales_extend.clear(); 257 | for (int i = 0; i>>>>scales_extend.size=%d,scales_extend[%d]=%f\n",scales_extend.size(),i,scales_extend[i]); 265 | //第一层:进入Pnet网络 266 | int hs = (int)ceil(img_h*scales_extend[i]); 267 | int ws = (int)ceil(img_w*scales_extend[i]); 268 | ncnn::Mat in; 269 | resize_bilinear(img_, in, ws, hs); 270 | //gettimeofday(&tm_before,NULL); 271 | ncnn::Extractor ex = Pnet.create_extractor(); 272 | ex.set_light_mode(true); 273 | #ifdef USE_TWO_THREAD 274 | ex.set_num_threads(2); 275 | #else 276 | ex.set_num_threads(1); 277 | #endif 278 | ex.input(det1_param_id::LAYER_data, in); 279 | ncnn::Mat score_, location_; 280 | ex.extract(det1_param_id::BLOB_prob1, score_); 281 | ex.extract(det1_param_id::BLOB_conv4_2, location_); 282 | //gettimeofday(&tm_after,NULL); 283 | //printf("=====>>>>>Pnet spend time %d us\n",(tm_after.tv_sec-tm_before.tv_sec)*1000000+(tm_after.tv_usec-tm_before.tv_usec)); 284 | std::vector boundingBox_; 285 | std::vector bboxScore_; 286 | 287 | generateBbox(score_, location_, boundingBox_, bboxScore_, scales_extend[i]); 288 | nms(boundingBox_, bboxScore_, nms_threshold_register[0]); 289 | 290 | count = 0; 291 | for(vector::iterator it=boundingBox_.begin(); it!=boundingBox_.end();it++) 292 | { 293 | if((*it).exist) 294 | { 295 | //printf("%d jinzita,[%d,%d],[%d,%d]\n",i,(*it).x1,(*it).y1,(*it).x2,(*it).y2); 296 | firstBbox_.push_back(*it); 297 | order.score = (*it).score; 298 | order.oriOrder = count; 299 | firstOrderScore_.push_back(order); 300 | count++; 301 | } 302 | } 303 | bboxScore_.clear(); 304 | boundingBox_.clear(); 305 | 306 | //the first stage's nms 307 | if(count<1) 308 | { 309 | cleanUp(); 310 | continue; 311 | } 312 | nms(firstBbox_, firstOrderScore_, nms_threshold_register[0]); 313 | refineAndSquareBbox(firstBbox_, img_h, img_w); 314 | 315 | //第二步:进入Rnet网络 316 | count = 0; 317 | for(vector::iterator it=firstBbox_.begin(); it!=firstBbox_.end();it++) 318 | { 319 | if((*it).exist) 320 | { 321 | ncnn::Mat tempIm; 322 | if((*it).y1 > img_h || (*it).x1 > img_w) 323 | { 324 | (*it).exist=false; 325 | continue; 326 | } 327 | //printf("Pnet Out: [%d,%d,%d,%d]\n",(*it).y1, img_h-(*it).y2, (*it).x1, img_w-(*it).x2); 328 | copy_cut_border(img, tempIm, (*it).y1, img_h-(*it).y2, (*it).x1, img_w-(*it).x2); 329 | ncnn::Mat in; 330 | resize_bilinear(tempIm, in, 24, 24); 331 | 332 | ncnn::Extractor ex = Rnet.create_extractor(); 333 | //gettimeofday(&tm_before,NULL); 334 | ex.set_light_mode(true); 335 | #ifdef USE_TWO_THREAD 336 | ex.set_num_threads(2); 337 | #else 338 | ex.set_num_threads(1); 339 | #endif 340 | ex.input(det2_param_id::LAYER_data, in); 341 | ncnn::Mat score, bbox; 342 | ex.extract(det2_param_id::BLOB_prob1, score); 343 | ex.extract(det2_param_id::BLOB_conv5_2, bbox); 344 | //gettimeofday(&tm_after,NULL); 345 | //printf("=====>>>>>Rnet spend time %d ms\n",(tm_after.tv_sec-tm_before.tv_sec)*1000+(tm_after.tv_usec-tm_before.tv_usec)/1000); 346 | #ifdef USE_NCNN_NEW_VERSION 347 | float* ptr = (float *)(score.channel(0)); 348 | if(ptr[1]>threshold[1]) 349 | { 350 | for(int channel=0;channel<4;channel++) 351 | { 352 | //printf("Rnet::regreCoord:%f\n",bbox.channel(0)[channel]); 353 | it->regreCoord[channel]=bbox.channel(0)[channel];//*(bbox.data+channel*bbox.cstep); 354 | } 355 | it->area = (it->x2 - it->x1)*(it->y2 - it->y1); 356 | it->score = ptr[1];//*(score.data+score.cstep); 357 | //printf("it->score = %f\n",it->score); 358 | secondBbox_.push_back(*it); 359 | order.score = it->score; 360 | order.oriOrder = count++; 361 | secondBboxScore_.push_back(order); 362 | } 363 | #else 364 | if(*(score.data+score.cstep)>threshold[1]) 365 | { 366 | for(int channel=0;channel<4;channel++) 367 | { 368 | it->regreCoord[channel]=bbox.channel(channel)[0];//*(bbox.data+channel*bbox.cstep); 369 | } 370 | it->area = (it->x2 - it->x1)*(it->y2 - it->y1); 371 | it->score = score.channel(1)[0];//*(score.data+score.cstep); 372 | secondBbox_.push_back(*it); 373 | order.score = it->score; 374 | order.oriOrder = count++; 375 | secondBboxScore_.push_back(order); 376 | } 377 | #endif 378 | else 379 | { 380 | (*it).exist=false; 381 | } 382 | } 383 | } 384 | firstBbox_.clear(); 385 | firstOrderScore_.clear(); 386 | 387 | if(count<1) 388 | { 389 | cleanUp(); 390 | continue; 391 | } 392 | 393 | nms(secondBbox_, secondBboxScore_, nms_threshold_register[1]); 394 | refineAndSquareBbox(secondBbox_, img_h, img_w); 395 | 396 | //第三步:进入Onet网络 397 | count = 0; 398 | for(vector::iterator it=secondBbox_.begin(); it!=secondBbox_.end();it++) 399 | { 400 | if((*it).exist) 401 | { 402 | ncnn::Mat tempIm; 403 | copy_cut_border(img, tempIm, (*it).y1, img_h-(*it).y2, (*it).x1, img_w-(*it).x2); 404 | ncnn::Mat in; 405 | resize_bilinear(tempIm, in, 48, 48); 406 | 407 | ncnn::Extractor ex = Onet.create_extractor(); 408 | ex.set_light_mode(true); 409 | #ifdef USE_TWO_THREAD 410 | ex.set_num_threads(2); 411 | #else 412 | ex.set_num_threads(1); 413 | #endif 414 | ex.input(det3_param_id::LAYER_data, in); 415 | ncnn::Mat score, bbox, keyPoint; 416 | ex.extract(det3_param_id::BLOB_prob1, score); 417 | ex.extract(det3_param_id::BLOB_conv6_2, bbox); 418 | ex.extract(det3_param_id::BLOB_conv6_3, keyPoint); 419 | 420 | #ifdef USE_NCNN_NEW_VERSION 421 | if(score.channel(0)[1]>threshold[2]) 422 | { 423 | for(int channel=0;channel<4;channel++) 424 | { 425 | it->regreCoord[channel]=bbox.channel(0)[channel]; 426 | } 427 | it->area = (it->x2 - it->x1)*(it->y2 - it->y1); 428 | it->score = score.channel(0)[1]; 429 | for(int num=0;num<5;num++) 430 | { 431 | (it->ppoint)[num] = it->x1 + (it->x2 - it->x1)*keyPoint.channel(0)[num]; 432 | (it->ppoint)[num+5] = it->y1 + (it->y2 - it->y1)*keyPoint.channel(0)[num+5]; 433 | } 434 | thirdBbox_.push_back(*it); 435 | order.score = it->score; 436 | order.oriOrder = count++; 437 | thirdBboxScore_.push_back(order); 438 | } 439 | #else 440 | if(score.channel(1)[0]>threshold[2]) 441 | { 442 | for(int channel=0;channel<4;channel++) 443 | { 444 | it->regreCoord[channel]=bbox.channel(channel)[0]; 445 | } 446 | it->area = (it->x2 - it->x1)*(it->y2 - it->y1); 447 | it->score = score.channel(1)[0]; 448 | for(int num=0;num<5;num++) 449 | { 450 | (it->ppoint)[num] = it->x1 + (it->x2 - it->x1)*keyPoint.channel(num)[0]; 451 | (it->ppoint)[num+5] = it->y1 + (it->y2 - it->y1)*keyPoint.channel(num+5)[0]; 452 | } 453 | 454 | thirdBbox_.push_back(*it); 455 | order.score = it->score; 456 | order.oriOrder = count++; 457 | thirdBboxScore_.push_back(order); 458 | } 459 | #endif 460 | else 461 | { 462 | (*it).exist=false; 463 | } 464 | } 465 | } 466 | secondBbox_.clear(); 467 | secondBboxScore_.clear(); 468 | 469 | if(count<1) 470 | { 471 | cleanUp(); 472 | continue; 473 | } 474 | 475 | refineAndSquareBbox(thirdBbox_, img_h, img_w); 476 | nms(thirdBbox_, thirdBboxScore_, nms_threshold_register[2], "Min"); 477 | 478 | rect_res.clear(); 479 | //返回最终结果 480 | for(vector::iterator it=thirdBbox_.begin(); it!=thirdBbox_.end();it++) 481 | { 482 | if((*it).exist) 483 | { 484 | Rect face_rect; 485 | 486 | face_rect.x = (*it).x1; 487 | face_rect.y = (*it).y1; 488 | face_rect.width = (*it).x2-(*it).x1; 489 | face_rect.height = (*it).y2-(*it).y1; 490 | 491 | //printf("=====>>>>>[%d,%d,%d,%d]\n",face_rect.x,face_rect.y,face_rect.width,face_rect.height); 492 | //rectangle(image_tmp, face_rect, Scalar(0,0,255), 2,8,0); 493 | //char tmp_name[128]={0}; 494 | //sprintf(tmp_name,"result_%d.bmp",fps); 495 | 496 | rect_res.push_back(face_rect); 497 | vector vp2d; 498 | for(int num=0;num<5;num++) 499 | { 500 | cv::Point2d vpd((*it).ppoint[num],(*it).ppoint[num+5]); 501 | vp2d.push_back(vpd); 502 | //cv::circle(image_tmp,Point((*it).ppoint[2*num],(*it).ppoint[2*num+1]),3,Scalar(0,255,255), -1); 503 | } 504 | points.push_back(vp2d); 505 | //imwrite(tmp_name,image_tmp); 506 | } 507 | } 508 | 509 | thirdBbox_.clear(); 510 | thirdBboxScore_.clear(); 511 | if(rect_res.size()>0) 512 | { 513 | break; 514 | } 515 | } 516 | } 517 | 518 | void mtcnn::clearNet() 519 | { 520 | Pnet.clear(); 521 | Rnet.clear(); 522 | Onet.clear(); 523 | } 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | -------------------------------------------------------------------------------- /mtcnn.h: -------------------------------------------------------------------------------- 1 | #ifndef MTCNN_H 2 | #define MTCNN_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | using namespace std; 9 | using namespace cv; 10 | 11 | struct Bbox 12 | { 13 | float score; 14 | int x1; 15 | int y1; 16 | int x2; 17 | int y2; 18 | float area; 19 | bool exist; 20 | float ppoint[10]; 21 | float regreCoord[4]; 22 | }; 23 | 24 | struct orderScore 25 | { 26 | float score; 27 | int oriOrder; 28 | }; 29 | 30 | bool cmpScore(orderScore lsh, orderScore rsh); 31 | 32 | class mtcnn 33 | { 34 | public: 35 | mtcnn(int minsize); 36 | void reset_minsize(int minsize); 37 | int get_minsize_value(void); 38 | //void detect(ncnn::Mat& img_, std::vector &ret_res); 39 | void mtcnn_oneperson_detect(Mat &image, vector &rect_res, vector > &points); 40 | void clearNet(); 41 | ~mtcnn(){ 42 | clearNet(); 43 | }; 44 | 45 | private: 46 | void generateBbox(ncnn::Mat score, ncnn::Mat location, vector& boundingBox_, vector& bboxScore_, float scale); 47 | void nms(vector &boundingBox_, std::vector &bboxScore_, const float overlap_threshold, string modelname="Union"); 48 | void refineAndSquareBbox(vector &vecBbox, const int &height, const int &width); 49 | void cleanUp(); 50 | 51 | ncnn::Net Pnet, Rnet, Onet; 52 | ncnn::Mat img; 53 | 54 | static float nms_threshold[3]; 55 | static float threshold[3]; 56 | static float nms_threshold_singleface[3]; 57 | static float nms_threshold_mulface[3]; 58 | static float nms_threshold_register[3]; 59 | 60 | 61 | static float mean_vals[3]; 62 | static float norm_vals[3]; 63 | 64 | std::vector firstBbox_, secondBbox_,thirdBbox_; 65 | std::vector firstOrderScore_, secondBboxScore_, thirdBboxScore_; 66 | int img_w, img_h; 67 | int min_size; 68 | }; 69 | 70 | 71 | #endif 72 | --------------------------------------------------------------------------------