├── CMakeLists.txt ├── README.md ├── coco_eval └── coco_eval.py ├── demo ├── CMakeLists.txt └── demo.cpp ├── eval ├── CMakeLists.txt └── eval.cpp ├── images ├── .ptp-sync-folder ├── dog.jpg └── person.jpg ├── prototxt ├── mobilenet_v1_yolov3.prototxt ├── mobilenet_v2_yolov3.prototxt ├── yolov3-spp.prototxt ├── yolov3-tiny.prototxt ├── yolov3_416x416.prototxt ├── yolov3_512x512.prototxt ├── yolov3_608x608.prototxt └── yolov4.prototxt └── src ├── activation_kernels.cu ├── activations.h ├── blas.h ├── blas_kernels.cu ├── box.cpp ├── box.h ├── cuda.cpp ├── cuda.h ├── detector.cpp ├── detector.h ├── image.cpp ├── image.h ├── image_opencv.cpp ├── image_opencv.h ├── yolo_layer.cpp └── yolo_layer.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(caffe-yolov3) 3 | 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # -std=gnu++11 5 | 6 | # setup CUDA 7 | find_package(CUDA) 8 | 9 | set( 10 | CUDA_NVCC_FLAGS 11 | ${CUDA_NVCC_FLAGS}; 12 | -O3 13 | -gencode arch=compute_53,code=sm_53 #tegra tx1 14 | -gencode arch=compute_61,code=sm_61 #gtx 1060 15 | -gencode arch=compute_62,code=sm_62 #tegra tx2 16 | ) 17 | 18 | if(CUDA_VERSION_MAJOR GREATER 9) 19 | message("-- CUDA ${CUDA_VERSION_MAJOR} detected, enabling SM_72") 20 | set ( 21 | CUDA_NVCC_FLAGS 22 | ${CUDA_NVCC_FLAGS}; 23 | -gencode arch=compute_72,code=sm_72 #xavier 24 | -gencode arch=compute_75,code=sm_75 #rtx2080 25 | ) 26 | endif() 27 | 28 | # setup project output paths 29 | set(PROJECT_OUTPUT_DIR ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR}) 30 | set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include) 31 | 32 | file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR}) 33 | file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) 34 | 35 | message("-- system arch: ${CMAKE_SYSTEM_PROCESSOR}") 36 | message("-- output path: ${PROJECT_OUTPUT_DIR}") 37 | 38 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) 39 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) 40 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) 41 | 42 | 43 | # build C/C++ interface 44 | include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include) 45 | include_directories(${PROJECT_INCLUDE_DIR} 46 | /home/chen/caffe/include 47 | /home/chen/caffe/build/include 48 | ) 49 | 50 | 51 | file(GLOB inferenceSources src/*.cpp src/*.cu ) 52 | file(GLOB inferenceIncludes src/*.h ) 53 | 54 | cuda_add_library(yolov3-plugin SHARED ${inferenceSources}) 55 | target_link_libraries(yolov3-plugin 56 | /home/chen/caffe/build/lib/libcaffe.so 57 | /usr/lib/x86_64-linux-gnu/libglog.so 58 | /usr/lib/x86_64-linux-gnu/libgflags.so.2 59 | /usr/lib/x86_64-linux-gnu/libboost_system.so 60 | /usr/lib/x86_64-linux-gnu/libGLEW.so.1.13 61 | ) 62 | 63 | 64 | # transfer all headers to the include directory 65 | foreach(include ${inferenceIncludes}) 66 | message("-- Copying ${include}") 67 | configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY) 68 | endforeach() 69 | 70 | 71 | # create symbolic link for network data 72 | execute_process( COMMAND "${CMAKE_COMMAND}" "-E" "create_symlink" "${PROJECT_SOURCE_DIR}/data/networks" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/networks" ) 73 | 74 | add_subdirectory(demo) 75 | add_subdirectory(eval) 76 | 77 | #setup opencv 78 | FIND_PACKAGE(OpenCV REQUIRED) 79 | 80 | TARGET_LINK_LIBRARIES(yolov3-plugin ${OpenCV_LIBS}) 81 | 82 | # install 83 | foreach(include ${inferenceIncludes}) 84 | install(FILES "${include}" DESTINATION include/yolov3-plugin) 85 | endforeach() 86 | 87 | # install the shared library 88 | install(TARGETS yolov3-plugin DESTINATION lib/yolov3-plugin EXPORT yolov3-pluginConfig) 89 | 90 | # install the cmake project, for importing 91 | install(EXPORT yolov3-pluginConfig DESTINATION share/yolov3-plugin/cmake) 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # caffe-yolov3 2 | # Paltform 3 | Have tested on Ubuntu16.04LTS with Jetson-TX2 and Ubuntu16.04LTS with gtx1060; 4 | 5 | NOTE: You need change CMakeList.txt on Ubuntu16.04LTS with GTX1060. 6 | 7 | # Install 8 | git clone https://github.com/ChenYingpeng/caffe-yolov3 9 | 10 | cd caffe-yolov3 11 | 12 | mkdir build 13 | 14 | cd build 15 | 16 | cmake .. 17 | 18 | make -j6 19 | 20 | # Darknet2Caffe 21 | darknet2caffe link [github](https://github.com/ChenYingpeng/darknet2caffe) 22 | 23 | 24 | # Demo 25 | First,download model and put it into dir caffemodel. 26 | 27 | $ `./x86_64/bin/demo ../prototxt/yolov4.prototxt ../caffemodel/yolov4.caffemodel ../images/dog.jpg` 28 | 29 | # Eval 30 | 1. Run 31 | $ `./x86_64/bin/eval ../prototxt/yolov4.prototxt ../caffemodel/yolov4.caffemodel /path/to/coco/val2017/` 32 | 33 | generate `coco_results.json` on `results/`. 34 | 35 | 2. Run 36 | $ `python coco_eval/coco_eval.py --gt-json path/to/coco/annotations/instances_val2017.json --pred-json results/coco_results.json` 37 | 38 | 3. Eval results Yolov4 input size 608x608 from this repo. 39 | ``` 40 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.428 41 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.664 42 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.461 43 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.241 44 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.492 45 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.575 46 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.331 47 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.517 48 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.544 49 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.363 50 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.609 51 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.710 52 | 53 | ``` 54 | 55 | 4. Eval results Yolov4 input size 608x608 from offical model [AlexeyAB/YoloV4](https://github.com/AlexeyAB/darknet). 56 | ``` 57 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505 58 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.749 59 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.557 60 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.357 61 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.559 62 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.613 63 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.368 64 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.598 65 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.634 66 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 67 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.680 68 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.757 69 | 70 | ``` 71 | 72 | # Download Model 73 | 74 | Baidu link [model](https://pan.baidu.com/s/1yiCrnmsOm0hbweJBiiUScQ) 75 | 76 | 77 | # Note 78 | 79 | 1.Only inference on GPU platform,such as RTX2080, GTX1060,Jetson Tegra X1,TX2,nano,Xavier etc. 80 | 81 | 2.Support model such as yolov4,yolov3,yolov3-spp,yolov3-tiny etc. 82 | 83 | 84 | ### References 85 | Appreciate the great work from the following repositories: 86 | - [official/Yolo](https://pjreddie.com/darknet/yolo/) 87 | - [AlexeyAB/YoloV4](https://github.com/AlexeyAB/darknet) 88 | -------------------------------------------------------------------------------- /coco_eval/coco_eval.py: -------------------------------------------------------------------------------- 1 | #Company: Synthesis 2 | #Author: Chen 3 | #Date: 2020/04/26 4 | 5 | """ 6 | COCO-Style Evaluations 7 | 8 | put images here datasets/your_project_name/annotations/val_set_name/*.jpg 9 | put annotations here datasets/your_project_name/annotations/instances_{val_set_name}.json 10 | put weights here /path/to/your/weights/*.pth 11 | change compound_coef 12 | 13 | """ 14 | 15 | import json 16 | import os 17 | 18 | import argparse 19 | import torch 20 | import yaml 21 | from tqdm import tqdm 22 | from pycocotools.coco import COCO 23 | from pycocotools.cocoeval import COCOeval 24 | 25 | def eval(coco_gt, image_ids, pred_json_path): 26 | # load results in COCO evaluation tool 27 | coco_pred = coco_gt.loadRes(pred_json_path) 28 | 29 | # run COCO evaluation 30 | print('BBox') 31 | coco_eval = COCOeval(coco_gt, coco_pred, 'bbox') 32 | coco_eval.params.imgIds = image_ids 33 | coco_eval.evaluate() 34 | coco_eval.accumulate() 35 | coco_eval.summarize() 36 | 37 | if __name__ == '__main__': 38 | ap = argparse.ArgumentParser() 39 | ap.add_argument('--gt-json', type=str, default='/home/chen/data/coco2017/annotations/instances_val2017.json', help='coco val2017 annotations json files') 40 | ap.add_argument('--pred-json', type=str, default='results/darknet_yolov3_coco_results.json', help='pred coco val2017 annotations json files') 41 | args = ap.parse_args() 42 | print(args) 43 | 44 | pred_json_path = args.pred_json 45 | 46 | MAX_IMAGES = 10000 47 | coco_gt = COCO(args.gt_json) 48 | image_ids = coco_gt.getImgIds()[:MAX_IMAGES] 49 | 50 | eval(coco_gt, image_ids, pred_json_path) 51 | -------------------------------------------------------------------------------- /demo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB demoSources *.cpp) 3 | #file(GLOB detectnetIncludes *.h ) 4 | 5 | cuda_add_executable(demo ${demoSources}) 6 | target_link_libraries(demo yolov3-plugin) 7 | -------------------------------------------------------------------------------- /demo/demo.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Company: Synthesis 4 | * Author: Chen 5 | * Date: 2018/06/04 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "detector.h" 14 | 15 | using namespace cv; 16 | 17 | 18 | bool signal_recieved = false; 19 | 20 | 21 | void sig_handler(int signo){ 22 | if( signo == SIGINT ){ 23 | printf("received SIGINT\n"); 24 | signal_recieved = true; 25 | } 26 | } 27 | 28 | uint64_t current_timestamp() { 29 | struct timeval te; 30 | gettimeofday(&te, NULL); // get current time 31 | return te.tv_sec*1000LL + te.tv_usec/1000; // caculate milliseconds 32 | } 33 | 34 | int main( int argc, char** argv ) 35 | { 36 | std::string model_file; 37 | std::string weights_file; 38 | std::string image_path; 39 | if(4 == argc){ 40 | model_file = argv[1]; 41 | weights_file = argv[2]; 42 | image_path = argv[3]; 43 | } 44 | else{ 45 | LOG(ERROR) << "Input error: please input ./xx [model_path] [weights_path] [image_path]"; 46 | return -1; 47 | } 48 | int gpu_id = 0; 49 | //init network 50 | Detector detector = Detector(model_file,weights_file,gpu_id); 51 | 52 | //load image with opencv 53 | Mat img = imread(image_path); 54 | 55 | //detect 56 | float thresh = 0.3; 57 | std::vector bbox_vec = detector.detect(img,thresh); 58 | 59 | //show detection results 60 | for (int i=0;i 9 | #include 10 | #include 11 | #include 12 | 13 | #include "detector.h" 14 | 15 | using namespace cv; 16 | 17 | 18 | bool signal_recieved = false; 19 | 20 | static int coco_ids[] = { 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90 }; 21 | 22 | 23 | void sig_handler(int signo){ 24 | if( signo == SIGINT ){ 25 | printf("received SIGINT\n"); 26 | signal_recieved = true; 27 | } 28 | } 29 | 30 | uint64_t current_timestamp() { 31 | struct timeval te; 32 | gettimeofday(&te, NULL); // get current time 33 | return te.tv_sec*1000LL + te.tv_usec/1000; // caculate milliseconds 34 | } 35 | 36 | int main( int argc, char** argv ) 37 | { 38 | std::string model_file; 39 | std::string weights_file; 40 | std::string file_path; 41 | if(4 == argc){ 42 | model_file = argv[1]; 43 | weights_file = argv[2]; 44 | file_path = argv[3]; 45 | } 46 | else{ 47 | LOG(ERROR) << "Input error: please input ./xx [model_path] [weights_path] [file_path]"; 48 | return -1; 49 | } 50 | 51 | //init network 52 | Detector detector = Detector(model_file,weights_file,0); 53 | 54 | std::vector files; 55 | file_path = file_path + "/*.jpg"; 56 | LOG(INFO) << "images dir path is " << file_path; 57 | glob(file_path,files,false); 58 | 59 | char* prefix = "../results"; 60 | char* outfile = "coco_results"; 61 | FILE *fp = 0; 62 | 63 | char buff1[1024]; 64 | snprintf(buff1, 1024, "%s/%s.json", prefix, outfile); 65 | fp = fopen(buff1, "w"); 66 | fprintf(fp, "[\n"); 67 | 68 | for(int i=0;i bbox_vec = detector.detect(img,thresh); 86 | 87 | //show detection results 88 | for (int i=0;i 1) return .001f*(x-1.f) + 1.f; 16 | return x; 17 | } 18 | 19 | __device__ float hardtan_activate_kernel(float x) 20 | { 21 | if (x < -1) return -1; 22 | if (x > 1) return 1; 23 | return x; 24 | } 25 | 26 | __device__ float linear_activate_kernel(float x){return x;} 27 | __device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} 28 | __device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} 29 | __device__ float relu_activate_kernel(float x){return x*(x>0);} 30 | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} 31 | __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} 32 | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} 33 | __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} 34 | __device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} 35 | __device__ float plse_activate_kernel(float x) 36 | { 37 | if(x < -4) return .01f * (x + 4); 38 | if(x > 4) return .01f * (x - 4) + 1; 39 | return .125f*x + .5f; 40 | } 41 | __device__ float stair_activate_kernel(float x) 42 | { 43 | int n = floorf(x); 44 | if (n%2 == 0) return floorf(x/2); 45 | else return (x - n) + floorf(x/2); 46 | } 47 | 48 | __device__ float activate_kernel(float x, ACTIVATION a) 49 | { 50 | switch(a){ 51 | case LINEAR: 52 | return linear_activate_kernel(x); 53 | case LOGISTIC: 54 | return logistic_activate_kernel(x); 55 | case LOGGY: 56 | return loggy_activate_kernel(x); 57 | case RELU: 58 | return relu_activate_kernel(x); 59 | case ELU: 60 | return elu_activate_kernel(x); 61 | case RELIE: 62 | return relie_activate_kernel(x); 63 | case RAMP: 64 | return ramp_activate_kernel(x); 65 | case LEAKY: 66 | return leaky_activate_kernel(x); 67 | case TANH: 68 | return tanh_activate_kernel(x); 69 | case PLSE: 70 | return plse_activate_kernel(x); 71 | case STAIR: 72 | return stair_activate_kernel(x); 73 | case HARDTAN: 74 | return hardtan_activate_kernel(x); 75 | case LHTAN: 76 | return lhtan_activate_kernel(x); 77 | } 78 | return 0; 79 | } 80 | 81 | __global__ void activate_array_kernel(float *x, int n, ACTIVATION a) 82 | { 83 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 84 | if(i < n) x[i] = activate_kernel(x[i], a); 85 | } 86 | 87 | void activate_array_gpu(float *x, int n, ACTIVATION a) 88 | { 89 | activate_array_kernel<<>>(x, n, a); 90 | check_error(cudaPeekAtLastError()); 91 | } 92 | -------------------------------------------------------------------------------- /src/activations.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #ifndef __ACTIVATIONS_H_ 8 | #define __ACTIVATIONS_H_ 9 | 10 | typedef enum{ 11 | LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN 12 | } ACTIVATION; 13 | 14 | void activate_array_gpu(float* x,int n,ACTIVATION a); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/blas.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #ifndef __BLAS_H_ 8 | #define __BLAS_H_ 9 | 10 | void copy_gpu(int N,float* X,int INCX,float* Y,int INCY); 11 | 12 | void fill_gpu(int N, float ALPHA, float * X, int INCX); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /src/blas_kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | 8 | #include 9 | 10 | #include "cuda.h" 11 | #include "blas.h" 12 | 13 | __global__ void copy_kernel(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY) 14 | { 15 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 16 | if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; 17 | } 18 | 19 | __global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) 20 | { 21 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 22 | if(i < N) X[i*INCX] = ALPHA; 23 | } 24 | 25 | void copy_gpu_offset(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY) 26 | { 27 | copy_kernel<<>>(N,X,OFFX,INCX,Y,OFFY,INCY); 28 | check_error(cudaPeekAtLastError()); 29 | } 30 | 31 | void copy_gpu(int N,float* X,int INCX,float* Y,int INCY) 32 | { 33 | copy_gpu_offset(N,X,0,INCX,Y,0,INCY); 34 | } 35 | 36 | 37 | void fill_gpu(int N, float ALPHA, float * X, int INCX) 38 | { 39 | fill_kernel<<>>(N, ALPHA, X, INCX); 40 | check_error(cudaPeekAtLastError()); 41 | } 42 | -------------------------------------------------------------------------------- /src/box.cpp: -------------------------------------------------------------------------------- 1 | #include "box.h" 2 | #include 3 | #include 4 | #include 5 | 6 | int nms_comparator(const void *pa, const void *pb) 7 | { 8 | detection a = *(detection *)pa; 9 | detection b = *(detection *)pb; 10 | float diff = 0; 11 | if(b.sort_class >= 0){ 12 | diff = a.prob[b.sort_class] - b.prob[b.sort_class]; 13 | } else { 14 | diff = a.objectness - b.objectness; 15 | } 16 | if(diff < 0) return 1; 17 | else if(diff > 0) return -1; 18 | return 0; 19 | } 20 | 21 | float overlap(float x1, float w1, float x2, float w2) 22 | { 23 | float l1 = x1 - w1/2; 24 | float l2 = x2 - w2/2; 25 | float left = l1 > l2 ? l1 : l2; 26 | float r1 = x1 + w1/2; 27 | float r2 = x2 + w2/2; 28 | float right = r1 < r2 ? r1 : r2; 29 | return right - left; 30 | } 31 | 32 | float box_intersection(box a, box b) 33 | { 34 | float w = overlap(a.x, a.w, b.x, b.w); 35 | float h = overlap(a.y, a.h, b.y, b.h); 36 | if(w < 0 || h < 0) return 0; 37 | float area = w*h; 38 | return area; 39 | } 40 | 41 | float box_union(box a, box b) 42 | { 43 | float i = box_intersection(a, b); 44 | float u = a.w*a.h + b.w*b.h - i; 45 | return u; 46 | } 47 | 48 | float box_iou(box a, box b) 49 | { 50 | return box_intersection(a, b)/box_union(a, b); 51 | } 52 | 53 | void do_nms_sort(detection *dets, int total, int classes, float thresh) 54 | { 55 | int i, j, k; 56 | k = total-1; 57 | for(i = 0; i <= k; ++i){ 58 | if(dets[i].objectness == 0){ 59 | detection swap = dets[i]; 60 | dets[i] = dets[k]; 61 | dets[k] = swap; 62 | --k; 63 | --i; 64 | } 65 | } 66 | total = k+1; 67 | 68 | for(k = 0; k < classes; ++k){ 69 | for(i = 0; i < total; ++i){ 70 | dets[i].sort_class = k; 71 | } 72 | qsort(dets, total, sizeof(detection), nms_comparator); 73 | for(i = 0; i < total; ++i){ 74 | if(dets[i].prob[k] == 0) continue; 75 | box a = dets[i].bbox; 76 | for(j = i+1; j < total; ++j){ 77 | box b = dets[j].bbox; 78 | if (box_iou(a, b) > thresh){ 79 | dets[j].prob[k] = 0; 80 | } 81 | } 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/box.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #ifndef __BOX_H_ 8 | #define __BOX_H_ 9 | #include "yolo_layer.h" 10 | 11 | 12 | void do_nms_sort(detection *dets, int total, int classes, float thresh); 13 | 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/cuda.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | #include "cuda.h" 7 | #include "blas.h" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | void error(const char* s) 15 | { 16 | perror(s); 17 | assert(0); 18 | exit(-1); 19 | } 20 | 21 | void check_error(cudaError_t status) 22 | { 23 | //cudaDeviceSynchronize(); 24 | cudaError_t status2 = cudaGetLastError(); 25 | if (status != cudaSuccess) 26 | { 27 | const char *s = cudaGetErrorString(status); 28 | char buffer[256]; 29 | printf("CUDA Error: %s\n", s); 30 | assert(0); 31 | snprintf(buffer, 256, "CUDA Error: %s", s); 32 | error(buffer); 33 | } 34 | if (status2 != cudaSuccess) 35 | { 36 | const char *s = cudaGetErrorString(status); 37 | char buffer[256]; 38 | printf("CUDA Error Prev: %s\n", s); 39 | assert(0); 40 | snprintf(buffer, 256, "CUDA Error Prev: %s", s); 41 | error(buffer); 42 | } 43 | } 44 | 45 | dim3 cuda_gridsize(size_t n){ 46 | size_t k = (n-1) / BLOCK + 1; 47 | size_t x = k; 48 | size_t y = 1; 49 | if(x > 65535){ 50 | x = ceil(sqrt(k)); 51 | y = (n-1)/(x*BLOCK) + 1; 52 | } 53 | dim3 d = {x, y, 1}; 54 | //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); 55 | return d; 56 | } 57 | 58 | float* cuda_make_array(float* x,size_t n) 59 | { 60 | float *x_gpu; 61 | size_t size = sizeof(float)*n; 62 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 63 | check_error(status); 64 | if(x){ 65 | status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 66 | check_error(status); 67 | } else { 68 | fill_gpu(n, 0, x_gpu, 1); 69 | } 70 | if(!x_gpu) error("Cuda malloc failed\n"); 71 | return x_gpu; 72 | } 73 | 74 | void cuda_free(float* x_gpu) 75 | { 76 | cudaError_t status = cudaFree(x_gpu); 77 | check_error(status); 78 | } 79 | 80 | void cuda_push_array(float *x_gpu,float* x,size_t n) 81 | { 82 | size_t size = sizeof(float)*n; 83 | cudaError_t status = cudaMemcpy(x_gpu,x,size,cudaMemcpyHostToDevice); 84 | check_error(status); 85 | } 86 | 87 | 88 | 89 | void cuda_pull_array(float *x_gpu,float* x,size_t n) 90 | { 91 | size_t size = sizeof(float)*n; 92 | cudaError_t status = cudaMemcpy(x,x_gpu,size,cudaMemcpyDeviceToHost); 93 | check_error(status); 94 | } 95 | -------------------------------------------------------------------------------- /src/cuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #ifndef __CUDA_H_ 8 | #define __CUDA_H_ 9 | #include "cuda_runtime.h" 10 | #include "curand.h" 11 | #include "cublas_v2.h" 12 | 13 | #define BLOCK 512 14 | 15 | void check_error(cudaError_t status); 16 | 17 | dim3 cuda_gridsize(size_t n); 18 | 19 | float* cuda_make_array(float* x,size_t n); 20 | 21 | void cuda_free(float* x_gpu); 22 | 23 | void cuda_push_array(float *x_gpu,float* x,size_t n); 24 | 25 | void cuda_pull_array(float *x_gpu,float* x,size_t n); 26 | 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/detector.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2020/04/26 5 | */ 6 | 7 | #include "detector.h" 8 | 9 | int max_index(float *a, int n) 10 | { 11 | if(n <= 0) return -1; 12 | int i, max_i = 0; 13 | float max = a[0]; 14 | for(i = 1; i < n; ++i){ 15 | if(a[i] > max){ 16 | max = a[i]; 17 | max_i = i; 18 | } 19 | } 20 | return max_i; 21 | } 22 | 23 | Detector::Detector(std::string prototxt,std::string caffemodel,int gpu_id){ 24 | // set device 25 | Caffe::set_mode(Caffe::GPU); 26 | 27 | if (gpu_id >= 0){ 28 | Caffe::SetDevice(gpu_id); 29 | LOG(INFO) << "Using GPU #" << gpu_id; 30 | } 31 | else{ 32 | LOG(ERROR) << "Not supported CPU!"; 33 | } 34 | 35 | /* load and init network. */ 36 | m_net.reset(new Net(prototxt, TEST)); 37 | m_net->CopyTrainedLayersFrom(caffemodel); 38 | LOG(INFO) << "net inputs numbers is " << m_net->num_inputs(); 39 | LOG(INFO) << "net outputs numbers is " << m_net->num_outputs(); 40 | 41 | CHECK_EQ(m_net->num_inputs(), 1) << "Network should have exactly one input."; 42 | 43 | m_net_input_data_blobs = m_net->input_blobs()[0]; 44 | LOG(INFO) << "input data layer channels is " << m_net_input_data_blobs->channels(); 45 | LOG(INFO) << "input data layer width is " << m_net_input_data_blobs->width(); 46 | LOG(INFO) << "input data layer height is " << m_net_input_data_blobs->height(); 47 | 48 | 49 | 50 | } 51 | 52 | Detector::~Detector(){ 53 | 54 | //release memory 55 | // free_image(m_sized); 56 | // free_image(m_im); 57 | 58 | } 59 | 60 | 61 | 62 | 63 | std::vector Detector::detect(std::string image_path,float thresh){ 64 | //load image 65 | image im = load_image_color((char*)image_path.c_str(),0,0); 66 | image sized = letterbox_image(im,m_net_input_data_blobs->width(),m_net_input_data_blobs->height()); 67 | 68 | //copy data from cpu to gpu 69 | int size = m_net_input_data_blobs->channels()*m_net_input_data_blobs->width()*m_net_input_data_blobs->height(); 70 | cuda_push_array(m_net_input_data_blobs->mutable_gpu_data(),sized.data,size); 71 | 72 | //clean blobs 73 | m_blobs.clear(); 74 | 75 | int nboxes = 0; 76 | detection *dets = NULL; 77 | 78 | // forward 79 | m_net->Forward(); 80 | for(int i =0;inum_outputs();++i){ 81 | m_blobs.push_back(m_net->output_blobs()[i]); 82 | } 83 | 84 | dets = get_detections(m_blobs,im.w,im.h, 85 | m_net_input_data_blobs->width(),m_net_input_data_blobs->height(),m_thresh, m_classes, &nboxes); 86 | 87 | //deal with results 88 | std::vector bbox_vec; 89 | for (int i = 0; i < nboxes; ++i) { 90 | box b = dets[i].bbox; 91 | int const obj_id = max_index(dets[i].prob, m_classes); 92 | float const prob = dets[i].prob[obj_id]; 93 | 94 | if (prob > thresh) 95 | { 96 | bbox_t bbox; 97 | bbox.x = std::max((double)0, (b.x - b.w / 2.)*im.w); 98 | bbox.y = std::max((double)0, (b.y - b.h / 2.)*im.h); 99 | bbox.w = b.w*im.w; 100 | bbox.h = b.h*im.h; 101 | bbox.obj_id = obj_id; 102 | bbox.prob = prob; 103 | 104 | bbox_vec.push_back(bbox); 105 | } 106 | } 107 | 108 | free_detections(dets,nboxes); 109 | free_image(sized); 110 | free_image(im); 111 | return bbox_vec; 112 | } 113 | 114 | 115 | 116 | std::vector Detector::detect(cv::Mat mat,float thresh){ 117 | //convert mat to image 118 | if(mat.data == NULL) 119 | throw std::runtime_error("Mat is empty"); 120 | image im = mat_to_image(mat); 121 | image sized = letterbox_image(im,m_net_input_data_blobs->width(),m_net_input_data_blobs->height()); 122 | 123 | //copy data from cpu to gpu 124 | int size = m_net_input_data_blobs->channels()*m_net_input_data_blobs->width()*m_net_input_data_blobs->height(); 125 | cuda_push_array(m_net_input_data_blobs->mutable_gpu_data(),sized.data,size); 126 | 127 | //clean blobs 128 | m_blobs.clear(); 129 | 130 | int nboxes = 0; 131 | detection *dets = NULL; 132 | 133 | // forward 134 | m_net->Forward(); 135 | for(int i =0;inum_outputs();++i){ 136 | m_blobs.push_back(m_net->output_blobs()[i]); 137 | } 138 | 139 | dets = get_detections(m_blobs,im.w,im.h, 140 | m_net_input_data_blobs->width(),m_net_input_data_blobs->height(),m_thresh, m_classes, &nboxes); 141 | 142 | //deal with results 143 | std::vector bbox_vec; 144 | for (int i = 0; i < nboxes; ++i) { 145 | box b = dets[i].bbox; 146 | int const obj_id = max_index(dets[i].prob, m_classes); 147 | float const prob = dets[i].prob[obj_id]; 148 | 149 | if (prob > thresh) 150 | { 151 | bbox_t bbox; 152 | bbox.x = std::max((double)0, (b.x - b.w / 2.)*im.w); 153 | bbox.y = std::max((double)0, (b.y - b.h / 2.)*im.h); 154 | bbox.w = b.w*im.w; 155 | bbox.h = b.h*im.h; 156 | bbox.obj_id = obj_id; 157 | bbox.prob = prob; 158 | 159 | bbox_vec.push_back(bbox); 160 | } 161 | } 162 | 163 | free_detections(dets,nboxes); 164 | free_image(sized); 165 | free_image(im); 166 | return bbox_vec; 167 | } 168 | -------------------------------------------------------------------------------- /src/detector.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2020/04/26 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | 14 | #include "image_opencv.h" 15 | #include "yolo_layer.h" 16 | 17 | using namespace caffe; 18 | // using namespace cv; 19 | 20 | 21 | struct bbox_t{ 22 | unsigned int x,y,w,h; //(x,y) - top-left corner, (w,h) - width & height of bounded box 23 | float prob; // confidence - probability that the object was found correctly 24 | unsigned int obj_id; // class of object - from range [0,classes - 1] 25 | }; 26 | 27 | class Detector{ 28 | public: 29 | Detector(std::string prototxt,std::string caffemodel,int gpu_id); 30 | ~Detector(); 31 | 32 | std::vector detect(std::string image_path,float thresh); 33 | std::vector detect(cv::Mat mat,float thresh); 34 | 35 | private: 36 | shared_ptr > m_net; 37 | Blob * m_net_input_data_blobs; 38 | vector*> m_blobs; 39 | 40 | float m_thresh = 0.001; 41 | int m_classes = 80; //coco classes 42 | }; 43 | -------------------------------------------------------------------------------- /src/image.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "image.h" 3 | 4 | #include 5 | 6 | using namespace cv; 7 | 8 | void rgbgr_image(image im) 9 | { 10 | int i; 11 | for(i = 0; i < im.w*im.h; ++i){ 12 | float swap = im.data[i]; 13 | im.data[i] = im.data[i+im.w*im.h*2]; 14 | im.data[i+im.w*im.h*2] = swap; 15 | } 16 | } 17 | 18 | void ipl_into_image(IplImage* src, image im) 19 | { 20 | unsigned char *data = (unsigned char *)src->imageData; 21 | int h = src->height; 22 | int w = src->width; 23 | int c = src->nChannels; 24 | int step = src->widthStep; 25 | int i, j, k; 26 | 27 | for(i = 0; i < h; ++i){ 28 | for(k= 0; k < c; ++k){ 29 | for(j = 0; j < w; ++j){ 30 | im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.; 31 | } 32 | } 33 | } 34 | } 35 | 36 | image make_empty_image(int w, int h, int c) 37 | { 38 | image out; 39 | out.data = 0; 40 | out.h = h; 41 | out.w = w; 42 | out.c = c; 43 | return out; 44 | } 45 | 46 | image make_image(int w, int h, int c) 47 | { 48 | image out = make_empty_image(w,h,c); 49 | out.data = (float*)calloc(h*w*c, sizeof(float)); 50 | return out; 51 | } 52 | 53 | image ipl_to_image(IplImage* src) 54 | { 55 | int h = src->height; 56 | int w = src->width; 57 | int c = src->nChannels; 58 | image out = make_image(w, h, c); 59 | ipl_into_image(src, out); 60 | return out; 61 | } 62 | 63 | 64 | 65 | 66 | 67 | image load_image_cv(char *filename, int channels) 68 | { 69 | IplImage* src = 0; 70 | int flag = -1; 71 | if (channels == 0) flag = -1; 72 | else if (channels == 1) flag = 0; 73 | else if (channels == 3) flag = 1; 74 | else { 75 | fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); 76 | } 77 | 78 | if( (src = cvLoadImage(filename, flag)) == 0 ) 79 | { 80 | fprintf(stderr, "Cannot load image \"%s\"\n", filename); 81 | char buff[256]; 82 | sprintf(buff, "echo %s >> bad.list", filename); 83 | system(buff); 84 | return make_image(10,10,3); 85 | //exit(0); 86 | } 87 | image out = ipl_to_image(src); 88 | cvReleaseImage(&src); 89 | rgbgr_image(out); 90 | return out; 91 | } 92 | 93 | void free_image(image m) 94 | { 95 | if(m.data){ 96 | free(m.data); 97 | } 98 | } 99 | 100 | image resize_image(image im, int w, int h) 101 | { 102 | image resized = make_image(w, h, im.c); 103 | image part = make_image(w, im.h, im.c); 104 | int r, c, k; 105 | float w_scale = (float)(im.w - 1) / (w - 1); 106 | float h_scale = (float)(im.h - 1) / (h - 1); 107 | for(k = 0; k < im.c; ++k){ 108 | for(r = 0; r < im.h; ++r){ 109 | for(c = 0; c < w; ++c){ 110 | float val = 0; 111 | if(c == w-1 || im.w == 1){ 112 | val = get_pixel(im, im.w-1, r, k); 113 | } else { 114 | float sx = c*w_scale; 115 | int ix = (int) sx; 116 | float dx = sx - ix; 117 | val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); 118 | } 119 | set_pixel(part, c, r, k, val); 120 | } 121 | } 122 | } 123 | for(k = 0; k < im.c; ++k){ 124 | for(r = 0; r < h; ++r){ 125 | float sy = r*h_scale; 126 | int iy = (int) sy; 127 | float dy = sy - iy; 128 | for(c = 0; c < w; ++c){ 129 | float val = (1-dy) * get_pixel(part, c, iy, k); 130 | set_pixel(resized, c, r, k, val); 131 | } 132 | if(r == h-1 || im.h == 1) continue; 133 | for(c = 0; c < w; ++c){ 134 | float val = dy * get_pixel(part, c, iy+1, k); 135 | add_pixel(resized, c, r, k, val); 136 | } 137 | } 138 | } 139 | 140 | free_image(part); 141 | return resized; 142 | } 143 | 144 | image load_image(char* filename,int w,int h,int c) 145 | { 146 | image out = load_image_cv(filename,c); 147 | 148 | if((h && w) && (h != out.h || w != out.w)) 149 | { 150 | image resized = resize_image(out,w,h); 151 | free_image(out); 152 | out = resized; 153 | } 154 | return out; 155 | } 156 | 157 | image load_image_color(char* filename,int w,int h) 158 | { 159 | return load_image(filename,w,h,3); 160 | } 161 | 162 | void fill_image(image m, float s) 163 | { 164 | int i; 165 | for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; 166 | } 167 | 168 | static float get_pixel(image m, int x, int y, int c) 169 | { 170 | assert(x < m.w && y < m.h && c < m.c); 171 | return m.data[c*m.h*m.w + y*m.w + x]; 172 | } 173 | 174 | static void set_pixel(image m, int x, int y, int c, float val) 175 | { 176 | if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; 177 | assert(x < m.w && y < m.h && c < m.c); 178 | m.data[c*m.h*m.w + y*m.w + x] = val; 179 | } 180 | 181 | static void add_pixel(image m, int x, int y, int c, float val) 182 | { 183 | assert(x < m.w && y < m.h && c < m.c); 184 | m.data[c*m.h*m.w + y*m.w + x] += val; 185 | } 186 | 187 | void embed_image(image source, image dest, int dx, int dy) 188 | { 189 | int x,y,k; 190 | for(k = 0; k < source.c; ++k){ 191 | for(y = 0; y < source.h; ++y){ 192 | for(x = 0; x < source.w; ++x){ 193 | float val = get_pixel(source, x,y,k); 194 | set_pixel(dest, dx+x, dy+y, k, val); 195 | } 196 | } 197 | } 198 | } 199 | 200 | 201 | image letterbox_image(image im, int w, int h) 202 | { 203 | int new_w = im.w; 204 | int new_h = im.h; 205 | if (((float)w/im.w) < ((float)h/im.h)) { 206 | new_w = w; 207 | new_h = (im.h * w)/im.w; 208 | } else { 209 | new_h = h; 210 | new_w = (im.w * h)/im.h; 211 | } 212 | image resized = resize_image(im, new_w, new_h); 213 | image boxed = make_image(w, h, im.c); 214 | fill_image(boxed, .5); 215 | //int i; 216 | //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; 217 | embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); 218 | free_image(resized); 219 | return boxed; 220 | } 221 | -------------------------------------------------------------------------------- /src/image.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/07 5 | */ 6 | #ifndef __IMAGE_H_ 7 | #define __IMAGE_H_ 8 | 9 | typedef struct 10 | { 11 | int w; 12 | int h; 13 | int c; 14 | float *data; 15 | }image; 16 | 17 | image make_image(int w, int h, int c); 18 | 19 | image make_empty_image(int w, int h, int c); 20 | 21 | 22 | image load_image_color(char* filename,int w,int h); 23 | 24 | void free_image(image m); 25 | 26 | image letterbox_image(image im, int w, int h); 27 | 28 | static float get_pixel(image m, int x, int y, int c); 29 | 30 | static void set_pixel(image m, int x, int y, int c, float val); 31 | 32 | static void add_pixel(image m, int x, int y, int c, float val); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/image_opencv.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2020/04/26 5 | */ 6 | #include "image_opencv.h" 7 | image mat_to_image(cv::Mat mat) 8 | { 9 | int w = mat.cols; 10 | int h = mat.rows; 11 | int c = mat.channels(); 12 | image im = make_image(w, h, c); 13 | unsigned char *data = (unsigned char *)mat.data; 14 | int step = mat.step; 15 | for (int y = 0; y < h; ++y) { 16 | for (int k = 0; k < c; ++k) { 17 | for (int x = 0; x < w; ++x) { 18 | //uint8_t val = mat.ptr(y)[c * x + k]; 19 | //uint8_t val = mat.at(y, x).val[k]; 20 | //im.data[k*w*h + y*w + x] = val / 255.0f; 21 | 22 | im.data[k*w*h + y*w + x] = data[y*step + x*c + k] / 255.0f; 23 | } 24 | } 25 | } 26 | return im; 27 | } 28 | 29 | 30 | cv::Mat image_to_mat(image im) 31 | { 32 | int channels = im.c; 33 | int width = im.w; 34 | int height = im.h; 35 | cv::Mat mat = cv::Mat(height, width, CV_8UC(channels)); 36 | int step = mat.step; 37 | 38 | for (int y = 0; y < im.h; ++y) { 39 | for (int x = 0; x < im.w; ++x) { 40 | for (int c = 0; c < im.c; ++c) { 41 | float val = im.data[c*im.h*im.w + y*im.w + x]; 42 | mat.data[y*step + x*im.c + c] = (unsigned char)(val * 255); 43 | } 44 | } 45 | } 46 | return mat; 47 | } -------------------------------------------------------------------------------- /src/image_opencv.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2020/04/26 5 | */ 6 | 7 | #ifndef __IMAGE_OPENCV_H_ 8 | #define __IMAGE_OPENCV_H_ 9 | 10 | #include 11 | #include "image.h" 12 | 13 | image mat_to_image(cv::Mat mat); 14 | 15 | cv::Mat image_to_mat(image im); 16 | 17 | #endif -------------------------------------------------------------------------------- /src/yolo_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #include "yolo_layer.h" 8 | #include "blas.h" 9 | #include "cuda.h" 10 | #include "activations.h" 11 | #include "box.h" 12 | #include 13 | #include 14 | 15 | //yolov3 16 | //float biases[18] = {10,13,16,30,33,23,30,61,62,45,59,119,116,90,156,198,373,326}; 17 | 18 | //yolov4 19 | float biases[18] = {12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401}; 20 | 21 | //yolov3-tiny 22 | float biases_tiny[12] = {10,14,23,27,37,58,81,82,135,169,344,319}; 23 | 24 | layer make_yolo_layer(int batch,int w,int h,int net_w,int net_h,int n,int total,int classes) 25 | { 26 | layer l = {0}; 27 | l.n = n; 28 | l.total = total; 29 | l.batch = batch; 30 | l.h = h; 31 | l.w = w; 32 | l.c = n*(classes+ 4 + 1); 33 | l.out_w = l.w; 34 | l.out_h = l.h; 35 | l.out_c = l.c; 36 | l.classes = classes; 37 | l.inputs = l.w*l.h*l.c; 38 | 39 | l.biases = (float*)calloc(total*2,sizeof(float)); 40 | 41 | l.mask = (int*)calloc(n,sizeof(int)); 42 | if(9 == total){ 43 | for(int i =0;i thresh) 138 | ++count; 139 | } 140 | } 141 | } 142 | return count; 143 | } 144 | 145 | int num_detections(std::vector layers_params,float thresh) 146 | { 147 | int i; 148 | int s=0; 149 | for(i=0;i layers_params,float thresh,int* num) 158 | { 159 | layer l = layers_params[0]; 160 | int i; 161 | int nboxes = num_detections(layers_params,thresh); 162 | if(num) *num = nboxes; 163 | detection *dets = (detection*)calloc(nboxes,sizeof(detection)); 164 | for(i=0;i 4) 167 | //{ 168 | // dets[i].mask = (float*)(l.coords-4,sizeof(float)); 169 | //} 170 | } 171 | return dets; 172 | } 173 | 174 | 175 | void correct_yolo_boxes(detection* dets,int n,int w,int h,int netw,int neth,int relative) 176 | { 177 | int i; 178 | int new_w=0; 179 | int new_h=0; 180 | if (((float)netw/w) < ((float)neth/h)){ 181 | new_w = netw; 182 | new_h = (h * netw)/w; 183 | } 184 | else{ 185 | new_h = neth; 186 | new_w = (w * neth)/h; 187 | } 188 | for (i = 0; i < n; ++i){ 189 | box b = dets[i].bbox; 190 | b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); 191 | b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); 192 | b.w *= (float)netw/new_w; 193 | b.h *= (float)neth/new_h; 194 | if(!relative){ 195 | b.x *= w; 196 | b.w *= w; 197 | b.y *= h; 198 | b.h *= h; 199 | } 200 | dets[i].bbox = b; 201 | } 202 | } 203 | 204 | 205 | box get_yolo_box(float* x,float* biases,int n,int index,int i,int j,int lw, int lh,int w, int h,int stride) 206 | { 207 | box b; 208 | b.x = (i + x[index + 0*stride]) / lw; 209 | b.y = (j + x[index + 1*stride]) / lh; 210 | b.w = exp(x[index + 2*stride]) * biases[2*n] / w; 211 | b.h = exp(x[index + 3*stride]) * biases[2*n + 1] / h; 212 | return b; 213 | } 214 | 215 | 216 | int get_yolo_detections(layer l,int w, int h, int netw,int neth,float thresh,int *map,int relative,detection *dets) 217 | { 218 | int i,j,n,b; 219 | float* predictions = l.output; 220 | int count = 0; 221 | for(b = 0;b < l.batch;++b){ 222 | for(i=0;i thresh) ? prob : 0; 238 | } 239 | ++count; 240 | } 241 | } 242 | } 243 | correct_yolo_boxes(dets,count,w,h,netw,neth,relative); 244 | return count; 245 | } 246 | 247 | 248 | void fill_network_boxes(std::vector layers_params,int img_w,int img_h,int net_w,int net_h,float thresh, float hier, int *map,int relative,detection *dets) 249 | { 250 | int j; 251 | for(j=0;j layers_params, 260 | int img_w,int img_h,int net_w,int net_h,float thresh,float hier,int* map,int relative,int *num) 261 | { 262 | //make network boxes 263 | detection *dets = make_network_boxes(layers_params,thresh,num); 264 | 265 | //fill network boxes 266 | fill_network_boxes(layers_params,img_w,img_h,net_w,net_h,thresh,hier,map,relative,dets); 267 | return dets; 268 | } 269 | 270 | //get detection result 271 | detection* get_detections(std::vector*> blobs,int img_w,int img_h,int net_w,int net_h, float thresh, int classes, int *nboxes) 272 | { 273 | std::vector layers_params; 274 | layers_params.clear(); 275 | for(int i=0;inum(),blobs[i]->width(),blobs[i]->height(),net_w,net_h,num_bboxes,blobs.size()*dev_num_anchors,classes); 278 | layers_params.push_back(l_params); 279 | forward_yolo_layer_gpu(blobs[i]->gpu_data(),l_params); 280 | } 281 | 282 | //get network boxes 283 | detection* dets = get_network_boxes(layers_params,img_w,img_h,net_w,net_h,thresh,hier_thresh,0,relative,nboxes); 284 | 285 | //release layer memory 286 | for(int index =0;index < layers_params.size();++index){ 287 | free_yolo_layer(layers_params[index]); 288 | } 289 | 290 | //do nms 291 | if(nms_thresh) do_nms_sort(dets,(*nboxes),classes,nms_thresh); 292 | 293 | return dets; 294 | } 295 | 296 | 297 | //release detection memory 298 | void free_detections(detection *dets,int nboxes) 299 | { 300 | int i; 301 | for(i = 0;i 10 | #include 11 | #include 12 | 13 | using namespace caffe; 14 | 15 | 16 | // const int classes = 80; 17 | // const float thresh = 0.5; 18 | const float hier_thresh = 0.5; 19 | const float nms_thresh = 0.45; 20 | const int num_bboxes = 3; 21 | const int relative = 1; 22 | 23 | const int dev_num_anchors = 3; 24 | 25 | typedef struct{ 26 | float x,y,w,h; 27 | }box; 28 | 29 | typedef struct{ 30 | box bbox; 31 | int classes; 32 | float* prob; 33 | float* mask; 34 | float objectness; 35 | int sort_class; 36 | }detection; 37 | 38 | typedef struct layer{ 39 | int batch; 40 | int total; 41 | int n,c,h,w; 42 | int out_n,out_c,out_h,out_w; 43 | int classes; 44 | int inputs,outputs; 45 | int *mask; 46 | float* biases; 47 | float* output; 48 | float* output_gpu; 49 | }layer; 50 | 51 | layer make_yolo_layer(int batch,int w,int h,int n,int total,int classes); 52 | 53 | void free_yolo_layer(layer l); 54 | 55 | void forward_yolo_layer_gpu(const float* input,layer l, float* output); 56 | 57 | detection* get_detections(std::vector*> blobs,int img_w,int img_h,int net_w,int net_h,float thresh, int classes, int* nboxes); 58 | 59 | void free_detections(detection *dets,int nboxes); 60 | 61 | 62 | 63 | 64 | #endif 65 | --------------------------------------------------------------------------------