├── CMakeLists.txt ├── CMakePreBuild.sh ├── README.md ├── activation_kernels.cu ├── activations.h ├── blas.h ├── blas_kernels.cu ├── box.cpp ├── box.h ├── cuda.cpp ├── cuda.h ├── detectnet ├── CMakeLists.txt └── detectnet.cpp ├── image.cpp ├── image.h ├── make_method_by_laymu.txt ├── max_pool_1d.cu ├── max_pool_1d.h ├── model_convert ├── Readme └── yolov3_darknet2caffe.py ├── yolo_layer.cpp └── yolo_layer.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(sysDetectSpeed) 3 | 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # -std=gnu++11 5 | 6 | # setup CUDA 7 | find_package(CUDA) 8 | 9 | set( 10 | CUDA_NVCC_FLAGS 11 | ${CUDA_NVCC_FLAGS}; 12 | -O3 13 | -gencode arch=compute_53,code=sm_53 #tegra tx1 14 | -gencode arch=compute_61,code=sm_61 #gtx 1060 15 | -gencode arch=compute_62,code=sm_62 #tegra tx2 16 | -gencode arch=compute_75,code=[sm_75,compute_75] 17 | 18 | ) 19 | 20 | # setup project output paths 21 | set(PROJECT_OUTPUT_DIR ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR}) 22 | set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include) 23 | 24 | file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR}) 25 | file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) 26 | 27 | message("-- system arch: ${CMAKE_SYSTEM_PROCESSOR}") 28 | message("-- output path: ${PROJECT_OUTPUT_DIR}") 29 | 30 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin) 31 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) 32 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib) 33 | 34 | 35 | # build C/C++ interface 36 | include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include) 37 | include_directories(${PROJECT_INCLUDE_DIR} 38 | /home/hs/software/caffe/include 39 | /home/hs/software/caffe/.build_release/src 40 | /usr/include/openblas/ 41 | ) 42 | 43 | 44 | file(GLOB inferenceSources *.cpp *.cu ) 45 | file(GLOB inferenceIncludes *.h ) 46 | 47 | cuda_add_library(sysDetectSpeed SHARED ${inferenceSources}) 48 | target_link_libraries(sysDetectSpeed 49 | /home/hs/software/caffe/.build_release/lib/libcaffe.so 50 | /usr/lib64/libglog.so 51 | /usr/lib64/libgflags.so 52 | /usr/lib64/libgflags.so.2.1 53 | /usr/lib64/boost/lib/libboost_system.so 54 | /root/anaconda3/envs/venv/lib/libstdc++.so.6 55 | /usr/lib64/libtiff.so 56 | ) 57 | 58 | 59 | # transfer all headers to the include directory 60 | foreach(include ${inferenceIncludes}) 61 | message("-- Copying ${include}") 62 | configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY) 63 | endforeach() 64 | 65 | 66 | # create symbolic link for network data 67 | execute_process( COMMAND "${CMAKE_COMMAND}" "-E" "create_symlink" "${PROJECT_SOURCE_DIR}/data/networks" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/networks" ) 68 | 69 | add_subdirectory(detectnet) 70 | 71 | 72 | #setup opencv 73 | FIND_PACKAGE(OpenCV REQUIRED) 74 | 75 | TARGET_LINK_LIBRARIES(sysDetectSpeed ${OpenCV_LIBS}) 76 | 77 | # install 78 | foreach(include ${inferenceIncludes}) 79 | install(FILES "${include}" DESTINATION include/sysDetectSpeed) 80 | endforeach() 81 | 82 | # install the shared library 83 | install(TARGETS sysDetectSpeed DESTINATION lib/sysDetectSpeed EXPORT sysDetectSpeedConfig) 84 | 85 | # install the cmake project, for importing 86 | install(EXPORT sysDetectSpeedConfig DESTINATION share/sysDetectSpeed/cmake) 87 | -------------------------------------------------------------------------------- /CMakePreBuild.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # this script is automatically run from CMakeLists.txt 3 | 4 | BUILD_ROOT=$PWD 5 | TORCH_PREFIX=$PWD/torch 6 | 7 | echo "[Pre-build] dependency installer script running..." 8 | echo "[Pre-build] build root directory: $BUILD_ROOT" 9 | 10 | 11 | # break on errors 12 | #set -e 13 | 14 | 15 | # install packages 16 | sudo apt-get update 17 | sudo apt-get install -y libqt4-dev qt4-dev-tools libglew-dev glew-utils libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libglib2.0-dev 18 | sudo apt-get update 19 | 20 | sudo rm /usr/lib/aarch64-linux-gnu/libGL.so 21 | sudo ln -s /usr/lib/aarch64-linux-gnu/tegra/libGL.so /usr/lib/aarch64-linux-gnu/libGL.so 22 | 23 | # maximize performance 24 | sudo nvpmodel -m 0 25 | sudo ~/jetson_clock.sh 26 | echo "[Pre-build] Finished CMakePreBuild script" 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # caffe-yolov3 2 | # Paltform 3 | Have tested on centos7 with 2080ti; 4 | 5 | NOTE: You need change CMakeList.txt. 6 | the repository is forked from https://github.com/ChenYingpeng/caffe-yolov3 7 | I have modified some places, you can reference the file of make_method_by_laymu.txt. 8 | thanks ChenYingpeng! 9 | 10 | # Convert model 11 | You could refer the following repo for model converting. 12 | https://github.com/passion3394/pytorch-caffe-darknet-convert-laymu 13 | 14 | I have tested the yolov3_darknet2caffe.py, it should work. 15 | 16 | # Install 17 | git clone https://github.com/passion3394/darknet2caffe_yolov3 18 | 19 | cd darknet2caffe_yolov3 20 | 21 | mkdir build 22 | 23 | cd build 24 | 25 | cmake .. 26 | 27 | make -j6 28 | 29 | # Test 30 | 31 | Example 1: yolov3 32 | 33 | $ ./x86_64/bin/detectnet 0 ../../data/yolov3/prototxt/yolov3.prototxt ../../data/yolov3/caffemodel/yolov3.caffemodel img_path 34 | 35 | Example 2: yolov3-spp 36 | 37 | $ ./x86_64/bin/detectnet 0 ../../data/yolov3/prototxt/yolov3-spp.prototxt ../../data/yolov3/caffemodel/yolov3-spp.caffemodel img_path 38 | 39 | Example 3: mobilenet_v1 + yolov3 40 | 41 | $ ./x86_64/bin/detectnet 0 ../../data/yolov3/prototxt/mobilenet_v1_yolov3.prototxt ../../data/yolov3/caffemodel/mobilenet_v1_yolov3.caffemodel img_path 42 | 43 | Example 4:yolov3-tiny 44 | 45 | $ ./x86_64/bin/detectnet 1 ../../data/yolov3/prototxt/yolov3-tiny-1.prototxt ../../data/yolov3/prototxt/yolov3-tiny-2.prototxt ../../data/yolov3/caffemodel/yolov3-tiny.caffemodel img_path 46 | 47 | I Warn All The Users Who Want To Use Converted Yolov3-Tiny Caffemodel From Darknet, That Caffemodel May Have Incorrect Testing Result. The Conclusion Comes From My Experiments. 48 | 49 | # Download Model 50 | 51 | Baidu link [model](https://pan.baidu.com/s/1yiCrnmsOm0hbweJBiiUScQ) 52 | 53 | 54 | # Note 55 | 56 | 1.Only inference 57 | 58 | 2.Support model such as yolov3、yolov3-spp、yolov3-tiny、mobilenet_v1_yolov3 etc and input network size 320x320,416x416,608x608 etc. 59 | 60 | 3.Mobilenet_v1 + yolov3 (test COCO,mAP = 0.3798,To be optimized) 61 | 62 | 4.Yolov3-tiny: Caffe can not duplicate the layer that maxpool layer (params:kernel_size = 2,stride = 1),so rewrite max_pool_1d function for recurrenting it. 63 | -------------------------------------------------------------------------------- /activation_kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | #include "activations.h" 7 | #include "cuda.h" 8 | #include "blas.h" 9 | 10 | 11 | 12 | __device__ float lhtan_activate_kernel(float x) 13 | { 14 | if(x < 0) return .001f*x; 15 | if(x > 1) return .001f*(x-1.f) + 1.f; 16 | return x; 17 | } 18 | 19 | __device__ float hardtan_activate_kernel(float x) 20 | { 21 | if (x < -1) return -1; 22 | if (x > 1) return 1; 23 | return x; 24 | } 25 | 26 | __device__ float linear_activate_kernel(float x){return x;} 27 | __device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} 28 | __device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} 29 | __device__ float relu_activate_kernel(float x){return x*(x>0);} 30 | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} 31 | __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} 32 | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} 33 | __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} 34 | __device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} 35 | __device__ float plse_activate_kernel(float x) 36 | { 37 | if(x < -4) return .01f * (x + 4); 38 | if(x > 4) return .01f * (x - 4) + 1; 39 | return .125f*x + .5f; 40 | } 41 | __device__ float stair_activate_kernel(float x) 42 | { 43 | int n = floorf(x); 44 | if (n%2 == 0) return floorf(x/2); 45 | else return (x - n) + floorf(x/2); 46 | } 47 | 48 | __device__ float activate_kernel(float x, ACTIVATION a) 49 | { 50 | switch(a){ 51 | case LINEAR: 52 | return linear_activate_kernel(x); 53 | case LOGISTIC: 54 | return logistic_activate_kernel(x); 55 | case LOGGY: 56 | return loggy_activate_kernel(x); 57 | case RELU: 58 | return relu_activate_kernel(x); 59 | case ELU: 60 | return elu_activate_kernel(x); 61 | case RELIE: 62 | return relie_activate_kernel(x); 63 | case RAMP: 64 | return ramp_activate_kernel(x); 65 | case LEAKY: 66 | return leaky_activate_kernel(x); 67 | case TANH: 68 | return tanh_activate_kernel(x); 69 | case PLSE: 70 | return plse_activate_kernel(x); 71 | case STAIR: 72 | return stair_activate_kernel(x); 73 | case HARDTAN: 74 | return hardtan_activate_kernel(x); 75 | case LHTAN: 76 | return lhtan_activate_kernel(x); 77 | } 78 | return 0; 79 | } 80 | 81 | __global__ void activate_array_kernel(float *x, int n, ACTIVATION a) 82 | { 83 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 84 | if(i < n) x[i] = activate_kernel(x[i], a); 85 | } 86 | 87 | void activate_array_gpu(float *x, int n, ACTIVATION a) 88 | { 89 | activate_array_kernel<<>>(x, n, a); 90 | check_error(cudaPeekAtLastError()); 91 | } 92 | -------------------------------------------------------------------------------- /activations.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #ifndef __ACTIVATIONS_H_ 8 | #define __ACTIVATIONS_H_ 9 | 10 | typedef enum{ 11 | LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN 12 | } ACTIVATION; 13 | 14 | void activate_array_gpu(float* x,int n,ACTIVATION a); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /blas.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #ifndef __BLAS_H_ 8 | #define __BLAS_H_ 9 | 10 | void copy_gpu(int N,float* X,int INCX,float* Y,int INCY); 11 | 12 | void fill_gpu(int N, float ALPHA, float * X, int INCX); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /blas_kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | 8 | #include 9 | 10 | #include "cuda.h" 11 | #include "blas.h" 12 | 13 | __global__ void copy_kernel(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY) 14 | { 15 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 16 | if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; 17 | } 18 | 19 | __global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) 20 | { 21 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 22 | if(i < N) X[i*INCX] = ALPHA; 23 | } 24 | 25 | void copy_gpu_offset(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY) 26 | { 27 | copy_kernel<<>>(N,X,OFFX,INCX,Y,OFFY,INCY); 28 | check_error(cudaPeekAtLastError()); 29 | } 30 | 31 | void copy_gpu(int N,float* X,int INCX,float* Y,int INCY) 32 | { 33 | copy_gpu_offset(N,X,0,INCX,Y,0,INCY); 34 | } 35 | 36 | 37 | void fill_gpu(int N, float ALPHA, float * X, int INCX) 38 | { 39 | fill_kernel<<>>(N, ALPHA, X, INCX); 40 | check_error(cudaPeekAtLastError()); 41 | } 42 | -------------------------------------------------------------------------------- /box.cpp: -------------------------------------------------------------------------------- 1 | #include "box.h" 2 | #include 3 | #include 4 | #include 5 | 6 | int nms_comparator(const void *pa, const void *pb) 7 | { 8 | detection a = *(detection *)pa; 9 | detection b = *(detection *)pb; 10 | float diff = 0; 11 | if(b.sort_class >= 0){ 12 | diff = a.prob[b.sort_class] - b.prob[b.sort_class]; 13 | } else { 14 | diff = a.objectness - b.objectness; 15 | } 16 | if(diff < 0) return 1; 17 | else if(diff > 0) return -1; 18 | return 0; 19 | } 20 | 21 | float overlap(float x1, float w1, float x2, float w2) 22 | { 23 | float l1 = x1 - w1/2; 24 | float l2 = x2 - w2/2; 25 | float left = l1 > l2 ? l1 : l2; 26 | float r1 = x1 + w1/2; 27 | float r2 = x2 + w2/2; 28 | float right = r1 < r2 ? r1 : r2; 29 | return right - left; 30 | } 31 | 32 | float box_intersection(box a, box b) 33 | { 34 | float w = overlap(a.x, a.w, b.x, b.w); 35 | float h = overlap(a.y, a.h, b.y, b.h); 36 | if(w < 0 || h < 0) return 0; 37 | float area = w*h; 38 | return area; 39 | } 40 | 41 | float box_union(box a, box b) 42 | { 43 | float i = box_intersection(a, b); 44 | float u = a.w*a.h + b.w*b.h - i; 45 | return u; 46 | } 47 | 48 | float box_iou(box a, box b) 49 | { 50 | return box_intersection(a, b)/box_union(a, b); 51 | } 52 | 53 | void do_nms_sort(detection *dets, int total, int classes, float thresh) 54 | { 55 | int i, j, k; 56 | k = total-1; 57 | for(i = 0; i <= k; ++i){ 58 | if(dets[i].objectness == 0){ 59 | detection swap = dets[i]; 60 | dets[i] = dets[k]; 61 | dets[k] = swap; 62 | --k; 63 | --i; 64 | } 65 | } 66 | total = k+1; 67 | 68 | for(k = 0; k < classes; ++k){ 69 | for(i = 0; i < total; ++i){ 70 | dets[i].sort_class = k; 71 | } 72 | qsort(dets, total, sizeof(detection), nms_comparator); 73 | for(i = 0; i < total; ++i){ 74 | if(dets[i].prob[k] == 0) continue; 75 | box a = dets[i].bbox; 76 | for(j = i+1; j < total; ++j){ 77 | box b = dets[j].bbox; 78 | if (box_iou(a, b) > thresh){ 79 | dets[j].prob[k] = 0; 80 | } 81 | } 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /box.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #ifndef __BOX_H_ 8 | #define __BOX_H_ 9 | #include "yolo_layer.h" 10 | 11 | 12 | void do_nms_sort(detection *dets, int total, int classes, float thresh); 13 | 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /cuda.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | #include "cuda.h" 7 | #include "blas.h" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | void error(const char* s) 15 | { 16 | perror(s); 17 | assert(0); 18 | exit(-1); 19 | } 20 | 21 | void check_error(cudaError_t status) 22 | { 23 | //cudaDeviceSynchronize(); 24 | cudaError_t status2 = cudaGetLastError(); 25 | if (status != cudaSuccess) 26 | { 27 | const char *s = cudaGetErrorString(status); 28 | char buffer[256]; 29 | printf("CUDA Error: %s\n", s); 30 | assert(0); 31 | snprintf(buffer, 256, "CUDA Error: %s", s); 32 | error(buffer); 33 | } 34 | if (status2 != cudaSuccess) 35 | { 36 | const char *s = cudaGetErrorString(status); 37 | char buffer[256]; 38 | printf("CUDA Error Prev: %s\n", s); 39 | assert(0); 40 | snprintf(buffer, 256, "CUDA Error Prev: %s", s); 41 | error(buffer); 42 | } 43 | } 44 | 45 | dim3 cuda_gridsize(size_t n){ 46 | size_t k = (n-1) / BLOCK + 1; 47 | size_t x = k; 48 | size_t y = 1; 49 | if(x > 65535){ 50 | x = ceil(sqrt(k)); 51 | y = (n-1)/(x*BLOCK) + 1; 52 | } 53 | dim3 d = {x, y, 1}; 54 | //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); 55 | return d; 56 | } 57 | 58 | float* cuda_make_array(float* x,size_t n) 59 | { 60 | float *x_gpu; 61 | size_t size = sizeof(float)*n; 62 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 63 | check_error(status); 64 | if(x){ 65 | status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 66 | check_error(status); 67 | } else { 68 | fill_gpu(n, 0, x_gpu, 1); 69 | } 70 | if(!x_gpu) error("Cuda malloc failed\n"); 71 | return x_gpu; 72 | } 73 | 74 | void cuda_free(float* x_gpu) 75 | { 76 | cudaError_t status = cudaFree(x_gpu); 77 | check_error(status); 78 | } 79 | 80 | void cuda_push_array(float *x_gpu,float* x,size_t n) 81 | { 82 | size_t size = sizeof(float)*n; 83 | cudaError_t status = cudaMemcpy(x_gpu,x,size,cudaMemcpyHostToDevice); 84 | check_error(status); 85 | } 86 | 87 | 88 | 89 | void cuda_pull_array(float *x_gpu,float* x,size_t n) 90 | { 91 | size_t size = sizeof(float)*n; 92 | cudaError_t status = cudaMemcpy(x,x_gpu,size,cudaMemcpyDeviceToHost); 93 | check_error(status); 94 | } 95 | -------------------------------------------------------------------------------- /cuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #ifndef __CUDA_H_ 8 | #define __CUDA_H_ 9 | #include "cuda_runtime.h" 10 | #include "curand.h" 11 | #include "cublas_v2.h" 12 | 13 | #define BLOCK 512 14 | 15 | void check_error(cudaError_t status); 16 | 17 | dim3 cuda_gridsize(size_t n); 18 | 19 | float* cuda_make_array(float* x,size_t n); 20 | 21 | void cuda_free(float* x_gpu); 22 | 23 | void cuda_push_array(float *x_gpu,float* x,size_t n); 24 | 25 | void cuda_pull_array(float *x_gpu,float* x,size_t n); 26 | 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /detectnet/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB detectnetSources *.cpp) 3 | file(GLOB detectnetIncludes *.h ) 4 | 5 | cuda_add_executable(detectnet ${detectnetSources}) 6 | target_link_libraries(detectnet sysDetectSpeed) 7 | -------------------------------------------------------------------------------- /detectnet/detectnet.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Company: Synthesis 4 | * Author: Chen 5 | * Date: 2018/06/04 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "yolo_layer.h" 14 | #include "image.h" 15 | #include "cuda.h" 16 | #include "max_pool_1d.h" 17 | #include "blas.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | using namespace caffe; 26 | using namespace cv; 27 | 28 | const char* imgFilename = "/home/chen/projects/data/images/dog.jpg"; //modify your images file path 29 | 30 | uint64_t current_timestamp() { 31 | struct timeval te; 32 | gettimeofday(&te, NULL); // get current time 33 | return te.tv_sec*1000LL + te.tv_usec/1000; // caculate milliseconds 34 | } 35 | 36 | bool signal_recieved = false; 37 | void sig_handler(int signo) 38 | { 39 | if( signo == SIGINT ){ 40 | printf("received SIGINT\n"); 41 | signal_recieved = true; 42 | } 43 | } 44 | 45 | //! Note: Net的Blob是指,每个层的输出数据,即Feature Maps 46 | unsigned int get_blob_index(boost::shared_ptr< Net > & net, char *query_blob_name) 47 | { 48 | std::string str_query(query_blob_name); 49 | vector< string > const & blob_names = net->blob_names(); 50 | for( unsigned int i = 0; i != blob_names.size(); ++i ) 51 | { 52 | //std::cout << "blob names " << i << " is " << blob_names[i] << std::endl; 53 | if( str_query == blob_names[i] ) 54 | { 55 | return i; 56 | } 57 | } 58 | LOG(FATAL) << "Unknown blob name: " << str_query; 59 | } 60 | 61 | 62 | int main( int argc, char** argv ) 63 | { 64 | printf("detectnet\n args (%i): ", argc); 65 | //YOLOV3 66 | string model_file; 67 | string weights_file; 68 | 69 | //yolov3-tiny 70 | string model1_file; 71 | string model2_file; 72 | string tiny_weights_file; 73 | 74 | if(5 == argc){ 75 | assert(0 == atoi(argv[1])); 76 | model_file = argv[2]; 77 | weights_file = argv[3]; 78 | imgFilename = argv[4]; 79 | } 80 | else if(6 == argc){ 81 | assert(1 == atoi(argv[1])); 82 | model1_file = argv[2]; 83 | model2_file = argv[3]; 84 | tiny_weights_file = argv[4]; 85 | imgFilename = argv[5]; 86 | } 87 | else{ 88 | printf("Input error: please input ./xx YOLOV3 [model_path] [weights_path] [img_path] or ./xx YOLOV3_TINY [model1_path] [model2_path] [weights_path] [img_path]\n"); 89 | return -1; 90 | } 91 | 92 | printf("\n\n"); 93 | 94 | // Initialize the network. 95 | Caffe::set_mode(Caffe::GPU); 96 | 97 | image im,sized; 98 | vector*> blobs; 99 | blobs.clear(); 100 | 101 | int nboxes = 0; 102 | int size; 103 | detection *dets = NULL; 104 | 105 | NetType type = (NetType)atoi(argv[1]); 106 | 107 | if(0 == type){ 108 | /* Load the network. */ 109 | shared_ptr > net; 110 | net.reset(new Net(model_file, TEST)); 111 | net->CopyTrainedLayersFrom(weights_file); 112 | 113 | printf("net num_inputs is %d\n",net->num_inputs()); 114 | printf("net num_outputs is %d\n",net->num_outputs()); 115 | CHECK_EQ(net->num_inputs(), 1) << "Network should have exactly one input."; 116 | CHECK_EQ(net->num_outputs(), 3) << "Network should have exactly three outputs."; 117 | 118 | Blob *input_data_blobs = net->input_blobs()[0]; 119 | LOG(INFO) << "Input data layer channels is " << input_data_blobs->channels(); 120 | LOG(INFO) << "Input data layer width is " << input_data_blobs->width(); 121 | LOG(INFO) << "Input data layer height is " << input_data_blobs->height(); 122 | 123 | size = input_data_blobs->channels()*input_data_blobs->width()*input_data_blobs->height(); 124 | 125 | //load image 126 | uint64_t beginDataTime = current_timestamp(); 127 | im = load_image_color((char*)imgFilename,0,0); 128 | sized = letterbox_image(im,input_data_blobs->width(),input_data_blobs->height()); 129 | cuda_push_array(input_data_blobs->mutable_gpu_data(),sized.data,size); 130 | uint64_t endDataTime = current_timestamp(); 131 | 132 | //YOLOV3 objection detection implementation with Caffe 133 | net->Forward(); 134 | 135 | Blob* out_blob1 = net->output_blobs()[1]; 136 | blobs.push_back(out_blob1); 137 | Blob* out_blob2 = net->output_blobs()[2]; 138 | blobs.push_back(out_blob2); 139 | Blob* out_blob3 = net->output_blobs()[0]; 140 | blobs.push_back(out_blob3); 141 | 142 | dets = get_detections(blobs,im.w,im.h,input_data_blobs->width(),input_data_blobs->height(),&nboxes,type); 143 | uint64_t endDetectTime = current_timestamp(); 144 | 145 | printf("object-detection: processing data operation avergae time is (%zu)ms\n", endDataTime - beginDataTime); 146 | printf("object-detection: processing network yolov3 avergae time is (%zu)ms\n", endDetectTime - endDataTime); 147 | 148 | } 149 | 150 | if(1 == type){ 151 | 152 | /* Load the network. */ 153 | shared_ptr > net1,net2; 154 | net1.reset(new Net(model1_file, TEST)); 155 | net2.reset(new Net(model2_file, TEST)); 156 | net1->CopyTrainedLayersFrom(tiny_weights_file); 157 | net2->CopyTrainedLayersFrom(tiny_weights_file); 158 | 159 | printf("net1 num_inputs is %d\n",net1->num_inputs()); 160 | printf("net1 num_outputs is %d\n",net1->num_outputs()); 161 | printf("net2 num_inputs is %d\n",net2->num_inputs()); 162 | printf("net2 num_outputs is %d\n",net2->num_outputs()); 163 | 164 | CHECK_EQ(net1->num_inputs(), 1) << "Network should have exactly one input."; 165 | CHECK_EQ(net1->num_outputs(), 1) << "Network should have exactly three outputs."; 166 | 167 | CHECK_EQ(net2->num_inputs(), 2) << "Network should have exactly one input."; 168 | CHECK_EQ(net2->num_outputs(), 2) << "Network should have exactly three outputs."; 169 | 170 | Blob *net1_input1_data_blobs = net1->input_blobs()[0]; 171 | Blob *net2_input1_data_blobs = net2->input_blobs()[0]; 172 | Blob *net2_input2_data_blobs = net2->input_blobs()[1]; 173 | LOG(INFO) << "Input1 data layer channels is " << net1_input1_data_blobs->channels(); 174 | LOG(INFO) << "Input1 data layer width is " << net1_input1_data_blobs->width(); 175 | LOG(INFO) << "Input1 data layer height is " << net1_input1_data_blobs->height(); 176 | 177 | LOG(INFO) << "Input2 data1 layer channels is " << net2_input1_data_blobs->channels(); 178 | LOG(INFO) << "Input2 data1 layer width is " << net2_input1_data_blobs->width(); 179 | LOG(INFO) << "Input2 data1 layer height is " << net2_input1_data_blobs->height(); 180 | 181 | LOG(INFO) << "Input2 data2 layer channels is " << net2_input2_data_blobs->channels(); 182 | LOG(INFO) << "Input2 data2 layer width is " << net2_input2_data_blobs->width(); 183 | LOG(INFO) << "Input2 data2 layer height is " << net2_input2_data_blobs->height(); 184 | 185 | size = net1_input1_data_blobs->channels()*net1_input1_data_blobs->width()*net1_input1_data_blobs->height(); 186 | 187 | //load image 188 | printf("start forward yolov3-tiny!\n"); 189 | uint64_t beginDataTime = current_timestamp(); 190 | im = load_image_color((char*)imgFilename,0,0); 191 | sized = letterbox_image(im,net1_input1_data_blobs->width(),net1_input1_data_blobs->height()); 192 | cuda_push_array(net1_input1_data_blobs->mutable_gpu_data(),sized.data,size); 193 | uint64_t endDataTime = current_timestamp(); 194 | 195 | net1->Forward(); 196 | 197 | //temp output 198 | Blob* out1_blob1 = net1->output_blobs()[0]; 199 | LOG(INFO) << "temp output data layer channels is " << out1_blob1->channels(); 200 | LOG(INFO) << "temp outputdata layer width is " << out1_blob1->width(); 201 | LOG(INFO) << "temp output data layer height is " << out1_blob1->height(); 202 | 203 | char *query_blob_name = "layer9-conv"; 204 | unsigned int blob_id = get_blob_index(net1, query_blob_name); 205 | boost::shared_ptr > out1_blob2 = net1->blobs()[blob_id]; 206 | 207 | //load input data1 208 | //Note: size = 2 stride = 1 209 | int kernel_size = 2; 210 | int stride = 1; 211 | int pad = kernel_size - stride; 212 | max_pool_1d_gpu(out1_blob1->mutable_gpu_data(),1,out1_blob1->channels(),out1_blob1->height(),out1_blob1->width(),kernel_size,stride,pad,net2_input1_data_blobs->mutable_gpu_data()); 213 | 214 | //load input data2 215 | copy_gpu(out1_blob2->count(),(float*)out1_blob2->mutable_gpu_data(),1,net2_input2_data_blobs->mutable_gpu_data(),1); 216 | 217 | net2->Forward(); 218 | 219 | Blob* out2_blob1 = net2->output_blobs()[0]; 220 | blobs.push_back(out2_blob1); 221 | Blob* out2_blob2 = net2->output_blobs()[1]; 222 | blobs.push_back(out2_blob2); 223 | dets = get_detections(blobs,im.w,im.h,net1_input1_data_blobs->width(),net1_input1_data_blobs->height(),&nboxes,type); 224 | 225 | uint64_t endDetectTime = current_timestamp(); 226 | 227 | printf("object-detection: processing data operation avergae time is (%zu)ms\n", endDataTime - beginDataTime); 228 | printf("object-detection: processing network yolov3 tiny avergae time is (%zu)ms\n", endDetectTime - endDataTime); 229 | } 230 | 231 | //show detection results 232 | Mat img = imread(imgFilename); 233 | int i,j; 234 | for(i=0;i< nboxes;++i){ 235 | char labelstr[4096] = {0}; 236 | int cls = -1; 237 | for(j=0;j<2;++j){ 238 | if(dets[i].prob[j] > 0.5){ 239 | if(cls < 0){ 240 | cls = j; 241 | } 242 | printf("%d: %.0f%%\n",cls,dets[i].prob[j]*100); 243 | } 244 | } 245 | if(cls >= 0){ 246 | box b = dets[i].bbox; 247 | printf("x = %f,y = %f,w = %f,h = %f\n",b.x,b.y,b.w,b.h); 248 | 249 | int left = (b.x-b.w/2.)*im.w; 250 | int right = (b.x+b.w/2.)*im.w; 251 | int top = (b.y-b.h/2.)*im.h; 252 | int bot = (b.y+b.h/2.)*im.h; 253 | rectangle(img,Point(left,top),Point(right,bot),Scalar(0,0,255),3,8,0); 254 | printf("left = %d,right = %d,top = %d,bot = %d\n",left,right,top,bot); 255 | } 256 | } 257 | 258 | imwrite("caffe-yolov3_test.jpg", img); 259 | namedWindow("show",CV_WINDOW_AUTOSIZE); 260 | imshow("show",img); 261 | waitKey(0); 262 | 263 | free_detections(dets,nboxes); 264 | free_image(im); 265 | free_image(sized); 266 | 267 | printf("done.\n"); 268 | return 0; 269 | } 270 | 271 | -------------------------------------------------------------------------------- /image.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "image.h" 3 | 4 | #include 5 | 6 | using namespace cv; 7 | 8 | void rgbgr_image(image im) 9 | { 10 | int i; 11 | for(i = 0; i < im.w*im.h; ++i){ 12 | float swap = im.data[i]; 13 | im.data[i] = im.data[i+im.w*im.h*2]; 14 | im.data[i+im.w*im.h*2] = swap; 15 | } 16 | } 17 | 18 | void ipl_into_image(IplImage* src, image im) 19 | { 20 | unsigned char *data = (unsigned char *)src->imageData; 21 | int h = src->height; 22 | int w = src->width; 23 | int c = src->nChannels; 24 | int step = src->widthStep; 25 | int i, j, k; 26 | 27 | for(i = 0; i < h; ++i){ 28 | for(k= 0; k < c; ++k){ 29 | for(j = 0; j < w; ++j){ 30 | im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.; 31 | } 32 | } 33 | } 34 | } 35 | 36 | image make_empty_image(int w, int h, int c) 37 | { 38 | image out; 39 | out.data = 0; 40 | out.h = h; 41 | out.w = w; 42 | out.c = c; 43 | return out; 44 | } 45 | 46 | image make_image(int w, int h, int c) 47 | { 48 | image out = make_empty_image(w,h,c); 49 | out.data = (float*)calloc(h*w*c, sizeof(float)); 50 | return out; 51 | } 52 | 53 | image ipl_to_image(IplImage* src) 54 | { 55 | int h = src->height; 56 | int w = src->width; 57 | int c = src->nChannels; 58 | image out = make_image(w, h, c); 59 | ipl_into_image(src, out); 60 | return out; 61 | } 62 | 63 | 64 | 65 | 66 | 67 | image load_image_cv(char *filename, int channels) 68 | { 69 | IplImage* src = 0; 70 | int flag = -1; 71 | if (channels == 0) flag = -1; 72 | else if (channels == 1) flag = 0; 73 | else if (channels == 3) flag = 1; 74 | else { 75 | fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); 76 | } 77 | 78 | if( (src = cvLoadImage(filename, flag)) == 0 ) 79 | { 80 | fprintf(stderr, "Cannot load image \"%s\"\n", filename); 81 | char buff[256]; 82 | sprintf(buff, "echo %s >> bad.list", filename); 83 | system(buff); 84 | return make_image(10,10,3); 85 | //exit(0); 86 | } 87 | image out = ipl_to_image(src); 88 | cvReleaseImage(&src); 89 | rgbgr_image(out); 90 | return out; 91 | } 92 | 93 | void free_image(image m) 94 | { 95 | if(m.data){ 96 | free(m.data); 97 | } 98 | } 99 | 100 | image resize_image(image im, int w, int h) 101 | { 102 | image resized = make_image(w, h, im.c); 103 | image part = make_image(w, im.h, im.c); 104 | int r, c, k; 105 | float w_scale = (float)(im.w - 1) / (w - 1); 106 | float h_scale = (float)(im.h - 1) / (h - 1); 107 | for(k = 0; k < im.c; ++k){ 108 | for(r = 0; r < im.h; ++r){ 109 | for(c = 0; c < w; ++c){ 110 | float val = 0; 111 | if(c == w-1 || im.w == 1){ 112 | val = get_pixel(im, im.w-1, r, k); 113 | } else { 114 | float sx = c*w_scale; 115 | int ix = (int) sx; 116 | float dx = sx - ix; 117 | val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); 118 | } 119 | set_pixel(part, c, r, k, val); 120 | } 121 | } 122 | } 123 | for(k = 0; k < im.c; ++k){ 124 | for(r = 0; r < h; ++r){ 125 | float sy = r*h_scale; 126 | int iy = (int) sy; 127 | float dy = sy - iy; 128 | for(c = 0; c < w; ++c){ 129 | float val = (1-dy) * get_pixel(part, c, iy, k); 130 | set_pixel(resized, c, r, k, val); 131 | } 132 | if(r == h-1 || im.h == 1) continue; 133 | for(c = 0; c < w; ++c){ 134 | float val = dy * get_pixel(part, c, iy+1, k); 135 | add_pixel(resized, c, r, k, val); 136 | } 137 | } 138 | } 139 | 140 | free_image(part); 141 | return resized; 142 | } 143 | 144 | image load_image(char* filename,int w,int h,int c) 145 | { 146 | image out = load_image_cv(filename,c); 147 | 148 | if((h && w) && (h != out.h || w != out.w)) 149 | { 150 | image resized = resize_image(out,w,h); 151 | free_image(out); 152 | out = resized; 153 | } 154 | return out; 155 | } 156 | 157 | image load_image_color(char* filename,int w,int h) 158 | { 159 | return load_image(filename,w,h,3); 160 | } 161 | 162 | void fill_image(image m, float s) 163 | { 164 | int i; 165 | for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; 166 | } 167 | 168 | static float get_pixel(image m, int x, int y, int c) 169 | { 170 | assert(x < m.w && y < m.h && c < m.c); 171 | return m.data[c*m.h*m.w + y*m.w + x]; 172 | } 173 | 174 | static void set_pixel(image m, int x, int y, int c, float val) 175 | { 176 | if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; 177 | assert(x < m.w && y < m.h && c < m.c); 178 | m.data[c*m.h*m.w + y*m.w + x] = val; 179 | } 180 | 181 | static void add_pixel(image m, int x, int y, int c, float val) 182 | { 183 | assert(x < m.w && y < m.h && c < m.c); 184 | m.data[c*m.h*m.w + y*m.w + x] += val; 185 | } 186 | 187 | void embed_image(image source, image dest, int dx, int dy) 188 | { 189 | int x,y,k; 190 | for(k = 0; k < source.c; ++k){ 191 | for(y = 0; y < source.h; ++y){ 192 | for(x = 0; x < source.w; ++x){ 193 | float val = get_pixel(source, x,y,k); 194 | set_pixel(dest, dx+x, dy+y, k, val); 195 | } 196 | } 197 | } 198 | } 199 | 200 | 201 | image letterbox_image(image im, int w, int h) 202 | { 203 | int new_w = im.w; 204 | int new_h = im.h; 205 | if (((float)w/im.w) < ((float)h/im.h)) { 206 | new_w = w; 207 | new_h = (im.h * w)/im.w; 208 | } else { 209 | new_h = h; 210 | new_w = (im.w * h)/im.h; 211 | } 212 | image resized = resize_image(im, new_w, new_h); 213 | image boxed = make_image(w, h, im.c); 214 | fill_image(boxed, .5); 215 | //int i; 216 | //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; 217 | embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); 218 | free_image(resized); 219 | return boxed; 220 | } 221 | -------------------------------------------------------------------------------- /image.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/07 5 | */ 6 | #ifndef __IMAGE_H_ 7 | #define __IMAGE_H_ 8 | 9 | typedef struct 10 | { 11 | int w; 12 | int h; 13 | int c; 14 | float *data; 15 | }image; 16 | 17 | image load_image_color(char* filename,int w,int h); 18 | 19 | void free_image(image m); 20 | 21 | image letterbox_image(image im, int w, int h); 22 | 23 | static float get_pixel(image m, int x, int y, int c); 24 | 25 | static void set_pixel(image m, int x, int y, int c, float val); 26 | 27 | static void add_pixel(image m, int x, int y, int c, float val); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /make_method_by_laymu.txt: -------------------------------------------------------------------------------- 1 | 1 2 | modify the gencode of your GPU. My GPU is 2080ti, so gencode changed to the following: 3 | -gencode arch=compute_75,code=[sm_75,compute_75] 4 | 5 | 6 | 2 7 | include some include files: 8 | /home/hs/software/caffe/include 9 | /home/hs/software/caffe/.build_release/src 10 | /usr/include/openblas/ 11 | 12 | 13 | 3 14 | include some libs: 15 | /home/hs/software/caffe/.build_release/lib/libcaffe.so 16 | /usr/lib64/libglog.so 17 | /usr/lib64/libgflags.so 18 | /usr/lib64/libgflags.so.2.1 19 | /usr/lib64/boost/lib/libboost_system.so 20 | /root/anaconda3/envs/venv/lib/libstdc++.so.6 21 | /usr/lib64/libtiff.so 22 | 23 | 4 24 | cmake .. -DCUDA_PROPAGATE_HOST_FLAGS=OFF 25 | 26 | 5 27 | make 28 | 29 | 6 30 | modify the following places of yolo_layer.cpp: 31 | 32 | height 33 | width 34 | classes 35 | anchors 36 | 37 | 7 38 | modify line 237 of detectnet.cpp as: 39 | 2 is the total classes num of my dataset. 40 | 41 | for(j=0;j<2;++j){ 42 | 43 | 8 44 | ./x86_64/bin/detectnet 0 ../../pytorch-caffe-darknet-convert/yolov3.prototxt ../../pytorch-caffe-darknet-convert/yolov3.caffemodel pic_path 45 | -------------------------------------------------------------------------------- /max_pool_1d.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2019/01/24 5 | */ 6 | #include "max_pool_1d.h" 7 | #include "cuda.h" 8 | 9 | 10 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output) 11 | { 12 | int h = (in_h + pad - size)/stride + 1; 13 | int w = (in_w + pad - size)/stride + 1; 14 | int c = in_c; 15 | 16 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 17 | if(id >= n) return; 18 | 19 | int j = id % w; 20 | id /= w; 21 | int i = id % h; 22 | id /= h; 23 | int k = id % c; 24 | id /= c; 25 | int b = id; 26 | 27 | int w_offset = -pad/2; 28 | int h_offset = -pad/2; 29 | 30 | int out_index = j + w*(i + h*(k + c*b)); 31 | float max = -INFINITY; 32 | //int max_i = -1; 33 | int l, m; 34 | for(l = 0; l < size; ++l){ 35 | for(m = 0; m < size; ++m){ 36 | int cur_h = h_offset + i*stride + l; 37 | int cur_w = w_offset + j*stride + m; 38 | int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); 39 | int valid = (cur_h >= 0 && cur_h < in_h && 40 | cur_w >= 0 && cur_w < in_w); 41 | float val = (valid != 0) ? input[index] : -INFINITY; 42 | //max_i = (val > max) ? index : max_i; 43 | max = (val > max) ? val : max; 44 | } 45 | } 46 | output[out_index] = max; 47 | //indexes[out_index] = max_i; 48 | } 49 | 50 | void max_pool_1d_gpu(float* input_data_gpu,int batch_size,int c,int h,int w,int size,int stride,int pad,float* output_data_gpu) 51 | { 52 | size_t n = h*w*c*batch_size; 53 | 54 | forward_maxpool_layer_kernel<<>>(n, h, w, c, stride, size, pad, input_data_gpu, output_data_gpu); 55 | 56 | check_error(cudaPeekAtLastError()); 57 | } 58 | -------------------------------------------------------------------------------- /max_pool_1d.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2019/01/24 5 | */ 6 | 7 | #ifndef __MAX_POOL_1D_H__ 8 | #define __MAX_POOL_1D_H__ 9 | 10 | void max_pool_1d_gpu(float* input_data_gpu,int batch_size,int c,int h,int w,int size,int stride,int pad,float* output); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /model_convert/Readme: -------------------------------------------------------------------------------- 1 | Steps: 2 | 3 | First,you should install this repo https://github.com/marvis/pytorch-caffe-darknet-convert; 4 | 5 | Note:this repo need install pytorch and caffe. 6 | 7 | Second,you should install upsample_layer into caffe,please check this link https://github.com/BVLC/caffe/pull/6384/commits/4d2400e7ae692b25f034f02ff8e8cd3621725f5c. 8 | 9 | Finally,download yolov3.weights and run yolov3_darknet2caffe.py this file. 10 | 11 | 1) download yolov3.weights 12 | 13 | $ wget https://pjreddie.com/media/files/yolov3.weights 14 | 15 | 2) run yolov3_darknet2caffe.py this file in this folder (/home/xx/pytorch-caffe-darknet-convert/). 16 | 17 | $ python yolov3_darknet2caffe.py yolov3.cfg yolov3.weights yolov3.prototxt yolov3.caffemodel 18 | -------------------------------------------------------------------------------- /model_convert/yolov3_darknet2caffe.py: -------------------------------------------------------------------------------- 1 | # The caffe module needs to be on the Python path; 2 | # we'll add it here explicitly. 3 | caffe_root='/home/chen/caffe/' 4 | #os.chdir(caffe_root) 5 | import sys 6 | sys.path.insert(0,caffe_root+'python') 7 | import caffe 8 | import numpy as np 9 | from collections import OrderedDict 10 | from cfg import * 11 | from prototxt import * 12 | 13 | def darknet2caffe(cfgfile, weightfile, protofile, caffemodel): 14 | net_info = cfg2prototxt(cfgfile) 15 | save_prototxt(net_info , protofile, region=False) 16 | 17 | net = caffe.Net(protofile, caffe.TEST) 18 | params = net.params 19 | 20 | blocks = parse_cfg(cfgfile) 21 | 22 | #Open the weights file 23 | fp = open(weightfile, "rb") 24 | 25 | #The first 4 values are header information 26 | # 1. Major version number 27 | # 2. Minor Version Number 28 | # 3. Subversion number 29 | # 4. IMages seen 30 | header = np.fromfile(fp, dtype = np.int32, count = 5) 31 | 32 | #fp = open(weightfile, 'rb') 33 | #header = np.fromfile(fp, count=5, dtype=np.int32) 34 | #header = np.ndarray(shape=(5,),dtype='int32',buffer=fp.read(20)) 35 | #print(header) 36 | buf = np.fromfile(fp, dtype = np.float32) 37 | #print(buf) 38 | fp.close() 39 | 40 | layers = [] 41 | layer_id = 1 42 | start = 0 43 | for block in blocks: 44 | if start >= buf.size: 45 | break 46 | 47 | if block['type'] == 'net': 48 | continue 49 | elif block['type'] == 'convolutional': 50 | batch_normalize = int(block['batch_normalize']) 51 | if block.has_key('name'): 52 | conv_layer_name = block['name'] 53 | bn_layer_name = '%s-bn' % block['name'] 54 | scale_layer_name = '%s-scale' % block['name'] 55 | else: 56 | conv_layer_name = 'layer%d-conv' % layer_id 57 | bn_layer_name = 'layer%d-bn' % layer_id 58 | scale_layer_name = 'layer%d-scale' % layer_id 59 | 60 | if batch_normalize: 61 | start = load_conv_bn2caffe(buf, start, params[conv_layer_name], params[bn_layer_name], params[scale_layer_name]) 62 | else: 63 | start = load_conv2caffe(buf, start, params[conv_layer_name]) 64 | layer_id = layer_id+1 65 | elif block['type'] == 'depthwise_convolutional': 66 | batch_normalize = int(block['batch_normalize']) 67 | if block.has_key('name'): 68 | conv_layer_name = block['name'] 69 | bn_layer_name = '%s-bn' % block['name'] 70 | scale_layer_name = '%s-scale' % block['name'] 71 | else: 72 | conv_layer_name = 'layer%d-dwconv' % layer_id 73 | bn_layer_name = 'layer%d-bn' % layer_id 74 | scale_layer_name = 'layer%d-scale' % layer_id 75 | 76 | if batch_normalize: 77 | start = load_conv_bn2caffe(buf, start, params[conv_layer_name], params[bn_layer_name], params[scale_layer_name]) 78 | else: 79 | start = load_conv2caffe(buf, start, params[conv_layer_name]) 80 | layer_id = layer_id+1 81 | elif block['type'] == 'connected': 82 | if block.has_key('name'): 83 | fc_layer_name = block['name'] 84 | else: 85 | fc_layer_name = 'layer%d-fc' % layer_id 86 | start = load_fc2caffe(buf, start, params[fc_layer_name]) 87 | layer_id = layer_id+1 88 | elif block['type'] == 'maxpool': 89 | layer_id = layer_id+1 90 | elif block['type'] == 'avgpool': 91 | layer_id = layer_id+1 92 | elif block['type'] == 'region': 93 | layer_id = layer_id + 1 94 | elif block['type'] == 'route': 95 | layer_id = layer_id + 1 96 | elif block['type'] == 'shortcut': 97 | layer_id = layer_id + 1 98 | elif block['type'] == 'softmax': 99 | layer_id = layer_id + 1 100 | elif block['type'] == 'cost': 101 | layer_id = layer_id + 1 102 | elif block['type'] == 'upsample': 103 | layer_id = layer_id + 1 104 | else: 105 | print('unknow layer type %s ' % block['type']) 106 | layer_id = layer_id + 1 107 | print('save prototxt to %s' % protofile) 108 | save_prototxt(net_info , protofile, region=True) 109 | print('save caffemodel to %s' % caffemodel) 110 | net.save(caffemodel) 111 | 112 | def load_conv2caffe(buf, start, conv_param): 113 | weight = conv_param[0].data 114 | bias = conv_param[1].data 115 | conv_param[1].data[...] = np.reshape(buf[start:start+bias.size], bias.shape); start = start + bias.size 116 | conv_param[0].data[...] = np.reshape(buf[start:start+weight.size], weight.shape); start = start + weight.size 117 | return start 118 | 119 | def load_fc2caffe(buf, start, fc_param): 120 | weight = fc_param[0].data 121 | bias = fc_param[1].data 122 | fc_param[1].data[...] = np.reshape(buf[start:start+bias.size], bias.shape); start = start + bias.size 123 | fc_param[0].data[...] = np.reshape(buf[start:start+weight.size], weight.shape); start = start + weight.size 124 | return start 125 | 126 | 127 | def load_conv_bn2caffe(buf, start, conv_param, bn_param, scale_param): 128 | conv_weight = conv_param[0].data 129 | running_mean = bn_param[0].data 130 | running_var = bn_param[1].data 131 | scale_weight = scale_param[0].data 132 | scale_bias = scale_param[1].data 133 | 134 | 135 | 136 | scale_param[1].data[...] = np.reshape(buf[start:start+scale_bias.size], scale_bias.shape); start = start + scale_bias.size 137 | #print scale_bias.size 138 | #print scale_bias 139 | 140 | scale_param[0].data[...] = np.reshape(buf[start:start+scale_weight.size], scale_weight.shape); start = start + scale_weight.size 141 | #print scale_weight.size 142 | 143 | bn_param[0].data[...] = np.reshape(buf[start:start+running_mean.size], running_mean.shape); start = start + running_mean.size 144 | #print running_mean.size 145 | 146 | bn_param[1].data[...] = np.reshape(buf[start:start+running_var.size], running_var.shape); start = start + running_var.size 147 | #print running_var.size 148 | 149 | bn_param[2].data[...] = np.array([1.0]) 150 | conv_param[0].data[...] = np.reshape(buf[start:start+conv_weight.size], conv_weight.shape); start = start + conv_weight.size 151 | #print conv_weight.size 152 | 153 | return start 154 | 155 | def cfg2prototxt(cfgfile): 156 | blocks = parse_cfg(cfgfile) 157 | 158 | prev_filters = 3 159 | layers = [] 160 | props = OrderedDict() 161 | bottom = 'data' 162 | layer_id = 1 163 | topnames = dict() 164 | for block in blocks: 165 | if block['type'] == 'net': 166 | props['name'] = 'Darkent2Caffe' 167 | props['input'] = 'data' 168 | props['input_dim'] = ['1'] 169 | props['input_dim'].append(block['channels']) 170 | props['input_dim'].append(block['height']) 171 | props['input_dim'].append(block['width']) 172 | continue 173 | elif block['type'] == 'convolutional': 174 | conv_layer = OrderedDict() 175 | conv_layer['bottom'] = bottom 176 | if block.has_key('name'): 177 | conv_layer['top'] = block['name'] 178 | conv_layer['name'] = block['name'] 179 | else: 180 | conv_layer['top'] = 'layer%d-conv' % layer_id 181 | conv_layer['name'] = 'layer%d-conv' % layer_id 182 | conv_layer['type'] = 'Convolution' 183 | convolution_param = OrderedDict() 184 | convolution_param['num_output'] = block['filters'] 185 | prev_filters = block['filters'] 186 | convolution_param['kernel_size'] = block['size'] 187 | if block['pad'] == '1': 188 | convolution_param['pad'] = str(int(convolution_param['kernel_size'])/2) 189 | convolution_param['stride'] = block['stride'] 190 | if block['batch_normalize'] == '1': 191 | convolution_param['bias_term'] = 'false' 192 | else: 193 | convolution_param['bias_term'] = 'true' 194 | conv_layer['convolution_param'] = convolution_param 195 | layers.append(conv_layer) 196 | bottom = conv_layer['top'] 197 | 198 | if block['batch_normalize'] == '1': 199 | bn_layer = OrderedDict() 200 | bn_layer['bottom'] = bottom 201 | bn_layer['top'] = bottom 202 | if block.has_key('name'): 203 | bn_layer['name'] = '%s-bn' % block['name'] 204 | else: 205 | bn_layer['name'] = 'layer%d-bn' % layer_id 206 | bn_layer['type'] = 'BatchNorm' 207 | batch_norm_param = OrderedDict() 208 | batch_norm_param['use_global_stats'] = 'true' 209 | bn_layer['batch_norm_param'] = batch_norm_param 210 | layers.append(bn_layer) 211 | 212 | scale_layer = OrderedDict() 213 | scale_layer['bottom'] = bottom 214 | scale_layer['top'] = bottom 215 | if block.has_key('name'): 216 | scale_layer['name'] = '%s-scale' % block['name'] 217 | else: 218 | scale_layer['name'] = 'layer%d-scale' % layer_id 219 | scale_layer['type'] = 'Scale' 220 | scale_param = OrderedDict() 221 | scale_param['bias_term'] = 'true' 222 | scale_layer['scale_param'] = scale_param 223 | layers.append(scale_layer) 224 | 225 | if block['activation'] != 'linear': 226 | relu_layer = OrderedDict() 227 | relu_layer['bottom'] = bottom 228 | relu_layer['top'] = bottom 229 | if block.has_key('name'): 230 | relu_layer['name'] = '%s-act' % block['name'] 231 | else: 232 | relu_layer['name'] = 'layer%d-act' % layer_id 233 | relu_layer['type'] = 'ReLU' 234 | if block['activation'] == 'leaky': 235 | relu_param = OrderedDict() 236 | relu_param['negative_slope'] = '0.1' 237 | relu_layer['relu_param'] = relu_param 238 | layers.append(relu_layer) 239 | topnames[layer_id] = bottom 240 | layer_id = layer_id+1 241 | elif block['type'] == 'depthwise_convolutional': 242 | conv_layer = OrderedDict() 243 | conv_layer['bottom'] = bottom 244 | if block.has_key('name'): 245 | conv_layer['top'] = block['name'] 246 | conv_layer['name'] = block['name'] 247 | else: 248 | conv_layer['top'] = 'layer%d-dwconv' % layer_id 249 | conv_layer['name'] = 'layer%d-dwconv' % layer_id 250 | conv_layer['type'] = 'ConvolutionDepthwise' 251 | convolution_param = OrderedDict() 252 | convolution_param['num_output'] = prev_filters 253 | convolution_param['kernel_size'] = block['size'] 254 | if block['pad'] == '1': 255 | convolution_param['pad'] = str(int(convolution_param['kernel_size'])/2) 256 | convolution_param['stride'] = block['stride'] 257 | if block['batch_normalize'] == '1': 258 | convolution_param['bias_term'] = 'false' 259 | else: 260 | convolution_param['bias_term'] = 'true' 261 | conv_layer['convolution_param'] = convolution_param 262 | layers.append(conv_layer) 263 | bottom = conv_layer['top'] 264 | 265 | if block['batch_normalize'] == '1': 266 | bn_layer = OrderedDict() 267 | bn_layer['bottom'] = bottom 268 | bn_layer['top'] = bottom 269 | if block.has_key('name'): 270 | bn_layer['name'] = '%s-bn' % block['name'] 271 | else: 272 | bn_layer['name'] = 'layer%d-bn' % layer_id 273 | bn_layer['type'] = 'BatchNorm' 274 | batch_norm_param = OrderedDict() 275 | batch_norm_param['use_global_stats'] = 'true' 276 | bn_layer['batch_norm_param'] = batch_norm_param 277 | layers.append(bn_layer) 278 | 279 | scale_layer = OrderedDict() 280 | scale_layer['bottom'] = bottom 281 | scale_layer['top'] = bottom 282 | if block.has_key('name'): 283 | scale_layer['name'] = '%s-scale' % block['name'] 284 | else: 285 | scale_layer['name'] = 'layer%d-scale' % layer_id 286 | scale_layer['type'] = 'Scale' 287 | scale_param = OrderedDict() 288 | scale_param['bias_term'] = 'true' 289 | scale_layer['scale_param'] = scale_param 290 | layers.append(scale_layer) 291 | 292 | if block['activation'] != 'linear': 293 | relu_layer = OrderedDict() 294 | relu_layer['bottom'] = bottom 295 | relu_layer['top'] = bottom 296 | if block.has_key('name'): 297 | relu_layer['name'] = '%s-act' % block['name'] 298 | else: 299 | relu_layer['name'] = 'layer%d-act' % layer_id 300 | relu_layer['type'] = 'ReLU' 301 | if block['activation'] == 'leaky': 302 | relu_param = OrderedDict() 303 | relu_param['negative_slope'] = '0.1' 304 | relu_layer['relu_param'] = relu_param 305 | layers.append(relu_layer) 306 | topnames[layer_id] = bottom 307 | layer_id = layer_id+1 308 | elif block['type'] == 'maxpool': 309 | max_layer = OrderedDict() 310 | max_layer['bottom'] = bottom 311 | if block.has_key('name'): 312 | max_layer['top'] = block['name'] 313 | max_layer['name'] = block['name'] 314 | else: 315 | max_layer['top'] = 'layer%d-maxpool' % layer_id 316 | max_layer['name'] = 'layer%d-maxpool' % layer_id 317 | max_layer['type'] = 'Pooling' 318 | pooling_param = OrderedDict() 319 | pooling_param['stride'] = block['stride'] 320 | pooling_param['pool'] = 'MAX' 321 | if (int(block['size']) - int(block['stride'])) % 2 == 0: 322 | pooling_param['kernel_size'] = block['size'] 323 | pooling_param['pad'] = str((int(block['size'])-1)/2) 324 | 325 | if (int(block['size']) - int(block['stride'])) % 2 == 1: 326 | pooling_param['kernel_size'] = str(int(block['size']) + 1) 327 | pooling_param['pad'] = str((int(block['size']) + 1)/2) 328 | 329 | max_layer['pooling_param'] = pooling_param 330 | layers.append(max_layer) 331 | bottom = max_layer['top'] 332 | topnames[layer_id] = bottom 333 | layer_id = layer_id+1 334 | elif block['type'] == 'avgpool': 335 | avg_layer = OrderedDict() 336 | avg_layer['bottom'] = bottom 337 | if block.has_key('name'): 338 | avg_layer['top'] = block['name'] 339 | avg_layer['name'] = block['name'] 340 | else: 341 | avg_layer['top'] = 'layer%d-avgpool' % layer_id 342 | avg_layer['name'] = 'layer%d-avgpool' % layer_id 343 | avg_layer['type'] = 'Pooling' 344 | pooling_param = OrderedDict() 345 | pooling_param['kernel_size'] = 7 346 | pooling_param['stride'] = 1 347 | pooling_param['pool'] = 'AVE' 348 | avg_layer['pooling_param'] = pooling_param 349 | layers.append(avg_layer) 350 | bottom = avg_layer['top'] 351 | topnames[layer_id] = bottom 352 | layer_id = layer_id+1 353 | elif block['type'] == 'region': 354 | if True: 355 | region_layer = OrderedDict() 356 | region_layer['bottom'] = bottom 357 | if block.has_key('name'): 358 | region_layer['top'] = block['name'] 359 | region_layer['name'] = block['name'] 360 | else: 361 | region_layer['top'] = 'layer%d-region' % layer_id 362 | region_layer['name'] = 'layer%d-region' % layer_id 363 | region_layer['type'] = 'Region' 364 | region_param = OrderedDict() 365 | region_param['anchors'] = block['anchors'].strip() 366 | region_param['classes'] = block['classes'] 367 | region_param['num'] = block['num'] 368 | region_layer['region_param'] = region_param 369 | layers.append(region_layer) 370 | bottom = region_layer['top'] 371 | topnames[layer_id] = bottom 372 | layer_id = layer_id + 1 373 | 374 | elif block['type'] == 'route': 375 | route_layer = OrderedDict() 376 | layer_name = str(block['layers']).split(',') 377 | #print(layer_name[0]) 378 | bottom_layer_size = len(str(block['layers']).split(',')) 379 | #print(bottom_layer_size) 380 | if(1 == bottom_layer_size): 381 | prev_layer_id = layer_id + int(block['layers']) 382 | bottom = topnames[prev_layer_id] 383 | #topnames[layer_id] = bottom 384 | route_layer['bottom'] = bottom 385 | if(2 == bottom_layer_size): 386 | prev_layer_id1 = layer_id + int(layer_name[0]) 387 | #print(prev_layer_id1) 388 | prev_layer_id2 = int(layer_name[1]) + 1 389 | print(topnames) 390 | bottom1 = topnames[prev_layer_id1] 391 | bottom2 = topnames[prev_layer_id2] 392 | route_layer['bottom'] = [bottom1, bottom2] 393 | if(4 == bottom_layer_size): 394 | prev_layer_id1 = layer_id + int(layer_name[0]) 395 | prev_layer_id2 = layer_id + int(layer_name[1]) 396 | prev_layer_id3 = layer_id + int(layer_name[2]) 397 | prev_layer_id4 = layer_id + int(layer_name[3]) 398 | 399 | bottom1 = topnames[prev_layer_id1] 400 | bottom2 = topnames[prev_layer_id2] 401 | bottom3 = topnames[prev_layer_id3] 402 | bottom4 = topnames[prev_layer_id4] 403 | route_layer['bottom'] = [bottom1, bottom2,bottom3,bottom4] 404 | if block.has_key('name'): 405 | route_layer['top'] = block['name'] 406 | route_layer['name'] = block['name'] 407 | else: 408 | route_layer['top'] = 'layer%d-route' % layer_id 409 | route_layer['name'] = 'layer%d-route' % layer_id 410 | route_layer['type'] = 'Concat' 411 | print(route_layer) 412 | layers.append(route_layer) 413 | bottom = route_layer['top'] 414 | print(layer_id) 415 | topnames[layer_id] = bottom 416 | layer_id = layer_id + 1 417 | 418 | elif block['type'] == 'upsample': 419 | upsample_layer = OrderedDict() 420 | print(block['stride']) 421 | upsample_layer['bottom'] = bottom 422 | if block.has_key('name'): 423 | upsample_layer['top'] = block['name'] 424 | upsample_layer['name'] = block['name'] 425 | else: 426 | upsample_layer['top'] = 'layer%d-upsample' % layer_id 427 | upsample_layer['name'] = 'layer%d-upsample' % layer_id 428 | upsample_layer['type'] = 'Upsample' 429 | upsample_param = OrderedDict() 430 | upsample_param['scale'] = block['stride'] 431 | upsample_layer['upsample_param'] = upsample_param 432 | print(upsample_layer) 433 | layers.append(upsample_layer) 434 | bottom = upsample_layer['top'] 435 | print('upsample:',layer_id) 436 | topnames[layer_id] = bottom 437 | layer_id = layer_id + 1 438 | 439 | elif block['type'] == 'shortcut': 440 | prev_layer_id1 = layer_id + int(block['from']) 441 | prev_layer_id2 = layer_id - 1 442 | bottom1 = topnames[prev_layer_id1] 443 | bottom2= topnames[prev_layer_id2] 444 | shortcut_layer = OrderedDict() 445 | shortcut_layer['bottom'] = [bottom1, bottom2] 446 | if block.has_key('name'): 447 | shortcut_layer['top'] = block['name'] 448 | shortcut_layer['name'] = block['name'] 449 | else: 450 | shortcut_layer['top'] = 'layer%d-shortcut' % layer_id 451 | shortcut_layer['name'] = 'layer%d-shortcut' % layer_id 452 | shortcut_layer['type'] = 'Eltwise' 453 | eltwise_param = OrderedDict() 454 | eltwise_param['operation'] = 'SUM' 455 | shortcut_layer['eltwise_param'] = eltwise_param 456 | layers.append(shortcut_layer) 457 | bottom = shortcut_layer['top'] 458 | 459 | if block['activation'] != 'linear': 460 | relu_layer = OrderedDict() 461 | relu_layer['bottom'] = bottom 462 | relu_layer['top'] = bottom 463 | if block.has_key('name'): 464 | relu_layer['name'] = '%s-act' % block['name'] 465 | else: 466 | relu_layer['name'] = 'layer%d-act' % layer_id 467 | relu_layer['type'] = 'ReLU' 468 | if block['activation'] == 'leaky': 469 | relu_param = OrderedDict() 470 | relu_param['negative_slope'] = '0.1' 471 | relu_layer['relu_param'] = relu_param 472 | layers.append(relu_layer) 473 | topnames[layer_id] = bottom 474 | layer_id = layer_id + 1 475 | 476 | elif block['type'] == 'connected': 477 | fc_layer = OrderedDict() 478 | fc_layer['bottom'] = bottom 479 | if block.has_key('name'): 480 | fc_layer['top'] = block['name'] 481 | fc_layer['name'] = block['name'] 482 | else: 483 | fc_layer['top'] = 'layer%d-fc' % layer_id 484 | fc_layer['name'] = 'layer%d-fc' % layer_id 485 | fc_layer['type'] = 'InnerProduct' 486 | fc_param = OrderedDict() 487 | fc_param['num_output'] = int(block['output']) 488 | fc_layer['inner_product_param'] = fc_param 489 | layers.append(fc_layer) 490 | bottom = fc_layer['top'] 491 | 492 | if block['activation'] != 'linear': 493 | relu_layer = OrderedDict() 494 | relu_layer['bottom'] = bottom 495 | relu_layer['top'] = bottom 496 | if block.has_key('name'): 497 | relu_layer['name'] = '%s-act' % block['name'] 498 | else: 499 | relu_layer['name'] = 'layer%d-act' % layer_id 500 | relu_layer['type'] = 'ReLU' 501 | if block['activation'] == 'leaky': 502 | relu_param = OrderedDict() 503 | relu_param['negative_slope'] = '0.1' 504 | relu_layer['relu_param'] = relu_param 505 | layers.append(relu_layer) 506 | topnames[layer_id] = bottom 507 | layer_id = layer_id+1 508 | else: 509 | print('unknow layer type %s ' % block['type']) 510 | topnames[layer_id] = bottom 511 | layer_id = layer_id + 1 512 | 513 | net_info = OrderedDict() 514 | net_info['props'] = props 515 | net_info['layers'] = layers 516 | return net_info 517 | 518 | if __name__ == '__main__': 519 | import sys 520 | if len(sys.argv) != 5: 521 | print('try:') 522 | print('python darknet2caffe.py tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.prototxt tiny-yolo-voc.caffemodel') 523 | print('') 524 | print('please add name field for each block to avoid generated name') 525 | exit() 526 | 527 | cfgfile = sys.argv[1] 528 | #net_info = cfg2prototxt(cfgfile) 529 | #print_prototxt(net_info) 530 | #save_prototxt(net_info, 'tmp.prototxt') 531 | weightfile = sys.argv[2] 532 | protofile = sys.argv[3] 533 | caffemodel = sys.argv[4] 534 | darknet2caffe(cfgfile, weightfile, protofile, caffemodel) 535 | -------------------------------------------------------------------------------- /yolo_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Company: Synthesis 3 | * Author: Chen 4 | * Date: 2018/06/04 5 | */ 6 | 7 | #include "yolo_layer.h" 8 | #include "blas.h" 9 | #include "cuda.h" 10 | #include "activations.h" 11 | #include "box.h" 12 | #include 13 | #include 14 | 15 | //yolov3 16 | float biases[18] = {10,10, 20,21, 39,29, 28,55, 66,48, 49,109, 96,75, 246,37, 132,11}; 17 | 18 | //yolov3-tiny 19 | float biases_tiny[12] = {10,14,23,27,37,58,81,82,135,169,344,319}; 20 | 21 | layer make_yolo_layer(int batch,int w,int h,int net_w,int net_h,int n,int total,int classes) 22 | { 23 | layer l = {0}; 24 | l.n = n; 25 | l.total = total; 26 | l.batch = batch; 27 | l.h = h; 28 | l.w = w; 29 | l.c = n*(classes+ 4 + 1); 30 | l.out_w = l.w; 31 | l.out_h = l.h; 32 | l.out_c = l.c; 33 | l.classes = classes; 34 | l.inputs = l.w*l.h*l.c; 35 | 36 | l.biases = (float*)calloc(total*2,sizeof(float)); 37 | 38 | l.mask = (int*)calloc(n,sizeof(int)); 39 | if(9 == total){ 40 | for(int i =0;i thresh) 135 | ++count; 136 | } 137 | 138 | } 139 | } 140 | //printf("count = %d\n",count); 141 | return count; 142 | } 143 | 144 | int num_detections(vector layers_params,float thresh) 145 | { 146 | int i; 147 | int s=0; 148 | for(i=0;i layers_params,float thresh,int* num) 157 | { 158 | layer l = layers_params[0]; 159 | int i; 160 | int nboxes = num_detections(layers_params,thresh); 161 | if(num) *num = nboxes; 162 | detection *dets = (detection*)calloc(nboxes,sizeof(detection)); 163 | for(i=0;i 4) 166 | //{ 167 | // dets[i].mask = (float*)(l.coords-4,sizeof(float)); 168 | //} 169 | } 170 | return dets; 171 | } 172 | 173 | 174 | void correct_yolo_boxes(detection* dets,int n,int w,int h,int netw,int neth,int relative) 175 | { 176 | int i; 177 | int new_w=0; 178 | int new_h=0; 179 | if (((float)netw/w) < ((float)neth/h)){ 180 | new_w = netw; 181 | new_h = (h * netw)/w; 182 | } 183 | else{ 184 | new_h = neth; 185 | new_w = (w * neth)/h; 186 | } 187 | for (i = 0; i < n; ++i){ 188 | box b = dets[i].bbox; 189 | b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); 190 | b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); 191 | b.w *= (float)netw/new_w; 192 | b.h *= (float)neth/new_h; 193 | if(!relative){ 194 | b.x *= w; 195 | b.w *= w; 196 | b.y *= h; 197 | b.h *= h; 198 | } 199 | dets[i].bbox = b; 200 | } 201 | } 202 | 203 | 204 | box get_yolo_box(float* x,float* biases,int n,int index,int i,int j,int lw, int lh,int w, int h,int stride) 205 | { 206 | box b; 207 | b.x = (i + x[index + 0*stride]) / lw; 208 | b.y = (j + x[index + 1*stride]) / lh; 209 | b.w = exp(x[index + 2*stride]) * biases[2*n] / w; 210 | b.h = exp(x[index + 3*stride]) * biases[2*n + 1] / h; 211 | return b; 212 | } 213 | 214 | 215 | int get_yolo_detections(layer l,int w, int h, int netw,int neth,float thresh,int *map,int relative,detection *dets) 216 | { 217 | int i,j,n,b; 218 | float* predictions = l.output; 219 | int count = 0; 220 | for(b = 0;b < l.batch;++b){ 221 | for(i=0;i thresh) ? prob : 0; 237 | } 238 | ++count; 239 | } 240 | } 241 | } 242 | correct_yolo_boxes(dets,count,w,h,netw,neth,relative); 243 | return count; 244 | } 245 | 246 | 247 | void fill_network_boxes(vector layers_params,int img_w,int img_h,int net_w,int net_h,float thresh, float hier, int *map,int relative,detection *dets) 248 | { 249 | int j; 250 | for(j=0;j layers_params, 259 | int img_w,int img_h,int net_w,int net_h,float thresh,float hier,int* map,int relative,int *num) 260 | { 261 | //make network boxes 262 | detection *dets = make_network_boxes(layers_params,thresh,num); 263 | 264 | //fill network boxes 265 | fill_network_boxes(layers_params,img_w,img_h,net_w,net_h,thresh,hier,map,relative,dets); 266 | return dets; 267 | } 268 | 269 | //get detection result 270 | detection* get_detections(vector*> blobs,int img_w,int img_h,int net_w,int net_h,int *nboxes,NetType type) 271 | { 272 | vector layers_params; 273 | layers_params.clear(); 274 | for(int i=0;iwidth(),blobs[i]->height(),net_w,net_h,numBBoxes,yolov3_numAnchors,classes); 278 | } 279 | else if(YOLOV3_TINY == type){ 280 | l_params = make_yolo_layer(1,blobs[i]->width(),blobs[i]->height(),net_w,net_h,numBBoxes,yolov3_tiny_numAnchors,classes); 281 | } 282 | 283 | layers_params.push_back(l_params); 284 | forward_yolo_layer_gpu(blobs[i]->gpu_data(),l_params); 285 | } 286 | 287 | 288 | //get network boxes 289 | detection* dets = get_network_boxes(layers_params,img_w,img_h,net_w,net_h,thresh,hier_thresh,0,relative,nboxes); 290 | 291 | //release layer memory 292 | for(int index =0;index < layers_params.size();++index){ 293 | free_yolo_layer(layers_params[index]); 294 | } 295 | 296 | if(nms) { 297 | do_nms_sort(dets,(*nboxes),classes,nms); 298 | printf("have done nms\n"); 299 | } 300 | return dets; 301 | } 302 | 303 | 304 | //release detection memory 305 | void free_detections(detection *dets,int nboxes) 306 | { 307 | int i; 308 | for(i = 0;i 10 | #include 11 | #include 12 | 13 | using namespace caffe; 14 | 15 | 16 | const int classes = 2; 17 | const float thresh = 0.5; 18 | const float hier_thresh = 0.5; 19 | const float nms = 0.45; 20 | const int numBBoxes = 3; 21 | const int relative = 1; 22 | 23 | const int yolov3_numAnchors = 9; 24 | const int yolov3_tiny_numAnchors = 6; 25 | 26 | enum NetType{ 27 | YOLOV3 = 0, 28 | YOLOV3_TINY = 1 29 | }; 30 | 31 | typedef struct{ 32 | float x,y,w,h; 33 | }box; 34 | 35 | typedef struct{ 36 | box bbox; 37 | int classes; 38 | float* prob; 39 | float* mask; 40 | float objectness; 41 | int sort_class; 42 | }detection; 43 | 44 | typedef struct layer{ 45 | int batch; 46 | int total; 47 | int n,c,h,w; 48 | int out_n,out_c,out_h,out_w; 49 | int classes; 50 | int inputs,outputs; 51 | int *mask; 52 | float* biases; 53 | float* output; 54 | float* output_gpu; 55 | }layer; 56 | 57 | layer make_yolo_layer(int batch,int w,int h,int n,int total,int classes); 58 | 59 | void free_yolo_layer(layer l); 60 | 61 | void forward_yolo_layer_gpu(const float* input,layer l, float* output); 62 | 63 | detection* get_detections(vector*> blobs,int img_w,int img_h,int net_w,int net_h,int* nboxes,NetType type); 64 | 65 | void free_detections(detection *dets,int nboxes); 66 | 67 | 68 | 69 | 70 | #endif 71 | --------------------------------------------------------------------------------