├── CMakeLists.txt
├── CMakePreBuild.sh
├── README.md
├── activation_kernels.cu
├── activations.h
├── blas.h
├── blas_kernels.cu
├── box.cpp
├── box.h
├── cuda.cpp
├── cuda.h
├── detectnet
    ├── CMakeLists.txt
    └── detectnet.cpp
├── image.cpp
├── image.h
├── make_method_by_laymu.txt
├── max_pool_1d.cu
├── max_pool_1d.h
├── model_convert
    ├── Readme
    └── yolov3_darknet2caffe.py
├── yolo_layer.cpp
└── yolo_layer.h


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | project(sysDetectSpeed)
 3 | 
 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")	# -std=gnu++11
 5 | 
 6 | # setup CUDA
 7 | find_package(CUDA)
 8 | 
 9 | set(
10 | 	CUDA_NVCC_FLAGS
11 | 	${CUDA_NVCC_FLAGS}; 
12 |     -O3 
13 | 	-gencode arch=compute_53,code=sm_53  #tegra tx1 
14 |         -gencode arch=compute_61,code=sm_61  #gtx 1060
15 | 	-gencode arch=compute_62,code=sm_62  #tegra tx2
16 |         -gencode arch=compute_75,code=[sm_75,compute_75]
17 |         
18 | )
19 | 
20 | # setup project output paths
21 | set(PROJECT_OUTPUT_DIR  ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR})
22 | set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include)
23 | 
24 | file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR})
25 | file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
26 | 
27 | message("-- system arch:  ${CMAKE_SYSTEM_PROCESSOR}")
28 | message("-- output path:  ${PROJECT_OUTPUT_DIR}")
29 | 
30 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
31 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
32 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
33 | 
34 | 
35 | # build C/C++ interface
36 | include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include)
37 | include_directories(${PROJECT_INCLUDE_DIR} 
38 | 	/home/hs/software/caffe/include 
39 |         /home/hs/software/caffe/.build_release/src
40 |         /usr/include/openblas/
41 | )
42 | 
43 | 
44 | file(GLOB inferenceSources *.cpp *.cu )
45 | file(GLOB inferenceIncludes *.h )
46 | 
47 | cuda_add_library(sysDetectSpeed SHARED ${inferenceSources})
48 | target_link_libraries(sysDetectSpeed 
49 |         /home/hs/software/caffe/.build_release/lib/libcaffe.so
50 |         /usr/lib64/libglog.so
51 |         /usr/lib64/libgflags.so
52 |         /usr/lib64/libgflags.so.2.1
53 |         /usr/lib64/boost/lib/libboost_system.so
54 |         /root/anaconda3/envs/venv/lib/libstdc++.so.6
55 |         /usr/lib64/libtiff.so
56 | )
57 | 
58 | 
59 | # transfer all headers to the include directory
60 | foreach(include ${inferenceIncludes})
61 | 	message("-- Copying ${include}")
62 | 	configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY)
63 | endforeach()
64 | 
65 | 
66 | # create symbolic link for network data
67 | execute_process( COMMAND "${CMAKE_COMMAND}" "-E" "create_symlink" "${PROJECT_SOURCE_DIR}/data/networks" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/networks" )
68 |   
69 | add_subdirectory(detectnet)
70 | 
71 | 
72 | #setup opencv
73 | FIND_PACKAGE(OpenCV REQUIRED)
74 | 
75 | TARGET_LINK_LIBRARIES(sysDetectSpeed ${OpenCV_LIBS})
76 | 
77 | # install
78 | foreach(include ${inferenceIncludes})
79 |     install(FILES "${include}" DESTINATION include/sysDetectSpeed)
80 | endforeach()
81 | 
82 | # install the shared library
83 | install(TARGETS sysDetectSpeed DESTINATION lib/sysDetectSpeed EXPORT sysDetectSpeedConfig)
84 | 
85 | # install the cmake project, for importing
86 | install(EXPORT sysDetectSpeedConfig DESTINATION share/sysDetectSpeed/cmake)
87 | 


--------------------------------------------------------------------------------
/CMakePreBuild.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # this script is automatically run from CMakeLists.txt
 3 | 
 4 | BUILD_ROOT=$PWD
 5 | TORCH_PREFIX=$PWD/torch
 6 | 
 7 | echo "[Pre-build]  dependency installer script running..."
 8 | echo "[Pre-build]  build root directory:       $BUILD_ROOT"
 9 | 
10 | 
11 | # break on errors
12 | #set -e
13 | 
14 | 
15 | # install packages
16 | sudo apt-get update
17 | sudo apt-get install -y libqt4-dev qt4-dev-tools libglew-dev glew-utils libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libglib2.0-dev
18 | sudo apt-get update
19 | 
20 | sudo rm /usr/lib/aarch64-linux-gnu/libGL.so
21 | sudo ln -s /usr/lib/aarch64-linux-gnu/tegra/libGL.so /usr/lib/aarch64-linux-gnu/libGL.so
22 | 
23 | # maximize performance
24 | sudo nvpmodel -m 0
25 | sudo ~/jetson_clock.sh
26 | echo "[Pre-build]  Finished CMakePreBuild script"
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # caffe-yolov3
 2 | # Paltform
 3 | Have tested on centos7 with 2080ti;
 4 | 
 5 | NOTE: You need change CMakeList.txt.
 6 | the repository is forked from https://github.com/ChenYingpeng/caffe-yolov3 
 7 | I have modified some places, you can reference the file of make_method_by_laymu.txt.
 8 | thanks ChenYingpeng!
 9 | 
10 | # Convert model
11 | You could refer the following repo for model converting.
12 | https://github.com/passion3394/pytorch-caffe-darknet-convert-laymu
13 | 
14 | I have tested the yolov3_darknet2caffe.py, it should work.
15 | 
16 | # Install
17 | git clone https://github.com/passion3394/darknet2caffe_yolov3
18 | 
19 | cd darknet2caffe_yolov3
20 | 
21 | mkdir build
22 | 
23 | cd build
24 | 
25 | cmake ..
26 | 
27 | make -j6
28 | 
29 | # Test
30 | 
31 | Example 1: yolov3
32 | 
33 | $ ./x86_64/bin/detectnet 0 ../../data/yolov3/prototxt/yolov3.prototxt ../../data/yolov3/caffemodel/yolov3.caffemodel img_path
34 | 
35 | Example 2: yolov3-spp
36 | 
37 | $ ./x86_64/bin/detectnet 0 ../../data/yolov3/prototxt/yolov3-spp.prototxt ../../data/yolov3/caffemodel/yolov3-spp.caffemodel img_path 
38 | 
39 | Example 3: mobilenet_v1 + yolov3
40 | 
41 | $ ./x86_64/bin/detectnet 0 ../../data/yolov3/prototxt/mobilenet_v1_yolov3.prototxt ../../data/yolov3/caffemodel/mobilenet_v1_yolov3.caffemodel img_path
42 | 
43 | Example 4:yolov3-tiny
44 | 
45 | $ ./x86_64/bin/detectnet 1 ../../data/yolov3/prototxt/yolov3-tiny-1.prototxt ../../data/yolov3/prototxt/yolov3-tiny-2.prototxt ../../data/yolov3/caffemodel/yolov3-tiny.caffemodel img_path
46 | 
47 | I Warn All The Users Who Want To Use Converted Yolov3-Tiny Caffemodel From Darknet, That Caffemodel May Have Incorrect Testing Result. The Conclusion Comes From My Experiments.
48 | 
49 | # Download Model
50 | 
51 | Baidu link [model](https://pan.baidu.com/s/1yiCrnmsOm0hbweJBiiUScQ)
52 | 
53 | 
54 | # Note
55 | 
56 | 1.Only inference
57 | 
58 | 2.Support model such as yolov3、yolov3-spp、yolov3-tiny、mobilenet_v1_yolov3 etc and input network size 320x320,416x416,608x608 etc.
59 | 
60 | 3.Mobilenet_v1 + yolov3 (test COCO,mAP = 0.3798,To be optimized)
61 | 
62 | 4.Yolov3-tiny: Caffe can not duplicate the layer that maxpool layer (params:kernel_size = 2,stride = 1),so rewrite max_pool_1d function for recurrenting it.
63 | 


--------------------------------------------------------------------------------
/activation_kernels.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2018/06/04
 5 |  */
 6 | #include "activations.h"
 7 | #include "cuda.h"
 8 | #include "blas.h"
 9 | 
10 | 
11 | 
12 | __device__ float lhtan_activate_kernel(float x)
13 | {
14 |     if(x < 0) return .001f*x;
15 |     if(x > 1) return .001f*(x-1.f) + 1.f;
16 |     return x;
17 | }
18 | 
19 | __device__ float hardtan_activate_kernel(float x)
20 | {
21 |     if (x < -1) return -1;
22 |     if (x > 1) return 1;
23 |     return x;
24 | }
25 | 
26 | __device__ float linear_activate_kernel(float x){return x;}
27 | __device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
28 | __device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
29 | __device__ float relu_activate_kernel(float x){return x*(x>0);}
30 | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
31 | __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
32 | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
33 | __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
34 | __device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
35 | __device__ float plse_activate_kernel(float x)
36 | {
37 |     if(x < -4) return .01f * (x + 4);
38 |     if(x > 4)  return .01f * (x - 4) + 1;
39 |     return .125f*x + .5f;
40 | }
41 | __device__ float stair_activate_kernel(float x)
42 | {
43 |     int n = floorf(x);
44 |     if (n%2 == 0) return floorf(x/2);
45 |     else return (x - n) + floorf(x/2);
46 | }
47 | 
48 | __device__ float activate_kernel(float x, ACTIVATION a)
49 | {
50 |     switch(a){
51 |         case LINEAR:
52 |             return linear_activate_kernel(x);
53 |         case LOGISTIC:
54 |             return logistic_activate_kernel(x);
55 |         case LOGGY:
56 |             return loggy_activate_kernel(x);
57 |         case RELU:
58 |             return relu_activate_kernel(x);
59 |         case ELU:
60 |             return elu_activate_kernel(x);
61 |         case RELIE:
62 |             return relie_activate_kernel(x);
63 |         case RAMP:
64 |             return ramp_activate_kernel(x);
65 |         case LEAKY:
66 |             return leaky_activate_kernel(x);
67 |         case TANH:
68 |             return tanh_activate_kernel(x);
69 |         case PLSE:
70 |             return plse_activate_kernel(x);
71 |         case STAIR:
72 |             return stair_activate_kernel(x);
73 |         case HARDTAN:
74 |             return hardtan_activate_kernel(x);
75 |         case LHTAN:
76 |             return lhtan_activate_kernel(x);
77 |     }
78 |     return 0;
79 | }
80 | 
81 | __global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
82 | {
83 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
84 |     if(i < n) x[i] = activate_kernel(x[i], a);
85 | }
86 | 
87 | void activate_array_gpu(float *x, int n, ACTIVATION a)
88 | {
89 |     activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
90 |     check_error(cudaPeekAtLastError());
91 | }
92 | 


--------------------------------------------------------------------------------
/activations.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2018/06/04
 5 |  */
 6 | 
 7 | #ifndef __ACTIVATIONS_H_
 8 | #define __ACTIVATIONS_H_
 9 | 
10 | typedef enum{
11 |     LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
12 | } ACTIVATION;
13 | 
14 | void activate_array_gpu(float* x,int n,ACTIVATION a);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/blas.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2018/06/04
 5 |  */
 6 | 
 7 | #ifndef __BLAS_H_
 8 | #define __BLAS_H_
 9 | 
10 | void copy_gpu(int N,float* X,int INCX,float* Y,int INCY);
11 | 
12 | void fill_gpu(int N, float ALPHA, float * X, int INCX);
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/blas_kernels.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2018/06/04
 5 |  */
 6 | 
 7 | 
 8 | #include <assert.h>
 9 | 
10 | #include "cuda.h"
11 | #include "blas.h"
12 | 
13 | __global__ void copy_kernel(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
14 | {
15 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
16 |     if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX];
17 | }
18 | 
19 | __global__ void fill_kernel(int N, float ALPHA, float *X, int INCX)
20 | {
21 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
22 |     if(i < N) X[i*INCX] = ALPHA;
23 | }
24 | 
25 | void copy_gpu_offset(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
26 | {
27 |     copy_kernel<<<cuda_gridsize(N),BLOCK>>>(N,X,OFFX,INCX,Y,OFFY,INCY);
28 |     check_error(cudaPeekAtLastError());
29 | }
30 | 
31 | void copy_gpu(int N,float* X,int INCX,float* Y,int INCY)
32 | {
33 |     copy_gpu_offset(N,X,0,INCX,Y,0,INCY);
34 | }
35 | 
36 | 
37 | void fill_gpu(int N, float ALPHA, float * X, int INCX)
38 | {
39 |     fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
40 |     check_error(cudaPeekAtLastError());
41 | }
42 | 


--------------------------------------------------------------------------------
/box.cpp:
--------------------------------------------------------------------------------
 1 | #include "box.h"
 2 | #include <stdio.h>
 3 | #include <math.h>
 4 | #include <stdlib.h>
 5 | 
 6 | int nms_comparator(const void *pa, const void *pb)
 7 | {
 8 |     detection a = *(detection *)pa;
 9 |     detection b = *(detection *)pb;
10 |     float diff = 0;
11 |     if(b.sort_class >= 0){
12 |         diff = a.prob[b.sort_class] - b.prob[b.sort_class];
13 |     } else {
14 |         diff = a.objectness - b.objectness;
15 |     }
16 |     if(diff < 0) return 1;
17 |     else if(diff > 0) return -1;
18 |     return 0;
19 | }
20 | 
21 | float overlap(float x1, float w1, float x2, float w2)
22 | {
23 |     float l1 = x1 - w1/2;
24 |     float l2 = x2 - w2/2;
25 |     float left = l1 > l2 ? l1 : l2;
26 |     float r1 = x1 + w1/2;
27 |     float r2 = x2 + w2/2;
28 |     float right = r1 < r2 ? r1 : r2;
29 |     return right - left;
30 | }
31 | 
32 | float box_intersection(box a, box b)
33 | {
34 |     float w = overlap(a.x, a.w, b.x, b.w);
35 |     float h = overlap(a.y, a.h, b.y, b.h);
36 |     if(w < 0 || h < 0) return 0;
37 |     float area = w*h;
38 |     return area;
39 | }
40 | 
41 | float box_union(box a, box b)
42 | {
43 |     float i = box_intersection(a, b);
44 |     float u = a.w*a.h + b.w*b.h - i;
45 |     return u;
46 | }
47 | 
48 | float box_iou(box a, box b)
49 | {
50 |     return box_intersection(a, b)/box_union(a, b);
51 | }
52 | 
53 | void do_nms_sort(detection *dets, int total, int classes, float thresh)
54 | {
55 |     int i, j, k;
56 |     k = total-1;
57 |     for(i = 0; i <= k; ++i){
58 |         if(dets[i].objectness == 0){
59 |             detection swap = dets[i];
60 |             dets[i] = dets[k];
61 |             dets[k] = swap;
62 |             --k;
63 |             --i;
64 |         }
65 |     }
66 |     total = k+1;
67 | 
68 |     for(k = 0; k < classes; ++k){
69 |         for(i = 0; i < total; ++i){
70 |             dets[i].sort_class = k;
71 |         }
72 |         qsort(dets, total, sizeof(detection), nms_comparator);
73 |         for(i = 0; i < total; ++i){
74 |             if(dets[i].prob[k] == 0) continue;
75 |             box a = dets[i].bbox;
76 |             for(j = i+1; j < total; ++j){
77 |                 box b = dets[j].bbox;
78 |                 if (box_iou(a, b) > thresh){
79 |                     dets[j].prob[k] = 0;
80 |                 }
81 |             }
82 |         }
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/box.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2018/06/04
 5 |  */
 6 | 
 7 | #ifndef __BOX_H_
 8 | #define __BOX_H_
 9 | #include "yolo_layer.h"
10 | 
11 | 
12 | void do_nms_sort(detection *dets, int total, int classes, float thresh);
13 | 
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/cuda.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2018/06/04
 5 |  */
 6 | #include "cuda.h"
 7 | #include "blas.h"
 8 | 
 9 | #include <assert.h>
10 | #include <stdlib.h>
11 | #include <time.h>
12 | #include <stdio.h>
13 | 
14 | void error(const char* s)
15 | {
16 |     perror(s);
17 |     assert(0);
18 |     exit(-1);
19 | }
20 | 
21 | void check_error(cudaError_t status)
22 | {
23 |     //cudaDeviceSynchronize();
24 |     cudaError_t status2 = cudaGetLastError();
25 |     if (status != cudaSuccess)
26 |     {   
27 |         const char *s = cudaGetErrorString(status);
28 |         char buffer[256];
29 |         printf("CUDA Error: %s\n", s);
30 |         assert(0);
31 |         snprintf(buffer, 256, "CUDA Error: %s", s);
32 |         error(buffer);
33 |     } 
34 |     if (status2 != cudaSuccess)
35 |     {   
36 |         const char *s = cudaGetErrorString(status);
37 |         char buffer[256];
38 |         printf("CUDA Error Prev: %s\n", s);
39 |         assert(0);
40 |         snprintf(buffer, 256, "CUDA Error Prev: %s", s);
41 |         error(buffer);
42 |     } 
43 | }
44 | 
45 | dim3 cuda_gridsize(size_t n){
46 |     size_t k = (n-1) / BLOCK + 1;
47 |     size_t x = k;
48 |     size_t y = 1;
49 |     if(x > 65535){
50 |         x = ceil(sqrt(k));
51 |         y = (n-1)/(x*BLOCK) + 1;
52 |     }
53 |     dim3 d = {x, y, 1};
54 |     //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
55 |     return d;
56 | }
57 | 
58 | float* cuda_make_array(float* x,size_t n)
59 | {
60 |     float *x_gpu;
61 |     size_t size = sizeof(float)*n;
62 |     cudaError_t status = cudaMalloc((void **)&x_gpu, size);
63 |     check_error(status);
64 |     if(x){
65 |         status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
66 |         check_error(status);
67 |     } else {
68 |         fill_gpu(n, 0, x_gpu, 1);
69 |     }
70 |     if(!x_gpu) error("Cuda malloc failed\n");
71 |     return x_gpu;
72 | }
73 | 
74 | void cuda_free(float* x_gpu)
75 | {
76 |     cudaError_t status = cudaFree(x_gpu);
77 |     check_error(status);
78 | }
79 | 
80 | void cuda_push_array(float *x_gpu,float* x,size_t n)
81 | {
82 |     size_t size = sizeof(float)*n;
83 |     cudaError_t status = cudaMemcpy(x_gpu,x,size,cudaMemcpyHostToDevice);
84 |     check_error(status);
85 | }
86 | 
87 | 
88 | 
89 | void cuda_pull_array(float *x_gpu,float* x,size_t n)
90 | {
91 |     size_t size = sizeof(float)*n;
92 |     cudaError_t status = cudaMemcpy(x,x_gpu,size,cudaMemcpyDeviceToHost);
93 |     check_error(status);
94 | }
95 | 


--------------------------------------------------------------------------------
/cuda.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2018/06/04
 5 |  */
 6 | 
 7 | #ifndef __CUDA_H_
 8 | #define __CUDA_H_
 9 | #include "cuda_runtime.h"
10 | #include "curand.h"
11 | #include "cublas_v2.h"
12 | 
13 | #define BLOCK 512
14 | 
15 | void check_error(cudaError_t status);
16 | 
17 | dim3 cuda_gridsize(size_t n);
18 | 
19 | float* cuda_make_array(float* x,size_t n);
20 | 
21 | void cuda_free(float* x_gpu);
22 | 
23 | void cuda_push_array(float *x_gpu,float* x,size_t n);
24 | 
25 | void cuda_pull_array(float *x_gpu,float* x,size_t n);
26 | 
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/detectnet/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | file(GLOB detectnetSources *.cpp)
3 | file(GLOB detectnetIncludes *.h )
4 | 
5 | cuda_add_executable(detectnet ${detectnetSources})
6 | target_link_libraries(detectnet sysDetectSpeed)
7 | 


--------------------------------------------------------------------------------
/detectnet/detectnet.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | /*
  3 |  * Company:	Synthesis
  4 |  * Author: 	Chen
  5 |  * Date:	2018/06/04	
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | #include <signal.h>
 10 | #include <unistd.h>
 11 | #include <iostream>
 12 | 
 13 | #include "yolo_layer.h"
 14 | #include "image.h"
 15 | #include "cuda.h"
 16 | #include "max_pool_1d.h"
 17 | #include "blas.h"
 18 | 
 19 | #include <caffe/caffe.hpp>
 20 | #include <opencv2/opencv.hpp>
 21 | #include <string>
 22 | #include <vector>
 23 | #include <sys/time.h>
 24 | 
 25 | using namespace caffe;
 26 | using namespace cv;
 27 | 
 28 | const char* imgFilename = "/home/chen/projects/data/images/dog.jpg"; //modify your images file path
 29 | 
 30 | uint64_t current_timestamp() {
 31 |     struct timeval te; 
 32 |     gettimeofday(&te, NULL); // get current time
 33 |     return te.tv_sec*1000LL + te.tv_usec/1000; // caculate milliseconds
 34 | }
 35 | 
 36 | bool signal_recieved = false;
 37 | void sig_handler(int signo)
 38 | {
 39 |     if( signo == SIGINT ){
 40 | 	    printf("received SIGINT\n");
 41 | 	    signal_recieved = true;
 42 |     }
 43 | }
 44 | 
 45 | //! Note: Net的Blob是指，每个层的输出数据，即Feature Maps
 46 | unsigned int get_blob_index(boost::shared_ptr< Net<float> > & net, char *query_blob_name)
 47 | {
 48 |     std::string str_query(query_blob_name);    
 49 |     vector< string > const & blob_names = net->blob_names();
 50 |     for( unsigned int i = 0; i != blob_names.size(); ++i ) 
 51 |     { 
 52 |         //std::cout << "blob names " << i << " is " << blob_names[i] << std::endl; 
 53 |         if( str_query == blob_names[i] ) 
 54 |         { 
 55 |             return i;
 56 |         } 
 57 |     }
 58 |     LOG(FATAL) << "Unknown blob name: " << str_query;
 59 | }
 60 | 
 61 | 
 62 | int main( int argc, char** argv )
 63 | {
 64 |     printf("detectnet\n  args (%i):  ", argc);
 65 | //YOLOV3
 66 |     string model_file;
 67 |     string weights_file;
 68 | 
 69 |     //yolov3-tiny
 70 |     string model1_file;
 71 |     string model2_file;
 72 |     string tiny_weights_file;
 73 | 
 74 |     if(5 == argc){
 75 |         assert(0 == atoi(argv[1]));
 76 |         model_file = argv[2];
 77 |         weights_file = argv[3];
 78 |         imgFilename = argv[4];
 79 |     }
 80 |     else if(6 == argc){
 81 |         assert(1 == atoi(argv[1]));
 82 |         model1_file = argv[2];
 83 |         model2_file = argv[3];
 84 |         tiny_weights_file = argv[4];
 85 |         imgFilename = argv[5];
 86 |     }
 87 |     else{
 88 |         printf("Input error: please input ./xx YOLOV3 [model_path] [weights_path]  [img_path] or ./xx YOLOV3_TINY [model1_path] [model2_path] [weights_path] [img_path]\n");
 89 |         return -1;
 90 |     }
 91 | 	
 92 |     printf("\n\n");	
 93 | 
 94 |     // Initialize the network.
 95 |     Caffe::set_mode(Caffe::GPU);
 96 | 
 97 |     image im,sized;
 98 |     vector<Blob<float>*> blobs;
 99 |     blobs.clear();
100 | 
101 |     int nboxes = 0;
102 |     int size;
103 |     detection *dets = NULL;
104 | 
105 |     NetType type = (NetType)atoi(argv[1]);
106 | 
107 |     if(0 == type){
108 |         /* Load the network. */
109 |         shared_ptr<Net<float> > net;
110 |         net.reset(new Net<float>(model_file, TEST));
111 |         net->CopyTrainedLayersFrom(weights_file);
112 | 
113 |         printf("net num_inputs is %d\n",net->num_inputs());
114 |         printf("net num_outputs is %d\n",net->num_outputs());
115 |         CHECK_EQ(net->num_inputs(), 1) << "Network should have exactly one input.";
116 |         CHECK_EQ(net->num_outputs(), 3) << "Network should have exactly three outputs.";
117 | 
118 |         Blob<float> *input_data_blobs = net->input_blobs()[0];
119 |         LOG(INFO) << "Input data layer channels is  " << input_data_blobs->channels();
120 |         LOG(INFO) << "Input data layer width is  " << input_data_blobs->width();
121 |         LOG(INFO) << "Input data layer height is  " << input_data_blobs->height();
122 | 
123 |         size = input_data_blobs->channels()*input_data_blobs->width()*input_data_blobs->height();
124 |         
125 |         //load image
126 |         uint64_t beginDataTime =  current_timestamp();
127 |         im = load_image_color((char*)imgFilename,0,0);
128 |         sized = letterbox_image(im,input_data_blobs->width(),input_data_blobs->height());
129 |         cuda_push_array(input_data_blobs->mutable_gpu_data(),sized.data,size);
130 |         uint64_t endDataTime =  current_timestamp();
131 | 
132 |         //YOLOV3 objection detection implementation with Caffe
133 |         net->Forward();
134 | 
135 |         Blob<float>* out_blob1 = net->output_blobs()[1];
136 |         blobs.push_back(out_blob1);
137 |         Blob<float>* out_blob2 = net->output_blobs()[2];
138 |         blobs.push_back(out_blob2);
139 |         Blob<float>* out_blob3 = net->output_blobs()[0];
140 |         blobs.push_back(out_blob3);
141 | 
142 |         dets = get_detections(blobs,im.w,im.h,input_data_blobs->width(),input_data_blobs->height(),&nboxes,type);
143 |         uint64_t endDetectTime = current_timestamp();
144 | 
145 |         printf("object-detection:  processing data operation avergae time is  (%zu)ms\n", endDataTime - beginDataTime);
146 |         printf("object-detection:  processing network yolov3 avergae time is (%zu)ms\n", endDetectTime - endDataTime);
147 | 
148 |     }
149 | 
150 |     if(1 == type){
151 |         
152 |         /* Load the network. */
153 |         shared_ptr<Net<float> > net1,net2;
154 |         net1.reset(new Net<float>(model1_file, TEST));
155 |         net2.reset(new Net<float>(model2_file, TEST));
156 |         net1->CopyTrainedLayersFrom(tiny_weights_file);
157 |         net2->CopyTrainedLayersFrom(tiny_weights_file);
158 | 
159 |         printf("net1 num_inputs is %d\n",net1->num_inputs());
160 |         printf("net1 num_outputs is %d\n",net1->num_outputs());
161 |         printf("net2 num_inputs is %d\n",net2->num_inputs());
162 |         printf("net2 num_outputs is %d\n",net2->num_outputs());
163 | 
164 |         CHECK_EQ(net1->num_inputs(), 1) << "Network should have exactly one input.";
165 |         CHECK_EQ(net1->num_outputs(), 1) << "Network should have exactly three outputs.";
166 | 
167 |         CHECK_EQ(net2->num_inputs(), 2) << "Network should have exactly one input.";
168 |         CHECK_EQ(net2->num_outputs(), 2) << "Network should have exactly three outputs.";
169 | 
170 |         Blob<float> *net1_input1_data_blobs = net1->input_blobs()[0];
171 |         Blob<float> *net2_input1_data_blobs = net2->input_blobs()[0];
172 |         Blob<float> *net2_input2_data_blobs = net2->input_blobs()[1];
173 |         LOG(INFO) << "Input1 data layer channels is  " << net1_input1_data_blobs->channels();
174 |         LOG(INFO) << "Input1 data layer width is  " << net1_input1_data_blobs->width();
175 |         LOG(INFO) << "Input1 data layer height is  " << net1_input1_data_blobs->height();
176 | 
177 |         LOG(INFO) << "Input2 data1 layer channels is  " << net2_input1_data_blobs->channels();
178 |         LOG(INFO) << "Input2 data1 layer width is  " << net2_input1_data_blobs->width();
179 |         LOG(INFO) << "Input2 data1 layer height is  " << net2_input1_data_blobs->height();
180 | 
181 |         LOG(INFO) << "Input2 data2 layer channels is  " << net2_input2_data_blobs->channels();
182 |         LOG(INFO) << "Input2 data2 layer width is  " << net2_input2_data_blobs->width();
183 |         LOG(INFO) << "Input2 data2 layer height is  " << net2_input2_data_blobs->height();
184 | 
185 |         size = net1_input1_data_blobs->channels()*net1_input1_data_blobs->width()*net1_input1_data_blobs->height();
186 | 
187 |         //load image
188 |         printf("start forward yolov3-tiny!\n");
189 |         uint64_t beginDataTime =  current_timestamp();
190 |         im = load_image_color((char*)imgFilename,0,0);
191 |         sized = letterbox_image(im,net1_input1_data_blobs->width(),net1_input1_data_blobs->height());
192 |         cuda_push_array(net1_input1_data_blobs->mutable_gpu_data(),sized.data,size);
193 |         uint64_t endDataTime =  current_timestamp();
194 | 
195 |         net1->Forward();
196 | 
197 |         //temp output
198 |         Blob<float>* out1_blob1 = net1->output_blobs()[0];
199 |         LOG(INFO) << "temp output data layer channels is  " << out1_blob1->channels();
200 |         LOG(INFO) << "temp outputdata layer width is  " << out1_blob1->width();
201 |         LOG(INFO) << "temp output data layer height is  " << out1_blob1->height();
202 | 
203 |         char *query_blob_name = "layer9-conv";
204 |         unsigned int blob_id = get_blob_index(net1, query_blob_name);
205 |         boost::shared_ptr<Blob<float> > out1_blob2 = net1->blobs()[blob_id];
206 | 
207 |         //load input data1
208 |         //Note: size = 2 stride = 1
209 |         int kernel_size = 2;
210 |         int stride = 1;
211 |         int pad = kernel_size - stride;
212 |         max_pool_1d_gpu(out1_blob1->mutable_gpu_data(),1,out1_blob1->channels(),out1_blob1->height(),out1_blob1->width(),kernel_size,stride,pad,net2_input1_data_blobs->mutable_gpu_data());
213 |         
214 |         //load input data2
215 |         copy_gpu(out1_blob2->count(),(float*)out1_blob2->mutable_gpu_data(),1,net2_input2_data_blobs->mutable_gpu_data(),1);
216 |         
217 |         net2->Forward();
218 | 
219 |         Blob<float>* out2_blob1 = net2->output_blobs()[0];
220 |         blobs.push_back(out2_blob1);
221 |         Blob<float>* out2_blob2 = net2->output_blobs()[1];
222 |         blobs.push_back(out2_blob2);
223 |         dets = get_detections(blobs,im.w,im.h,net1_input1_data_blobs->width(),net1_input1_data_blobs->height(),&nboxes,type);
224 | 
225 |         uint64_t endDetectTime = current_timestamp();
226 | 
227 |         printf("object-detection:  processing data operation avergae time is  (%zu)ms\n", endDataTime - beginDataTime);
228 |         printf("object-detection:  processing network yolov3 tiny avergae time is (%zu)ms\n", endDetectTime - endDataTime);
229 |     }
230 | 
231 |     //show detection results
232 |     Mat img = imread(imgFilename);
233 |     int i,j;
234 |     for(i=0;i< nboxes;++i){
235 |         char labelstr[4096] = {0};
236 |         int cls = -1;
237 |         for(j=0;j<2;++j){
238 |             if(dets[i].prob[j] > 0.5){
239 |                 if(cls < 0){
240 |                     cls = j;
241 |                 }
242 |                 printf("%d: %.0f%%\n",cls,dets[i].prob[j]*100);
243 |             }
244 |         }
245 |         if(cls >= 0){
246 |             box b = dets[i].bbox;
247 |             printf("x = %f,y =  %f,w = %f,h =  %f\n",b.x,b.y,b.w,b.h);
248 | 
249 |             int left  = (b.x-b.w/2.)*im.w;
250 |             int right = (b.x+b.w/2.)*im.w;
251 |             int top   = (b.y-b.h/2.)*im.h;
252 |             int bot   = (b.y+b.h/2.)*im.h;
253 |             rectangle(img,Point(left,top),Point(right,bot),Scalar(0,0,255),3,8,0);
254 |             printf("left = %d,right =  %d,top = %d,bot =  %d\n",left,right,top,bot);
255 |         }
256 |     }
257 | 
258 |     imwrite("caffe-yolov3_test.jpg", img);
259 |     namedWindow("show",CV_WINDOW_AUTOSIZE);
260 |     imshow("show",img);
261 |     waitKey(0);
262 | 
263 |     free_detections(dets,nboxes);
264 |     free_image(im);
265 |     free_image(sized);
266 |         
267 |     printf("done.\n");
268 |     return 0;
269 | }
270 | 
271 | 


--------------------------------------------------------------------------------
/image.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "image.h"
  3 | 
  4 | #include <opencv2/opencv.hpp>
  5 | 
  6 | using namespace cv;
  7 | 
  8 | void rgbgr_image(image im)
  9 | {
 10 |     int i;
 11 |     for(i = 0; i < im.w*im.h; ++i){
 12 |         float swap = im.data[i];
 13 |         im.data[i] = im.data[i+im.w*im.h*2];
 14 |         im.data[i+im.w*im.h*2] = swap;
 15 |     }
 16 | }
 17 | 
 18 | void ipl_into_image(IplImage* src, image im)
 19 | {
 20 |     unsigned char *data = (unsigned char *)src->imageData;
 21 |     int h = src->height;
 22 |     int w = src->width;
 23 |     int c = src->nChannels;
 24 |     int step = src->widthStep;
 25 |     int i, j, k;
 26 | 
 27 |     for(i = 0; i < h; ++i){
 28 |         for(k= 0; k < c; ++k){
 29 |             for(j = 0; j < w; ++j){
 30 |                 im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.;
 31 |             }
 32 |         }
 33 |     }
 34 | }
 35 | 
 36 | image make_empty_image(int w, int h, int c)
 37 | {
 38 |     image out;
 39 |     out.data = 0;
 40 |     out.h = h;
 41 |     out.w = w;
 42 |     out.c = c;
 43 |     return out;
 44 | }
 45 | 
 46 | image make_image(int w, int h, int c)
 47 | {
 48 |     image out = make_empty_image(w,h,c);
 49 |     out.data = (float*)calloc(h*w*c, sizeof(float));
 50 |     return out;
 51 | }
 52 | 
 53 | image ipl_to_image(IplImage* src)
 54 | {
 55 |     int h = src->height;
 56 |     int w = src->width;
 57 |     int c = src->nChannels;
 58 |     image out = make_image(w, h, c);
 59 |     ipl_into_image(src, out);
 60 |     return out;
 61 | }
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | image load_image_cv(char *filename, int channels)
 68 | {
 69 |     IplImage* src = 0;
 70 |     int flag = -1;
 71 |     if (channels == 0) flag = -1;
 72 |     else if (channels == 1) flag = 0;
 73 |     else if (channels == 3) flag = 1;
 74 |     else {
 75 |         fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
 76 |     }
 77 | 
 78 |     if( (src = cvLoadImage(filename, flag)) == 0 )
 79 |     {
 80 |         fprintf(stderr, "Cannot load image \"%s\"\n", filename);
 81 |         char buff[256];
 82 |         sprintf(buff, "echo %s >> bad.list", filename);
 83 |         system(buff);
 84 |         return make_image(10,10,3);
 85 |         //exit(0);
 86 |     }
 87 |     image out = ipl_to_image(src);
 88 |     cvReleaseImage(&src);
 89 |     rgbgr_image(out);
 90 |     return out;
 91 | }
 92 | 
 93 | void free_image(image m)
 94 | {
 95 |     if(m.data){
 96 |         free(m.data);
 97 |     }
 98 | }
 99 | 
100 | image resize_image(image im, int w, int h)
101 | {
102 |     image resized = make_image(w, h, im.c);
103 |     image part = make_image(w, im.h, im.c);
104 |     int r, c, k;
105 |     float w_scale = (float)(im.w - 1) / (w - 1);
106 |     float h_scale = (float)(im.h - 1) / (h - 1);
107 |     for(k = 0; k < im.c; ++k){
108 |         for(r = 0; r < im.h; ++r){
109 |             for(c = 0; c < w; ++c){
110 |                 float val = 0;
111 |                 if(c == w-1 || im.w == 1){
112 |                     val = get_pixel(im, im.w-1, r, k);
113 |                 } else {
114 |                     float sx = c*w_scale;
115 |                     int ix = (int) sx;
116 |                     float dx = sx - ix;
117 |                     val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
118 |                 }
119 |                 set_pixel(part, c, r, k, val);
120 |             }
121 |         }
122 |     }
123 |     for(k = 0; k < im.c; ++k){
124 |         for(r = 0; r < h; ++r){
125 |             float sy = r*h_scale;
126 |             int iy = (int) sy;
127 |             float dy = sy - iy;
128 |             for(c = 0; c < w; ++c){
129 |                 float val = (1-dy) * get_pixel(part, c, iy, k);
130 |                 set_pixel(resized, c, r, k, val);
131 |             }
132 |             if(r == h-1 || im.h == 1) continue;
133 |             for(c = 0; c < w; ++c){
134 |                 float val = dy * get_pixel(part, c, iy+1, k);
135 |                 add_pixel(resized, c, r, k, val);
136 |             }
137 |         }
138 |     }
139 | 
140 |     free_image(part);
141 |     return resized;
142 | }
143 | 
144 | image load_image(char* filename,int w,int h,int c)
145 | {
146 |     image out = load_image_cv(filename,c);
147 | 
148 |     if((h && w) && (h != out.h || w != out.w))
149 |     {
150 |         image resized = resize_image(out,w,h);
151 |         free_image(out);
152 |         out = resized;
153 |     }
154 |     return out;
155 | }
156 | 
157 | image load_image_color(char* filename,int w,int h)
158 | {
159 |     return load_image(filename,w,h,3);
160 | }
161 | 
162 | void fill_image(image m, float s)
163 | {
164 |     int i;
165 |     for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s;
166 | }
167 | 
168 | static float get_pixel(image m, int x, int y, int c)
169 | {
170 |     assert(x < m.w && y < m.h && c < m.c);
171 |     return m.data[c*m.h*m.w + y*m.w + x];
172 | }
173 | 
174 | static void set_pixel(image m, int x, int y, int c, float val)
175 | {
176 |     if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;
177 |     assert(x < m.w && y < m.h && c < m.c);
178 |     m.data[c*m.h*m.w + y*m.w + x] = val;
179 | }
180 | 
181 | static void add_pixel(image m, int x, int y, int c, float val)
182 | {
183 |     assert(x < m.w && y < m.h && c < m.c);
184 |     m.data[c*m.h*m.w + y*m.w + x] += val;
185 | }
186 | 
187 | void embed_image(image source, image dest, int dx, int dy)
188 | {
189 |     int x,y,k;
190 |     for(k = 0; k < source.c; ++k){
191 |         for(y = 0; y < source.h; ++y){
192 |             for(x = 0; x < source.w; ++x){
193 |                 float val = get_pixel(source, x,y,k);
194 |                 set_pixel(dest, dx+x, dy+y, k, val);
195 |             }
196 |         }
197 |     }
198 | }
199 | 
200 | 
201 | image letterbox_image(image im, int w, int h)
202 | {
203 |     int new_w = im.w;
204 |     int new_h = im.h;
205 |     if (((float)w/im.w) < ((float)h/im.h)) {
206 |         new_w = w;
207 |         new_h = (im.h * w)/im.w;
208 |     } else {
209 |         new_h = h;
210 |         new_w = (im.w * h)/im.h;
211 |     }
212 |     image resized = resize_image(im, new_w, new_h);
213 |     image boxed = make_image(w, h, im.c);
214 |     fill_image(boxed, .5);
215 |     //int i;
216 |     //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0;
217 |     embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2);
218 |     free_image(resized);
219 |     return boxed;
220 | }
221 | 


--------------------------------------------------------------------------------
/image.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2018/06/07
 5 |  */
 6 | #ifndef __IMAGE_H_
 7 | #define __IMAGE_H_
 8 | 
 9 | typedef struct
10 | {
11 |     int w;
12 |     int h;
13 |     int c;
14 |     float *data;
15 | }image;
16 | 
17 | image load_image_color(char* filename,int w,int h);
18 | 
19 | void free_image(image m);
20 | 
21 | image letterbox_image(image im, int w, int h);
22 | 
23 | static float get_pixel(image m, int x, int y, int c);
24 | 
25 | static void set_pixel(image m, int x, int y, int c, float val);
26 | 
27 | static void add_pixel(image m, int x, int y, int c, float val);
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/make_method_by_laymu.txt:
--------------------------------------------------------------------------------
 1 | 1 
 2 | modify the gencode of your GPU. My GPU is 2080ti, so gencode changed to the following:
 3 | -gencode arch=compute_75,code=[sm_75,compute_75]
 4 | 
 5 | 
 6 | 2
 7 | include some include files:
 8 | /home/hs/software/caffe/include 
 9 | /home/hs/software/caffe/.build_release/src
10 | /usr/include/openblas/
11 | 
12 | 
13 | 3
14 | include some libs:
15 | /home/hs/software/caffe/.build_release/lib/libcaffe.so
16 | /usr/lib64/libglog.so
17 | /usr/lib64/libgflags.so
18 | /usr/lib64/libgflags.so.2.1
19 | /usr/lib64/boost/lib/libboost_system.so
20 | /root/anaconda3/envs/venv/lib/libstdc++.so.6
21 | /usr/lib64/libtiff.so
22 | 
23 | 4
24 | cmake .. -DCUDA_PROPAGATE_HOST_FLAGS=OFF
25 | 
26 | 5
27 | make
28 | 
29 | 6
30 | modify the following places of yolo_layer.cpp:
31 | 
32 | height
33 | width
34 | classes
35 | anchors
36 | 
37 | 7
38 | modify line 237 of detectnet.cpp as:
39 | 2 is the total classes num of my dataset.
40 | 
41 | for(j=0;j<2;++j){
42 | 
43 | 8
44 | ./x86_64/bin/detectnet 0 ../../pytorch-caffe-darknet-convert/yolov3.prototxt ../../pytorch-caffe-darknet-convert/yolov3.caffemodel pic_path
45 | 


--------------------------------------------------------------------------------
/max_pool_1d.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2019/01/24	
 5 |  */
 6 | #include "max_pool_1d.h"
 7 | #include "cuda.h"
 8 | 
 9 | 
10 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output)
11 | {
12 |     int h = (in_h + pad - size)/stride + 1;
13 |     int w = (in_w + pad - size)/stride + 1;
14 |     int c = in_c;
15 | 
16 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
17 |     if(id >= n) return;
18 | 
19 |     int j = id % w;
20 |     id /= w;
21 |     int i = id % h;
22 |     id /= h;
23 |     int k = id % c;
24 |     id /= c;
25 |     int b = id;
26 | 
27 |     int w_offset = -pad/2;
28 |     int h_offset = -pad/2;
29 | 
30 |     int out_index = j + w*(i + h*(k + c*b));
31 |     float max = -INFINITY;
32 |     //int max_i = -1;
33 |     int l, m;
34 |     for(l = 0; l < size; ++l){
35 |         for(m = 0; m < size; ++m){
36 |             int cur_h = h_offset + i*stride + l;
37 |             int cur_w = w_offset + j*stride + m;
38 |             int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c));
39 |             int valid = (cur_h >= 0 && cur_h < in_h &&
40 |                     cur_w >= 0 && cur_w < in_w);
41 |             float val = (valid != 0) ? input[index] : -INFINITY;
42 |             //max_i = (val > max) ? index : max_i;
43 |             max   = (val > max) ? val   : max;
44 |         }
45 |     }
46 |     output[out_index] = max;
47 |     //indexes[out_index] = max_i;
48 | }
49 | 
50 | void max_pool_1d_gpu(float* input_data_gpu,int batch_size,int c,int h,int w,int size,int stride,int pad,float* output_data_gpu)
51 | {
52 |     size_t n = h*w*c*batch_size;
53 | 
54 |     forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, h, w, c, stride, size, pad, input_data_gpu, output_data_gpu);
55 | 
56 |     check_error(cudaPeekAtLastError());	
57 | }
58 | 


--------------------------------------------------------------------------------
/max_pool_1d.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2019/01/24	
 5 |  */
 6 | 
 7 | #ifndef __MAX_POOL_1D_H__
 8 | #define __MAX_POOL_1D_H__
 9 | 
10 | void max_pool_1d_gpu(float* input_data_gpu,int batch_size,int c,int h,int w,int size,int stride,int pad,float* output);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/model_convert/Readme:
--------------------------------------------------------------------------------
 1 | Steps:
 2 | 
 3 |   First,you should install this repo https://github.com/marvis/pytorch-caffe-darknet-convert;
 4 | 
 5 |   Note:this repo need install pytorch and caffe.
 6 | 
 7 |   Second,you should install upsample_layer into caffe,please check this link https://github.com/BVLC/caffe/pull/6384/commits/4d2400e7ae692b25f034f02ff8e8cd3621725f5c.
 8 | 
 9 |   Finally,download yolov3.weights and run yolov3_darknet2caffe.py this file.
10 | 
11 |   	1) download yolov3.weights 
12 | 
13 | 	$ wget https://pjreddie.com/media/files/yolov3.weights
14 | 
15 | 	2) run yolov3_darknet2caffe.py this file in this folder (/home/xx/pytorch-caffe-darknet-convert/).
16 | 
17 | 	$ python yolov3_darknet2caffe.py yolov3.cfg yolov3.weights yolov3.prototxt yolov3.caffemodel 
18 | 


--------------------------------------------------------------------------------
/model_convert/yolov3_darknet2caffe.py:
--------------------------------------------------------------------------------
  1 | # The caffe module needs to be on the Python path;
  2 | #  we'll add it here explicitly.
  3 | caffe_root='/home/chen/caffe/'
  4 | #os.chdir(caffe_root)
  5 | import sys
  6 | sys.path.insert(0,caffe_root+'python')
  7 | import caffe
  8 | import numpy as np
  9 | from collections import OrderedDict
 10 | from cfg import *
 11 | from prototxt import *
 12 | 
 13 | def darknet2caffe(cfgfile, weightfile, protofile, caffemodel):
 14 |     net_info = cfg2prototxt(cfgfile)
 15 |     save_prototxt(net_info , protofile, region=False)
 16 | 
 17 |     net = caffe.Net(protofile, caffe.TEST)
 18 |     params = net.params
 19 | 
 20 |     blocks = parse_cfg(cfgfile)
 21 | 
 22 |     #Open the weights file
 23 |     fp = open(weightfile, "rb")
 24 | 
 25 |     #The first 4 values are header information 
 26 |     # 1. Major version number
 27 |     # 2. Minor Version Number
 28 |     # 3. Subversion number 
 29 |     # 4. IMages seen 
 30 |     header = np.fromfile(fp, dtype = np.int32, count = 5)
 31 | 
 32 |     #fp = open(weightfile, 'rb')
 33 |     #header = np.fromfile(fp, count=5, dtype=np.int32)
 34 |     #header = np.ndarray(shape=(5,),dtype='int32',buffer=fp.read(20))
 35 |     #print(header)
 36 |     buf = np.fromfile(fp, dtype = np.float32)
 37 |     #print(buf)
 38 |     fp.close()
 39 | 
 40 |     layers = []
 41 |     layer_id = 1
 42 |     start = 0
 43 |     for block in blocks:
 44 |         if start >= buf.size:
 45 |             break
 46 | 
 47 |         if block['type'] == 'net':
 48 |             continue
 49 |         elif block['type'] == 'convolutional':
 50 |             batch_normalize = int(block['batch_normalize'])
 51 |             if block.has_key('name'):
 52 |                 conv_layer_name = block['name']
 53 |                 bn_layer_name = '%s-bn' % block['name']
 54 |                 scale_layer_name = '%s-scale' % block['name']
 55 |             else:
 56 |                 conv_layer_name = 'layer%d-conv' % layer_id
 57 |                 bn_layer_name = 'layer%d-bn' % layer_id
 58 |                 scale_layer_name = 'layer%d-scale' % layer_id
 59 | 
 60 |             if batch_normalize:
 61 |                 start = load_conv_bn2caffe(buf, start, params[conv_layer_name], params[bn_layer_name], params[scale_layer_name])
 62 |             else:
 63 |                 start = load_conv2caffe(buf, start, params[conv_layer_name])
 64 |             layer_id = layer_id+1
 65 |         elif block['type'] == 'depthwise_convolutional':
 66 |             batch_normalize = int(block['batch_normalize'])
 67 |             if block.has_key('name'):
 68 |                 conv_layer_name = block['name']
 69 |                 bn_layer_name = '%s-bn' % block['name']
 70 |                 scale_layer_name = '%s-scale' % block['name']
 71 |             else:
 72 |                 conv_layer_name = 'layer%d-dwconv' % layer_id
 73 |                 bn_layer_name = 'layer%d-bn' % layer_id
 74 |                 scale_layer_name = 'layer%d-scale' % layer_id
 75 | 
 76 |             if batch_normalize:
 77 |                 start = load_conv_bn2caffe(buf, start, params[conv_layer_name], params[bn_layer_name], params[scale_layer_name])
 78 |             else:
 79 |                 start = load_conv2caffe(buf, start, params[conv_layer_name])
 80 |             layer_id = layer_id+1
 81 |         elif block['type'] == 'connected':
 82 |             if block.has_key('name'):
 83 |                 fc_layer_name = block['name']
 84 |             else:
 85 |                 fc_layer_name = 'layer%d-fc' % layer_id
 86 |             start = load_fc2caffe(buf, start, params[fc_layer_name])
 87 |             layer_id = layer_id+1
 88 |         elif block['type'] == 'maxpool':
 89 |             layer_id = layer_id+1
 90 |         elif block['type'] == 'avgpool':
 91 |             layer_id = layer_id+1
 92 |         elif block['type'] == 'region':
 93 |             layer_id = layer_id + 1
 94 |         elif block['type'] == 'route':
 95 |             layer_id = layer_id + 1
 96 |         elif block['type'] == 'shortcut':
 97 |             layer_id = layer_id + 1
 98 |         elif block['type'] == 'softmax':
 99 |             layer_id = layer_id + 1
100 |         elif block['type'] == 'cost':
101 |             layer_id = layer_id + 1
102 | 	elif block['type'] == 'upsample':
103 | 	    layer_id = layer_id + 1
104 |         else:
105 |             print('unknow layer type %s ' % block['type'])
106 |             layer_id = layer_id + 1
107 |     print('save prototxt to %s' % protofile)
108 |     save_prototxt(net_info , protofile, region=True)
109 |     print('save caffemodel to %s' % caffemodel)
110 |     net.save(caffemodel)
111 | 
112 | def load_conv2caffe(buf, start, conv_param):
113 |     weight = conv_param[0].data
114 |     bias = conv_param[1].data
115 |     conv_param[1].data[...] = np.reshape(buf[start:start+bias.size], bias.shape);   start = start + bias.size
116 |     conv_param[0].data[...] = np.reshape(buf[start:start+weight.size], weight.shape); start = start + weight.size
117 |     return start
118 | 
119 | def load_fc2caffe(buf, start, fc_param):
120 |     weight = fc_param[0].data
121 |     bias = fc_param[1].data
122 |     fc_param[1].data[...] = np.reshape(buf[start:start+bias.size], bias.shape);   start = start + bias.size
123 |     fc_param[0].data[...] = np.reshape(buf[start:start+weight.size], weight.shape); start = start + weight.size
124 |     return start
125 | 
126 | 
127 | def load_conv_bn2caffe(buf, start, conv_param, bn_param, scale_param):
128 |     conv_weight = conv_param[0].data
129 |     running_mean = bn_param[0].data
130 |     running_var = bn_param[1].data
131 |     scale_weight = scale_param[0].data
132 |     scale_bias = scale_param[1].data
133 | 
134 |     
135 |    
136 |     scale_param[1].data[...] = np.reshape(buf[start:start+scale_bias.size], scale_bias.shape); start = start + scale_bias.size
137 |     #print scale_bias.size
138 |     #print scale_bias
139 | 
140 |     scale_param[0].data[...] = np.reshape(buf[start:start+scale_weight.size], scale_weight.shape); start = start + scale_weight.size
141 |     #print scale_weight.size
142 | 
143 |     bn_param[0].data[...] = np.reshape(buf[start:start+running_mean.size], running_mean.shape); start = start + running_mean.size
144 |     #print running_mean.size
145 | 
146 |     bn_param[1].data[...] = np.reshape(buf[start:start+running_var.size], running_var.shape); start = start + running_var.size
147 |     #print running_var.size
148 | 
149 |     bn_param[2].data[...] = np.array([1.0])
150 |     conv_param[0].data[...] = np.reshape(buf[start:start+conv_weight.size], conv_weight.shape); start = start + conv_weight.size
151 |     #print conv_weight.size
152 | 
153 |     return start
154 | 
155 | def cfg2prototxt(cfgfile):
156 |     blocks = parse_cfg(cfgfile)
157 | 
158 |     prev_filters = 3
159 |     layers = []
160 |     props = OrderedDict() 
161 |     bottom = 'data'
162 |     layer_id = 1
163 |     topnames = dict()
164 |     for block in blocks:
165 |         if block['type'] == 'net':
166 |             props['name'] = 'Darkent2Caffe'
167 |             props['input'] = 'data'
168 |             props['input_dim'] = ['1']
169 |             props['input_dim'].append(block['channels'])
170 |             props['input_dim'].append(block['height'])
171 |             props['input_dim'].append(block['width'])
172 |             continue
173 |         elif block['type'] == 'convolutional':
174 |             conv_layer = OrderedDict()
175 |             conv_layer['bottom'] = bottom
176 |             if block.has_key('name'):
177 |                 conv_layer['top'] = block['name']
178 |                 conv_layer['name'] = block['name']
179 |             else:
180 |                 conv_layer['top'] = 'layer%d-conv' % layer_id
181 |                 conv_layer['name'] = 'layer%d-conv' % layer_id
182 |             conv_layer['type'] = 'Convolution'
183 |             convolution_param = OrderedDict()
184 |             convolution_param['num_output'] = block['filters']
185 |             prev_filters = block['filters']
186 |             convolution_param['kernel_size'] = block['size']
187 |             if block['pad'] == '1':
188 |                 convolution_param['pad'] = str(int(convolution_param['kernel_size'])/2)
189 |             convolution_param['stride'] = block['stride']
190 |             if block['batch_normalize'] == '1':
191 |                 convolution_param['bias_term'] = 'false'
192 |             else:
193 |                 convolution_param['bias_term'] = 'true'
194 |             conv_layer['convolution_param'] = convolution_param
195 |             layers.append(conv_layer)
196 |             bottom = conv_layer['top']
197 | 
198 |             if block['batch_normalize'] == '1':
199 |                 bn_layer = OrderedDict()
200 |                 bn_layer['bottom'] = bottom
201 |                 bn_layer['top'] = bottom
202 |                 if block.has_key('name'):
203 |                     bn_layer['name'] = '%s-bn' % block['name']
204 |                 else:
205 |                     bn_layer['name'] = 'layer%d-bn' % layer_id
206 |                 bn_layer['type'] = 'BatchNorm'
207 |                 batch_norm_param = OrderedDict()
208 |                 batch_norm_param['use_global_stats'] = 'true'
209 |                 bn_layer['batch_norm_param'] = batch_norm_param
210 |                 layers.append(bn_layer)
211 | 
212 |                 scale_layer = OrderedDict()
213 |                 scale_layer['bottom'] = bottom
214 |                 scale_layer['top'] = bottom
215 |                 if block.has_key('name'):
216 |                     scale_layer['name'] = '%s-scale' % block['name']
217 |                 else:
218 |                     scale_layer['name'] = 'layer%d-scale' % layer_id
219 |                 scale_layer['type'] = 'Scale'
220 |                 scale_param = OrderedDict()
221 |                 scale_param['bias_term'] = 'true'
222 |                 scale_layer['scale_param'] = scale_param
223 |                 layers.append(scale_layer)
224 | 
225 |             if block['activation'] != 'linear':
226 |                 relu_layer = OrderedDict()
227 |                 relu_layer['bottom'] = bottom
228 |                 relu_layer['top'] = bottom
229 |                 if block.has_key('name'):
230 |                     relu_layer['name'] = '%s-act' % block['name']
231 |                 else:
232 |                     relu_layer['name'] = 'layer%d-act' % layer_id
233 |                 relu_layer['type'] = 'ReLU'
234 |                 if block['activation'] == 'leaky':
235 |                     relu_param = OrderedDict()
236 |                     relu_param['negative_slope'] = '0.1'
237 |                     relu_layer['relu_param'] = relu_param
238 |                 layers.append(relu_layer)
239 |             topnames[layer_id] = bottom
240 |             layer_id = layer_id+1
241 |         elif block['type'] == 'depthwise_convolutional':
242 |             conv_layer = OrderedDict()
243 |             conv_layer['bottom'] = bottom
244 |             if block.has_key('name'):
245 |                 conv_layer['top'] = block['name']
246 |                 conv_layer['name'] = block['name']
247 |             else:
248 |                 conv_layer['top'] = 'layer%d-dwconv' % layer_id
249 |                 conv_layer['name'] = 'layer%d-dwconv' % layer_id
250 |             conv_layer['type'] = 'ConvolutionDepthwise'
251 |             convolution_param = OrderedDict()
252 |             convolution_param['num_output'] = prev_filters
253 |             convolution_param['kernel_size'] = block['size']
254 |             if block['pad'] == '1':
255 |                 convolution_param['pad'] = str(int(convolution_param['kernel_size'])/2)
256 |             convolution_param['stride'] = block['stride']
257 |             if block['batch_normalize'] == '1':
258 |                 convolution_param['bias_term'] = 'false'
259 |             else:
260 |                 convolution_param['bias_term'] = 'true'
261 |             conv_layer['convolution_param'] = convolution_param
262 |             layers.append(conv_layer)
263 |             bottom = conv_layer['top']
264 | 
265 |             if block['batch_normalize'] == '1':
266 |                 bn_layer = OrderedDict()
267 |                 bn_layer['bottom'] = bottom
268 |                 bn_layer['top'] = bottom
269 |                 if block.has_key('name'):
270 |                     bn_layer['name'] = '%s-bn' % block['name']
271 |                 else:
272 |                     bn_layer['name'] = 'layer%d-bn' % layer_id
273 |                 bn_layer['type'] = 'BatchNorm'
274 |                 batch_norm_param = OrderedDict()
275 |                 batch_norm_param['use_global_stats'] = 'true'
276 |                 bn_layer['batch_norm_param'] = batch_norm_param
277 |                 layers.append(bn_layer)
278 | 
279 |                 scale_layer = OrderedDict()
280 |                 scale_layer['bottom'] = bottom
281 |                 scale_layer['top'] = bottom
282 |                 if block.has_key('name'):
283 |                     scale_layer['name'] = '%s-scale' % block['name']
284 |                 else:
285 |                     scale_layer['name'] = 'layer%d-scale' % layer_id
286 |                 scale_layer['type'] = 'Scale'
287 |                 scale_param = OrderedDict()
288 |                 scale_param['bias_term'] = 'true'
289 |                 scale_layer['scale_param'] = scale_param
290 |                 layers.append(scale_layer)
291 | 
292 |             if block['activation'] != 'linear':
293 |                 relu_layer = OrderedDict()
294 |                 relu_layer['bottom'] = bottom
295 |                 relu_layer['top'] = bottom
296 |                 if block.has_key('name'):
297 |                     relu_layer['name'] = '%s-act' % block['name']
298 |                 else:
299 |                     relu_layer['name'] = 'layer%d-act' % layer_id
300 |                 relu_layer['type'] = 'ReLU'
301 |                 if block['activation'] == 'leaky':
302 |                     relu_param = OrderedDict()
303 |                     relu_param['negative_slope'] = '0.1'
304 |                     relu_layer['relu_param'] = relu_param
305 |                 layers.append(relu_layer)
306 |             topnames[layer_id] = bottom
307 |             layer_id = layer_id+1
308 |         elif block['type'] == 'maxpool':
309 |             max_layer = OrderedDict()
310 |             max_layer['bottom'] = bottom
311 |             if block.has_key('name'):
312 |                 max_layer['top'] = block['name']
313 |                 max_layer['name'] = block['name']
314 |             else:
315 |                 max_layer['top'] = 'layer%d-maxpool' % layer_id
316 |                 max_layer['name'] = 'layer%d-maxpool' % layer_id
317 |             max_layer['type'] = 'Pooling'
318 |             pooling_param = OrderedDict()
319 |             pooling_param['stride'] = block['stride']
320 |             pooling_param['pool'] = 'MAX'
321 |             if (int(block['size']) - int(block['stride'])) % 2 == 0:
322 | 		pooling_param['kernel_size'] = block['size']
323 |             	pooling_param['pad'] = str((int(block['size'])-1)/2)
324 | 
325 |             if (int(block['size']) - int(block['stride'])) % 2 == 1:
326 |                 pooling_param['kernel_size'] = str(int(block['size']) + 1)
327 |             	pooling_param['pad'] = str((int(block['size']) + 1)/2)
328 |             
329 |             max_layer['pooling_param'] = pooling_param
330 |             layers.append(max_layer)
331 |             bottom = max_layer['top']
332 |             topnames[layer_id] = bottom
333 |             layer_id = layer_id+1
334 |         elif block['type'] == 'avgpool':
335 |             avg_layer = OrderedDict()
336 |             avg_layer['bottom'] = bottom
337 |             if block.has_key('name'):
338 |                 avg_layer['top'] = block['name']
339 |                 avg_layer['name'] = block['name']
340 |             else:
341 |                 avg_layer['top'] = 'layer%d-avgpool' % layer_id
342 |                 avg_layer['name'] = 'layer%d-avgpool' % layer_id
343 |             avg_layer['type'] = 'Pooling'
344 |             pooling_param = OrderedDict()
345 |             pooling_param['kernel_size'] = 7
346 |             pooling_param['stride'] = 1
347 |             pooling_param['pool'] = 'AVE'
348 |             avg_layer['pooling_param'] = pooling_param
349 |             layers.append(avg_layer)
350 |             bottom = avg_layer['top']
351 |             topnames[layer_id] = bottom
352 |             layer_id = layer_id+1
353 |         elif block['type'] == 'region':
354 |             if True:
355 |                 region_layer = OrderedDict()
356 |                 region_layer['bottom'] = bottom
357 |                 if block.has_key('name'):
358 |                     region_layer['top'] = block['name']
359 |                     region_layer['name'] = block['name']
360 |                 else:
361 |                     region_layer['top'] = 'layer%d-region' % layer_id
362 |                     region_layer['name'] = 'layer%d-region' % layer_id
363 |                 region_layer['type'] = 'Region'
364 |                 region_param = OrderedDict()
365 |                 region_param['anchors'] = block['anchors'].strip()
366 |                 region_param['classes'] = block['classes']
367 |                 region_param['num'] = block['num']
368 |                 region_layer['region_param'] = region_param
369 |                 layers.append(region_layer)
370 |                 bottom = region_layer['top']
371 |             topnames[layer_id] = bottom
372 |             layer_id = layer_id + 1
373 | 
374 |         elif block['type'] == 'route':
375 |     	    route_layer = OrderedDict()
376 | 	    layer_name = str(block['layers']).split(',')
377 | 	    #print(layer_name[0])
378 | 	    bottom_layer_size = len(str(block['layers']).split(','))
379 |   	    #print(bottom_layer_size)
380 | 	    if(1 == bottom_layer_size):
381 |             	prev_layer_id = layer_id + int(block['layers'])
382 |             	bottom = topnames[prev_layer_id]
383 |             	#topnames[layer_id] = bottom
384 | 		route_layer['bottom'] = bottom
385 | 	    if(2 == bottom_layer_size):
386 | 		prev_layer_id1 = layer_id + int(layer_name[0])
387 | 		#print(prev_layer_id1)
388 | 		prev_layer_id2 = int(layer_name[1]) + 1
389 | 		print(topnames)
390 | 		bottom1 = topnames[prev_layer_id1]
391 | 	 	bottom2 = topnames[prev_layer_id2]
392 | 		route_layer['bottom'] = [bottom1, bottom2]
393 | 	    if(4 == bottom_layer_size):
394 | 		prev_layer_id1 = layer_id + int(layer_name[0])
395 | 		prev_layer_id2 = layer_id + int(layer_name[1])
396 | 		prev_layer_id3 = layer_id + int(layer_name[2])
397 | 		prev_layer_id4 = layer_id + int(layer_name[3])
398 | 
399 | 		bottom1 = topnames[prev_layer_id1]
400 | 	 	bottom2 = topnames[prev_layer_id2]
401 | 		bottom3 = topnames[prev_layer_id3]
402 | 	 	bottom4 = topnames[prev_layer_id4]
403 | 		route_layer['bottom'] = [bottom1, bottom2,bottom3,bottom4]
404 | 	    if block.has_key('name'):
405 |                 route_layer['top'] = block['name']
406 |                 route_layer['name'] = block['name']
407 |             else:
408 |                 route_layer['top'] = 'layer%d-route' % layer_id
409 |                 route_layer['name'] = 'layer%d-route' % layer_id
410 | 	    route_layer['type'] = 'Concat'
411 | 	    print(route_layer)
412 | 	    layers.append(route_layer)
413 | 	    bottom = route_layer['top']
414 | 	    print(layer_id)
415 |             topnames[layer_id] = bottom
416 | 	    layer_id = layer_id + 1
417 | 
418 | 	elif block['type'] == 'upsample':
419 | 	    upsample_layer = OrderedDict()
420 | 	    print(block['stride'])
421 | 	    upsample_layer['bottom'] = bottom
422 | 	    if block.has_key('name'):
423 |                 upsample_layer['top'] = block['name']
424 |                 upsample_layer['name'] = block['name']
425 |             else:
426 |                 upsample_layer['top'] = 'layer%d-upsample' % layer_id
427 |                 upsample_layer['name'] = 'layer%d-upsample' % layer_id
428 | 	    upsample_layer['type'] = 'Upsample'
429 | 	    upsample_param = OrderedDict()
430 | 	    upsample_param['scale'] = block['stride']
431 | 	    upsample_layer['upsample_param'] = upsample_param
432 |  	    print(upsample_layer)
433 | 	    layers.append(upsample_layer)
434 | 	    bottom = upsample_layer['top']
435 | 	    print('upsample:',layer_id)
436 |             topnames[layer_id] = bottom
437 | 	    layer_id = layer_id + 1
438 | 
439 |         elif block['type'] == 'shortcut':
440 |             prev_layer_id1 = layer_id + int(block['from'])
441 |             prev_layer_id2 = layer_id - 1
442 |             bottom1 = topnames[prev_layer_id1]
443 |             bottom2= topnames[prev_layer_id2]
444 |             shortcut_layer = OrderedDict()
445 |             shortcut_layer['bottom'] = [bottom1, bottom2]
446 |             if block.has_key('name'):
447 |                 shortcut_layer['top'] = block['name']
448 |                 shortcut_layer['name'] = block['name']
449 |             else:
450 |                 shortcut_layer['top'] = 'layer%d-shortcut' % layer_id
451 |                 shortcut_layer['name'] = 'layer%d-shortcut' % layer_id
452 |             shortcut_layer['type'] = 'Eltwise'
453 |             eltwise_param = OrderedDict()
454 |             eltwise_param['operation'] = 'SUM'
455 |             shortcut_layer['eltwise_param'] = eltwise_param
456 |             layers.append(shortcut_layer)
457 |             bottom = shortcut_layer['top']
458 |  
459 |             if block['activation'] != 'linear':
460 |                 relu_layer = OrderedDict()
461 |                 relu_layer['bottom'] = bottom
462 |                 relu_layer['top'] = bottom
463 |                 if block.has_key('name'):
464 |                     relu_layer['name'] = '%s-act' % block['name']
465 |                 else:
466 |                     relu_layer['name'] = 'layer%d-act' % layer_id
467 |                 relu_layer['type'] = 'ReLU'
468 |                 if block['activation'] == 'leaky':
469 |                     relu_param = OrderedDict()
470 |                     relu_param['negative_slope'] = '0.1'
471 |                     relu_layer['relu_param'] = relu_param
472 |                 layers.append(relu_layer)
473 |             topnames[layer_id] = bottom
474 |             layer_id = layer_id + 1           
475 |             
476 |         elif block['type'] == 'connected':
477 |             fc_layer = OrderedDict()
478 |             fc_layer['bottom'] = bottom
479 |             if block.has_key('name'):
480 |                 fc_layer['top'] = block['name']
481 |                 fc_layer['name'] = block['name']
482 |             else:
483 |                 fc_layer['top'] = 'layer%d-fc' % layer_id
484 |                 fc_layer['name'] = 'layer%d-fc' % layer_id
485 |             fc_layer['type'] = 'InnerProduct'
486 |             fc_param = OrderedDict()
487 |             fc_param['num_output'] = int(block['output'])
488 |             fc_layer['inner_product_param'] = fc_param
489 |             layers.append(fc_layer)
490 |             bottom = fc_layer['top']
491 | 
492 |             if block['activation'] != 'linear':
493 |                 relu_layer = OrderedDict()
494 |                 relu_layer['bottom'] = bottom
495 |                 relu_layer['top'] = bottom
496 |                 if block.has_key('name'):
497 |                     relu_layer['name'] = '%s-act' % block['name']
498 |                 else:
499 |                     relu_layer['name'] = 'layer%d-act' % layer_id
500 |                 relu_layer['type'] = 'ReLU'
501 |                 if block['activation'] == 'leaky':
502 |                     relu_param = OrderedDict()
503 |                     relu_param['negative_slope'] = '0.1'
504 |                     relu_layer['relu_param'] = relu_param
505 |                 layers.append(relu_layer)
506 |             topnames[layer_id] = bottom
507 |             layer_id = layer_id+1
508 |         else:
509 |             print('unknow layer type %s ' % block['type'])
510 |             topnames[layer_id] = bottom
511 |             layer_id = layer_id + 1
512 | 
513 |     net_info = OrderedDict()
514 |     net_info['props'] = props
515 |     net_info['layers'] = layers
516 |     return net_info
517 | 
518 | if __name__ == '__main__':
519 |     import sys
520 |     if len(sys.argv) != 5:
521 |         print('try:')
522 |         print('python darknet2caffe.py tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.prototxt tiny-yolo-voc.caffemodel')
523 |         print('')
524 |         print('please add name field for each block to avoid generated name')
525 |         exit()
526 | 
527 |     cfgfile = sys.argv[1]
528 |     #net_info = cfg2prototxt(cfgfile)
529 |     #print_prototxt(net_info)
530 |     #save_prototxt(net_info, 'tmp.prototxt')
531 |     weightfile = sys.argv[2]
532 |     protofile = sys.argv[3]
533 |     caffemodel = sys.argv[4]
534 |     darknet2caffe(cfgfile, weightfile, protofile, caffemodel)
535 | 


--------------------------------------------------------------------------------
/yolo_layer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Company:	Synthesis
  3 |  * Author: 	Chen
  4 |  * Date:	2018/06/04
  5 |  */
  6 | 
  7 | #include "yolo_layer.h"
  8 | #include "blas.h"
  9 | #include "cuda.h"
 10 | #include "activations.h"
 11 | #include "box.h"
 12 | #include <stdio.h>
 13 | #include <math.h>
 14 | 
 15 | //yolov3
 16 | float biases[18] = {10,10,  20,21,  39,29,  28,55,  66,48,  49,109,  96,75,  246,37,  132,11};
 17 | 
 18 | //yolov3-tiny
 19 | float biases_tiny[12] = {10,14,23,27,37,58,81,82,135,169,344,319};
 20 | 
 21 | layer make_yolo_layer(int batch,int w,int h,int net_w,int net_h,int n,int total,int classes)
 22 | {
 23 |     layer l = {0};
 24 |     l.n = n;
 25 |     l.total = total;
 26 |     l.batch = batch;
 27 |     l.h = h;
 28 |     l.w = w;
 29 |     l.c = n*(classes+ 4 + 1);
 30 |     l.out_w = l.w;
 31 |     l.out_h = l.h;
 32 |     l.out_c = l.c;
 33 |     l.classes = classes;
 34 |     l.inputs = l.w*l.h*l.c;
 35 | 
 36 |     l.biases = (float*)calloc(total*2,sizeof(float));
 37 | 
 38 |     l.mask = (int*)calloc(n,sizeof(int));
 39 |     if(9 == total){
 40 |         for(int i =0;i<total*2;++i){
 41 |             l.biases[i] = biases[i];
 42 |         }
 43 |         if(l.w == net_w / 32){
 44 |             int j = 6;
 45 |             for(int i =0;i<l.n;++i)
 46 |                 l.mask[i] = j++;
 47 |         }
 48 |         if(l.w == net_w / 16){
 49 |             int j = 3;
 50 |             for(int i =0;i<l.n;++i)
 51 |                 l.mask[i] = j++;
 52 |         }
 53 |         if(l.w == net_w / 8){
 54 |             int j = 0;
 55 |             for(int i =0;i<l.n;++i)
 56 |                 l.mask[i] = j++;
 57 |         }
 58 |     }
 59 | 
 60 |     if(6 == total){
 61 |         for(int i =0;i<total*2;++i){
 62 |             l.biases[i] = biases_tiny[i];
 63 |         }
 64 |         if(l.w == net_w / 32){
 65 |             int j = 3;
 66 |             for(int i =0;i<l.n;++i)
 67 |                 l.mask[i] = j++;
 68 |         }
 69 |         if(l.w == net_w / 16){
 70 |             int j = 0;
 71 |             for(int i =0;i<l.n;++i)
 72 |                 l.mask[i] = j++;
 73 |         }
 74 |     }
 75 |     l.outputs = l.inputs;
 76 |     l.output = (float*)calloc(batch*l.outputs,sizeof(float));
 77 |     l.output_gpu = cuda_make_array(l.output,batch*l.outputs);
 78 |     
 79 |     return l;
 80 | }
 81 | 
 82 | void free_yolo_layer(layer l)
 83 | {
 84 |     if(NULL != l.biases){
 85 |         free(l.biases);
 86 |         l.biases = NULL;
 87 |     }
 88 | 
 89 |     if(NULL != l.mask){
 90 |         free(l.mask);
 91 |         l.mask = NULL;
 92 |     }
 93 |     if(NULL != l.output){
 94 |         free(l.output);
 95 |         l.output = NULL;
 96 |     }
 97 | 
 98 |     if(NULL != l.output_gpu)
 99 |         cuda_free(l.output_gpu);
100 | }
101 | 
102 | static int entry_index(layer l,int batch,int location,int entry)
103 | {
104 |     int n = location / (l.w*l.h);
105 |     int loc = location % (l.w*l.h);
106 |     return batch*l.outputs + n*l.w*l.h*(4 + l.classes + 1) + entry*l.w*l.h + loc;
107 |  }
108 | 
109 | void forward_yolo_layer_gpu(const float* input,layer l)
110 | {
111 |     copy_gpu(l.batch*l.inputs,(float*)input,1,l.output_gpu,1);
112 |     int b,n;
113 |     for(b = 0;b < l.batch;++b){
114 |   	for(n =0;n< l.n;++n){
115 | 	    int index = entry_index(l,b,n*l.w*l.h,0);
116 |             activate_array_gpu(l.output_gpu + index, 2*l.w*l.h,LOGISTIC);
117 |             index = entry_index(l,b,n*l.w*l.h,4);
118 |             activate_array_gpu(l.output_gpu + index,(1 + l.classes)*l.w*l.h,LOGISTIC);
119 | 	}
120 |     }
121 |     cuda_pull_array(l.output_gpu,l.output,l.batch*l.outputs);
122 | }
123 | 
124 | 
125 | 
126 | int yolo_num_detections(layer l,float thresh)
127 | {
128 |     int i,n,b;
129 |     int count = 0;
130 |   for(b = 0;b < l.batch;++b){
131 |     for(i=0;i<l.w*l.h;++i){
132 |         for(n=0;n<l.n;++n){
133 |             int obj_index = entry_index(l,b,n*l.w*l.h+i,4);
134 |             if(l.output[obj_index] > thresh)
135 |                 ++count;
136 |         }
137 | 	
138 |     }
139 |   }
140 |   //printf("count = %d\n",count);
141 |     return count;
142 | }
143 | 
144 | int num_detections(vector<layer> layers_params,float thresh)
145 | {
146 |     int i;
147 |     int s=0;
148 |     for(i=0;i<layers_params.size();++i){
149 |         layer l  = layers_params[i];
150 |         s += yolo_num_detections(l,thresh);
151 |     }
152 |     return s;
153 | 
154 | }
155 | 
156 | detection* make_network_boxes(vector<layer> layers_params,float thresh,int* num)
157 | {
158 |     layer l = layers_params[0];
159 |     int i;
160 |     int nboxes = num_detections(layers_params,thresh);
161 |     if(num) *num = nboxes;
162 |     detection *dets = (detection*)calloc(nboxes,sizeof(detection));
163 |     for(i=0;i<nboxes;++i){
164 |         dets[i].prob = (float*)calloc(l.classes,sizeof(float));
165 |         //if(l.coords > 4)
166 |         //{
167 |         //    dets[i].mask = (float*)(l.coords-4,sizeof(float));
168 |         //}
169 |     }
170 |     return dets;
171 | }
172 | 
173 | 
174 | void correct_yolo_boxes(detection* dets,int n,int w,int h,int netw,int neth,int relative)
175 | {
176 |     int i;
177 |     int new_w=0;
178 |     int new_h=0;
179 |     if (((float)netw/w) < ((float)neth/h)){
180 |         new_w = netw;
181 |         new_h = (h * netw)/w;
182 |     }
183 |     else{
184 |         new_h = neth;
185 |         new_w = (w * neth)/h;
186 |     }
187 |     for (i = 0; i < n; ++i){
188 |         box b = dets[i].bbox;
189 |         b.x =  (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw);
190 |         b.y =  (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth);
191 |         b.w *= (float)netw/new_w;
192 |         b.h *= (float)neth/new_h;
193 |         if(!relative){
194 |             b.x *= w;
195 |             b.w *= w;
196 |             b.y *= h;
197 |             b.h *= h;
198 |         }
199 |         dets[i].bbox = b;
200 |     }
201 | }
202 | 
203 | 
204 | box get_yolo_box(float* x,float* biases,int n,int index,int i,int j,int lw, int lh,int w, int h,int stride)
205 | {
206 |     box b;
207 |     b.x = (i + x[index + 0*stride]) / lw;
208 |     b.y = (j + x[index + 1*stride]) / lh;
209 |     b.w = exp(x[index + 2*stride]) * biases[2*n] / w;
210 |     b.h = exp(x[index + 3*stride]) * biases[2*n + 1] / h;
211 |     return b;
212 | }
213 | 
214 | 
215 | int get_yolo_detections(layer l,int w, int h, int netw,int neth,float thresh,int *map,int relative,detection *dets)
216 | {
217 |     int i,j,n,b;
218 |     float* predictions = l.output;
219 |     int count = 0;
220 |   for(b = 0;b < l.batch;++b){
221 |     for(i=0;i<l.w*l.h;++i){
222 |         int row = i/l.w;
223 |         int col = i%l.w;
224 |         for(n = 0;n<l.n;++n){           
225 |             int obj_index = entry_index(l,b,n*l.w*l.h + i,4);
226 |             float objectness = predictions[obj_index];
227 |             if(objectness <= thresh) continue;
228 |             int box_index = entry_index(l,b,n*l.w*l.h + i,0);
229 | 
230 |             dets[count].bbox = get_yolo_box(predictions,l.biases,l.mask[n],box_index,col,row,l.w,l.h,netw,neth,l.w*l.h);
231 |             dets[count].objectness = objectness;
232 |             dets[count].classes = l.classes;
233 |             for(j=0;j<l.classes;++j){
234 |                 int class_index = entry_index(l,b,n*l.w*l.h+i,4+1+j);
235 |                 float prob = objectness*predictions[class_index];
236 |                 dets[count].prob[j] = (prob > thresh) ? prob : 0;
237 |             }
238 |             ++count;
239 |         }
240 |     }
241 |   }
242 |     correct_yolo_boxes(dets,count,w,h,netw,neth,relative);
243 |     return count;
244 | }
245 | 
246 | 
247 | void fill_network_boxes(vector<layer> layers_params,int img_w,int img_h,int net_w,int net_h,float thresh, float hier, int *map,int relative,detection *dets)
248 | {
249 |     int j;
250 |     for(j=0;j<layers_params.size();++j){
251 |         layer l = layers_params[j];
252 |         int count = get_yolo_detections(l,img_w,img_h,net_w,net_h,thresh,map,relative,dets);
253 |         dets += count;
254 |     }
255 | }
256 | 
257 | 
258 | detection* get_network_boxes(vector<layer> layers_params,
259 |                              int img_w,int img_h,int net_w,int net_h,float thresh,float hier,int* map,int relative,int *num)
260 | {
261 |     //make network boxes
262 |     detection *dets = make_network_boxes(layers_params,thresh,num);
263 | 
264 |     //fill network boxes
265 |     fill_network_boxes(layers_params,img_w,img_h,net_w,net_h,thresh,hier,map,relative,dets);
266 |     return dets;
267 | }
268 | 
269 | //get detection result
270 | detection* get_detections(vector<Blob<float>*> blobs,int img_w,int img_h,int net_w,int net_h,int *nboxes,NetType type)
271 | {
272 |     vector<layer> layers_params;
273 |     layers_params.clear();
274 |     for(int i=0;i<blobs.size();++i){
275 |         layer l_params;
276 |         if(YOLOV3 == type){
277 |             l_params = make_yolo_layer(1,blobs[i]->width(),blobs[i]->height(),net_w,net_h,numBBoxes,yolov3_numAnchors,classes);
278 |         }
279 |         else if(YOLOV3_TINY == type){
280 |             l_params = make_yolo_layer(1,blobs[i]->width(),blobs[i]->height(),net_w,net_h,numBBoxes,yolov3_tiny_numAnchors,classes);
281 |         }
282 | 
283 |         layers_params.push_back(l_params);
284 |         forward_yolo_layer_gpu(blobs[i]->gpu_data(),l_params);
285 |     }
286 |     
287 | 
288 |     //get network boxes
289 |     detection* dets = get_network_boxes(layers_params,img_w,img_h,net_w,net_h,thresh,hier_thresh,0,relative,nboxes);
290 | 
291 |     //release layer memory
292 |     for(int index =0;index < layers_params.size();++index){
293 |         free_yolo_layer(layers_params[index]);
294 |     }
295 | 
296 |     if(nms) {
297 |         do_nms_sort(dets,(*nboxes),classes,nms);
298 |         printf("have done nms\n");
299 |     }
300 |     return dets;       
301 | }
302 | 
303 | 
304 | //release detection memory
305 | void free_detections(detection *dets,int nboxes)
306 | {
307 |     int i;
308 |     for(i = 0;i<nboxes;++i){
309 |         free(dets[i].prob);
310 |     }
311 |     free(dets);
312 | }
313 | 


--------------------------------------------------------------------------------
/yolo_layer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Company:	Synthesis
 3 |  * Author: 	Chen
 4 |  * Date:	2018/06/04	
 5 |  */
 6 | 
 7 | #ifndef __YOLO_LAYER_H_
 8 | #define __YOLO_LAYER_H_
 9 | #include <caffe/caffe.hpp>
10 | #include <string>
11 | #include <vector>
12 | 
13 | using namespace caffe;
14 | 
15 | 
16 | const int classes = 2;
17 | const float thresh = 0.5;
18 | const float hier_thresh = 0.5;
19 | const float nms = 0.45;
20 | const int numBBoxes = 3;
21 | const int relative = 1;
22 | 
23 | const int yolov3_numAnchors = 9;
24 | const int yolov3_tiny_numAnchors = 6;
25 | 
26 | enum NetType{
27 |     YOLOV3 = 0,
28 |     YOLOV3_TINY = 1
29 | };
30 | 
31 | typedef struct{
32 |     float x,y,w,h;
33 | }box;
34 | 
35 | typedef struct{
36 |     box bbox;
37 |     int classes;
38 |     float* prob;
39 |     float* mask;
40 |     float objectness;
41 |     int sort_class;
42 | }detection;
43 | 
44 | typedef struct layer{
45 |     int batch;
46 |     int total;
47 |     int n,c,h,w;
48 |     int out_n,out_c,out_h,out_w;
49 |     int classes;
50 |     int inputs,outputs;
51 |     int *mask;
52 |     float* biases;
53 |     float* output;
54 |     float* output_gpu;
55 | }layer;
56 | 
57 | layer make_yolo_layer(int batch,int w,int h,int n,int total,int classes);
58 | 
59 | void free_yolo_layer(layer l);
60 | 
61 | void forward_yolo_layer_gpu(const float* input,layer l, float* output);
62 | 
63 | detection* get_detections(vector<Blob<float>*> blobs,int img_w,int img_h,int net_w,int net_h,int* nboxes,NetType type);
64 | 
65 | void free_detections(detection *dets,int nboxes);
66 | 
67 | 
68 | 
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------