├── .gitignore
├── CMakeLists.txt
├── README.md
├── cmake_uninstall.cmake.in
└── darknet
    ├── CMakeLists.txt
    ├── include
        ├── activation_layer.h
        ├── activations.h
        ├── avgpool_layer.h
        ├── batchnorm_layer.h
        ├── blas.h
        ├── box.h
        ├── classifier.h
        ├── col2im.h
        ├── connected_layer.h
        ├── convolutional_layer.h
        ├── cost_layer.h
        ├── crnn_layer.h
        ├── crop_layer.h
        ├── cuda.h
        ├── darknet.h
        ├── data.h
        ├── deconvolutional_layer.h
        ├── demo.h
        ├── detection_layer.h
        ├── dropout_layer.h
        ├── gemm.h
        ├── gru_layer.h
        ├── im2col.h
        ├── image.h
        ├── l2norm_layer.h
        ├── layer.h
        ├── list.h
        ├── local_layer.h
        ├── logistic_layer.h
        ├── lstm_layer.h
        ├── matrix.h
        ├── maxpool_layer.h
        ├── network.h
        ├── normalization_layer.h
        ├── option_list.h
        ├── parser.h
        ├── region_layer.h
        ├── reorg_layer.h
        ├── rnn_layer.h
        ├── route_layer.h
        ├── shortcut_layer.h
        ├── softmax_layer.h
        ├── stb_image.h
        ├── stb_image_write.h
        ├── tree.h
        ├── upsample_layer.h
        ├── utils.h
        └── yolo_layer.h
    └── src
        ├── CMakeLists.txt
        ├── activation_kernels.cu
        ├── activation_layer.cpp
        ├── activations.cpp
        ├── avgpool_layer.cpp
        ├── avgpool_layer_kernels.cu
        ├── batchnorm_layer.cpp
        ├── blas.cpp
        ├── blas_kernels.cu
        ├── box.cpp
        ├── col2im.cpp
        ├── col2im_kernels.cu
        ├── connected_layer.cpp
        ├── convolutional_kernels.cu
        ├── convolutional_layer.cpp
        ├── cost_layer.cpp
        ├── crnn_layer.cpp
        ├── crop_layer.cpp
        ├── crop_layer_kernels.cu
        ├── cuda.cpp
        ├── data.cpp
        ├── deconvolutional_kernels.cu
        ├── deconvolutional_layer.cpp
        ├── demo.cpp
        ├── detection_layer.cpp
        ├── dropout_layer.cpp
        ├── dropout_layer_kernels.cu
        ├── gemm.cpp
        ├── gru_layer.cpp
        ├── im2col.cpp
        ├── im2col_kernels.cu
        ├── image.cpp
        ├── l2norm_layer.cpp
        ├── layer.cpp
        ├── list.cpp
        ├── local_layer.cpp
        ├── logistic_layer.cpp
        ├── lstm_layer.cpp
        ├── matrix.cpp
        ├── maxpool_layer.cpp
        ├── maxpool_layer_kernels.cu
        ├── network.cpp
        ├── normalization_layer.cpp
        ├── option_list.cpp
        ├── parser.cpp
        ├── region_layer.cpp
        ├── reorg_layer.cpp
        ├── rnn_layer.cpp
        ├── route_layer.cpp
        ├── shortcut_layer.cpp
        ├── softmax_layer.cpp
        ├── tree.cpp
        ├── upsample_layer.cpp
        ├── utils.cpp
        └── yolo_layer.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.9)
 2 | 
 3 | project(darknet_cpp LANGUAGES CXX CUDA)
 4 | if(CMAKE_BUILD_TYPE STREQUAL "DEBUG")
 5 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Winline")
 6 | else()
 7 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG")
 8 | endif()
 9 | message(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
10 | 
11 | set(CUDA_PROPAGATE_HOST_FLAGS OFF)
12 | set(CUDA_WARNING "cross-execution-space-call")
13 | # new flags introduced in CUDA 9 set(CUDA_WARNING "reorder,cross-execution-
14 | # space-call,deprecated-declarations")
15 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror ${CUDA_WARNING} -restrict")
16 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_61,code=sm_61")
17 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_52,code=sm_52")
18 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_50,code=sm_50")
19 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_35,code=sm_35")
20 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_30,code=sm_30")
21 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_75,code=sm_75")
22 | set(CMAKE_CUDA_FLAGS
23 |     "${CMAKE_CUDA_FLAGS} -gencode arch=compute_75,code=compute_75")
24 | 
25 | message(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
26 | 
27 | if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR})
28 |   export(TARGETS FILE ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake)
29 | endif()
30 | 
31 | file(
32 |   WRITE
33 |     ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake
34 |     "include(CMakeFindDependencyMacro)\n"
35 |     "include(\${CMAKE_CURRENT_LIST_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake)\n")
36 | 
37 | if(NOT TARGET uninstall)
38 |   configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in
39 |                  ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake
40 |                  IMMEDIATE
41 |                  @ONLY)
42 | 
43 |   add_custom_target(uninstall
44 |                     COMMAND ${CMAKE_COMMAND} -P
45 |                             ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
46 | endif()
47 | 
48 | add_subdirectory(darknet)
49 | 
50 | install(FILES ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake
51 |         DESTINATION lib/cmake/${CMAKE_PROJECT_NAME})
52 | 
53 | install(EXPORT ${CMAKE_PROJECT_NAME}Targets
54 |         DESTINATION lib/cmake/${CMAKE_PROJECT_NAME})
55 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # darknet-cpp
 2 | 
 3 | Darknet is well known for yolo implementation. However, darknet is built in c and it makes it little painful to integrate with other c++ projects. In this project, I will update darknet c library to c++.
 4 | 
 5 | Current Status:
 6 | - Cmake files added
 7 | - Yolo v3 added
 8 | - Network detection memory leak fix after darknet update
 9 | 
10 | Future work:
11 | - Replace c style raw pointers to smart pointers
12 | - Improve modularity
13 | 


--------------------------------------------------------------------------------
/cmake_uninstall.cmake.in:
--------------------------------------------------------------------------------
 1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 2 |   message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 3 | endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 4 | 
 5 | if (NOT DEFINED CMAKE_INSTALL_PREFIX)
 6 |   set (CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@")
 7 | endif ()
 8 |  message(${CMAKE_INSTALL_PREFIX})
 9 | 
10 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
11 | string(REGEX REPLACE "\n" ";" files "${files}")
12 | foreach(file ${files})
13 |   message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
14 |   if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
15 |     exec_program(
16 |       "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
17 |       OUTPUT_VARIABLE rm_out
18 |       RETURN_VALUE rm_retval
19 |       )
20 |     if(NOT "${rm_retval}" STREQUAL 0)
21 |       message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
22 |     endif(NOT "${rm_retval}" STREQUAL 0)
23 |   else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
24 |     message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
25 |   endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
26 | endforeach(file)


--------------------------------------------------------------------------------
/darknet/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.9)
 2 | 
 3 | project(darknet_cpp)
 4 | 
 5 | file(GLOB YOLO_SOURCE_FILES src/*.cpp)
 6 | file(GLOB CUDA_SOURCE_FILES src/*.cu)
 7 | file(GLOB DARKNET_INCS include/*.h)
 8 | 
 9 | set(PROJECT_SRCS
10 | ${YOLO_SOURCE_FILES}
11 | ${CUDA_SOURCE_FILES}
12 | )
13 | 
14 | set(PROJECT_INCS
15 | ${DARKNET_INCS}
16 | )
17 | 
18 | find_package( OpenCV REQUIRED )
19 | find_package( CUDA REQUIRED )
20 | 
21 | add_library(${PROJECT_NAME} SHARED ${PROJECT_SRCS} ${PROJECT_INCS})
22 | target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_14)
23 | 
24 | target_compile_definitions(${PROJECT_NAME} PUBLIC
25 |                           DKGPU)
26 | 
27 | target_include_directories(${PROJECT_NAME} PUBLIC
28 |                           $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
29 |                           $<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/include>)
30 | 
31 | target_include_directories(${PROJECT_NAME} SYSTEM PUBLIC
32 |                           ${OpenCV_INCLUDE_DIRS}
33 |                           ${CUDA_INCLUDE_DIRS})
34 | 
35 | target_link_libraries(${PROJECT_NAME}
36 |                       ${OpenCV_LIBS}
37 |                       ${CUDA_LIBRARIES}
38 |                       ${CUDA_curand_LIBRARY}
39 |                       ${CUDA_CUBLAS_LIBRARIES})
40 | 
41 | install(TARGETS ${PROJECT_NAME}
42 |         EXPORT ${CMAKE_PROJECT_NAME}Targets
43 |         ARCHIVE DESTINATION lib
44 |         LIBRARY DESTINATION lib
45 |         RUNTIME DESTINATION bin)
46 | 
47 | install(FILES ${DARKNET_INCS}
48 |         DESTINATION include/Darknet/)
49 | 
50 | export(TARGETS ${PROJECT_NAME}
51 |       APPEND FILE ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake)
52 | 
53 | file(APPEND ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake
54 |     "find_dependency(OpenCV REQUIRED)\n"
55 |     "find_dependency(CUDA REQUIRED)\n")
56 | 


--------------------------------------------------------------------------------
/darknet/include/activation_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef ACTIVATION_LAYER_H
 2 | #define ACTIVATION_LAYER_H
 3 | 
 4 | #include "activations.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C"
10 | {
11 | #endif
12 | 
13 |   layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
14 | 
15 |   void forward_activation_layer(layer l, network net);
16 |   void backward_activation_layer(layer l, network net);
17 | 
18 | #ifdef DKGPU
19 |   void forward_activation_layer_gpu(layer l, network net);
20 |   void backward_activation_layer_gpu(layer l, network net);
21 | #endif
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/darknet/include/activations.h:
--------------------------------------------------------------------------------
  1 | #ifndef ACTIVATIONS_H
  2 | #define ACTIVATIONS_H
  3 | #include "cuda.h"
  4 | #include "darknet.h"
  5 | #include "math.h"
  6 | 
  7 | #ifdef __cplusplus
  8 | extern "C"
  9 | {
 10 | #endif
 11 | 
 12 |   ACTIVATION get_activation(char *s);
 13 | 
 14 |   char *get_activation_string(ACTIVATION a);
 15 |   float activate(float x, ACTIVATION a);
 16 |   float gradient(float x, ACTIVATION a);
 17 |   void gradient_array(const float *x,
 18 |                       const int n,
 19 |                       const ACTIVATION a,
 20 |                       float *delta);
 21 |   void activate_array(float *x, const int n, const ACTIVATION a);
 22 | #ifdef DKGPU
 23 |   void activate_array_gpu(float *x, int n, ACTIVATION a);
 24 |   void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta);
 25 | #endif
 26 | 
 27 |   static inline float stair_activate(float x)
 28 |   {
 29 |     int n = floor(x);
 30 |     if (n % 2 == 0)
 31 |       return floor(x / 2.);
 32 |     else
 33 |       return (x - n) + floor(x / 2.);
 34 |   }
 35 |   static inline float hardtan_activate(float x)
 36 |   {
 37 |     if (x < -1) return -1;
 38 |     if (x > 1) return 1;
 39 |     return x;
 40 |   }
 41 |   static inline float linear_activate(float x)
 42 |   {
 43 |     return x;
 44 |   }
 45 |   static inline float logistic_activate(float x)
 46 |   {
 47 |     return 1. / (1. + exp(-x));
 48 |   }
 49 |   static inline float loggy_activate(float x)
 50 |   {
 51 |     return 2. / (1. + exp(-x)) - 1;
 52 |   }
 53 |   static inline float relu_activate(float x)
 54 |   {
 55 |     return x * (x > 0);
 56 |   }
 57 |   static inline float elu_activate(float x)
 58 |   {
 59 |     return (x >= 0) * x + (x < 0) * (exp(x) - 1);
 60 |   }
 61 |   static inline float relie_activate(float x)
 62 |   {
 63 |     return (x > 0) ? x : .01 * x;
 64 |   }
 65 |   static inline float ramp_activate(float x)
 66 |   {
 67 |     return x * (x > 0) + .1 * x;
 68 |   }
 69 |   static inline float leaky_activate(float x)
 70 |   {
 71 |     return (x > 0) ? x : .1 * x;
 72 |   }
 73 |   static inline float tanh_activate(float x)
 74 |   {
 75 |     return (exp(2 * x) - 1) / (exp(2 * x) + 1);
 76 |   }
 77 |   static inline float plse_activate(float x)
 78 |   {
 79 |     if (x < -4) return .01 * (x + 4);
 80 |     if (x > 4) return .01 * (x - 4) + 1;
 81 |     return .125 * x + .5;
 82 |   }
 83 | 
 84 |   static inline float lhtan_activate(float x)
 85 |   {
 86 |     if (x < 0) return .001 * x;
 87 |     if (x > 1) return .001 * (x - 1) + 1;
 88 |     return x;
 89 |   }
 90 |   static inline float lhtan_gradient(float x)
 91 |   {
 92 |     if (x > 0 && x < 1) return 1;
 93 |     return .001;
 94 |   }
 95 | 
 96 |   static inline float hardtan_gradient(float x)
 97 |   {
 98 |     if (x > -1 && x < 1) return 1;
 99 |     return 0;
100 |   }
101 |   static inline float linear_gradient(float x)
102 |   {
103 |     return 1;
104 |   }
105 |   static inline float logistic_gradient(float x)
106 |   {
107 |     return (1 - x) * x;
108 |   }
109 |   static inline float loggy_gradient(float x)
110 |   {
111 |     float y = (x + 1.) / 2.;
112 |     return 2 * (1 - y) * y;
113 |   }
114 |   static inline float stair_gradient(float x)
115 |   {
116 |     if (floor(x) == x) return 0;
117 |     return 1;
118 |   }
119 |   static inline float relu_gradient(float x)
120 |   {
121 |     return (x > 0);
122 |   }
123 |   static inline float elu_gradient(float x)
124 |   {
125 |     return (x >= 0) + (x < 0) * (x + 1);
126 |   }
127 |   static inline float relie_gradient(float x)
128 |   {
129 |     return (x > 0) ? 1 : .01;
130 |   }
131 |   static inline float ramp_gradient(float x)
132 |   {
133 |     return (x > 0) + .1;
134 |   }
135 |   static inline float leaky_gradient(float x)
136 |   {
137 |     return (x > 0) ? 1 : .1;
138 |   }
139 |   static inline float tanh_gradient(float x)
140 |   {
141 |     return 1 - x * x;
142 |   }
143 |   static inline float plse_gradient(float x)
144 |   {
145 |     return (x < 0 || x > 1) ? .01 : .125;
146 |   }
147 | 
148 | #ifdef __cplusplus
149 | }
150 | #endif
151 | 
152 | #endif
153 | 


--------------------------------------------------------------------------------
/darknet/include/avgpool_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef AVGPOOL_LAYER_H
 2 | #define AVGPOOL_LAYER_H
 3 | 
 4 | #include "cuda.h"
 5 | #include "image.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C"
11 | {
12 | #endif
13 | 
14 |   typedef layer avgpool_layer;
15 | 
16 |   image get_avgpool_image(avgpool_layer l);
17 |   avgpool_layer make_avgpool_layer(int batch, int w, int h, int c);
18 |   void resize_avgpool_layer(avgpool_layer *l, int w, int h);
19 |   void forward_avgpool_layer(const avgpool_layer l, network net);
20 |   void backward_avgpool_layer(const avgpool_layer l, network net);
21 | 
22 | #ifdef DKGPU
23 |   void forward_avgpool_layer_gpu(avgpool_layer l, network net);
24 |   void backward_avgpool_layer_gpu(avgpool_layer l, network net);
25 | #endif
26 | 
27 | #ifdef __cplusplus
28 | }
29 | #endif
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/darknet/include/batchnorm_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef BATCHNORM_LAYER_H
 2 | #define BATCHNORM_LAYER_H
 3 | 
 4 | #include "image.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C"
10 | {
11 | #endif
12 | 
13 |   layer make_batchnorm_layer(int batch, int w, int h, int c);
14 |   void forward_batchnorm_layer(layer l, network net);
15 |   void backward_batchnorm_layer(layer l, network net);
16 | 
17 | #ifdef DKGPU
18 |   void forward_batchnorm_layer_gpu(layer l, network net);
19 |   void backward_batchnorm_layer_gpu(layer l, network net);
20 |   void pull_batchnorm_layer(layer l);
21 |   void push_batchnorm_layer(layer l);
22 | #endif
23 | 
24 | #ifdef __cplusplus
25 | }
26 | #endif
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/darknet/include/box.h:
--------------------------------------------------------------------------------
 1 | #ifndef BOX_H
 2 | #define BOX_H
 3 | #include "darknet.h"
 4 | 
 5 | #ifdef __cplusplus
 6 | extern "C"
 7 | {
 8 | #endif
 9 | 
10 |   typedef struct
11 |   {
12 |     float dx, dy, dw, dh;
13 |   } dbox;
14 | 
15 |   float box_rmse(box a, box b);
16 |   dbox diou(box a, box b);
17 |   box decode_box(box b, box anchor);
18 |   box encode_box(box b, box anchor);
19 | #ifdef __cplusplus
20 | }
21 | #endif
22 | #endif
23 | 


--------------------------------------------------------------------------------
/darknet/include/classifier.h:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/darknet/include/col2im.h:
--------------------------------------------------------------------------------
 1 | #ifndef COL2IM_H
 2 | #define COL2IM_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C"
 6 | {
 7 | #endif
 8 | 
 9 |   void col2im_cpu(float* data_col,
10 |                   int channels,
11 |                   int height,
12 |                   int width,
13 |                   int ksize,
14 |                   int stride,
15 |                   int pad,
16 |                   float* data_im);
17 | 
18 | #ifdef DKGPU
19 |   void col2im_gpu(float* data_col,
20 |                   int channels,
21 |                   int height,
22 |                   int width,
23 |                   int ksize,
24 |                   int stride,
25 |                   int pad,
26 |                   float* data_im);
27 | #endif
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/darknet/include/connected_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONNECTED_LAYER_H
 2 | #define CONNECTED_LAYER_H
 3 | 
 4 | #include "activations.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C"
10 | {
11 | #endif
12 | 
13 |   layer make_connected_layer(int batch,
14 |                              int inputs,
15 |                              int outputs,
16 |                              ACTIVATION activation,
17 |                              int batch_normalize,
18 |                              int adam);
19 | 
20 |   void forward_connected_layer(layer l, network net);
21 |   void backward_connected_layer(layer l, network net);
22 |   void update_connected_layer(layer l, update_args a);
23 | 
24 | #ifdef DKGPU
25 |   void forward_connected_layer_gpu(layer l, network net);
26 |   void backward_connected_layer_gpu(layer l, network net);
27 |   void update_connected_layer_gpu(layer l, update_args a);
28 |   void push_connected_layer(layer l);
29 |   void pull_connected_layer(layer l);
30 | #endif
31 | 
32 | #ifdef __cplusplus
33 | }
34 | #endif
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/darknet/include/convolutional_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONVOLUTIONAL_LAYER_H
 2 | #define CONVOLUTIONAL_LAYER_H
 3 | 
 4 | #include "activations.h"
 5 | #include "cuda.h"
 6 | #include "image.h"
 7 | #include "layer.h"
 8 | #include "network.h"
 9 | 
10 | #ifdef __cplusplus
11 | extern "C"
12 | {
13 | #endif
14 | 
15 |   typedef layer convolutional_layer;
16 | 
17 | #ifdef DKGPU
18 |   void forward_convolutional_layer_gpu(convolutional_layer layer, network net);
19 |   void backward_convolutional_layer_gpu(convolutional_layer layer, network net);
20 |   void update_convolutional_layer_gpu(convolutional_layer layer, update_args a);
21 | 
22 |   void push_convolutional_layer(convolutional_layer layer);
23 |   void pull_convolutional_layer(convolutional_layer layer);
24 | 
25 |   void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
26 |   void backward_bias_gpu(
27 |       float *bias_updates, float *delta, int batch, int n, int size);
28 |   void adam_update_gpu(float *w,
29 |                        float *d,
30 |                        float *m,
31 |                        float *v,
32 |                        float B1,
33 |                        float B2,
34 |                        float eps,
35 |                        float decay,
36 |                        float rate,
37 |                        int n,
38 |                        int batch,
39 |                        int t);
40 | #ifdef CUDNN
41 |   void cudnn_convolutional_setup(layer *l);
42 | #endif
43 | #endif
44 | 
45 |   convolutional_layer make_convolutional_layer(int batch,
46 |                                                int h,
47 |                                                int w,
48 |                                                int c,
49 |                                                int n,
50 |                                                int groups,
51 |                                                int size,
52 |                                                int stride,
53 |                                                int padding,
54 |                                                ACTIVATION activation,
55 |                                                int batch_normalize,
56 |                                                int binary,
57 |                                                int xnor,
58 |                                                int adam);
59 |   void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
60 |   void forward_convolutional_layer(const convolutional_layer layer,
61 |                                    network net);
62 |   void update_convolutional_layer(convolutional_layer layer, update_args a);
63 |   image *visualize_convolutional_layer(convolutional_layer layer,
64 |                                        char *window,
65 |                                        image *prev_weights);
66 |   void binarize_weights(float *weights, int n, int size, float *binary);
67 |   void swap_binary(convolutional_layer *l);
68 |   void binarize_weights2(
69 |       float *weights, int n, int size, char *binary, float *scales);
70 | 
71 |   void backward_convolutional_layer(convolutional_layer layer, network net);
72 | 
73 |   void add_bias(float *output, float *biases, int batch, int n, int size);
74 |   void backward_bias(
75 |       float *bias_updates, float *delta, int batch, int n, int size);
76 | 
77 |   image get_convolutional_image(convolutional_layer layer);
78 |   image get_convolutional_delta(convolutional_layer layer);
79 |   image get_convolutional_weight(convolutional_layer layer, int i);
80 | 
81 |   int convolutional_out_height(convolutional_layer layer);
82 |   int convolutional_out_width(convolutional_layer layer);
83 | 
84 | #ifdef __cplusplus
85 | }
86 | #endif
87 | 
88 | #endif
89 | 


--------------------------------------------------------------------------------
/darknet/include/cost_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef COST_LAYER_H
 2 | #define COST_LAYER_H
 3 | #include "layer.h"
 4 | #include "network.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C"
 8 | {
 9 | #endif
10 | 
11 |   typedef layer cost_layer;
12 | 
13 |   COST_TYPE get_cost_type(char *s);
14 |   char *get_cost_string(COST_TYPE a);
15 |   cost_layer make_cost_layer(int batch,
16 |                              int inputs,
17 |                              COST_TYPE type,
18 |                              float scale);
19 |   void forward_cost_layer(const cost_layer l, network net);
20 |   void backward_cost_layer(const cost_layer l, network net);
21 |   void resize_cost_layer(cost_layer *l, int inputs);
22 | 
23 | #ifdef DKGPU
24 |   void forward_cost_layer_gpu(cost_layer l, network net);
25 |   void backward_cost_layer_gpu(const cost_layer l, network net);
26 | #endif
27 | 
28 | #ifdef __cplusplus
29 | }
30 | #endif
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/darknet/include/crnn_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CRNN_LAYER_H
 2 | #define CRNN_LAYER_H
 3 | 
 4 | #include "activations.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C"
10 | {
11 | #endif
12 | 
13 |   layer make_crnn_layer(int batch,
14 |                         int h,
15 |                         int w,
16 |                         int c,
17 |                         int hidden_filters,
18 |                         int output_filters,
19 |                         int steps,
20 |                         ACTIVATION activation,
21 |                         int batch_normalize);
22 | 
23 |   void forward_crnn_layer(layer l, network net);
24 |   void backward_crnn_layer(layer l, network net);
25 |   void update_crnn_layer(layer l, update_args a);
26 | 
27 | #ifdef DKGPU
28 |   void forward_crnn_layer_gpu(layer l, network net);
29 |   void backward_crnn_layer_gpu(layer l, network net);
30 |   void update_crnn_layer_gpu(layer l, update_args a);
31 |   void push_crnn_layer(layer l);
32 |   void pull_crnn_layer(layer l);
33 | #endif
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/darknet/include/crop_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CROP_LAYER_H
 2 | #define CROP_LAYER_H
 3 | 
 4 | #include "image.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C"
10 | {
11 | #endif
12 | 
13 |   typedef layer crop_layer;
14 | 
15 |   image get_crop_image(crop_layer l);
16 |   crop_layer make_crop_layer(int batch,
17 |                              int h,
18 |                              int w,
19 |                              int c,
20 |                              int crop_height,
21 |                              int crop_width,
22 |                              int flip,
23 |                              float angle,
24 |                              float saturation,
25 |                              float exposure);
26 |   void forward_crop_layer(const crop_layer l, network net);
27 |   void resize_crop_layer(layer *l, int w, int h);
28 | 
29 | #ifdef DKGPU
30 |   void forward_crop_layer_gpu(crop_layer l, network net);
31 | #endif
32 | 
33 | #ifdef __cplusplus
34 | }
35 | #endif
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/darknet/include/cuda.h:
--------------------------------------------------------------------------------
 1 | #ifndef CUDA_H
 2 | #define CUDA_H
 3 | 
 4 | #include "darknet.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C"
 8 | {
 9 | #endif
10 | 
11 | #ifdef DKGPU
12 | 
13 |   void check_error(cudaError_t status);
14 |   cublasHandle_t blas_handle();
15 |   int *cuda_make_int_array(int *x, size_t n);
16 |   void cuda_random(float *x_gpu, size_t n);
17 |   float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
18 |   dim3 cuda_gridsize(size_t n);
19 | 
20 | #ifdef CUDNN
21 |   cudnnHandle_t cudnn_handle();
22 | #endif
23 | 
24 | #endif
25 | 
26 | #ifdef __cplusplus
27 | }
28 | #endif
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/darknet/include/data.h:
--------------------------------------------------------------------------------
  1 | #ifndef DATA_H
  2 | #define DATA_H
  3 | #include <pthread.h>
  4 | 
  5 | #include "darknet.h"
  6 | #include "image.h"
  7 | #include "list.h"
  8 | #include "matrix.h"
  9 | #include "tree.h"
 10 | 
 11 | #ifdef __cplusplus
 12 | extern "C"
 13 | {
 14 | #endif
 15 | 
 16 |   static inline float distance_from_edge(int x, int max)
 17 |   {
 18 |     int dx = (max / 2) - x;
 19 |     if (dx < 0) dx = -dx;
 20 |     dx = (max / 2) + 1 - dx;
 21 |     dx *= 2;
 22 |     float dist = (float)dx / max;
 23 |     if (dist > 1) dist = 1;
 24 |     return dist;
 25 |   }
 26 |   void load_data_blocking(load_args args);
 27 | 
 28 |   void print_letters(float *pred, int n);
 29 |   data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
 30 |   data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
 31 |   data load_data_detection(int n,
 32 |                            char **paths,
 33 |                            int m,
 34 |                            int w,
 35 |                            int h,
 36 |                            int boxes,
 37 |                            int classes,
 38 |                            float jitter,
 39 |                            float hue,
 40 |                            float saturation,
 41 |                            float exposure);
 42 |   data load_data_tag(char **paths,
 43 |                      int n,
 44 |                      int m,
 45 |                      int k,
 46 |                      int min,
 47 |                      int max,
 48 |                      int size,
 49 |                      float angle,
 50 |                      float aspect,
 51 |                      float hue,
 52 |                      float saturation,
 53 |                      float exposure);
 54 |   matrix load_image_augment_paths(char **paths,
 55 |                                   int n,
 56 |                                   int min,
 57 |                                   int max,
 58 |                                   int size,
 59 |                                   float angle,
 60 |                                   float aspect,
 61 |                                   float hue,
 62 |                                   float saturation,
 63 |                                   float exposure,
 64 |                                   int center);
 65 |   data load_data_super(char **paths, int n, int m, int w, int h, int scale);
 66 |   data load_data_augment(char **paths,
 67 |                          int n,
 68 |                          int m,
 69 |                          char **labels,
 70 |                          int k,
 71 |                          tree *hierarchy,
 72 |                          int min,
 73 |                          int max,
 74 |                          int size,
 75 |                          float angle,
 76 |                          float aspect,
 77 |                          float hue,
 78 |                          float saturation,
 79 |                          float exposure,
 80 |                          int center);
 81 |   data load_data_regression(char **paths,
 82 |                             int n,
 83 |                             int m,
 84 |                             int classes,
 85 |                             int min,
 86 |                             int max,
 87 |                             int size,
 88 |                             float angle,
 89 |                             float aspect,
 90 |                             float hue,
 91 |                             float saturation,
 92 |                             float exposure);
 93 |   data load_go(char *filename);
 94 | 
 95 |   data load_data_writing(
 96 |       char **paths, int n, int m, int w, int h, int out_w, int out_h);
 97 | 
 98 |   void get_random_batch(data d, int n, float *X, float *y);
 99 |   data get_data_part(data d, int part, int total);
100 |   data get_random_data(data d, int num);
101 |   data load_categorical_data_csv(char *filename, int target, int k);
102 |   void normalize_data_rows(data d);
103 |   void scale_data_rows(data d, float s);
104 |   void translate_data_rows(data d, float s);
105 |   void randomize_data(data d);
106 |   data *split_data(data d, int part, int total);
107 |   data concat_datas(data *d, int n);
108 |   void fill_truth(char *path, char **labels, int k, float *truth);
109 | 
110 | #ifdef __cplusplus
111 | }
112 | #endif
113 | 
114 | #endif
115 | 


--------------------------------------------------------------------------------
/darknet/include/deconvolutional_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef DECONVOLUTIONAL_LAYER_H
 2 | #define DECONVOLUTIONAL_LAYER_H
 3 | 
 4 | #include "activations.h"
 5 | #include "cuda.h"
 6 | #include "image.h"
 7 | #include "layer.h"
 8 | #include "network.h"
 9 | 
10 | #ifdef __cplusplus
11 | extern "C"
12 | {
13 | #endif
14 | 
15 | #ifdef DKGPU
16 |   void forward_deconvolutional_layer_gpu(layer l, network net);
17 |   void backward_deconvolutional_layer_gpu(layer l, network net);
18 |   void update_deconvolutional_layer_gpu(layer l, update_args a);
19 |   void push_deconvolutional_layer(layer l);
20 |   void pull_deconvolutional_layer(layer l);
21 | #endif
22 | 
23 |   layer make_deconvolutional_layer(int batch,
24 |                                    int h,
25 |                                    int w,
26 |                                    int c,
27 |                                    int n,
28 |                                    int size,
29 |                                    int stride,
30 |                                    int padding,
31 |                                    ACTIVATION activation,
32 |                                    int batch_normalize,
33 |                                    int adam);
34 |   void resize_deconvolutional_layer(layer *l, int h, int w);
35 |   void forward_deconvolutional_layer(const layer l, network net);
36 |   void update_deconvolutional_layer(layer l, update_args a);
37 |   void backward_deconvolutional_layer(layer l, network net);
38 | 
39 | #ifdef __cplusplus
40 | }
41 | #endif
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/darknet/include/demo.h:
--------------------------------------------------------------------------------
1 | #ifndef DEMO_H
2 | #define DEMO_H
3 | 
4 | #include "image.h"
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/darknet/include/detection_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef DETECTION_LAYER_H
 2 | #define DETECTION_LAYER_H
 3 | 
 4 | #include "layer.h"
 5 | #include "network.h"
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C"
 9 | {
10 | #endif
11 | 
12 |   typedef layer detection_layer;
13 | 
14 |   detection_layer make_detection_layer(int batch,
15 |                                        int inputs,
16 |                                        int n,
17 |                                        int size,
18 |                                        int classes,
19 |                                        int coords,
20 |                                        int rescore);
21 |   void forward_detection_layer(const detection_layer l, network net);
22 |   void backward_detection_layer(const detection_layer l, network net);
23 | 
24 | #ifdef DKGPU
25 |   void forward_detection_layer_gpu(const detection_layer l, network net);
26 |   void backward_detection_layer_gpu(detection_layer l, network net);
27 | #endif
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/darknet/include/dropout_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef DROPOUT_LAYER_H
 2 | #define DROPOUT_LAYER_H
 3 | 
 4 | #include "layer.h"
 5 | #include "network.h"
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C"
 9 | {
10 | #endif
11 | 
12 |   typedef layer dropout_layer;
13 | 
14 |   dropout_layer make_dropout_layer(int batch, int inputs, float probability);
15 | 
16 |   void forward_dropout_layer(dropout_layer l, network net);
17 |   void backward_dropout_layer(dropout_layer l, network net);
18 |   void resize_dropout_layer(dropout_layer *l, int inputs);
19 | 
20 | #ifdef DKGPU
21 |   void forward_dropout_layer_gpu(dropout_layer l, network net);
22 |   void backward_dropout_layer_gpu(dropout_layer l, network net);
23 | 
24 | #endif
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/darknet/include/gemm.h:
--------------------------------------------------------------------------------
 1 | #ifndef GEMM_H
 2 | #define GEMM_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C"
 6 | {
 7 | #endif
 8 | 
 9 |   void gemm_bin(int M,
10 |                 int N,
11 |                 int K,
12 |                 float ALPHA,
13 |                 char *A,
14 |                 int lda,
15 |                 float *B,
16 |                 int ldb,
17 |                 float *C,
18 |                 int ldc);
19 | 
20 |   void gemm(int TA,
21 |             int TB,
22 |             int M,
23 |             int N,
24 |             int K,
25 |             float ALPHA,
26 |             float *A,
27 |             int lda,
28 |             float *B,
29 |             int ldb,
30 |             float BETA,
31 |             float *C,
32 |             int ldc);
33 | 
34 |   void gemm_cpu(int TA,
35 |                 int TB,
36 |                 int M,
37 |                 int N,
38 |                 int K,
39 |                 float ALPHA,
40 |                 float *A,
41 |                 int lda,
42 |                 float *B,
43 |                 int ldb,
44 |                 float BETA,
45 |                 float *C,
46 |                 int ldc);
47 | 
48 | #ifdef DKGPU
49 |   void gemm_gpu(int TA,
50 |                 int TB,
51 |                 int M,
52 |                 int N,
53 |                 int K,
54 |                 float ALPHA,
55 |                 float *A_gpu,
56 |                 int lda,
57 |                 float *B_gpu,
58 |                 int ldb,
59 |                 float BETA,
60 |                 float *C_gpu,
61 |                 int ldc);
62 | 
63 |   void gemm_gpu(int TA,
64 |                 int TB,
65 |                 int M,
66 |                 int N,
67 |                 int K,
68 |                 float ALPHA,
69 |                 float *A,
70 |                 int lda,
71 |                 float *B,
72 |                 int ldb,
73 |                 float BETA,
74 |                 float *C,
75 |                 int ldc);
76 | #endif
77 | #ifdef __cplusplus
78 | }
79 | #endif
80 | 
81 | #endif
82 | 


--------------------------------------------------------------------------------
/darknet/include/gru_layer.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef GRU_LAYER_H
 3 | #define GRU_LAYER_H
 4 | 
 5 | #include "activations.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C"
11 | {
12 | #endif
13 | 
14 |   layer make_gru_layer(int batch,
15 |                        int inputs,
16 |                        int outputs,
17 |                        int steps,
18 |                        int batch_normalize,
19 |                        int adam);
20 | 
21 |   void forward_gru_layer(layer l, network state);
22 |   void backward_gru_layer(layer l, network state);
23 |   void update_gru_layer(layer l, update_args a);
24 | 
25 | #ifdef DKGPU
26 |   void forward_gru_layer_gpu(layer l, network state);
27 |   void backward_gru_layer_gpu(layer l, network state);
28 |   void update_gru_layer_gpu(layer l, update_args a);
29 |   void push_gru_layer(layer l);
30 |   void pull_gru_layer(layer l);
31 | #endif
32 | 
33 | #ifdef __cplusplus
34 | }
35 | #endif
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/darknet/include/im2col.h:
--------------------------------------------------------------------------------
 1 | #ifndef IM2COL_H
 2 | #define IM2COL_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C"
 6 | {
 7 | #endif
 8 | 
 9 |   void im2col_cpu(float* data_im,
10 |                   int channels,
11 |                   int height,
12 |                   int width,
13 |                   int ksize,
14 |                   int stride,
15 |                   int pad,
16 |                   float* data_col);
17 | 
18 | #ifdef DKGPU
19 | 
20 |   void im2col_gpu(float* im,
21 |                   int channels,
22 |                   int height,
23 |                   int width,
24 |                   int ksize,
25 |                   int stride,
26 |                   int pad,
27 |                   float* data_col);
28 | 
29 | #endif
30 | 
31 | #ifdef __cplusplus
32 | }
33 | #endif
34 | #endif
35 | 


--------------------------------------------------------------------------------
/darknet/include/image.h:
--------------------------------------------------------------------------------
 1 | #ifndef IMAGE_H
 2 | #define IMAGE_H
 3 | 
 4 | #include <float.h>
 5 | #include <math.h>
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | #include <string.h>
 9 | #include "box.h"
10 | #include "darknet.h"
11 | 
12 | #ifdef __cplusplus
13 | extern "C"
14 | {
15 | #endif
16 | 
17 | #ifndef __cplusplus
18 | #ifdef OPENCV
19 |   int fill_image_from_stream(CvCapture *cap, image im);
20 |   image ipl_to_image(IplImage *src);
21 |   void ipl_into_image(IplImage *src, image im);
22 |   void flush_stream_buffer(CvCapture *cap, int n);
23 |   void show_image_cv(image p, const char *name, IplImage *disp);
24 | #endif
25 | #endif
26 | 
27 |   float get_color(int c, int x, int max);
28 |   void draw_box(
29 |       image a, int x1, int y1, int x2, int y2, float r, float g, float b);
30 |   void draw_bbox(image a, box bbox, int w, float r, float g, float b);
31 |   void write_label(
32 |       image a, int r, int c, image *characters, char *string, float *rgb);
33 |   image image_distance(image a, image b);
34 |   void scale_image(image m, float s);
35 |   image rotate_crop_image(image im,
36 |                           float rad,
37 |                           float s,
38 |                           int w,
39 |                           int h,
40 |                           float dx,
41 |                           float dy,
42 |                           float aspect);
43 |   image random_crop_image(image im, int w, int h);
44 |   image random_augment_image(
45 |       image im, float angle, float aspect, int low, int high, int w, int h);
46 |   augment_args random_augment_args(
47 |       image im, float angle, float aspect, int low, int high, int w, int h);
48 |   void letterbox_image_into(image im, int w, int h, image boxed);
49 |   image resize_max(image im, int max);
50 |   void translate_image(image m, float s);
51 |   void embed_image(image source, image dest, int dx, int dy);
52 |   void place_image(image im, int w, int h, int dx, int dy, image canvas);
53 |   void saturate_image(image im, float sat);
54 |   void exposure_image(image im, float sat);
55 |   void distort_image(image im, float hue, float sat, float val);
56 |   void saturate_exposure_image(image im, float sat, float exposure);
57 |   void rgb_to_hsv(image im);
58 |   void hsv_to_rgb(image im);
59 |   void yuv_to_rgb(image im);
60 |   void rgb_to_yuv(image im);
61 | 
62 |   image collapse_image_layers(image source, int border);
63 |   image collapse_images_horz(image *ims, int n);
64 |   image collapse_images_vert(image *ims, int n);
65 | 
66 |   void show_image_normalized(image im, const char *name);
67 |   void show_images(image *ims, int n, char *window);
68 |   void show_image_layers(image p, char *name);
69 |   void show_image_collapsed(image p, char *name);
70 | 
71 |   void print_image(image m);
72 | 
73 |   image make_empty_image(int w, int h, int c);
74 |   void copy_image_into(image src, image dest);
75 | 
76 |   image get_image_layer(image m, int l);
77 | 
78 | #ifdef __cplusplus
79 | }
80 | #endif
81 | #endif
82 | 


--------------------------------------------------------------------------------
/darknet/include/l2norm_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef L2NORM_LAYER_H
 2 | #define L2NORM_LAYER_H
 3 | #include "layer.h"
 4 | #include "network.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C"
 8 | {
 9 | #endif
10 | 
11 |   layer make_l2norm_layer(int batch, int inputs);
12 |   void forward_l2norm_layer(const layer l, network net);
13 |   void backward_l2norm_layer(const layer l, network net);
14 | 
15 | #ifdef DKGPU
16 |   void forward_l2norm_layer_gpu(const layer l, network net);
17 |   void backward_l2norm_layer_gpu(const layer l, network net);
18 | #endif
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/darknet/include/layer.h:
--------------------------------------------------------------------------------
1 | #include "darknet.h"
2 | 


--------------------------------------------------------------------------------
/darknet/include/list.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIST_H
 2 | #define LIST_H
 3 | #include "darknet.h"
 4 | 
 5 | #ifdef __cplusplus
 6 | extern "C"
 7 | {
 8 | #endif
 9 | 
10 |   list *make_list();
11 |   int list_find(list *l, void *val);
12 | 
13 |   void list_insert(list *, void *);
14 | 
15 |   void free_list_contents(list *l);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/darknet/include/local_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOCAL_LAYER_H
 2 | #define LOCAL_LAYER_H
 3 | 
 4 | #include "activations.h"
 5 | #include "cuda.h"
 6 | #include "image.h"
 7 | #include "layer.h"
 8 | #include "network.h"
 9 | 
10 | #ifdef __cplusplus
11 | extern "C"
12 | {
13 | #endif
14 | 
15 |   typedef layer local_layer;
16 | 
17 | #ifdef DKGPU
18 |   void forward_local_layer_gpu(local_layer layer, network net);
19 |   void backward_local_layer_gpu(local_layer layer, network net);
20 |   void update_local_layer_gpu(local_layer layer, update_args a);
21 | 
22 |   void push_local_layer(local_layer layer);
23 |   void pull_local_layer(local_layer layer);
24 | #endif
25 | 
26 |   local_layer make_local_layer(int batch,
27 |                                int h,
28 |                                int w,
29 |                                int c,
30 |                                int n,
31 |                                int size,
32 |                                int stride,
33 |                                int pad,
34 |                                ACTIVATION activation);
35 | 
36 |   void forward_local_layer(const local_layer layer, network net);
37 |   void backward_local_layer(local_layer layer, network net);
38 |   void update_local_layer(local_layer layer, update_args a);
39 | 
40 |   void bias_output(float *output, float *biases, int batch, int n, int size);
41 |   void backward_bias(
42 |       float *bias_updates, float *delta, int batch, int n, int size);
43 | 
44 | #ifdef __cplusplus
45 | }
46 | #endif
47 | #endif
48 | 


--------------------------------------------------------------------------------
/darknet/include/logistic_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGISTIC_LAYER_H
 2 | #define LOGISTIC_LAYER_H
 3 | #include "layer.h"
 4 | #include "network.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C"
 8 | {
 9 | #endif
10 | 
11 |   layer make_logistic_layer(int batch, int inputs);
12 |   void forward_logistic_layer(const layer l, network net);
13 |   void backward_logistic_layer(const layer l, network net);
14 | 
15 | #ifdef DKGPU
16 |   void forward_logistic_layer_gpu(const layer l, network net);
17 |   void backward_logistic_layer_gpu(const layer l, network net);
18 | #endif
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/darknet/include/lstm_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef LSTM_LAYER_H
 2 | #define LSTM_LAYER_H
 3 | 
 4 | #include "activations.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | #define USET
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C"
11 | {
12 | #endif
13 | 
14 |   layer make_lstm_layer(int batch,
15 |                         int inputs,
16 |                         int outputs,
17 |                         int steps,
18 |                         int batch_normalize,
19 |                         int adam);
20 | 
21 |   void forward_lstm_layer(layer l, network net);
22 |   void update_lstm_layer(layer l, update_args a);
23 | 
24 | #ifdef DKGPU
25 |   void forward_lstm_layer_gpu(layer l, network net);
26 |   void backward_lstm_layer_gpu(layer l, network net);
27 |   void update_lstm_layer_gpu(layer l, update_args a);
28 | 
29 | #endif
30 | #ifdef __cplusplus
31 | }
32 | #endif
33 | #endif
34 | 


--------------------------------------------------------------------------------
/darknet/include/matrix.h:
--------------------------------------------------------------------------------
 1 | #ifndef MATRIX_H
 2 | #define MATRIX_H
 3 | #include "darknet.h"
 4 | 
 5 | #ifdef __cplusplus
 6 | extern "C"
 7 | {
 8 | #endif
 9 |   matrix copy_matrix(matrix m);
10 |   void print_matrix(matrix m);
11 | 
12 |   matrix hold_out_matrix(matrix *m, int n);
13 |   matrix resize_matrix(matrix m, int size);
14 | 
15 |   float *pop_column(matrix *m, int c);
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 | #endif
20 | 


--------------------------------------------------------------------------------
/darknet/include/maxpool_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAXPOOL_LAYER_H
 2 | #define MAXPOOL_LAYER_H
 3 | 
 4 | #include "cuda.h"
 5 | #include "image.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | #ifdef __cplusplus
 9 | extern "C"
10 | {
11 | #endif
12 |   typedef layer maxpool_layer;
13 | 
14 |   image get_maxpool_image(maxpool_layer l);
15 |   maxpool_layer make_maxpool_layer(
16 |       int batch, int h, int w, int c, int size, int stride, int padding);
17 |   void resize_maxpool_layer(maxpool_layer *l, int w, int h);
18 |   void forward_maxpool_layer(const maxpool_layer l, network net);
19 |   void backward_maxpool_layer(const maxpool_layer l, network net);
20 | 
21 | #ifdef DKGPU
22 |   void forward_maxpool_layer_gpu(maxpool_layer l, network net);
23 |   void backward_maxpool_layer_gpu(maxpool_layer l, network net);
24 | #endif
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | #endif
29 | 


--------------------------------------------------------------------------------
/darknet/include/network.h:
--------------------------------------------------------------------------------
 1 | // Oh boy, why am I about to do this....
 2 | #ifndef NETWORK_H
 3 | #define NETWORK_H
 4 | #include "darknet.h"
 5 | 
 6 | #include "data.h"
 7 | #include "image.h"
 8 | #include "layer.h"
 9 | #include "tree.h"
10 | #ifdef __cplusplus
11 | extern "C"
12 | {
13 | #endif
14 | #ifdef DKGPU
15 |   void pull_network_output(network *net);
16 | #endif
17 | 
18 |   void compare_networks(network *n1, network *n2, data d);
19 |   char *get_layer_string(LAYER_TYPE a);
20 | 
21 |   network *make_network(int n);
22 | 
23 |   float network_accuracy_multi(network *net, data d, int n);
24 |   int get_predicted_class_network(network *net);
25 |   void print_network(network *net);
26 |   int resize_network(network *net, int w, int h);
27 |   void calc_network_cost(network *net);
28 | #ifdef __cplusplus
29 | }
30 | #endif
31 | #endif
32 | 


--------------------------------------------------------------------------------
/darknet/include/normalization_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef NORMALIZATION_LAYER_H
 2 | #define NORMALIZATION_LAYER_H
 3 | 
 4 | #include "image.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | #ifdef __cplusplus
 8 | extern "C"
 9 | {
10 | #endif
11 |   layer make_normalization_layer(int batch,
12 |                                  int w,
13 |                                  int h,
14 |                                  int c,
15 |                                  int size,
16 |                                  float alpha,
17 |                                  float beta,
18 |                                  float kappa);
19 |   void resize_normalization_layer(layer *layer, int h, int w);
20 |   void forward_normalization_layer(const layer layer, network net);
21 |   void backward_normalization_layer(const layer layer, network net);
22 |   void visualize_normalization_layer(layer layer, char *window);
23 | 
24 | #ifdef DKGPU
25 |   void forward_normalization_layer_gpu(const layer layer, network net);
26 |   void backward_normalization_layer_gpu(const layer layer, network net);
27 | #endif
28 | #ifdef __cplusplus
29 | }
30 | #endif
31 | #endif
32 | 


--------------------------------------------------------------------------------
/darknet/include/option_list.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTION_LIST_H
 2 | #define OPTION_LIST_H
 3 | #include "list.h"
 4 | #ifdef __cplusplus
 5 | extern "C"
 6 | {
 7 | #endif
 8 |   typedef struct
 9 |   {
10 |     char *key;
11 |     char *val;
12 |     int used;
13 |   } kvp;
14 | 
15 |   int read_option(char *s, list *options);
16 |   void option_insert(list *l, char *key, char *val);
17 |   char *option_find(list *l, char *key);
18 |   float option_find_float(list *l, char *key, float def);
19 |   float option_find_float_quiet(list *l, char *key, float def);
20 |   void option_unused(list *l);
21 | #ifdef __cplusplus
22 | }
23 | #endif
24 | #endif
25 | 


--------------------------------------------------------------------------------
/darknet/include/parser.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARSER_H
 2 | #define PARSER_H
 3 | #include "darknet.h"
 4 | #include "network.h"
 5 | #ifdef __cplusplus
 6 | extern "C"
 7 | {
 8 | #endif
 9 |   void save_network(network net, char *filename);
10 |   void save_weights_double(network net, char *filename);
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 | #endif
15 | 


--------------------------------------------------------------------------------
/darknet/include/region_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef REGION_LAYER_H
 2 | #define REGION_LAYER_H
 3 | 
 4 | #include "darknet.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | #ifdef __cplusplus
 8 | extern "C"
 9 | {
10 | #endif
11 |   layer make_region_layer(
12 |       int batch, int w, int h, int n, int classes, int coords);
13 |   void forward_region_layer(const layer l, network net);
14 |   void backward_region_layer(const layer l, network net);
15 |   void resize_region_layer(layer *l, int w, int h);
16 | 
17 | #ifdef DKGPU
18 |   void forward_region_layer_gpu(const layer l, network net);
19 |   void backward_region_layer_gpu(layer l, network net);
20 | #endif
21 | #ifdef __cplusplus
22 | }
23 | #endif
24 | #endif
25 | 


--------------------------------------------------------------------------------
/darknet/include/reorg_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef REORG_LAYER_H
 2 | #define REORG_LAYER_H
 3 | 
 4 | #include "cuda.h"
 5 | #include "image.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | #ifdef __cplusplus
 9 | extern "C"
10 | {
11 | #endif
12 |   layer make_reorg_layer(int batch,
13 |                          int w,
14 |                          int h,
15 |                          int c,
16 |                          int stride,
17 |                          int reverse,
18 |                          int flatten,
19 |                          int extra);
20 |   void resize_reorg_layer(layer *l, int w, int h);
21 |   void forward_reorg_layer(const layer l, network net);
22 |   void backward_reorg_layer(const layer l, network net);
23 | 
24 | #ifdef DKGPU
25 |   void forward_reorg_layer_gpu(layer l, network net);
26 |   void backward_reorg_layer_gpu(layer l, network net);
27 | #endif
28 | #ifdef __cplusplus
29 | }
30 | #endif
31 | #endif
32 | 


--------------------------------------------------------------------------------
/darknet/include/rnn_layer.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef RNN_LAYER_H
 3 | #define RNN_LAYER_H
 4 | 
 5 | #include "activations.h"
 6 | #include "layer.h"
 7 | #include "network.h"
 8 | #define USET
 9 | #ifdef __cplusplus
10 | extern "C"
11 | {
12 | #endif
13 |   layer make_rnn_layer(int batch,
14 |                        int inputs,
15 |                        int outputs,
16 |                        int steps,
17 |                        ACTIVATION activation,
18 |                        int batch_normalize,
19 |                        int adam);
20 | 
21 |   void forward_rnn_layer(layer l, network net);
22 |   void backward_rnn_layer(layer l, network net);
23 |   void update_rnn_layer(layer l, update_args a);
24 | 
25 | #ifdef DKGPU
26 |   void forward_rnn_layer_gpu(layer l, network net);
27 |   void backward_rnn_layer_gpu(layer l, network net);
28 |   void update_rnn_layer_gpu(layer l, update_args a);
29 |   void push_rnn_layer(layer l);
30 |   void pull_rnn_layer(layer l);
31 | #endif
32 | #ifdef __cplusplus
33 | }
34 | #endif
35 | #endif
36 | 


--------------------------------------------------------------------------------
/darknet/include/route_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef ROUTE_LAYER_H
 2 | #define ROUTE_LAYER_H
 3 | #include "layer.h"
 4 | #include "network.h"
 5 | #ifdef __cplusplus
 6 | extern "C"
 7 | {
 8 | #endif
 9 |   typedef layer route_layer;
10 | 
11 |   route_layer make_route_layer(int batch,
12 |                                int n,
13 |                                int *input_layers,
14 |                                int *input_size);
15 |   void forward_route_layer(const route_layer l, network net);
16 |   void backward_route_layer(const route_layer l, network net);
17 |   void resize_route_layer(route_layer *l, network *net);
18 | 
19 | #ifdef DKGPU
20 |   void forward_route_layer_gpu(const route_layer l, network net);
21 |   void backward_route_layer_gpu(const route_layer l, network net);
22 | #endif
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/darknet/include/shortcut_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SHORTCUT_LAYER_H
 2 | #define SHORTCUT_LAYER_H
 3 | 
 4 | #include "layer.h"
 5 | #include "network.h"
 6 | #ifdef __cplusplus
 7 | extern "C"
 8 | {
 9 | #endif
10 |   layer make_shortcut_layer(
11 |       int batch, int index, int w, int h, int c, int w2, int h2, int c2);
12 |   void forward_shortcut_layer(const layer l, network net);
13 |   void backward_shortcut_layer(const layer l, network net);
14 |   void resize_shortcut_layer(layer *l, int w, int h);
15 | 
16 | #ifdef DKGPU
17 |   void forward_shortcut_layer_gpu(const layer l, network net);
18 |   void backward_shortcut_layer_gpu(const layer l, network net);
19 | #endif
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/darknet/include/softmax_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef SOFTMAX_LAYER_H
 2 | #define SOFTMAX_LAYER_H
 3 | #include "layer.h"
 4 | #include "network.h"
 5 | #ifdef __cplusplus
 6 | extern "C"
 7 | {
 8 | #endif
 9 |   typedef layer softmax_layer;
10 | 
11 |   void softmax_array(float *input, int n, float temp, float *output);
12 |   softmax_layer make_softmax_layer(int batch, int inputs, int groups);
13 |   void forward_softmax_layer(const softmax_layer l, network net);
14 |   void backward_softmax_layer(const softmax_layer l, network net);
15 | 
16 | #ifdef DKGPU
17 |   void pull_softmax_layer_output(const softmax_layer l);
18 |   void forward_softmax_layer_gpu(const softmax_layer l, network net);
19 |   void backward_softmax_layer_gpu(const softmax_layer l, network net);
20 | #endif
21 | #ifdef __cplusplus
22 | }
23 | #endif
24 | #endif
25 | 


--------------------------------------------------------------------------------
/darknet/include/tree.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_H
 2 | #define TREE_H
 3 | #include "darknet.h"
 4 | #ifdef __cplusplus
 5 | extern "C"
 6 | {
 7 | #endif
 8 |   int hierarchy_top_prediction(float *predictions,
 9 |                                tree *hier,
10 |                                float thresh,
11 |                                int stride);
12 |   float get_hierarchy_probability(float *x, tree *hier, int c, int stride);
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | #endif
17 | 


--------------------------------------------------------------------------------
/darknet/include/upsample_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef UPSAMPLE_LAYER_H
 2 | #define UPSAMPLE_LAYER_H
 3 | #include "darknet.h"
 4 | #ifdef __cplusplus
 5 | extern "C"
 6 | {
 7 | #endif
 8 |   layer make_upsample_layer(int batch, int w, int h, int c, int stride);
 9 |   void forward_upsample_layer(const layer l, network net);
10 |   void backward_upsample_layer(const layer l, network net);
11 |   void resize_upsample_layer(layer *l, int w, int h);
12 | 
13 | #ifdef DKGPU
14 |   void forward_upsample_layer_gpu(const layer l, network net);
15 |   void backward_upsample_layer_gpu(const layer l, network net);
16 | #endif
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/darknet/include/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H
 2 | #define UTILS_H
 3 | #include <stdio.h>
 4 | #include <time.h>
 5 | #include "darknet.h"
 6 | #include "list.h"
 7 | #ifdef __cplusplus
 8 | extern "C"
 9 | {
10 | #endif
11 | #define TIME(a)                                                         \
12 |   do                                                                    \
13 |   {                                                                     \
14 |     double start = what_time_is_it_now();                               \
15 |     a;                                                                  \
16 |     printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \
17 |   } while (0)
18 | 
19 | #define TWO_PI 6.2831853071795864769252866f
20 | 
21 |   double what_time_is_it_now();
22 |   void shuffle(void *arr, size_t n, size_t size);
23 |   void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections);
24 |   void free_ptrs(void **ptrs, int n);
25 |   int alphanum_to_int(char c);
26 |   char int_to_alphanum(int i);
27 |   int read_int(int fd);
28 |   void write_int(int fd, int n);
29 |   void read_all(int fd, char *buffer, size_t bytes);
30 |   void write_all(int fd, char *buffer, size_t bytes);
31 |   int read_all_fail(int fd, char *buffer, size_t bytes);
32 |   int write_all_fail(int fd, char *buffer, size_t bytes);
33 |   void find_replace(char *str, char *orig, char *rep, char *output);
34 |   void malloc_error();
35 |   void file_error(char *s);
36 |   void strip(char *s);
37 |   void strip_char(char *s, char bad);
38 |   list *split_str(char *s, char delim);
39 |   char *fgetl(FILE *fp);
40 |   list *parse_csv_line(char *line);
41 |   char *copy_string(char *s);
42 |   int count_fields(char *line);
43 |   float *parse_fields(char *line, int n);
44 |   void translate_array(float *a, int n, float s);
45 |   float constrain(float min, float max, float a);
46 |   int constrain_int(int a, int min, int max);
47 |   float rand_scale(float s);
48 |   int rand_int(int min, int max);
49 |   void mean_arrays(float **a, int n, int els, float *avg);
50 |   float dist_array(float *a, float *b, int n, int sub);
51 |   float **one_hot_encode(float *a, int n, int k);
52 |   float sec(clock_t clocks);
53 |   void print_statistics(float *a, int n);
54 |   int int_index(int *a, int val, int n);
55 | #ifdef __cplusplus
56 | }
57 | #endif
58 | #endif
59 | 


--------------------------------------------------------------------------------
/darknet/include/yolo_layer.h:
--------------------------------------------------------------------------------
 1 | #ifndef YOLO_LAYER_H
 2 | #define YOLO_LAYER_H
 3 | 
 4 | #include "darknet.h"
 5 | #include "layer.h"
 6 | #include "network.h"
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C"
10 | {
11 | #endif
12 | 
13 |   layer make_yolo_layer(
14 |       int batch, int w, int h, int n, int total, int *mask, int classes);
15 |   void forward_yolo_layer(const layer l, network net);
16 |   void backward_yolo_layer(const layer l, network net);
17 |   void resize_yolo_layer(layer *l, int w, int h);
18 |   int yolo_num_detections(layer l, float thresh);
19 | 
20 | #ifdef DKGPU
21 |   void forward_yolo_layer_gpu(const layer l, network net);
22 |   void backward_yolo_layer_gpu(layer l, network net);
23 | #endif
24 | 
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/darknet/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.11)
 2 | 
 3 | set(MODULENAME ${PROJECT_NAME}_core)
 4 | 
 5 | file(GLOB YOLO_SOURCE_FILES *.cpp)
 6 | file(GLOB CUDA_SOURCE_FILES *.cu)
 7 | 
 8 | set(DARKNET_CORE_LIBNAME ${MODULENAME} CACHE INTERNAL "${MODULENAME}: core library" FORCE)
 9 | set(DARKNET_CUDA_LIBNAME ${MODULENAME}_cuda CACHE INTERNAL "${MODULENAME}: cuda library" FORCE)
10 | set(YOLO_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE INTERNAL "${MODULENAME}: Include Directories" FORCE)
11 | 
12 | CUDA_ADD_LIBRARY(
13 |         ${DARKNET_CUDA_LIBNAME} SHARED
14 |         ${CUDA_SOURCE_FILES})
15 | 
16 | add_library(${DARKNET_CORE_LIBNAME} SHARED ${YOLO_SOURCE_FILES})
17 | target_link_libraries(
18 |         ${DARKNET_CORE_LIBNAME}
19 |         ${DARKNET_CUDA_LIBNAME}
20 |         ${OpenCV_LIBRARIES}
21 |         ${CUDA_LIBRARIES}
22 |         ${CUDA_CUBLAS_LIBRARIES}
23 |         ${CUDA_curand_LIBRARY}
24 |         ${CMAKE_THREAD_LIBS_INIT})
25 | 


--------------------------------------------------------------------------------
/darknet/src/activation_kernels.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "curand.h"
  3 | #include "cublas_v2.h"
  4 | 
  5 | extern "C" {
  6 | #include "activations.h"
  7 | #include "cuda.h"
  8 | }
  9 | 
 10 | 
 11 | __device__ float lhtan_activate_kernel(float x)
 12 | {
 13 |     if(x < 0) return .001f*x;
 14 |     if(x > 1) return .001f*(x-1.f) + 1.f;
 15 |     return x;
 16 | }
 17 | __device__ float lhtan_gradient_kernel(float x)
 18 | {
 19 |     if(x > 0 && x < 1) return 1;
 20 |     return .001;
 21 | }
 22 | 
 23 | __device__ float hardtan_activate_kernel(float x)
 24 | {
 25 |     if (x < -1) return -1;
 26 |     if (x > 1) return 1;
 27 |     return x;
 28 | }
 29 | __device__ float linear_activate_kernel(float x){return x;}
 30 | __device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
 31 | __device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
 32 | __device__ float relu_activate_kernel(float x){return x*(x>0);}
 33 | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
 34 | __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
 35 | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
 36 | __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
 37 | __device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
 38 | __device__ float plse_activate_kernel(float x)
 39 | {
 40 |     if(x < -4) return .01f * (x + 4);
 41 |     if(x > 4)  return .01f * (x - 4) + 1;
 42 |     return .125f*x + .5f;
 43 | }
 44 | __device__ float stair_activate_kernel(float x)
 45 | {
 46 |     int n = floorf(x);
 47 |     if (n%2 == 0) return floorf(x/2);
 48 |     else return (x - n) + floorf(x/2);
 49 | }
 50 |  
 51 | 
 52 | __device__ float hardtan_gradient_kernel(float x)
 53 | {
 54 |     if (x > -1 && x < 1) return 1;
 55 |     return 0;
 56 | }
 57 | __device__ float linear_gradient_kernel(float x){return 1;}
 58 | __device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
 59 | __device__ float loggy_gradient_kernel(float x)
 60 | {
 61 |     float y = (x+1)/2;
 62 |     return 2*(1-y)*y;
 63 | }
 64 | __device__ float relu_gradient_kernel(float x){return (x>0);}
 65 | __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
 66 | __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;}
 67 | __device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;}
 68 | __device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;}
 69 | __device__ float tanh_gradient_kernel(float x){return 1-x*x;}
 70 | __device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;}
 71 | __device__ float stair_gradient_kernel(float x)
 72 | {
 73 |     if (floorf(x) == x) return 0;
 74 |     return 1;
 75 | }
 76 | 
 77 | __device__ float activate_kernel(float x, ACTIVATION a)
 78 | {
 79 |     switch(a){
 80 |         case LINEAR:
 81 |             return linear_activate_kernel(x);
 82 |         case LOGISTIC:
 83 |             return logistic_activate_kernel(x);
 84 |         case LOGGY:
 85 |             return loggy_activate_kernel(x);
 86 |         case RELU:
 87 |             return relu_activate_kernel(x);
 88 |         case ELU:
 89 |             return elu_activate_kernel(x);
 90 |         case RELIE:
 91 |             return relie_activate_kernel(x);
 92 |         case RAMP:
 93 |             return ramp_activate_kernel(x);
 94 |         case LEAKY:
 95 |             return leaky_activate_kernel(x);
 96 |         case TANH:
 97 |             return tanh_activate_kernel(x);
 98 |         case PLSE:
 99 |             return plse_activate_kernel(x);
100 |         case STAIR:
101 |             return stair_activate_kernel(x);
102 |         case HARDTAN:
103 |             return hardtan_activate_kernel(x);
104 |         case LHTAN:
105 |             return lhtan_activate_kernel(x);
106 |     }
107 |     return 0;
108 | }
109 | 
110 | __device__ float gradient_kernel(float x, ACTIVATION a)
111 | {
112 |     switch(a){
113 |         case LINEAR:
114 |             return linear_gradient_kernel(x);
115 |         case LOGISTIC:
116 |             return logistic_gradient_kernel(x);
117 |         case LOGGY:
118 |             return loggy_gradient_kernel(x);
119 |         case RELU:
120 |             return relu_gradient_kernel(x);
121 |         case ELU:
122 |             return elu_gradient_kernel(x);
123 |         case RELIE:
124 |             return relie_gradient_kernel(x);
125 |         case RAMP:
126 |             return ramp_gradient_kernel(x);
127 |         case LEAKY:
128 |             return leaky_gradient_kernel(x);
129 |         case TANH:
130 |             return tanh_gradient_kernel(x);
131 |         case PLSE:
132 |             return plse_gradient_kernel(x);
133 |         case STAIR:
134 |             return stair_gradient_kernel(x);
135 |         case HARDTAN:
136 |             return hardtan_gradient_kernel(x);
137 |         case LHTAN:
138 |             return lhtan_gradient_kernel(x);
139 |     }
140 |     return 0;
141 | }
142 | 
143 | __global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx)
144 | {
145 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
146 |     int i = id % s;
147 |     int b = id / s;
148 |     float x1 = x[b*s + i];
149 |     float x2 = x[b*s + s/2 + i];
150 |     if(id < n) {
151 |         float de = dy[id];
152 |         dx[b*s + i] = x2*de;
153 |         dx[b*s + s/2 + i] = x1*de; 
154 |     }
155 | }
156 | 
157 | extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) 
158 | {
159 |     binary_gradient_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, dx, n/2, size, a, y);
160 |     check_error(cudaPeekAtLastError());
161 | }
162 | __global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y)
163 | {
164 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
165 |     int i = id % s;
166 |     int b = id / s;
167 |     float x1 = x[b*s + i];
168 |     float x2 = x[b*s + s/2 + i];
169 |     if(id < n) y[id] = x1*x2;
170 | }
171 | 
172 | extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) 
173 | {
174 |     binary_activate_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, n/2, size, a, y);
175 |     check_error(cudaPeekAtLastError());
176 | }
177 | 
178 | __global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
179 | {
180 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
181 |     if(i < n) x[i] = activate_kernel(x[i], a);
182 | }
183 | 
184 | __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta)
185 | {
186 |     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
187 |     if(i < n) delta[i] *= gradient_kernel(x[i], a);
188 | }
189 | 
190 | extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a) 
191 | {
192 |     activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
193 |     check_error(cudaPeekAtLastError());
194 | }
195 | 
196 | extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) 
197 | {
198 |     gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
199 |     check_error(cudaPeekAtLastError());
200 | }
201 | 


--------------------------------------------------------------------------------
/darknet/src/activation_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "activation_layer.h"
 2 | #include "utils.h"
 3 | #include "cuda.h"
 4 | #include "blas.h"
 5 | #include "gemm.h"
 6 | 
 7 | #include <math.h>
 8 | #include <stdio.h>
 9 | #include <stdlib.h>
10 | #include <string.h>
11 | 
12 | layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
13 | {
14 |   layer l;
15 |   l.type = ACTIVE;
16 | 
17 |   l.inputs  = inputs;
18 |   l.outputs = inputs;
19 |   l.batch   = batch;
20 | 
21 |   l.output = (float*)calloc(batch * inputs, sizeof(float*));
22 |   l.delta  = (float*)calloc(batch * inputs, sizeof(float*));
23 | 
24 |   l.forward  = forward_activation_layer;
25 |   l.backward = backward_activation_layer;
26 | #ifdef DKGPU
27 |   l.forward_gpu  = forward_activation_layer_gpu;
28 |   l.backward_gpu = backward_activation_layer_gpu;
29 | 
30 |   l.output_gpu = cuda_make_array(l.output, inputs * batch);
31 |   l.delta_gpu  = cuda_make_array(l.delta, inputs * batch);
32 | #endif
33 |   l.activation = activation;
34 |   fprintf(stderr, "Activation Layer: %d inputs\n", inputs);
35 |   return l;
36 | }
37 | 
38 | void forward_activation_layer(layer l, network net)
39 | {
40 |   copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1);
41 |   activate_array(l.output, l.outputs * l.batch, l.activation);
42 | }
43 | 
44 | void backward_activation_layer(layer l, network net)
45 | {
46 |   gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta);
47 |   copy_cpu(l.outputs * l.batch, l.delta, 1, net.delta, 1);
48 | }
49 | 
50 | #ifdef DKGPU
51 | 
52 | void forward_activation_layer_gpu(layer l, network net)
53 | {
54 |   copy_gpu(l.outputs * l.batch, net.input_gpu, 1, l.output_gpu, 1);
55 |   activate_array_gpu(l.output_gpu, l.outputs * l.batch, l.activation);
56 | }
57 | 
58 | void backward_activation_layer_gpu(layer l, network net)
59 | {
60 |   gradient_array_gpu(
61 |       l.output_gpu, l.outputs * l.batch, l.activation, l.delta_gpu);
62 |   copy_gpu(l.outputs * l.batch, l.delta_gpu, 1, net.delta_gpu, 1);
63 | }
64 | #endif
65 | 


--------------------------------------------------------------------------------
/darknet/src/activations.cpp:
--------------------------------------------------------------------------------
  1 | #include "activations.h"
  2 | 
  3 | #include <math.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | 
  8 | char *get_activation_string(ACTIVATION a)
  9 | {
 10 |     switch(a){
 11 |         case LOGISTIC:
 12 |             return "logistic";
 13 |         case LOGGY:
 14 |             return "loggy";
 15 |         case RELU:
 16 |             return "relu";
 17 |         case ELU:
 18 |             return "elu";
 19 |         case RELIE:
 20 |             return "relie";
 21 |         case RAMP:
 22 |             return "ramp";
 23 |         case LINEAR:
 24 |             return "linear";
 25 |         case TANH:
 26 |             return "tanh";
 27 |         case PLSE:
 28 |             return "plse";
 29 |         case LEAKY:
 30 |             return "leaky";
 31 |         case STAIR:
 32 |             return "stair";
 33 |         case HARDTAN:
 34 |             return "hardtan";
 35 |         case LHTAN:
 36 |             return "lhtan";
 37 |         default:
 38 |             break;
 39 |     }
 40 |     return "relu";
 41 | }
 42 | 
 43 | ACTIVATION get_activation(char *s)
 44 | {
 45 |     if (strcmp(s, "logistic")==0) return LOGISTIC;
 46 |     if (strcmp(s, "loggy")==0) return LOGGY;
 47 |     if (strcmp(s, "relu")==0) return RELU;
 48 |     if (strcmp(s, "elu")==0) return ELU;
 49 |     if (strcmp(s, "relie")==0) return RELIE;
 50 |     if (strcmp(s, "plse")==0) return PLSE;
 51 |     if (strcmp(s, "hardtan")==0) return HARDTAN;
 52 |     if (strcmp(s, "lhtan")==0) return LHTAN;
 53 |     if (strcmp(s, "linear")==0) return LINEAR;
 54 |     if (strcmp(s, "ramp")==0) return RAMP;
 55 |     if (strcmp(s, "leaky")==0) return LEAKY;
 56 |     if (strcmp(s, "tanh")==0) return TANH;
 57 |     if (strcmp(s, "stair")==0) return STAIR;
 58 |     fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
 59 |     return RELU;
 60 | }
 61 | 
 62 | float activate(float x, ACTIVATION a)
 63 | {
 64 |     switch(a){
 65 |         case LINEAR:
 66 |             return linear_activate(x);
 67 |         case LOGISTIC:
 68 |             return logistic_activate(x);
 69 |         case LOGGY:
 70 |             return loggy_activate(x);
 71 |         case RELU:
 72 |             return relu_activate(x);
 73 |         case ELU:
 74 |             return elu_activate(x);
 75 |         case RELIE:
 76 |             return relie_activate(x);
 77 |         case RAMP:
 78 |             return ramp_activate(x);
 79 |         case LEAKY:
 80 |             return leaky_activate(x);
 81 |         case TANH:
 82 |             return tanh_activate(x);
 83 |         case PLSE:
 84 |             return plse_activate(x);
 85 |         case STAIR:
 86 |             return stair_activate(x);
 87 |         case HARDTAN:
 88 |             return hardtan_activate(x);
 89 |         case LHTAN:
 90 |             return lhtan_activate(x);
 91 |     }
 92 |     return 0;
 93 | }
 94 | 
 95 | void activate_array(float *x, const int n, const ACTIVATION a)
 96 | {
 97 |     int i;
 98 |     for(i = 0; i < n; ++i){
 99 |         x[i] = activate(x[i], a);
100 |     }
101 | }
102 | 
103 | float gradient(float x, ACTIVATION a)
104 | {
105 |     switch(a){
106 |         case LINEAR:
107 |             return linear_gradient(x);
108 |         case LOGISTIC:
109 |             return logistic_gradient(x);
110 |         case LOGGY:
111 |             return loggy_gradient(x);
112 |         case RELU:
113 |             return relu_gradient(x);
114 |         case ELU:
115 |             return elu_gradient(x);
116 |         case RELIE:
117 |             return relie_gradient(x);
118 |         case RAMP:
119 |             return ramp_gradient(x);
120 |         case LEAKY:
121 |             return leaky_gradient(x);
122 |         case TANH:
123 |             return tanh_gradient(x);
124 |         case PLSE:
125 |             return plse_gradient(x);
126 |         case STAIR:
127 |             return stair_gradient(x);
128 |         case HARDTAN:
129 |             return hardtan_gradient(x);
130 |         case LHTAN:
131 |             return lhtan_gradient(x);
132 |     }
133 |     return 0;
134 | }
135 | 
136 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
137 | {
138 |     int i;
139 |     for(i = 0; i < n; ++i){
140 |         delta[i] *= gradient(x[i], a);
141 |     }
142 | } 
143 | 
144 | 


--------------------------------------------------------------------------------
/darknet/src/avgpool_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "avgpool_layer.h"
 2 | #include <stdio.h>
 3 | #include "cuda.h"
 4 | 
 5 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
 6 | {
 7 |   fprintf(
 8 |       stderr, "avg                     %4d x%4d x%4d   ->  %4d\n", w, h, c, c);
 9 |   avgpool_layer l;
10 |   l.type          = AVGPOOL;
11 |   l.batch         = batch;
12 |   l.h             = h;
13 |   l.w             = w;
14 |   l.c             = c;
15 |   l.out_w         = 1;
16 |   l.out_h         = 1;
17 |   l.out_c         = c;
18 |   l.outputs       = l.out_c;
19 |   l.inputs        = h * w * c;
20 |   int output_size = l.outputs * batch;
21 |   l.output        = (float *)calloc(output_size, sizeof(float));
22 |   l.delta         = (float *)calloc(output_size, sizeof(float));
23 |   l.forward       = forward_avgpool_layer;
24 |   l.backward      = backward_avgpool_layer;
25 | #ifdef DKGPU
26 |   l.forward_gpu  = forward_avgpool_layer_gpu;
27 |   l.backward_gpu = backward_avgpool_layer_gpu;
28 |   l.output_gpu   = cuda_make_array(l.output, output_size);
29 |   l.delta_gpu    = cuda_make_array(l.delta, output_size);
30 | #endif
31 |   return l;
32 | }
33 | 
34 | void resize_avgpool_layer(avgpool_layer *l, int w, int h)
35 | {
36 |   l->w      = w;
37 |   l->h      = h;
38 |   l->inputs = h * w * l->c;
39 | }
40 | 
41 | void forward_avgpool_layer(const avgpool_layer l, network net)
42 | {
43 |   int b, i, k;
44 | 
45 |   for (b = 0; b < l.batch; ++b)
46 |   {
47 |     for (k = 0; k < l.c; ++k)
48 |     {
49 |       int out_index       = k + b * l.c;
50 |       l.output[out_index] = 0;
51 |       for (i = 0; i < l.h * l.w; ++i)
52 |       {
53 |         int in_index = i + l.h * l.w * (k + b * l.c);
54 |         l.output[out_index] += net.input[in_index];
55 |       }
56 |       l.output[out_index] /= l.h * l.w;
57 |     }
58 |   }
59 | }
60 | 
61 | void backward_avgpool_layer(const avgpool_layer l, network net)
62 | {
63 |   int b, i, k;
64 | 
65 |   for (b = 0; b < l.batch; ++b)
66 |   {
67 |     for (k = 0; k < l.c; ++k)
68 |     {
69 |       int out_index = k + b * l.c;
70 |       for (i = 0; i < l.h * l.w; ++i)
71 |       {
72 |         int in_index = i + l.h * l.w * (k + b * l.c);
73 |         net.delta[in_index] += l.delta[out_index] / (l.h * l.w);
74 |       }
75 |     }
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/darknet/src/avgpool_layer_kernels.cu:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "curand.h"
 3 | #include "cublas_v2.h"
 4 | 
 5 | extern "C" {
 6 | #include "avgpool_layer.h"
 7 | #include "cuda.h"
 8 | }
 9 | 
10 | __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
11 | {
12 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
13 |     if(id >= n) return;
14 | 
15 |     int k = id % c;
16 |     id /= c;
17 |     int b = id;
18 | 
19 |     int i;
20 |     int out_index = (k + c*b);
21 |     output[out_index] = 0;
22 |     for(i = 0; i < w*h; ++i){
23 |         int in_index = i + h*w*(k + b*c);
24 |         output[out_index] += input[in_index];
25 |     }
26 |     output[out_index] /= w*h;
27 | }
28 | 
29 | __global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta)
30 | {
31 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
32 |     if(id >= n) return;
33 | 
34 |     int k = id % c;
35 |     id /= c;
36 |     int b = id;
37 | 
38 |     int i;
39 |     int out_index = (k + c*b);
40 |     for(i = 0; i < w*h; ++i){
41 |         int in_index = i + h*w*(k + b*c);
42 |         in_delta[in_index] += out_delta[out_index] / (w*h);
43 |     }
44 | }
45 | 
46 | extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net)
47 | {
48 |     size_t n = layer.c*layer.batch;
49 | 
50 |     forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu);
51 |     check_error(cudaPeekAtLastError());
52 | }
53 | 
54 | extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net)
55 | {
56 |     size_t n = layer.c*layer.batch;
57 | 
58 |     backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu);
59 |     check_error(cudaPeekAtLastError());
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/darknet/src/box.cpp:
--------------------------------------------------------------------------------
  1 | #include "box.h"
  2 | #include <math.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | 
  6 | int nms_comparator(const void *pa, const void *pb)
  7 | {
  8 |   detection a = *(detection *)pa;
  9 |   detection b = *(detection *)pb;
 10 |   float diff  = 0;
 11 |   if (b.sort_class >= 0)
 12 |   {
 13 |     diff = a.prob[b.sort_class] - b.prob[b.sort_class];
 14 |   }
 15 |   else
 16 |   {
 17 |     diff = a.objectness - b.objectness;
 18 |   }
 19 |   if (diff < 0)
 20 |     return 1;
 21 |   else if (diff > 0)
 22 |     return -1;
 23 |   return 0;
 24 | }
 25 | 
 26 | void do_nms_obj(detection *dets, int total, int classes, float thresh)
 27 | {
 28 |   int i, j, k;
 29 |   k = total - 1;
 30 |   for (i = 0; i <= k; ++i)
 31 |   {
 32 |     if (dets[i].objectness == 0)
 33 |     {
 34 |       detection swap = dets[i];
 35 |       dets[i]        = dets[k];
 36 |       dets[k]        = swap;
 37 |       --k;
 38 |       --i;
 39 |     }
 40 |   }
 41 |   total = k + 1;
 42 | 
 43 |   for (i = 0; i < total; ++i)
 44 |   {
 45 |     dets[i].sort_class = -1;
 46 |   }
 47 | 
 48 |   qsort(dets, total, sizeof(detection), nms_comparator);
 49 |   for (i = 0; i < total; ++i)
 50 |   {
 51 |     if (dets[i].objectness == 0) continue;
 52 |     box a = dets[i].bbox;
 53 |     for (j = i + 1; j < total; ++j)
 54 |     {
 55 |       if (dets[j].objectness == 0) continue;
 56 |       box b = dets[j].bbox;
 57 |       if (box_iou(a, b) > thresh)
 58 |       {
 59 |         dets[j].objectness = 0;
 60 |         for (k = 0; k < classes; ++k)
 61 |         {
 62 |           dets[j].prob[k] = 0;
 63 |         }
 64 |       }
 65 |     }
 66 |   }
 67 | }
 68 | 
 69 | void do_nms_sort(detection *dets, int total, int classes, float thresh)
 70 | {
 71 |   int i, j, k;
 72 |   k = total - 1;
 73 |   for (i = 0; i <= k; ++i)
 74 |   {
 75 |     if (dets[i].objectness == 0)
 76 |     {
 77 |       detection swap = dets[i];
 78 |       dets[i]        = dets[k];
 79 |       dets[k]        = swap;
 80 |       --k;
 81 |       --i;
 82 |     }
 83 |   }
 84 |   total = k + 1;
 85 | 
 86 |   for (k = 0; k < classes; ++k)
 87 |   {
 88 |     for (i = 0; i < total; ++i)
 89 |     {
 90 |       dets[i].sort_class = k;
 91 |     }
 92 |     qsort(dets, total, sizeof(detection), nms_comparator);
 93 |     for (i = 0; i < total; ++i)
 94 |     {
 95 |       if (dets[i].prob[k] == 0) continue;
 96 |       box a = dets[i].bbox;
 97 |       for (j = i + 1; j < total; ++j)
 98 |       {
 99 |         box b = dets[j].bbox;
100 |         if (box_iou(a, b) > thresh)
101 |         {
102 |           dets[j].prob[k] = 0;
103 |         }
104 |       }
105 |     }
106 |   }
107 | }
108 | 
109 | box float_to_box(float *f, int stride)
110 | {
111 |   box b = {0};
112 |   b.x   = f[0];
113 |   b.y   = f[1 * stride];
114 |   b.w   = f[2 * stride];
115 |   b.h   = f[3 * stride];
116 |   return b;
117 | }
118 | 
119 | dbox derivative(box a, box b)
120 | {
121 |   dbox d;
122 |   d.dx     = 0;
123 |   d.dw     = 0;
124 |   float l1 = a.x - a.w / 2;
125 |   float l2 = b.x - b.w / 2;
126 |   if (l1 > l2)
127 |   {
128 |     d.dx -= 1;
129 |     d.dw += .5;
130 |   }
131 |   float r1 = a.x + a.w / 2;
132 |   float r2 = b.x + b.w / 2;
133 |   if (r1 < r2)
134 |   {
135 |     d.dx += 1;
136 |     d.dw += .5;
137 |   }
138 |   if (l1 > r2)
139 |   {
140 |     d.dx = -1;
141 |     d.dw = 0;
142 |   }
143 |   if (r1 < l2)
144 |   {
145 |     d.dx = 1;
146 |     d.dw = 0;
147 |   }
148 | 
149 |   d.dy     = 0;
150 |   d.dh     = 0;
151 |   float t1 = a.y - a.h / 2;
152 |   float t2 = b.y - b.h / 2;
153 |   if (t1 > t2)
154 |   {
155 |     d.dy -= 1;
156 |     d.dh += .5;
157 |   }
158 |   float b1 = a.y + a.h / 2;
159 |   float b2 = b.y + b.h / 2;
160 |   if (b1 < b2)
161 |   {
162 |     d.dy += 1;
163 |     d.dh += .5;
164 |   }
165 |   if (t1 > b2)
166 |   {
167 |     d.dy = -1;
168 |     d.dh = 0;
169 |   }
170 |   if (b1 < t2)
171 |   {
172 |     d.dy = 1;
173 |     d.dh = 0;
174 |   }
175 |   return d;
176 | }
177 | 
178 | float overlap(float x1, float w1, float x2, float w2)
179 | {
180 |   float l1    = x1 - w1 / 2;
181 |   float l2    = x2 - w2 / 2;
182 |   float left  = l1 > l2 ? l1 : l2;
183 |   float r1    = x1 + w1 / 2;
184 |   float r2    = x2 + w2 / 2;
185 |   float right = r1 < r2 ? r1 : r2;
186 |   return right - left;
187 | }
188 | 
189 | float box_intersection(box a, box b)
190 | {
191 |   float w = overlap(a.x, a.w, b.x, b.w);
192 |   float h = overlap(a.y, a.h, b.y, b.h);
193 |   if (w < 0 || h < 0) return 0;
194 |   float area = w * h;
195 |   return area;
196 | }
197 | 
198 | float box_union(box a, box b)
199 | {
200 |   float i = box_intersection(a, b);
201 |   float u = a.w * a.h + b.w * b.h - i;
202 |   return u;
203 | }
204 | 
205 | float box_iou(box a, box b)
206 | {
207 |   return box_intersection(a, b) / box_union(a, b);
208 | }
209 | 
210 | float box_rmse(box a, box b)
211 | {
212 |   return sqrt(pow(a.x - b.x, 2) + pow(a.y - b.y, 2) + pow(a.w - b.w, 2) +
213 |               pow(a.h - b.h, 2));
214 | }
215 | 
216 | dbox dintersect(box a, box b)
217 | {
218 |   float w    = overlap(a.x, a.w, b.x, b.w);
219 |   float h    = overlap(a.y, a.h, b.y, b.h);
220 |   dbox dover = derivative(a, b);
221 |   dbox di;
222 | 
223 |   di.dw = dover.dw * h;
224 |   di.dx = dover.dx * h;
225 |   di.dh = dover.dh * w;
226 |   di.dy = dover.dy * w;
227 | 
228 |   return di;
229 | }
230 | 
231 | dbox dunion(box a, box b)
232 | {
233 |   dbox du;
234 | 
235 |   dbox di = dintersect(a, b);
236 |   du.dw   = a.h - di.dw;
237 |   du.dh   = a.w - di.dh;
238 |   du.dx   = -di.dx;
239 |   du.dy   = -di.dy;
240 | 
241 |   return du;
242 | }
243 | 
244 | void test_dunion()
245 | {
246 |   box a   = {0, 0, 1, 1};
247 |   box dxa = {0 + .0001, 0, 1, 1};
248 |   box dya = {0, 0 + .0001, 1, 1};
249 |   box dwa = {0, 0, 1 + .0001, 1};
250 |   box dha = {0, 0, 1, 1 + .0001};
251 | 
252 |   box b   = {.5, .5, .2, .2};
253 |   dbox di = dunion(a, b);
254 |   printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh);
255 |   float inter  = box_union(a, b);
256 |   float xinter = box_union(dxa, b);
257 |   float yinter = box_union(dya, b);
258 |   float winter = box_union(dwa, b);
259 |   float hinter = box_union(dha, b);
260 |   xinter       = (xinter - inter) / (.0001);
261 |   yinter       = (yinter - inter) / (.0001);
262 |   winter       = (winter - inter) / (.0001);
263 |   hinter       = (hinter - inter) / (.0001);
264 |   printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter);
265 | }
266 | void test_dintersect()
267 | {
268 |   box a   = {0, 0, 1, 1};
269 |   box dxa = {0 + .0001, 0, 1, 1};
270 |   box dya = {0, 0 + .0001, 1, 1};
271 |   box dwa = {0, 0, 1 + .0001, 1};
272 |   box dha = {0, 0, 1, 1 + .0001};
273 | 
274 |   box b   = {.5, .5, .2, .2};
275 |   dbox di = dintersect(a, b);
276 |   printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh);
277 |   float inter  = box_intersection(a, b);
278 |   float xinter = box_intersection(dxa, b);
279 |   float yinter = box_intersection(dya, b);
280 |   float winter = box_intersection(dwa, b);
281 |   float hinter = box_intersection(dha, b);
282 |   xinter       = (xinter - inter) / (.0001);
283 |   yinter       = (yinter - inter) / (.0001);
284 |   winter       = (winter - inter) / (.0001);
285 |   hinter       = (hinter - inter) / (.0001);
286 |   printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter);
287 | }
288 | 
289 | void test_box()
290 | {
291 |   test_dintersect();
292 |   test_dunion();
293 |   box a   = {0, 0, 1, 1};
294 |   box dxa = {0 + .00001, 0, 1, 1};
295 |   box dya = {0, 0 + .00001, 1, 1};
296 |   box dwa = {0, 0, 1 + .00001, 1};
297 |   box dha = {0, 0, 1, 1 + .00001};
298 | 
299 |   box b = {.5, 0, .2, .2};
300 | 
301 |   float iou = box_iou(a, b);
302 |   iou       = (1 - iou) * (1 - iou);
303 |   printf("%f\n", iou);
304 |   dbox d = diou(a, b);
305 |   printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh);
306 | 
307 |   float xiou = box_iou(dxa, b);
308 |   float yiou = box_iou(dya, b);
309 |   float wiou = box_iou(dwa, b);
310 |   float hiou = box_iou(dha, b);
311 |   xiou       = ((1 - xiou) * (1 - xiou) - iou) / (.00001);
312 |   yiou       = ((1 - yiou) * (1 - yiou) - iou) / (.00001);
313 |   wiou       = ((1 - wiou) * (1 - wiou) - iou) / (.00001);
314 |   hiou       = ((1 - hiou) * (1 - hiou) - iou) / (.00001);
315 |   printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou);
316 | }
317 | 
318 | dbox diou(box a, box b)
319 | {
320 |   float u = box_union(a, b);
321 |   float i = box_intersection(a, b);
322 |   dbox di = dintersect(a, b);
323 |   dbox du = dunion(a, b);
324 |   dbox dd = {0, 0, 0, 0};
325 | 
326 |   if (i <= 0 || 1)
327 |   {
328 |     dd.dx = b.x - a.x;
329 |     dd.dy = b.y - a.y;
330 |     dd.dw = b.w - a.w;
331 |     dd.dh = b.h - a.h;
332 |     return dd;
333 |   }
334 | 
335 |   dd.dx = 2 * pow((1 - (i / u)), 1) * (di.dx * u - du.dx * i) / (u * u);
336 |   dd.dy = 2 * pow((1 - (i / u)), 1) * (di.dy * u - du.dy * i) / (u * u);
337 |   dd.dw = 2 * pow((1 - (i / u)), 1) * (di.dw * u - du.dw * i) / (u * u);
338 |   dd.dh = 2 * pow((1 - (i / u)), 1) * (di.dh * u - du.dh * i) / (u * u);
339 |   return dd;
340 | }
341 | 
342 | void do_nms(box *boxes, float **probs, int total, int classes, float thresh)
343 | {
344 |   int i, j, k;
345 |   for (i = 0; i < total; ++i)
346 |   {
347 |     int any = 0;
348 |     for (k = 0; k < classes; ++k) any = any || (probs[i][k] > 0);
349 |     if (!any)
350 |     {
351 |       continue;
352 |     }
353 |     for (j = i + 1; j < total; ++j)
354 |     {
355 |       if (box_iou(boxes[i], boxes[j]) > thresh)
356 |       {
357 |         for (k = 0; k < classes; ++k)
358 |         {
359 |           if (probs[i][k] < probs[j][k])
360 |             probs[i][k] = 0;
361 |           else
362 |             probs[j][k] = 0;
363 |         }
364 |       }
365 |     }
366 |   }
367 | }
368 | 
369 | box encode_box(box b, box anchor)
370 | {
371 |   box encode;
372 |   encode.x = (b.x - anchor.x) / anchor.w;
373 |   encode.y = (b.y - anchor.y) / anchor.h;
374 |   encode.w = log2(b.w / anchor.w);
375 |   encode.h = log2(b.h / anchor.h);
376 |   return encode;
377 | }
378 | 
379 | box decode_box(box b, box anchor)
380 | {
381 |   box decode;
382 |   decode.x = b.x * anchor.w + anchor.x;
383 |   decode.y = b.y * anchor.h + anchor.y;
384 |   decode.w = pow(2., b.w) * anchor.w;
385 |   decode.h = pow(2., b.h) * anchor.h;
386 |   return decode;
387 | }
388 | 


--------------------------------------------------------------------------------
/darknet/src/col2im.cpp:
--------------------------------------------------------------------------------
 1 | #include "col2im.h"
 2 | #include <stdio.h>
 3 | #include <math.h>
 4 | 
 5 | void col2im_add_pixel(float *im, int height, int width, int channels,
 6 |                         int row, int col, int channel, int pad, float val)
 7 | {
 8 |     row -= pad;
 9 |     col -= pad;
10 | 
11 |     if (row < 0 || col < 0 ||
12 |         row >= height || col >= width) return;
13 |     im[col + width*(row + height*channel)] += val;
14 | }
15 | //This one might be too, can't remember.
16 | void col2im_cpu(float* data_col,
17 |          int channels,  int height,  int width,
18 |          int ksize,  int stride, int pad, float* data_im) 
19 | {
20 |     int c,h,w;
21 |     int height_col = (height + 2*pad - ksize) / stride + 1;
22 |     int width_col = (width + 2*pad - ksize) / stride + 1;
23 | 
24 |     int channels_col = channels * ksize * ksize;
25 |     for (c = 0; c < channels_col; ++c) {
26 |         int w_offset = c % ksize;
27 |         int h_offset = (c / ksize) % ksize;
28 |         int c_im = c / ksize / ksize;
29 |         for (h = 0; h < height_col; ++h) {
30 |             for (w = 0; w < width_col; ++w) {
31 |                 int im_row = h_offset + h * stride;
32 |                 int im_col = w_offset + w * stride;
33 |                 int col_index = (c * height_col + h) * width_col + w;
34 |                 double val = data_col[col_index];
35 |                 col2im_add_pixel(data_im, height, width, channels,
36 |                         im_row, im_col, c_im, pad, val);
37 |             }
38 |         }
39 |     }
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/darknet/src/col2im_kernels.cu:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "curand.h"
 3 | #include "cublas_v2.h"
 4 | 
 5 | extern "C" {
 6 | #include "col2im.h"
 7 | #include "cuda.h"
 8 | }
 9 | 
10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
12 | 
13 | __global__ void col2im_gpu_kernel(const int n, const float* data_col,
14 |         const int height, const int width, const int ksize,
15 |         const int pad,
16 |         const int stride,
17 |         const int height_col, const int width_col,
18 |         float *data_im) {
19 |     int index = blockIdx.x*blockDim.x+threadIdx.x;
20 |     for(; index < n; index += blockDim.x*gridDim.x){
21 |         float val = 0;
22 |         int w = index % width + pad;
23 |         int h = (index / width) % height + pad;
24 |         int c = index / (width * height);
25 |         // compute the start and end of the output
26 |         int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1;
27 |         int w_col_end = min(w / stride + 1, width_col);
28 |         int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1;
29 |         int h_col_end = min(h / stride + 1, height_col);
30 |         // equivalent implementation
31 |         int offset =
32 |             (c * ksize * ksize + h * ksize + w) * height_col * width_col;
33 |         int coeff_h_col = (1 - stride * ksize * height_col) * width_col;
34 |         int coeff_w_col = (1 - stride * height_col * width_col);
35 |         for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
36 |             for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
37 |                 val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
38 |             }
39 |         }
40 |         data_im[index] += val;
41 |     }
42 | }
43 | 
44 | void col2im_gpu(float *data_col,
45 |         int channels, int height, int width,
46 |         int ksize, int stride, int pad, float *data_im){
47 |     // We are going to launch channels * height_col * width_col kernels, each
48 |     // kernel responsible for copying a single-channel grid.
49 |     int height_col = (height + 2 * pad - ksize) / stride + 1;
50 |     int width_col = (width + 2 * pad - ksize) / stride + 1;
51 |     int num_kernels = channels * height * width;
52 |     col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK,
53 |         BLOCK>>>(
54 |                 num_kernels, data_col, height, width, ksize, pad,
55 |                 stride, height_col,
56 |                 width_col, data_im);
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/darknet/src/cost_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "cost_layer.h"
  2 | #include <math.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | #include "blas.h"
  7 | #include "cuda.h"
  8 | #include "utils.h"
  9 | 
 10 | COST_TYPE get_cost_type(char *s)
 11 | {
 12 |   if (strcmp(s, "seg") == 0) return SEG;
 13 |   if (strcmp(s, "sse") == 0) return SSE;
 14 |   if (strcmp(s, "masked") == 0) return MASKED;
 15 |   if (strcmp(s, "smooth") == 0) return SMOOTH;
 16 |   if (strcmp(s, "L1") == 0) return L1;
 17 |   if (strcmp(s, "wgan") == 0) return WGAN;
 18 |   fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s);
 19 |   return SSE;
 20 | }
 21 | 
 22 | char *get_cost_string(COST_TYPE a)
 23 | {
 24 |   switch (a)
 25 |   {
 26 |     case SEG: return "seg";
 27 |     case SSE: return "sse";
 28 |     case MASKED: return "masked";
 29 |     case SMOOTH: return "smooth";
 30 |     case L1: return "L1";
 31 |     case WGAN: return "wgan";
 32 |   }
 33 |   return "sse";
 34 | }
 35 | 
 36 | cost_layer make_cost_layer(int batch,
 37 |                            int inputs,
 38 |                            COST_TYPE cost_type,
 39 |                            float scale)
 40 | {
 41 |   fprintf(
 42 |       stderr, "cost                                           %4d\n", inputs);
 43 |   cost_layer l;
 44 |   l.type = COST;
 45 | 
 46 |   l.scale     = scale;
 47 |   l.batch     = batch;
 48 |   l.inputs    = inputs;
 49 |   l.outputs   = inputs;
 50 |   l.cost_type = cost_type;
 51 |   l.delta     = (float *)calloc(inputs * batch, sizeof(float));
 52 |   l.output    = (float *)calloc(inputs * batch, sizeof(float));
 53 |   l.cost      = (float *)calloc(1, sizeof(float));
 54 | 
 55 |   l.forward  = forward_cost_layer;
 56 |   l.backward = backward_cost_layer;
 57 | #ifdef DKGPU
 58 |   l.forward_gpu  = forward_cost_layer_gpu;
 59 |   l.backward_gpu = backward_cost_layer_gpu;
 60 | 
 61 |   l.delta_gpu  = cuda_make_array(l.output, inputs * batch);
 62 |   l.output_gpu = cuda_make_array(l.delta, inputs * batch);
 63 | #endif
 64 |   return l;
 65 | }
 66 | 
 67 | void resize_cost_layer(cost_layer *l, int inputs)
 68 | {
 69 |   l->inputs  = inputs;
 70 |   l->outputs = inputs;
 71 |   l->delta   = (float *)realloc(l->delta, inputs * l->batch * sizeof(float));
 72 |   l->output  = (float *)realloc(l->output, inputs * l->batch * sizeof(float));
 73 | #ifdef DKGPU
 74 |   cuda_free(l->delta_gpu);
 75 |   cuda_free(l->output_gpu);
 76 |   l->delta_gpu  = cuda_make_array(l->delta, inputs * l->batch);
 77 |   l->output_gpu = cuda_make_array(l->output, inputs * l->batch);
 78 | #endif
 79 | }
 80 | 
 81 | void forward_cost_layer(cost_layer l, network net)
 82 | {
 83 |   if (!net.truth) return;
 84 |   if (l.cost_type == MASKED)
 85 |   {
 86 |     int i;
 87 |     for (i = 0; i < l.batch * l.inputs; ++i)
 88 |     {
 89 |       if (net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM;
 90 |     }
 91 |   }
 92 |   if (l.cost_type == SMOOTH)
 93 |   {
 94 |     smooth_l1_cpu(l.batch * l.inputs, net.input, net.truth, l.delta, l.output);
 95 |   }
 96 |   else if (l.cost_type == L1)
 97 |   {
 98 |     l1_cpu(l.batch * l.inputs, net.input, net.truth, l.delta, l.output);
 99 |   }
100 |   else
101 |   {
102 |     l2_cpu(l.batch * l.inputs, net.input, net.truth, l.delta, l.output);
103 |   }
104 |   l.cost[0] = sum_array(l.output, l.batch * l.inputs);
105 | }
106 | 
107 | void backward_cost_layer(const cost_layer l, network net)
108 | {
109 |   axpy_cpu(l.batch * l.inputs, l.scale, l.delta, 1, net.delta, 1);
110 | }
111 | 
112 | #ifdef DKGPU
113 | 
114 | void pull_cost_layer(cost_layer l)
115 | {
116 |   cuda_pull_array(l.delta_gpu, l.delta, l.batch * l.inputs);
117 | }
118 | 
119 | void push_cost_layer(cost_layer l)
120 | {
121 |   cuda_push_array(l.delta_gpu, l.delta, l.batch * l.inputs);
122 | }
123 | 
124 | int float_abs_compare(const void *a, const void *b)
125 | {
126 |   float fa = *(const float *)a;
127 |   if (fa < 0) fa = -fa;
128 |   float fb = *(const float *)b;
129 |   if (fb < 0) fb = -fb;
130 |   return (fa > fb) - (fa < fb);
131 | }
132 | 
133 | void forward_cost_layer_gpu(cost_layer l, network net)
134 | {
135 |   if (!net.truth) return;
136 |   if (l.smooth)
137 |   {
138 |     scal_gpu(l.batch * l.inputs, (1 - l.smooth), net.truth_gpu, 1);
139 |     add_gpu(l.batch * l.inputs, l.smooth * 1. / l.inputs, net.truth_gpu, 1);
140 |   }
141 | 
142 |   if (l.cost_type == SMOOTH)
143 |   {
144 |     smooth_l1_gpu(l.batch * l.inputs,
145 |                   net.input_gpu,
146 |                   net.truth_gpu,
147 |                   l.delta_gpu,
148 |                   l.output_gpu);
149 |   }
150 |   else if (l.cost_type == L1)
151 |   {
152 |     l1_gpu(l.batch * l.inputs,
153 |            net.input_gpu,
154 |            net.truth_gpu,
155 |            l.delta_gpu,
156 |            l.output_gpu);
157 |   }
158 |   else if (l.cost_type == WGAN)
159 |   {
160 |     wgan_gpu(l.batch * l.inputs,
161 |              net.input_gpu,
162 |              net.truth_gpu,
163 |              l.delta_gpu,
164 |              l.output_gpu);
165 |   }
166 |   else
167 |   {
168 |     l2_gpu(l.batch * l.inputs,
169 |            net.input_gpu,
170 |            net.truth_gpu,
171 |            l.delta_gpu,
172 |            l.output_gpu);
173 |   }
174 | 
175 |   if (l.cost_type == SEG && l.noobject_scale != 1)
176 |   {
177 |     scale_mask_gpu(
178 |         l.batch * l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale);
179 |     scale_mask_gpu(
180 |         l.batch * l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale);
181 |   }
182 |   if (l.cost_type == MASKED)
183 |   {
184 |     mask_gpu(l.batch * l.inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0);
185 |   }
186 | 
187 |   if (l.ratio)
188 |   {
189 |     cuda_pull_array(l.delta_gpu, l.delta, l.batch * l.inputs);
190 |     qsort(l.delta, l.batch * l.inputs, sizeof(float), float_abs_compare);
191 |     int n        = (1 - l.ratio) * l.batch * l.inputs;
192 |     float thresh = l.delta[n];
193 |     thresh       = 0;
194 |     printf("%f\n", thresh);
195 |     supp_gpu(l.batch * l.inputs, thresh, l.delta_gpu, 1);
196 |   }
197 | 
198 |   if (l.thresh)
199 |   {
200 |     supp_gpu(l.batch * l.inputs, l.thresh * 1. / l.inputs, l.delta_gpu, 1);
201 |   }
202 | 
203 |   cuda_pull_array(l.output_gpu, l.output, l.batch * l.inputs);
204 |   l.cost[0] = sum_array(l.output, l.batch * l.inputs);
205 | }
206 | 
207 | void backward_cost_layer_gpu(const cost_layer l, network net)
208 | {
209 |   axpy_gpu(l.batch * l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1);
210 | }
211 | #endif
212 | 


--------------------------------------------------------------------------------
/darknet/src/crnn_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "crnn_layer.h"
  2 | #include "convolutional_layer.h"
  3 | #include "utils.h"
  4 | #include "cuda.h"
  5 | #include "blas.h"
  6 | #include "gemm.h"
  7 | 
  8 | #include <math.h>
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <string.h>
 12 | 
 13 | static void increment_layer(layer *l, int steps)
 14 | {
 15 |     int num = l->outputs*l->batch*steps;
 16 |     l->output += num;
 17 |     l->delta += num;
 18 |     l->x += num;
 19 |     l->x_norm += num;
 20 | 
 21 | #ifdef DKGPU
 22 |     l->output_gpu += num;
 23 |     l->delta_gpu += num;
 24 |     l->x_gpu += num;
 25 |     l->x_norm_gpu += num;
 26 | #endif
 27 | }
 28 | 
 29 | layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize)
 30 | {
 31 |     fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
 32 |     batch = batch / steps;
 33 |     layer l;
 34 |     l.batch = batch;
 35 |     l.type = CRNN;
 36 |     l.steps = steps;
 37 |     l.h = h;
 38 |     l.w = w;
 39 |     l.c = c;
 40 |     l.out_h = h;
 41 |     l.out_w = w;
 42 |     l.out_c = output_filters;
 43 |     l.inputs = h*w*c;
 44 |     l.hidden = h * w * hidden_filters;
 45 |     l.outputs = l.out_h * l.out_w * l.out_c;
 46 | 
 47 |   l.state = (float *)calloc(l.hidden * batch * (steps + 1), sizeof(float));
 48 | 
 49 |     l.input_layer = (layer*)malloc(sizeof(layer));
 50 |     fprintf(stderr, "\t\t");
 51 |     *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
 52 |     l.input_layer->batch = batch;
 53 | 
 54 |     l.self_layer = (layer*)malloc(sizeof(layer));
 55 |     fprintf(stderr, "\t\t");
 56 |     *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
 57 |     l.self_layer->batch = batch;
 58 | 
 59 |     l.output_layer = (layer*)malloc(sizeof(layer));
 60 |     fprintf(stderr, "\t\t");
 61 |     *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
 62 |     l.output_layer->batch = batch;
 63 | 
 64 |     l.output = l.output_layer->output;
 65 |     l.delta = l.output_layer->delta;
 66 | 
 67 |     l.forward = forward_crnn_layer;
 68 |     l.backward = backward_crnn_layer;
 69 |     l.update = update_crnn_layer;
 70 | 
 71 | #ifdef DKGPU
 72 |     l.forward_gpu = forward_crnn_layer_gpu;
 73 |     l.backward_gpu = backward_crnn_layer_gpu;
 74 |     l.update_gpu = update_crnn_layer_gpu;
 75 | 
 76 |     l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1));
 77 |     l.output_gpu = l.output_layer->output_gpu;
 78 |     l.delta_gpu = l.output_layer->delta_gpu;
 79 | #endif
 80 | 
 81 |     return l;
 82 | }
 83 | 
 84 | void update_crnn_layer(layer l, update_args a)
 85 | {
 86 |     update_convolutional_layer(*(l.input_layer),  a);
 87 |     update_convolutional_layer(*(l.self_layer),   a);
 88 |     update_convolutional_layer(*(l.output_layer), a);
 89 | }
 90 | 
 91 | void forward_crnn_layer(layer l, network net)
 92 | {
 93 |     network s = net;
 94 |     s.train = net.train;
 95 |     int i;
 96 |     layer input_layer = *(l.input_layer);
 97 |     layer self_layer = *(l.self_layer);
 98 |     layer output_layer = *(l.output_layer);
 99 | 
100 |     fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
101 |     fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
102 |     fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
103 |     if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);
104 | 
105 |     for (i = 0; i < l.steps; ++i) {
106 |         s.input = net.input;
107 |         forward_convolutional_layer(input_layer, s);
108 | 
109 |         s.input = l.state;
110 |         forward_convolutional_layer(self_layer, s);
111 | 
112 |         float *old_state = l.state;
113 |         if(net.train) l.state += l.hidden*l.batch;
114 |         if(l.shortcut){
115 |             copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
116 |         }else{
117 |             fill_cpu(l.hidden * l.batch, 0, l.state, 1);
118 |         }
119 |         axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1);
120 |         axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);
121 | 
122 |         s.input = l.state;
123 |         forward_convolutional_layer(output_layer, s);
124 | 
125 |         net.input += l.inputs*l.batch;
126 |         increment_layer(&input_layer, 1);
127 |         increment_layer(&self_layer, 1);
128 |         increment_layer(&output_layer, 1);
129 |     }
130 | }
131 | 
132 | void backward_crnn_layer(layer l, network net)
133 | {
134 |     network s = net;
135 |     int i;
136 |     layer input_layer = *(l.input_layer);
137 |     layer self_layer = *(l.self_layer);
138 |     layer output_layer = *(l.output_layer);
139 | 
140 |     increment_layer(&input_layer, l.steps-1);
141 |     increment_layer(&self_layer, l.steps-1);
142 |     increment_layer(&output_layer, l.steps-1);
143 | 
144 |     l.state += l.hidden*l.batch*l.steps;
145 |     for (i = l.steps-1; i >= 0; --i) {
146 |         copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1);
147 |         axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);
148 | 
149 |         s.input = l.state;
150 |         s.delta = self_layer.delta;
151 |         backward_convolutional_layer(output_layer, s);
152 | 
153 |         l.state -= l.hidden*l.batch;
154 |         /*
155 |            if(i > 0){
156 |            copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1);
157 |            axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1);
158 |            }else{
159 |            fill_cpu(l.hidden * l.batch, 0, l.state, 1);
160 |            }
161 |          */
162 | 
163 |         s.input = l.state;
164 |         s.delta = self_layer.delta - l.hidden*l.batch;
165 |         if (i == 0) s.delta = 0;
166 |         backward_convolutional_layer(self_layer, s);
167 | 
168 |         copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1);
169 |         if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1);
170 |         s.input = net.input + i*l.inputs*l.batch;
171 |         if(net.delta) s.delta = net.delta + i*l.inputs*l.batch;
172 |         else s.delta = 0;
173 |         backward_convolutional_layer(input_layer, s);
174 | 
175 |         increment_layer(&input_layer, -1);
176 |         increment_layer(&self_layer, -1);
177 |         increment_layer(&output_layer, -1);
178 |     }
179 | }
180 | 
181 | #ifdef DKGPU
182 | 
183 | void pull_crnn_layer(layer l)
184 | {
185 |     pull_convolutional_layer(*(l.input_layer));
186 |     pull_convolutional_layer(*(l.self_layer));
187 |     pull_convolutional_layer(*(l.output_layer));
188 | }
189 | 
190 | void push_crnn_layer(layer l)
191 | {
192 |     push_convolutional_layer(*(l.input_layer));
193 |     push_convolutional_layer(*(l.self_layer));
194 |     push_convolutional_layer(*(l.output_layer));
195 | }
196 | 
197 | void update_crnn_layer_gpu(layer l, update_args a)
198 | {
199 |     update_convolutional_layer_gpu(*(l.input_layer),  a);
200 |     update_convolutional_layer_gpu(*(l.self_layer),   a);
201 |     update_convolutional_layer_gpu(*(l.output_layer), a);
202 | }
203 | 
204 | void forward_crnn_layer_gpu(layer l, network net)
205 | {
206 |     network s = net;
207 |     int i;
208 |     layer input_layer = *(l.input_layer);
209 |     layer self_layer = *(l.self_layer);
210 |     layer output_layer = *(l.output_layer);
211 | 
212 |     fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1);
213 |     fill_gpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1);
214 |     fill_gpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1);
215 |     if(net.train) fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1);
216 | 
217 |     for (i = 0; i < l.steps; ++i) {
218 |         s.input_gpu = net.input_gpu;
219 |         forward_convolutional_layer_gpu(input_layer, s);
220 | 
221 |         s.input_gpu = l.state_gpu;
222 |         forward_convolutional_layer_gpu(self_layer, s);
223 | 
224 |         float *old_state = l.state_gpu;
225 |         if(net.train) l.state_gpu += l.hidden*l.batch;
226 |         if(l.shortcut){
227 |             copy_gpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1);
228 |         }else{
229 |             fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1);
230 |         }
231 |         axpy_gpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1);
232 |         axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
233 | 
234 |         s.input_gpu = l.state_gpu;
235 |         forward_convolutional_layer_gpu(output_layer, s);
236 | 
237 |         net.input_gpu += l.inputs*l.batch;
238 |         increment_layer(&input_layer, 1);
239 |         increment_layer(&self_layer, 1);
240 |         increment_layer(&output_layer, 1);
241 |     }
242 | }
243 | 
244 | void backward_crnn_layer_gpu(layer l, network net)
245 | {
246 |     network s = net;
247 |     s.train = net.train;
248 |     int i;
249 |     layer input_layer = *(l.input_layer);
250 |     layer self_layer = *(l.self_layer);
251 |     layer output_layer = *(l.output_layer);
252 |     increment_layer(&input_layer,  l.steps - 1);
253 |     increment_layer(&self_layer,   l.steps - 1);
254 |     increment_layer(&output_layer, l.steps - 1);
255 |     l.state_gpu += l.hidden*l.batch*l.steps;
256 |     for (i = l.steps-1; i >= 0; --i) {
257 |         copy_gpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1);
258 |         axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
259 | 
260 |         s.input_gpu = l.state_gpu;
261 |         s.delta_gpu = self_layer.delta_gpu;
262 |         backward_convolutional_layer_gpu(output_layer, s);
263 | 
264 |         l.state_gpu -= l.hidden*l.batch;
265 | 
266 |         s.input_gpu = l.state_gpu;
267 |         s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch;
268 |         if (i == 0) s.delta_gpu = 0;
269 |         backward_convolutional_layer_gpu(self_layer, s);
270 | 
271 |         copy_gpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
272 |         if (i > 0 && l.shortcut) axpy_gpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1);
273 |         s.input_gpu = net.input_gpu + i*l.inputs*l.batch;
274 |         if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch;
275 |         else s.delta_gpu = 0;
276 |         backward_convolutional_layer_gpu(input_layer, s);
277 | 
278 |         increment_layer(&input_layer,  -1);
279 |         increment_layer(&self_layer,   -1);
280 |         increment_layer(&output_layer, -1);
281 |     }
282 | }
283 | #endif
284 | 


--------------------------------------------------------------------------------
/darknet/src/crop_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "crop_layer.h"
  2 | #include "cuda.h"
  3 | #include <stdio.h>
  4 | 
  5 | image get_crop_image(crop_layer l)
  6 | {
  7 |     int h = l.out_h;
  8 |     int w = l.out_w;
  9 |     int c = l.out_c;
 10 |     return float_to_image(w,h,c,l.output);
 11 | }
 12 | 
 13 | void backward_crop_layer(const crop_layer l, network net){}
 14 | void backward_crop_layer_gpu(const crop_layer l, network net){}
 15 | 
 16 | crop_layer make_crop_layer(int batch,
 17 |                            int h,
 18 |                            int w,
 19 |                            int c,
 20 |                            int crop_height,
 21 |                            int crop_width,
 22 |                            int flip,
 23 |                            float angle,
 24 |                            float saturation,
 25 |                            float exposure)
 26 | {
 27 |   fprintf(stderr,
 28 |           "Crop Layer: %d x %d -> %d x %d x %d image\n",
 29 |           h,
 30 |           w,
 31 |           crop_height,
 32 |           crop_width,
 33 |           c);
 34 |   crop_layer l;
 35 |   l.type       = CROP;
 36 |   l.batch      = batch;
 37 |   l.h          = h;
 38 |   l.w          = w;
 39 |   l.c          = c;
 40 |   l.scale      = (float)crop_height / h;
 41 |   l.flip       = flip;
 42 |   l.angle      = angle;
 43 |   l.saturation = saturation;
 44 |   l.exposure   = exposure;
 45 |   l.out_w      = crop_width;
 46 |   l.out_h      = crop_height;
 47 |   l.out_c      = c;
 48 |   l.inputs     = l.w * l.h * l.c;
 49 |   l.outputs    = l.out_w * l.out_h * l.out_c;
 50 |   l.output     = (float *)calloc(l.outputs * batch, sizeof(float));
 51 |   l.forward    = forward_crop_layer;
 52 |   l.backward   = backward_crop_layer;
 53 | 
 54 |     #ifdef DKGPU
 55 |     l.forward_gpu = forward_crop_layer_gpu;
 56 |     l.backward_gpu = backward_crop_layer_gpu;
 57 |     l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
 58 |     l.rand_gpu   = cuda_make_array(0, l.batch*8);
 59 |     #endif
 60 |     return l;
 61 | }
 62 | 
 63 | void resize_crop_layer(layer *l, int w, int h)
 64 | {
 65 |     l->w = w;
 66 |     l->h = h;
 67 | 
 68 |     l->out_w =  l->scale*w;
 69 |     l->out_h =  l->scale*h;
 70 | 
 71 |     l->inputs = l->w * l->h * l->c;
 72 |     l->outputs = l->out_h * l->out_w * l->out_c;
 73 | 
 74 |     l->output = (float*)realloc(l->output, l->batch*l->outputs*sizeof(float));
 75 |     #ifdef DKGPU
 76 |     cuda_free(l->output_gpu);
 77 |     l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
 78 |     #endif
 79 | }
 80 | 
 81 | 
 82 | void forward_crop_layer(const crop_layer l, network net)
 83 | {
 84 |     int i,j,c,b,row,col;
 85 |     int index;
 86 |     int count = 0;
 87 |     int flip = (l.flip && rand()%2);
 88 |     int dh = rand()%(l.h - l.out_h + 1);
 89 |     int dw = rand()%(l.w - l.out_w + 1);
 90 |     float scale = 2;
 91 |     float trans = -1;
 92 |     if(l.noadjust){
 93 |         scale = 1;
 94 |         trans = 0;
 95 |     }
 96 |     if(!net.train){
 97 |         flip = 0;
 98 |         dh = (l.h - l.out_h)/2;
 99 |         dw = (l.w - l.out_w)/2;
100 |     }
101 |     for(b = 0; b < l.batch; ++b){
102 |         for(c = 0; c < l.c; ++c){
103 |             for(i = 0; i < l.out_h; ++i){
104 |                 for(j = 0; j < l.out_w; ++j){
105 |                     if(flip){
106 |                         col = l.w - dw - j - 1;    
107 |                     }else{
108 |                         col = j + dw;
109 |                     }
110 |                     row = i + dh;
111 |                     index = col+l.w*(row+l.h*(c + l.c*b)); 
112 |                     l.output[count++] = net.input[index]*scale + trans;
113 |                 }
114 |             }
115 |         }
116 |     }
117 | }
118 | 
119 | 


--------------------------------------------------------------------------------
/darknet/src/crop_layer_kernels.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "curand.h"
  3 | #include "cublas_v2.h"
  4 | 
  5 | extern "C" {
  6 | #include "crop_layer.h"
  7 | #include "utils.h"
  8 | #include "cuda.h"
  9 | #include "image.h"
 10 | }
 11 | 
 12 | __device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
 13 | {
 14 |     if(x < 0 || x >= w || y < 0 || y >= h) return 0;
 15 |     return image[x + w*(y + c*h)];
 16 | }
 17 | 
 18 | __device__ float3 rgb_to_hsv_kernel(float3 rgb)
 19 | {
 20 |     float r = rgb.x;
 21 |     float g = rgb.y; 
 22 |     float b = rgb.z;
 23 | 
 24 |     float h, s, v;
 25 |     float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b);
 26 |     float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b);
 27 |     float delta = max - min;
 28 |     v = max;
 29 |     if(max == 0){
 30 |         s = 0;
 31 |         h = -1;
 32 |     }else{
 33 |         s = delta/max;
 34 |         if(r == max){
 35 |             h = (g - b) / delta;
 36 |         } else if (g == max) {
 37 |             h = 2 + (b - r) / delta;
 38 |         } else {
 39 |             h = 4 + (r - g) / delta;
 40 |         }
 41 |         if (h < 0) h += 6;
 42 |     }
 43 |     return make_float3(h, s, v);
 44 | }
 45 | 
 46 | __device__ float3 hsv_to_rgb_kernel(float3 hsv)
 47 | {
 48 |     float h = hsv.x;
 49 |     float s = hsv.y; 
 50 |     float v = hsv.z;
 51 | 
 52 |     float r, g, b;
 53 |     float f, p, q, t;
 54 | 
 55 |     if (s == 0) {
 56 |         r = g = b = v;
 57 |     } else {
 58 |         int index = (int) floorf(h);
 59 |         f = h - index;
 60 |         p = v*(1-s);
 61 |         q = v*(1-s*f);
 62 |         t = v*(1-s*(1-f));
 63 |         if(index == 0){
 64 |             r = v; g = t; b = p;
 65 |         } else if(index == 1){
 66 |             r = q; g = v; b = p;
 67 |         } else if(index == 2){
 68 |             r = p; g = v; b = t;
 69 |         } else if(index == 3){
 70 |             r = p; g = q; b = v;
 71 |         } else if(index == 4){
 72 |             r = t; g = p; b = v;
 73 |         } else {
 74 |             r = v; g = p; b = q;
 75 |         }
 76 |     }
 77 |     r = (r < 0) ? 0 : ((r > 1) ? 1 : r);
 78 |     g = (g < 0) ? 0 : ((g > 1) ? 1 : g);
 79 |     b = (b < 0) ? 0 : ((b > 1) ? 1 : b);
 80 |     return make_float3(r, g, b);
 81 | }
 82 | 
 83 | __device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c)
 84 | {
 85 |     int ix = (int) floorf(x);
 86 |     int iy = (int) floorf(y);
 87 | 
 88 |     float dx = x - ix;
 89 |     float dy = y - iy;
 90 | 
 91 |     float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + 
 92 |         dy     * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + 
 93 |         (1-dy) *   dx   * get_pixel_kernel(image, w, h, ix+1, iy, c) +
 94 |         dy     *   dx   * get_pixel_kernel(image, w, h, ix+1, iy+1, c);
 95 |     return val;
 96 | }
 97 | 
 98 | __global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift)
 99 | {
100 |     int size = batch * w * h;
101 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
102 |     if(id >= size) return;
103 |     int x = id % w;
104 |     id /= w;
105 |     int y = id % h;
106 |     id /= h;
107 |     float rshift = rand[0];
108 |     float gshift = rand[1];
109 |     float bshift = rand[2];
110 |     float r0 = rand[8*id + 0];
111 |     float r1 = rand[8*id + 1];
112 |     float r2 = rand[8*id + 2];
113 |     float r3 = rand[8*id + 3];
114 | 
115 |     saturation = r0*(saturation - 1) + 1;
116 |     saturation = (r1 > .5f) ? 1.f/saturation : saturation;
117 |     exposure = r2*(exposure - 1) + 1;
118 |     exposure = (r3 > .5f) ? 1.f/exposure : exposure;
119 | 
120 |     size_t offset = id * h * w * 3;
121 |     image += offset;
122 |     float r = image[x + w*(y + h*0)];
123 |     float g = image[x + w*(y + h*1)];
124 |     float b = image[x + w*(y + h*2)];
125 |     float3 rgb = make_float3(r,g,b);
126 |     if(train){
127 |         float3 hsv = rgb_to_hsv_kernel(rgb);
128 |         hsv.y *= saturation;
129 |         hsv.z *= exposure;
130 |         rgb = hsv_to_rgb_kernel(hsv);
131 |     } else {
132 |         shift = 0;
133 |     }
134 |     image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift;
135 |     image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift;
136 |     image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift;
137 | }
138 | 
139 | __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output)
140 | {
141 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
142 |     if(id >= size) return;
143 | 
144 |     float cx = w/2.f;
145 |     float cy = h/2.f;
146 | 
147 |     int count = id;
148 |     int j = id % crop_width;
149 |     id /= crop_width;
150 |     int i = id % crop_height;
151 |     id /= crop_height;
152 |     int k = id % c;
153 |     id /= c;
154 |     int b = id;
155 | 
156 |     float r4 = rand[8*b + 4];
157 |     float r5 = rand[8*b + 5];
158 |     float r6 = rand[8*b + 6];
159 |     float r7 = rand[8*b + 7];
160 | 
161 |     float dw = (w - crop_width)*r4;
162 |     float dh = (h - crop_height)*r5;
163 |     flip = (flip && (r6 > .5f));
164 |     angle = 2*angle*r7 - angle;
165 |     if(!train){
166 |         dw = (w - crop_width)/2.f;
167 |         dh = (h - crop_height)/2.f;
168 |         flip = 0;
169 |         angle = 0;
170 |     }
171 | 
172 |     input += w*h*c*b;
173 | 
174 |     float x = (flip) ? w - dw - j - 1 : j + dw;    
175 |     float y = i + dh;
176 | 
177 |     float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx;
178 |     float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy;
179 | 
180 |     output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k);
181 | }
182 | 
183 | extern "C" void forward_crop_layer_gpu(crop_layer layer, network net)
184 | {
185 |     cuda_random(layer.rand_gpu, layer.batch*8);
186 | 
187 |     float radians = layer.angle*3.14159265f/180.f;
188 | 
189 |     float scale = 2;
190 |     float translate = -1;
191 |     if(layer.noadjust){
192 |         scale = 1;
193 |         translate = 0;
194 |     }
195 | 
196 |     int size = layer.batch * layer.w * layer.h;
197 | 
198 |     levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift);
199 |     check_error(cudaPeekAtLastError());
200 | 
201 |     size = layer.batch*layer.c*layer.out_w*layer.out_h;
202 | 
203 |     forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu);
204 |     check_error(cudaPeekAtLastError());
205 | 
206 | /*
207 |        cuda_pull_array(layer.output_gpu, layer.output, size);
208 |        image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch));
209 |        image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch));
210 |        image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch));
211 | 
212 |        translate_image(im, -translate);
213 |        scale_image(im, 1/scale);
214 |        translate_image(im2, -translate);
215 |        scale_image(im2, 1/scale);
216 |        translate_image(im3, -translate);
217 |        scale_image(im3, 1/scale);
218 |        
219 |        show_image(im, "cropped");
220 |        show_image(im2, "cropped2");
221 |        show_image(im3, "cropped3");
222 |        cvWaitKey(0);
223 |        */
224 | }
225 | 
226 | 


--------------------------------------------------------------------------------
/darknet/src/cuda.cpp:
--------------------------------------------------------------------------------
  1 | int gpu_index = 0;
  2 | 
  3 | #ifdef DKGPU
  4 | 
  5 | #include "cuda.h"
  6 | #include <assert.h>
  7 | #include <stdlib.h>
  8 | #include <time.h>
  9 | #include "blas.h"
 10 | #include "utils.h"
 11 | 
 12 | void cuda_set_device(int n)
 13 | {
 14 |   gpu_index          = n;
 15 |   cudaError_t status = cudaSetDevice(n);
 16 |   check_error(status);
 17 | }
 18 | 
 19 | int cuda_get_device()
 20 | {
 21 |   int n              = 0;
 22 |   cudaError_t status = cudaGetDevice(&n);
 23 |   check_error(status);
 24 |   return n;
 25 | }
 26 | 
 27 | void check_error(cudaError_t status)
 28 | {
 29 |   // cudaDeviceSynchronize();
 30 |   cudaError_t status2 = cudaGetLastError();
 31 |   if (status != cudaSuccess)
 32 |   {
 33 |     const char *s = cudaGetErrorString(status);
 34 |     char buffer[256];
 35 |     printf("CUDA Error: %s\n", s);
 36 |     assert(0);
 37 |     snprintf(buffer, 256, "CUDA Error: %s", s);
 38 |     error(buffer);
 39 |   }
 40 |   if (status2 != cudaSuccess)
 41 |   {
 42 |     const char *s = cudaGetErrorString(status);
 43 |     char buffer[256];
 44 |     printf("CUDA Error Prev: %s\n", s);
 45 |     assert(0);
 46 |     snprintf(buffer, 256, "CUDA Error Prev: %s", s);
 47 |     error(buffer);
 48 |   }
 49 | }
 50 | 
 51 | dim3 cuda_gridsize(size_t n)
 52 | {
 53 |   size_t k = (n - 1) / BLOCK + 1;
 54 |   size_t x = k;
 55 |   size_t y = 1;
 56 |   if (x > 65535)
 57 |   {
 58 |     x = ceil(sqrt(k));
 59 |     y = (n - 1) / (x * BLOCK) + 1;
 60 |   }
 61 |   dim3 d = {x, y, 1};
 62 |   // printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
 63 |   return d;
 64 | }
 65 | 
 66 | #ifdef CUDNN
 67 | cudnnHandle_t cudnn_handle()
 68 | {
 69 |   static int init[16] = {0};
 70 |   static cudnnHandle_t handle[16];
 71 |   int i = cuda_get_device();
 72 |   if (!init[i])
 73 |   {
 74 |     cudnnCreate(&handle[i]);
 75 |     init[i] = 1;
 76 |   }
 77 |   return handle[i];
 78 | }
 79 | #endif
 80 | 
 81 | cublasHandle_t blas_handle()
 82 | {
 83 |   static int init[16] = {0};
 84 |   static cublasHandle_t handle[16];
 85 |   int i = cuda_get_device();
 86 |   if (!init[i])
 87 |   {
 88 |     cublasCreate(&handle[i]);
 89 |     init[i] = 1;
 90 |   }
 91 |   return handle[i];
 92 | }
 93 | 
 94 | float *cuda_make_array(float *x, size_t n)
 95 | {
 96 |   float *x_gpu;
 97 |   size_t size        = sizeof(float) * n;
 98 |   cudaError_t status = cudaMalloc((void **)&x_gpu, size);
 99 |   check_error(status);
100 |   if (x)
101 |   {
102 |     status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
103 |     check_error(status);
104 |   }
105 |   else
106 |   {
107 |     fill_gpu(n, 0, x_gpu, 1);
108 |   }
109 |   if (!x_gpu) error("Cuda malloc failed\n");
110 |   return x_gpu;
111 | }
112 | 
113 | void cuda_random(float *x_gpu, size_t n)
114 | {
115 |   static curandGenerator_t gen[16];
116 |   static int init[16] = {0};
117 |   int i               = cuda_get_device();
118 |   if (!init[i])
119 |   {
120 |     curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT);
121 |     curandSetPseudoRandomGeneratorSeed(gen[i], time(0));
122 |     init[i] = 1;
123 |   }
124 |   curandGenerateUniform(gen[i], x_gpu, n);
125 |   check_error(cudaPeekAtLastError());
126 | }
127 | 
128 | float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
129 | {
130 |   float *tmp = (float *)calloc(n, sizeof(float));
131 |   cuda_pull_array(x_gpu, tmp, n);
132 |   // int i;
133 |   // for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
134 |   axpy_cpu(n, -1, x, 1, tmp, 1);
135 |   float err = dot_cpu(n, tmp, 1, tmp, 1);
136 |   printf("Error %s: %f\n", s, sqrt(err / n));
137 |   free(tmp);
138 |   return err;
139 | }
140 | 
141 | int *cuda_make_int_array(int *x, size_t n)
142 | {
143 |   int *x_gpu;
144 |   size_t size        = sizeof(int) * n;
145 |   cudaError_t status = cudaMalloc((void **)&x_gpu, size);
146 |   check_error(status);
147 |   if (x)
148 |   {
149 |     status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
150 |     check_error(status);
151 |   }
152 |   if (!x_gpu) error("Cuda malloc failed\n");
153 |   return x_gpu;
154 | }
155 | 
156 | void cuda_free(float *x_gpu)
157 | {
158 |   cudaError_t status = cudaFree(x_gpu);
159 |   check_error(status);
160 | }
161 | 
162 | void cuda_push_array(float *x_gpu, float *x, size_t n)
163 | {
164 |   size_t size        = sizeof(float) * n;
165 |   cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
166 |   check_error(status);
167 | }
168 | 
169 | void cuda_pull_array(float *x_gpu, float *x, size_t n)
170 | {
171 |   size_t size        = sizeof(float) * n;
172 |   cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost);
173 |   check_error(status);
174 | }
175 | 
176 | float cuda_mag_array(float *x_gpu, size_t n)
177 | {
178 |   float *temp = (float *)calloc(n, sizeof(float));
179 |   cuda_pull_array(x_gpu, temp, n);
180 |   float m = mag_array(temp, n);
181 |   free(temp);
182 |   return m;
183 | }
184 | #else
185 | void cuda_set_device(int n)
186 | {
187 | }
188 | 
189 | #endif
190 | 


--------------------------------------------------------------------------------
/darknet/src/deconvolutional_kernels.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "curand.h"
  3 | #include "cublas_v2.h"
  4 | 
  5 | extern "C" {
  6 | #include "convolutional_layer.h"
  7 | #include "deconvolutional_layer.h"
  8 | #include "batchnorm_layer.h"
  9 | #include "gemm.h"
 10 | #include "blas.h"
 11 | #include "im2col.h"
 12 | #include "col2im.h"
 13 | #include "utils.h"
 14 | #include "cuda.h"
 15 | }
 16 | 
 17 | extern "C" void forward_deconvolutional_layer_gpu(layer l, network net)
 18 | {
 19 |     int i;
 20 | 
 21 |     int m = l.size*l.size*l.n;
 22 |     int n = l.h*l.w;
 23 |     int k = l.c;
 24 | 
 25 |     fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1);
 26 | 
 27 |     for(i = 0; i < l.batch; ++i){
 28 |         float *a = l.weights_gpu;
 29 |         float *b = net.input_gpu + i*l.c*l.h*l.w;
 30 |         float *c = net.workspace;
 31 | 
 32 |         gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
 33 | 
 34 |         col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs);
 35 |     }
 36 |     if (l.batch_normalize) {
 37 |         forward_batchnorm_layer_gpu(l, net);
 38 |     } else {
 39 |         add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
 40 |     }
 41 |     activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation);
 42 | }
 43 | 
 44 | extern "C" void backward_deconvolutional_layer_gpu(layer l, network net)
 45 | {
 46 |     int i;
 47 | 
 48 |     //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
 49 |     gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
 50 | 
 51 |     if(l.batch_normalize){
 52 |         backward_batchnorm_layer_gpu(l, net);
 53 |     } else {
 54 |         backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
 55 |     }
 56 | 
 57 |     //if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float));
 58 | 
 59 |     for(i = 0; i < l.batch; ++i){
 60 |         int m = l.c;
 61 |         int n = l.size*l.size*l.n;
 62 |         int k = l.h*l.w;
 63 | 
 64 |         float *a = net.input_gpu + i*m*k;
 65 |         float *b = net.workspace;
 66 |         float *c = l.weight_updates_gpu;
 67 | 
 68 |         im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w, 
 69 |                 l.size, l.stride, l.pad, b);
 70 |         gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
 71 | 
 72 |         if(net.delta_gpu){
 73 |             int m = l.c;
 74 |             int n = l.h*l.w;
 75 |             int k = l.size*l.size*l.n;
 76 | 
 77 |             float *a = l.weights_gpu;
 78 |             float *b = net.workspace;
 79 |             float *c = net.delta_gpu + i*n*m;
 80 | 
 81 |             gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
 82 |         }
 83 |     }
 84 | }
 85 | 
 86 | extern "C" void pull_deconvolutional_layer(layer l)
 87 | {
 88 |     cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size);
 89 |     cuda_pull_array(l.biases_gpu, l.biases, l.n);
 90 |     cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size);
 91 |     cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
 92 |     if (l.batch_normalize){
 93 |         cuda_pull_array(l.scales_gpu, l.scales, l.n);
 94 |         cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
 95 |         cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
 96 |     }
 97 | }
 98 | 
 99 | extern "C" void push_deconvolutional_layer(layer l)
100 | {
101 |     cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size);
102 |     cuda_push_array(l.biases_gpu, l.biases, l.n);
103 |     cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size);
104 |     cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
105 |     if (l.batch_normalize){
106 |         cuda_push_array(l.scales_gpu, l.scales, l.n);
107 |         cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
108 |         cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
109 |     }
110 | }
111 | 
112 | void update_deconvolutional_layer_gpu(layer l, update_args a)
113 | {
114 |     float learning_rate = a.learning_rate*l.learning_rate_scale;
115 |     float momentum = a.momentum;
116 |     float decay = a.decay;
117 |     int batch = a.batch;
118 | 
119 |     if(a.adam){
120 |         adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t);
121 |         adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t);
122 |         if(l.scales_gpu){
123 |             adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t);
124 |         }
125 |     }else{
126 |         axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
127 |         axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
128 |         scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1);
129 | 
130 |         axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
131 |         scal_gpu(l.n, momentum, l.bias_updates_gpu, 1);
132 | 
133 |         if(l.scales_gpu){
134 |             axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
135 |             scal_gpu(l.n, momentum, l.scale_updates_gpu, 1);
136 |         }
137 |     }
138 | }
139 | 
140 | 


--------------------------------------------------------------------------------
/darknet/src/demo.cpp:
--------------------------------------------------------------------------------
  1 | #include "network.h"
  2 | #include "detection_layer.h"
  3 | #include "region_layer.h"
  4 | #include "cost_layer.h"
  5 | #include "utils.h"
  6 | #include "parser.h"
  7 | #include "box.h"
  8 | #include "image.h"
  9 | #include "demo.h"
 10 | #include <sys/time.h>
 11 | 
 12 | #define FRAMES 3
 13 | 
 14 | #ifdef OPENCV
 15 | #include "opencv2/highgui/highgui_c.h"
 16 | #include "opencv2/imgproc/imgproc_c.h"
 17 | image get_image_from_stream(CvCapture *cap);
 18 | 
 19 | static char **demo_names;
 20 | static image **demo_alphabet;
 21 | static int demo_classes;
 22 | 
 23 | static float **probs;
 24 | static box *boxes;
 25 | static network net;
 26 | static image in   ;
 27 | static image in_s ;
 28 | static image det  ;
 29 | static image det_s;
 30 | static image disp = {0};
 31 | static CvCapture * cap;
 32 | static float fps = 0;
 33 | static float demo_thresh = 0;
 34 | static float demo_hier_thresh = .5;
 35 | 
 36 | static float *predictions[FRAMES];
 37 | static int demo_index = 0;
 38 | static image images[FRAMES];
 39 | static float *avg;
 40 | 
 41 | void *fetch_in_thread(void *ptr)
 42 | {
 43 |     in = get_image_from_stream(cap);
 44 |     if(!in.data){
 45 |         error("Stream closed.");
 46 |     }
 47 |     in_s = resize_image(in, net.w, net.h);
 48 |     return 0;
 49 | }
 50 | 
 51 | void *detect_in_thread(void *ptr)
 52 | {
 53 |     float nms = .4;
 54 | 
 55 |     layer l = net.layers[net.n-1];
 56 |     float *X = det_s.data;
 57 |     float *prediction = network_predict(net, X);
 58 | 
 59 |     memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
 60 |     mean_arrays(predictions, FRAMES, l.outputs, avg);
 61 |     l.output = avg;
 62 | 
 63 |     free_image(det_s);
 64 |     if(l.type == DETECTION){
 65 |         get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
 66 |     } else if (l.type == REGION){
 67 |         get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0, demo_hier_thresh);
 68 |     } else {
 69 |         error("Last layer must produce detections\n");
 70 |     }
 71 |     if (nms > 0) do_nms(boxes, probs, l.w*l.h*l.n, l.classes, nms);
 72 |     printf("\033[2J");
 73 |     printf("\033[1;1H");
 74 |     printf("\nFPS:%.1f\n",fps);
 75 |     printf("Objects:\n\n");
 76 | 
 77 |     images[demo_index] = det;
 78 |     det = images[(demo_index + FRAMES/2 + 1)%FRAMES];
 79 |     demo_index = (demo_index + 1)%FRAMES;
 80 | 
 81 |     draw_detections(det, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);
 82 | 
 83 |     return 0;
 84 | }
 85 | 
 86 | double get_wall_time()
 87 | {
 88 |     struct timeval time;
 89 |     if (gettimeofday(&time,NULL)){
 90 |         return 0;
 91 |     }
 92 |     return (double)time.tv_sec + (double)time.tv_usec * .000001;
 93 | }
 94 | 
 95 | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh)
 96 | {
 97 |     //skip = frame_skip;
 98 |     image **alphabet = load_alphabet();
 99 |     int delay = frame_skip;
100 |     demo_names = names;
101 |     demo_alphabet = alphabet;
102 |     demo_classes = classes;
103 |     demo_thresh = thresh;
104 |     demo_hier_thresh = hier_thresh;
105 |     printf("Demo\n");
106 |     net = parse_network_cfg(cfgfile);
107 |     if(weightfile){
108 |         load_weights(&net, weightfile);
109 |     }
110 |     set_batch_network(&net, 1);
111 | 
112 |     srand(2222222);
113 | 
114 |     if(filename){
115 |         printf("video file: %s\n", filename);
116 |         cap = cvCaptureFromFile(filename);
117 |     }else{
118 |         cap = cvCaptureFromCAM(cam_index);
119 |     }
120 | 
121 |     if(!cap) error("Couldn't connect to webcam.\n");
122 | 
123 |     layer l = net.layers[net.n-1];
124 |     int j;
125 | 
126 |     avg = (float *) calloc(l.outputs, sizeof(float));
127 |     for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
128 |     for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3);
129 | 
130 |     boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box));
131 |     probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *));
132 |     for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float));
133 | 
134 |     pthread_t fetch_thread;
135 |     pthread_t detect_thread;
136 | 
137 |     fetch_in_thread(0);
138 |     det = in;
139 |     det_s = in_s;
140 | 
141 |     fetch_in_thread(0);
142 |     detect_in_thread(0);
143 |     disp = det;
144 |     det = in;
145 |     det_s = in_s;
146 | 
147 |     for(j = 0; j < FRAMES/2; ++j){
148 |         fetch_in_thread(0);
149 |         detect_in_thread(0);
150 |         disp = det;
151 |         det = in;
152 |         det_s = in_s;
153 |     }
154 | 
155 |     int count = 0;
156 |     if(!prefix){
157 |         cvNamedWindow("Demo", CV_WINDOW_NORMAL); 
158 |         cvMoveWindow("Demo", 0, 0);
159 |         cvResizeWindow("Demo", 1352, 1013);
160 |     }
161 | 
162 |     double before = get_wall_time();
163 | 
164 |     while(1){
165 |         ++count;
166 |         if(1){
167 |             if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
168 |             if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
169 | 
170 |             if(!prefix){
171 |                 show_image(disp, "Demo");
172 |                 int c = cvWaitKey(1);
173 |                 if (c == 10){
174 |                     if(frame_skip == 0) frame_skip = 60;
175 |                     else if(frame_skip == 4) frame_skip = 0;
176 |                     else if(frame_skip == 60) frame_skip = 4;   
177 |                     else frame_skip = 0;
178 |                 }
179 |             }else{
180 |                 char buff[256];
181 |                 sprintf(buff, "%s_%08d", prefix, count);
182 |                 save_image(disp, buff);
183 |             }
184 | 
185 |             pthread_join(fetch_thread, 0);
186 |             pthread_join(detect_thread, 0);
187 | 
188 |             if(delay == 0){
189 |                 free_image(disp);
190 |                 disp  = det;
191 |             }
192 |             det   = in;
193 |             det_s = in_s;
194 |         }else {
195 |             fetch_in_thread(0);
196 |             det   = in;
197 |             det_s = in_s;
198 |             detect_in_thread(0);
199 |             if(delay == 0) {
200 |                 free_image(disp);
201 |                 disp = det;
202 |             }
203 |             show_image(disp, "Demo");
204 |             cvWaitKey(1);
205 |         }
206 |         --delay;
207 |         if(delay < 0){
208 |             delay = frame_skip;
209 | 
210 |             double after = get_wall_time();
211 |             float curr = 1./(after - before);
212 |             fps = curr;
213 |             before = after;
214 |         }
215 |     }
216 | }
217 | #else
218 | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh)
219 | {
220 |     fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
221 | }
222 | #endif
223 | 
224 | 


--------------------------------------------------------------------------------
/darknet/src/dropout_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "dropout_layer.h"
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include "cuda.h"
 5 | #include "utils.h"
 6 | 
 7 | dropout_layer make_dropout_layer(int batch, int inputs, float probability)
 8 | {
 9 |   dropout_layer l;
10 |   l.type        = DROPOUT;
11 |   l.probability = probability;
12 |   l.inputs      = inputs;
13 |   l.outputs     = inputs;
14 |   l.batch       = batch;
15 |   l.rand        = (float *)calloc(inputs * batch, sizeof(float));
16 |   l.scale       = 1. / (1. - probability);
17 |   l.forward     = forward_dropout_layer;
18 |   l.backward    = backward_dropout_layer;
19 | #ifdef DKGPU
20 |   l.forward_gpu  = forward_dropout_layer_gpu;
21 |   l.backward_gpu = backward_dropout_layer_gpu;
22 |   l.rand_gpu     = cuda_make_array(l.rand, inputs * batch);
23 | #endif
24 |   fprintf(stderr,
25 |           "dropout       p = %.2f               %4d  ->  %4d\n",
26 |           probability,
27 |           inputs,
28 |           inputs);
29 |   return l;
30 | }
31 | 
32 | void resize_dropout_layer(dropout_layer *l, int inputs)
33 | {
34 |   l->rand = (float *)realloc(l->rand, l->inputs * l->batch * sizeof(float));
35 | #ifdef DKGPU
36 |   cuda_free(l->rand_gpu);
37 | 
38 |   l->rand_gpu = cuda_make_array(l->rand, inputs * l->batch);
39 | #endif
40 | }
41 | 
42 | void forward_dropout_layer(dropout_layer l, network net)
43 | {
44 |   int i;
45 |   if (!net.train) return;
46 |   for (i = 0; i < l.batch * l.inputs; ++i)
47 |   {
48 |     float r   = rand_uniform(0, 1);
49 |     l.rand[i] = r;
50 |     if (r < l.probability)
51 |       net.input[i] = 0;
52 |     else
53 |       net.input[i] *= l.scale;
54 |   }
55 | }
56 | 
57 | void backward_dropout_layer(dropout_layer l, network net)
58 | {
59 |   int i;
60 |   if (!net.delta) return;
61 |   for (i = 0; i < l.batch * l.inputs; ++i)
62 |   {
63 |     float r = l.rand[i];
64 |     if (r < l.probability)
65 |       net.delta[i] = 0;
66 |     else
67 |       net.delta[i] *= l.scale;
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/darknet/src/dropout_layer_kernels.cu:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "curand.h"
 3 | #include "cublas_v2.h"
 4 | 
 5 | extern "C" {
 6 | #include "dropout_layer.h"
 7 | #include "cuda.h"
 8 | #include "utils.h"
 9 | }
10 | 
11 | __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
12 | {
13 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
14 |     if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale;
15 | }
16 | 
17 | void forward_dropout_layer_gpu(dropout_layer layer, network net)
18 | {
19 |     if (!net.train) return;
20 |     int size = layer.inputs*layer.batch;
21 |     cuda_random(layer.rand_gpu, size);
22 |     /*
23 |     int i;
24 |     for(i = 0; i < size; ++i){
25 |         layer.rand[i] = rand_uniform();
26 |     }
27 |     cuda_push_array(layer.rand_gpu, layer.rand, size);
28 |     */
29 | 
30 |     yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale);
31 |     check_error(cudaPeekAtLastError());
32 | }
33 | 
34 | void backward_dropout_layer_gpu(dropout_layer layer, network net)
35 | {
36 |     if(!net.delta_gpu) return;
37 |     int size = layer.inputs*layer.batch;
38 | 
39 |     yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale);
40 |     check_error(cudaPeekAtLastError());
41 | }
42 | 


--------------------------------------------------------------------------------
/darknet/src/im2col.cpp:
--------------------------------------------------------------------------------
 1 | #include "im2col.h"
 2 | #include <stdio.h>
 3 | float im2col_get_pixel(float *im, int height, int width, int channels,
 4 |                         int row, int col, int channel, int pad)
 5 | {
 6 |     row -= pad;
 7 |     col -= pad;
 8 | 
 9 |     if (row < 0 || col < 0 ||
10 |         row >= height || col >= width) return 0;
11 |     return im[col + width*(row + height*channel)];
12 | }
13 | 
14 | //From Berkeley Vision's Caffe!
15 | //https://github.com/BVLC/caffe/blob/master/LICENSE
16 | void im2col_cpu(float* data_im,
17 |      int channels,  int height,  int width,
18 |      int ksize,  int stride, int pad, float* data_col) 
19 | {
20 |     int c,h,w;
21 |     int height_col = (height + 2*pad - ksize) / stride + 1;
22 |     int width_col = (width + 2*pad - ksize) / stride + 1;
23 | 
24 |     int channels_col = channels * ksize * ksize;
25 |     for (c = 0; c < channels_col; ++c) {
26 |         int w_offset = c % ksize;
27 |         int h_offset = (c / ksize) % ksize;
28 |         int c_im = c / ksize / ksize;
29 |         for (h = 0; h < height_col; ++h) {
30 |             for (w = 0; w < width_col; ++w) {
31 |                 int im_row = h_offset + h * stride;
32 |                 int im_col = w_offset + w * stride;
33 |                 int col_index = (c * height_col + h) * width_col + w;
34 |                 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
35 |                         im_row, im_col, c_im, pad);
36 |             }
37 |         }
38 |     }
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/darknet/src/im2col_kernels.cu:
--------------------------------------------------------------------------------
 1 | #include "cuda_runtime.h"
 2 | #include "curand.h"
 3 | #include "cublas_v2.h"
 4 | 
 5 | extern "C" {
 6 | #include "im2col.h"
 7 | #include "cuda.h"
 8 | }
 9 | 
10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
12 | 
13 | __global__ void im2col_gpu_kernel(const int n, const float* data_im,
14 |         const int height, const int width, const int ksize,
15 |         const int pad,
16 |         const int stride,
17 |         const int height_col, const int width_col,
18 |         float *data_col) {
19 |     int index = blockIdx.x*blockDim.x+threadIdx.x;
20 |     for(; index < n; index += blockDim.x*gridDim.x){
21 |         int w_out = index % width_col;
22 |         int h_index = index / width_col;
23 |         int h_out = h_index % height_col;
24 |         int channel_in = h_index / height_col;
25 |         int channel_out = channel_in * ksize * ksize;
26 |         int h_in = h_out * stride - pad;
27 |         int w_in = w_out * stride - pad;
28 |         float* data_col_ptr = data_col;
29 |         data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out;
30 |         const float* data_im_ptr = data_im;
31 |         data_im_ptr += (channel_in * height + h_in) * width + w_in;
32 |         for (int i = 0; i < ksize; ++i) {
33 |             for (int j = 0; j < ksize; ++j) {
34 |                 int h = h_in + i;
35 |                 int w = w_in + j;
36 | 
37 |                 *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
38 |                     data_im_ptr[i * width + j] : 0;
39 | 
40 |                 //*data_col_ptr = data_im_ptr[ii * width + jj];
41 | 
42 |                 data_col_ptr += height_col * width_col;
43 |             }
44 |         }
45 |     }
46 | }
47 | 
48 | void im2col_gpu(float *im,
49 |          int channels, int height, int width,
50 |          int ksize, int stride, int pad, float *data_col){
51 |     // We are going to launch channels * height_col * width_col kernels, each
52 |     // kernel responsible for copying a single-channel grid.
53 |     int height_col = (height + 2 * pad - ksize) / stride + 1;
54 |     int width_col = (width + 2 * pad - ksize) / stride + 1;
55 |     int num_kernels = channels * height_col * width_col;
56 |     im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK,
57 |         BLOCK>>>(
58 |                 num_kernels, im, height, width, ksize, pad,
59 |                 stride, height_col,
60 |                 width_col, data_col);
61 | }
62 | 


--------------------------------------------------------------------------------
/darknet/src/l2norm_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "l2norm_layer.h"
 2 | #include "activations.h"
 3 | #include "blas.h"
 4 | #include "cuda.h"
 5 | 
 6 | #include <assert.h>
 7 | #include <float.h>
 8 | #include <math.h>
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | layer make_l2norm_layer(int batch, int inputs)
13 | {
14 |   fprintf(
15 |       stderr, "l2norm                                         %4d\n", inputs);
16 |   layer l;
17 |   l.type    = L2NORM;
18 |   l.batch   = batch;
19 |   l.inputs  = inputs;
20 |   l.outputs = inputs;
21 |   l.output  = (float*)calloc(inputs * batch, sizeof(float));
22 |   l.scales  = (float*)calloc(inputs * batch, sizeof(float));
23 |   l.delta   = (float*)calloc(inputs * batch, sizeof(float));
24 | 
25 |   l.forward  = forward_l2norm_layer;
26 |   l.backward = backward_l2norm_layer;
27 | #ifdef DKGPU
28 |   l.forward_gpu  = forward_l2norm_layer_gpu;
29 |   l.backward_gpu = backward_l2norm_layer_gpu;
30 | 
31 |   l.output_gpu = cuda_make_array(l.output, inputs * batch);
32 |   l.scales_gpu = cuda_make_array(l.output, inputs * batch);
33 |   l.delta_gpu  = cuda_make_array(l.delta, inputs * batch);
34 | #endif
35 |   return l;
36 | }
37 | 
38 | void forward_l2norm_layer(const layer l, network net)
39 | {
40 |   copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1);
41 |   l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w * l.out_h);
42 | }
43 | 
44 | void backward_l2norm_layer(const layer l, network net)
45 | {
46 |   // axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1);
47 |   axpy_cpu(l.inputs * l.batch, 1, l.delta, 1, net.delta, 1);
48 | }
49 | 
50 | #ifdef DKGPU
51 | 
52 | void forward_l2norm_layer_gpu(const layer l, network net)
53 | {
54 |   copy_gpu(l.outputs * l.batch, net.input_gpu, 1, l.output_gpu, 1);
55 |   l2normalize_gpu(
56 |       l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w * l.out_h);
57 | }
58 | 
59 | void backward_l2norm_layer_gpu(const layer l, network net)
60 | {
61 |   axpy_gpu(l.batch * l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1);
62 |   axpy_gpu(l.batch * l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
63 | }
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/darknet/src/layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "layer.h"
 2 | #include "cuda.h"
 3 | 
 4 | #include <stdlib.h>
 5 | 
 6 | void free_layer(layer l)
 7 | {
 8 |   if (l.type == DROPOUT)
 9 |   {
10 |     if (l.rand) free(l.rand);
11 | #ifdef DKGPU
12 |     if (l.rand_gpu) cuda_free(l.rand_gpu);
13 | #endif
14 |     return;
15 |   }
16 |   if (l.cweights) free(l.cweights);
17 |   if (l.indexes) free(l.indexes);
18 |   if (l.input_layers) free(l.input_layers);
19 |   if (l.input_sizes) free(l.input_sizes);
20 |   if (l.map) free(l.map);
21 |   if (l.rand) free(l.rand);
22 |   if (l.cost) free(l.cost);
23 |   if (l.state) free(l.state);
24 |   if (l.prev_state) free(l.prev_state);
25 |   if (l.forgot_state) free(l.forgot_state);
26 |   if (l.forgot_delta) free(l.forgot_delta);
27 |   if (l.state_delta) free(l.state_delta);
28 |   if (l.concat) free(l.concat);
29 |   if (l.concat_delta) free(l.concat_delta);
30 |   if (l.binary_weights) free(l.binary_weights);
31 |   if (l.biases) free(l.biases);
32 |   if (l.bias_updates) free(l.bias_updates);
33 |   if (l.scales) free(l.scales);
34 |   if (l.scale_updates) free(l.scale_updates);
35 |   if (l.weights) free(l.weights);
36 |   if (l.weight_updates) free(l.weight_updates);
37 |   if (l.delta) free(l.delta);
38 |   if (l.output) free(l.output);
39 |   if (l.squared) free(l.squared);
40 |   if (l.norms) free(l.norms);
41 |   if (l.spatial_mean) free(l.spatial_mean);
42 |   if (l.mean) free(l.mean);
43 |   if (l.variance) free(l.variance);
44 |   if (l.mean_delta) free(l.mean_delta);
45 |   if (l.variance_delta) free(l.variance_delta);
46 |   if (l.rolling_mean) free(l.rolling_mean);
47 |   if (l.rolling_variance) free(l.rolling_variance);
48 |   if (l.x) free(l.x);
49 |   if (l.x_norm) free(l.x_norm);
50 |   if (l.m) free(l.m);
51 |   if (l.v) free(l.v);
52 |   if (l.z_cpu) free(l.z_cpu);
53 |   if (l.r_cpu) free(l.r_cpu);
54 |   if (l.h_cpu) free(l.h_cpu);
55 |   if (l.binary_input) free(l.binary_input);
56 | 
57 | #ifdef DKGPU
58 |   if (l.indexes_gpu) cuda_free((float *)l.indexes_gpu);
59 | 
60 |   if (l.z_gpu) cuda_free(l.z_gpu);
61 |   if (l.r_gpu) cuda_free(l.r_gpu);
62 |   if (l.h_gpu) cuda_free(l.h_gpu);
63 |   if (l.m_gpu) cuda_free(l.m_gpu);
64 |   if (l.v_gpu) cuda_free(l.v_gpu);
65 |   if (l.prev_state_gpu) cuda_free(l.prev_state_gpu);
66 |   if (l.forgot_state_gpu) cuda_free(l.forgot_state_gpu);
67 |   if (l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu);
68 |   if (l.state_gpu) cuda_free(l.state_gpu);
69 |   if (l.state_delta_gpu) cuda_free(l.state_delta_gpu);
70 |   if (l.gate_gpu) cuda_free(l.gate_gpu);
71 |   if (l.gate_delta_gpu) cuda_free(l.gate_delta_gpu);
72 |   if (l.save_gpu) cuda_free(l.save_gpu);
73 |   if (l.save_delta_gpu) cuda_free(l.save_delta_gpu);
74 |   if (l.concat_gpu) cuda_free(l.concat_gpu);
75 |   if (l.concat_delta_gpu) cuda_free(l.concat_delta_gpu);
76 |   if (l.binary_input_gpu) cuda_free(l.binary_input_gpu);
77 |   if (l.binary_weights_gpu) cuda_free(l.binary_weights_gpu);
78 |   if (l.mean_gpu) cuda_free(l.mean_gpu);
79 |   if (l.variance_gpu) cuda_free(l.variance_gpu);
80 |   if (l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu);
81 |   if (l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu);
82 |   if (l.variance_delta_gpu) cuda_free(l.variance_delta_gpu);
83 |   if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu);
84 |   if (l.x_gpu) cuda_free(l.x_gpu);
85 |   if (l.x_norm_gpu) cuda_free(l.x_norm_gpu);
86 |   if (l.weights_gpu) cuda_free(l.weights_gpu);
87 |   if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu);
88 |   if (l.biases_gpu) cuda_free(l.biases_gpu);
89 |   if (l.bias_updates_gpu) cuda_free(l.bias_updates_gpu);
90 |   if (l.scales_gpu) cuda_free(l.scales_gpu);
91 |   if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu);
92 |   if (l.output_gpu) cuda_free(l.output_gpu);
93 |   if (l.delta_gpu) cuda_free(l.delta_gpu);
94 |   if (l.rand_gpu) cuda_free(l.rand_gpu);
95 |   if (l.squared_gpu) cuda_free(l.squared_gpu);
96 |   if (l.norms_gpu) cuda_free(l.norms_gpu);
97 | #endif
98 | }
99 | 


--------------------------------------------------------------------------------
/darknet/src/list.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <string.h>
 3 | #include "list.h"
 4 | 
 5 | list *make_list()
 6 | {
 7 | 	list *l = (list*)malloc(sizeof(list));
 8 | 	l->size = 0;
 9 | 	l->front = 0;
10 | 	l->back = 0;
11 | 	return l;
12 | }
13 | 
14 | /*
15 | void transfer_node(list *s, list *d, node *n)
16 | {
17 |     node *prev, *next;
18 |     prev = n->prev;
19 |     next = n->next;
20 |     if(prev) prev->next = next;
21 |     if(next) next->prev = prev;
22 |     --s->size;
23 |     if(s->front == n) s->front = next;
24 |     if(s->back == n) s->back = prev;
25 | }
26 | */
27 | 
28 | void *list_pop(list *l){
29 |     if(!l->back) return 0;
30 |     node *b = l->back;
31 |     void *val = b->val;
32 |     l->back = b->prev;
33 |     if(l->back) l->back->next = 0;
34 |     free(b);
35 |     --l->size;
36 |     
37 |     return val;
38 | }
39 | 
40 | void list_insert(list *l, void *val)
41 | {
42 | 	node *new_ = (node*)malloc(sizeof(node));
43 | 	new_->val = val;
44 | 	new_->next = 0;
45 | 
46 | 	if(!l->back){
47 | 		l->front = new_;
48 | 		new_->prev = 0;
49 | 	}else{
50 | 		l->back->next = new_;
51 | 		new_->prev = l->back;
52 | 	}
53 | 	l->back = new_;
54 | 	++l->size;
55 | }
56 | 
57 | void free_node(node *n)
58 | {
59 | 	node *next;
60 | 	while(n) {
61 | 		next = n->next;
62 | 		free(n);
63 | 		n = next;
64 | 	}
65 | }
66 | 
67 | void free_list(list *l)
68 | {
69 | 	free_node(l->front);
70 | 	free(l);
71 | }
72 | 
73 | void free_list_contents(list *l)
74 | {
75 | 	node *n = l->front;
76 | 	while(n){
77 | 		free(n->val);
78 | 		n = n->next;
79 | 	}
80 | }
81 | 
82 | void **list_to_array(list *l)
83 | {
84 |     void **a = (void**)calloc(l->size, sizeof(void*));
85 |     int count = 0;
86 |     node *n = l->front;
87 |     while(n){
88 |         a[count++] = n->val;
89 |         n = n->next;
90 |     }
91 |     return a;
92 | }
93 | 


--------------------------------------------------------------------------------
/darknet/src/local_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "local_layer.h"
  2 | #include <stdio.h>
  3 | #include <time.h>
  4 | #include "blas.h"
  5 | #include "col2im.h"
  6 | #include "gemm.h"
  7 | #include "im2col.h"
  8 | #include "utils.h"
  9 | 
 10 | int local_out_height(local_layer l)
 11 | {
 12 |   int h = l.h;
 13 |   if (!l.pad)
 14 |     h -= l.size;
 15 |   else
 16 |     h -= 1;
 17 |   return h / l.stride + 1;
 18 | }
 19 | 
 20 | int local_out_width(local_layer l)
 21 | {
 22 |   int w = l.w;
 23 |   if (!l.pad)
 24 |     w -= l.size;
 25 |   else
 26 |     w -= 1;
 27 |   return w / l.stride + 1;
 28 | }
 29 | 
 30 | local_layer make_local_layer(int batch,
 31 |                              int h,
 32 |                              int w,
 33 |                              int c,
 34 |                              int n,
 35 |                              int size,
 36 |                              int stride,
 37 |                              int pad,
 38 |                              ACTIVATION activation)
 39 | {
 40 |   int i;
 41 |   local_layer l;
 42 |   l.type = LOCAL;
 43 | 
 44 |   l.h      = h;
 45 |   l.w      = w;
 46 |   l.c      = c;
 47 |   l.n      = n;
 48 |   l.batch  = batch;
 49 |   l.stride = stride;
 50 |   l.size   = size;
 51 |   l.pad    = pad;
 52 | 
 53 |   int out_h     = local_out_height(l);
 54 |   int out_w     = local_out_width(l);
 55 |   int locations = out_h * out_w;
 56 |   l.out_h       = out_h;
 57 |   l.out_w       = out_w;
 58 |   l.out_c       = n;
 59 |   l.outputs     = l.out_h * l.out_w * l.out_c;
 60 |   l.inputs      = l.w * l.h * l.c;
 61 | 
 62 |   l.weights = (float *)calloc(c * n * size * size * locations, sizeof(float));
 63 |   l.weight_updates =
 64 |       (float *)calloc(c * n * size * size * locations, sizeof(float));
 65 | 
 66 |   l.biases       = (float *)calloc(l.outputs, sizeof(float));
 67 |   l.bias_updates = (float *)calloc(l.outputs, sizeof(float));
 68 | 
 69 |   // float scale = 1./sqrt(size*size*c);
 70 |   float scale = sqrt(2. / (size * size * c));
 71 |   for (i = 0; i < c * n * size * size; ++i)
 72 |     l.weights[i] = scale * rand_uniform(-1, 1);
 73 | 
 74 |   l.output = (float *)calloc(l.batch * out_h * out_w * n, sizeof(float));
 75 |   l.delta  = (float *)calloc(l.batch * out_h * out_w * n, sizeof(float));
 76 | 
 77 |   l.workspace_size = out_h * out_w * size * size * c;
 78 | 
 79 |   l.forward  = forward_local_layer;
 80 |   l.backward = backward_local_layer;
 81 |   l.update   = update_local_layer;
 82 | 
 83 | #ifdef DKGPU
 84 |   l.forward_gpu  = forward_local_layer_gpu;
 85 |   l.backward_gpu = backward_local_layer_gpu;
 86 |   l.update_gpu   = update_local_layer_gpu;
 87 | 
 88 |   l.weights_gpu = cuda_make_array(l.weights, c * n * size * size * locations);
 89 |   l.weight_updates_gpu =
 90 |       cuda_make_array(l.weight_updates, c * n * size * size * locations);
 91 | 
 92 |   l.biases_gpu       = cuda_make_array(l.biases, l.outputs);
 93 |   l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs);
 94 | 
 95 |   l.delta_gpu  = cuda_make_array(l.delta, l.batch * out_h * out_w * n);
 96 |   l.output_gpu = cuda_make_array(l.output, l.batch * out_h * out_w * n);
 97 | 
 98 | #endif
 99 |   l.activation = activation;
100 | 
101 |   fprintf(stderr,
102 |           "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n",
103 |           h,
104 |           w,
105 |           c,
106 |           n,
107 |           out_h,
108 |           out_w,
109 |           n);
110 | 
111 |   return l;
112 | }
113 | 
114 | void forward_local_layer(const local_layer l, network net)
115 | {
116 |   int out_h = local_out_height(l);
117 |   int out_w = local_out_width(l);
118 |   int i, j;
119 |   int locations = out_h * out_w;
120 | 
121 |   for (i = 0; i < l.batch; ++i)
122 |   {
123 |     copy_cpu(l.outputs, l.biases, 1, l.output + i * l.outputs, 1);
124 |   }
125 | 
126 |   for (i = 0; i < l.batch; ++i)
127 |   {
128 |     float *input = net.input + i * l.w * l.h * l.c;
129 |     im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace);
130 |     float *output = l.output + i * l.outputs;
131 |     for (j = 0; j < locations; ++j)
132 |     {
133 |       float *a = l.weights + j * l.size * l.size * l.c * l.n;
134 |       float *b = net.workspace + j;
135 |       float *c = output + j;
136 | 
137 |       int m = l.n;
138 |       int n = 1;
139 |       int k = l.size * l.size * l.c;
140 | 
141 |       gemm(0, 0, m, n, k, 1, a, k, b, locations, 1, c, locations);
142 |     }
143 |   }
144 |   activate_array(l.output, l.outputs * l.batch, l.activation);
145 | }
146 | 
147 | void backward_local_layer(local_layer l, network net)
148 | {
149 |   int i, j;
150 |   int locations = l.out_w * l.out_h;
151 | 
152 |   gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta);
153 | 
154 |   for (i = 0; i < l.batch; ++i)
155 |   {
156 |     axpy_cpu(l.outputs, 1, l.delta + i * l.outputs, 1, l.bias_updates, 1);
157 |   }
158 | 
159 |   for (i = 0; i < l.batch; ++i)
160 |   {
161 |     float *input = net.input + i * l.w * l.h * l.c;
162 |     im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace);
163 | 
164 |     for (j = 0; j < locations; ++j)
165 |     {
166 |       float *a = l.delta + i * l.outputs + j;
167 |       float *b = net.workspace + j;
168 |       float *c = l.weight_updates + j * l.size * l.size * l.c * l.n;
169 |       int m    = l.n;
170 |       int n    = l.size * l.size * l.c;
171 |       int k    = 1;
172 | 
173 |       gemm(0, 1, m, n, k, 1, a, locations, b, locations, 1, c, n);
174 |     }
175 | 
176 |     if (net.delta)
177 |     {
178 |       for (j = 0; j < locations; ++j)
179 |       {
180 |         float *a = l.weights + j * l.size * l.size * l.c * l.n;
181 |         float *b = l.delta + i * l.outputs + j;
182 |         float *c = net.workspace + j;
183 | 
184 |         int m = l.size * l.size * l.c;
185 |         int n = 1;
186 |         int k = l.n;
187 | 
188 |         gemm(1, 0, m, n, k, 1, a, m, b, locations, 0, c, locations);
189 |       }
190 | 
191 |       col2im_cpu(net.workspace,
192 |                  l.c,
193 |                  l.h,
194 |                  l.w,
195 |                  l.size,
196 |                  l.stride,
197 |                  l.pad,
198 |                  net.delta + i * l.c * l.h * l.w);
199 |     }
200 |   }
201 | }
202 | 
203 | void update_local_layer(local_layer l, update_args a)
204 | {
205 |   float learning_rate = a.learning_rate * l.learning_rate_scale;
206 |   float momentum      = a.momentum;
207 |   float decay         = a.decay;
208 |   int batch           = a.batch;
209 | 
210 |   int locations = l.out_w * l.out_h;
211 |   int size      = l.size * l.size * l.c * l.n * locations;
212 |   axpy_cpu(l.outputs, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
213 |   scal_cpu(l.outputs, momentum, l.bias_updates, 1);
214 | 
215 |   axpy_cpu(size, -decay * batch, l.weights, 1, l.weight_updates, 1);
216 |   axpy_cpu(size, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
217 |   scal_cpu(size, momentum, l.weight_updates, 1);
218 | }
219 | 
220 | #ifdef DKGPU
221 | 
222 | void forward_local_layer_gpu(const local_layer l, network net)
223 | {
224 |   int out_h = local_out_height(l);
225 |   int out_w = local_out_width(l);
226 |   int i, j;
227 |   int locations = out_h * out_w;
228 | 
229 |   for (i = 0; i < l.batch; ++i)
230 |   {
231 |     copy_gpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i * l.outputs, 1);
232 |   }
233 | 
234 |   for (i = 0; i < l.batch; ++i)
235 |   {
236 |     float *input = net.input_gpu + i * l.w * l.h * l.c;
237 |     im2col_gpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace);
238 |     float *output = l.output_gpu + i * l.outputs;
239 |     for (j = 0; j < locations; ++j)
240 |     {
241 |       float *a = l.weights_gpu + j * l.size * l.size * l.c * l.n;
242 |       float *b = net.workspace + j;
243 |       float *c = output + j;
244 | 
245 |       int m = l.n;
246 |       int n = 1;
247 |       int k = l.size * l.size * l.c;
248 | 
249 |       gemm_gpu(0, 0, m, n, k, 1, a, k, b, locations, 1, c, locations);
250 |     }
251 |   }
252 |   activate_array_gpu(l.output_gpu, l.outputs * l.batch, l.activation);
253 | }
254 | 
255 | void backward_local_layer_gpu(local_layer l, network net)
256 | {
257 |   int i, j;
258 |   int locations = l.out_w * l.out_h;
259 | 
260 |   gradient_array_gpu(
261 |       l.output_gpu, l.outputs * l.batch, l.activation, l.delta_gpu);
262 |   for (i = 0; i < l.batch; ++i)
263 |   {
264 |     axpy_gpu(
265 |         l.outputs, 1, l.delta_gpu + i * l.outputs, 1, l.bias_updates_gpu, 1);
266 |   }
267 | 
268 |   for (i = 0; i < l.batch; ++i)
269 |   {
270 |     float *input = net.input_gpu + i * l.w * l.h * l.c;
271 |     im2col_gpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace);
272 | 
273 |     for (j = 0; j < locations; ++j)
274 |     {
275 |       float *a = l.delta_gpu + i * l.outputs + j;
276 |       float *b = net.workspace + j;
277 |       float *c = l.weight_updates_gpu + j * l.size * l.size * l.c * l.n;
278 |       int m    = l.n;
279 |       int n    = l.size * l.size * l.c;
280 |       int k    = 1;
281 | 
282 |       gemm_gpu(0, 1, m, n, k, 1, a, locations, b, locations, 1, c, n);
283 |     }
284 | 
285 |     if (net.delta_gpu)
286 |     {
287 |       for (j = 0; j < locations; ++j)
288 |       {
289 |         float *a = l.weights_gpu + j * l.size * l.size * l.c * l.n;
290 |         float *b = l.delta_gpu + i * l.outputs + j;
291 |         float *c = net.workspace + j;
292 | 
293 |         int m = l.size * l.size * l.c;
294 |         int n = 1;
295 |         int k = l.n;
296 | 
297 |         gemm_gpu(1, 0, m, n, k, 1, a, m, b, locations, 0, c, locations);
298 |       }
299 | 
300 |       col2im_gpu(net.workspace,
301 |                  l.c,
302 |                  l.h,
303 |                  l.w,
304 |                  l.size,
305 |                  l.stride,
306 |                  l.pad,
307 |                  net.delta_gpu + i * l.c * l.h * l.w);
308 |     }
309 |   }
310 | }
311 | 
312 | void update_local_layer_gpu(local_layer l, update_args a)
313 | {
314 |   float learning_rate = a.learning_rate * l.learning_rate_scale;
315 |   float momentum      = a.momentum;
316 |   float decay         = a.decay;
317 |   int batch           = a.batch;
318 | 
319 |   int locations = l.out_w * l.out_h;
320 |   int size      = l.size * l.size * l.c * l.n * locations;
321 |   axpy_gpu(
322 |       l.outputs, learning_rate / batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
323 |   scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1);
324 | 
325 |   axpy_gpu(size, -decay * batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
326 |   axpy_gpu(
327 |       size, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
328 |   scal_gpu(size, momentum, l.weight_updates_gpu, 1);
329 | }
330 | 
331 | void pull_local_layer(local_layer l)
332 | {
333 |   int locations = l.out_w * l.out_h;
334 |   int size      = l.size * l.size * l.c * l.n * locations;
335 |   cuda_pull_array(l.weights_gpu, l.weights, size);
336 |   cuda_pull_array(l.biases_gpu, l.biases, l.outputs);
337 | }
338 | 
339 | void push_local_layer(local_layer l)
340 | {
341 |   int locations = l.out_w * l.out_h;
342 |   int size      = l.size * l.size * l.c * l.n * locations;
343 |   cuda_push_array(l.weights_gpu, l.weights, size);
344 |   cuda_push_array(l.biases_gpu, l.biases, l.outputs);
345 | }
346 | #endif
347 | 


--------------------------------------------------------------------------------
/darknet/src/logistic_layer.cpp:
--------------------------------------------------------------------------------
 1 | #include "logistic_layer.h"
 2 | #include "activations.h"
 3 | #include "blas.h"
 4 | #include "cuda.h"
 5 | 
 6 | #include <assert.h>
 7 | #include <float.h>
 8 | #include <math.h>
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | layer make_logistic_layer(int batch, int inputs)
13 | {
14 |   fprintf(
15 |       stderr, "logistic x entropy                             %4d\n", inputs);
16 |   layer l;
17 |   l.type    = LOGXENT;
18 |   l.batch   = batch;
19 |   l.inputs  = inputs;
20 |   l.outputs = inputs;
21 |   l.loss    = (float*)calloc(inputs * batch, sizeof(float));
22 |   l.output  = (float*)calloc(inputs * batch, sizeof(float));
23 |   l.delta   = (float*)calloc(inputs * batch, sizeof(float));
24 |   l.cost    = (float*)calloc(1, sizeof(float));
25 | 
26 |   l.forward  = forward_logistic_layer;
27 |   l.backward = backward_logistic_layer;
28 | #ifdef DKGPU
29 |   l.forward_gpu  = forward_logistic_layer_gpu;
30 |   l.backward_gpu = backward_logistic_layer_gpu;
31 | 
32 |   l.output_gpu = cuda_make_array(l.output, inputs * batch);
33 |   l.loss_gpu   = cuda_make_array(l.loss, inputs * batch);
34 |   l.delta_gpu  = cuda_make_array(l.delta, inputs * batch);
35 | #endif
36 |   return l;
37 | }
38 | 
39 | void forward_logistic_layer(const layer l, network net)
40 | {
41 |   copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1);
42 |   activate_array(l.output, l.outputs * l.batch, LOGISTIC);
43 |   if (net.truth)
44 |   {
45 |     logistic_x_ent_cpu(
46 |         l.batch * l.inputs, l.output, net.truth, l.delta, l.loss);
47 |     l.cost[0] = sum_array(l.loss, l.batch * l.inputs);
48 |   }
49 | }
50 | 
51 | void backward_logistic_layer(const layer l, network net)
52 | {
53 |   axpy_cpu(l.inputs * l.batch, 1, l.delta, 1, net.delta, 1);
54 | }
55 | 
56 | #ifdef DKGPU
57 | 
58 | void forward_logistic_layer_gpu(const layer l, network net)
59 | {
60 |   copy_gpu(l.outputs * l.batch, net.input_gpu, 1, l.output_gpu, 1);
61 |   activate_array_gpu(l.output_gpu, l.outputs * l.batch, LOGISTIC);
62 |   if (net.truth)
63 |   {
64 |     logistic_x_ent_gpu(l.batch * l.inputs,
65 |                        l.output_gpu,
66 |                        net.truth_gpu,
67 |                        l.delta_gpu,
68 |                        l.loss_gpu);
69 |     cuda_pull_array(l.loss_gpu, l.loss, l.batch * l.inputs);
70 |     l.cost[0] = sum_array(l.loss, l.batch * l.inputs);
71 |   }
72 | }
73 | 
74 | void backward_logistic_layer_gpu(const layer l, network net)
75 | {
76 |   axpy_gpu(l.batch * l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
77 | }
78 | 
79 | #endif
80 | 


--------------------------------------------------------------------------------
/darknet/src/matrix.cpp:
--------------------------------------------------------------------------------
  1 | #include "matrix.h"
  2 | #include <assert.h>
  3 | #include <math.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | #include "blas.h"
  8 | #include "utils.h"
  9 | 
 10 | void free_matrix(matrix m)
 11 | {
 12 |   int i;
 13 |   for (i = 0; i < m.rows; ++i) free(m.vals[i]);
 14 |   free(m.vals);
 15 | }
 16 | 
 17 | float matrix_topk_accuracy(matrix truth, matrix guess, int k)
 18 | {
 19 |   int *indexes = (int *)calloc(k, sizeof(int));
 20 |   int n        = truth.cols;
 21 |   int i, j;
 22 |   int correct = 0;
 23 |   for (i = 0; i < truth.rows; ++i)
 24 |   {
 25 |     top_k(guess.vals[i], n, k, indexes);
 26 |     for (j = 0; j < k; ++j)
 27 |     {
 28 |       int class_ = indexes[j];
 29 |       if (truth.vals[i][class_])
 30 |       {
 31 |         ++correct;
 32 |         break;
 33 |       }
 34 |     }
 35 |   }
 36 |   free(indexes);
 37 |   return (float)correct / truth.rows;
 38 | }
 39 | 
 40 | void scale_matrix(matrix m, float scale)
 41 | {
 42 |   int i, j;
 43 |   for (i = 0; i < m.rows; ++i)
 44 |   {
 45 |     for (j = 0; j < m.cols; ++j)
 46 |     {
 47 |       m.vals[i][j] *= scale;
 48 |     }
 49 |   }
 50 | }
 51 | 
 52 | matrix resize_matrix(matrix m, int size)
 53 | {
 54 |   int i;
 55 |   if (m.rows == size) return m;
 56 |   if (m.rows < size)
 57 |   {
 58 |     m.vals = (float **)realloc(m.vals, size * sizeof(float *));
 59 |     for (i = m.rows; i < size; ++i)
 60 |     {
 61 |       m.vals[i] = (float *)calloc(m.cols, sizeof(float));
 62 |     }
 63 |   }
 64 |   else if (m.rows > size)
 65 |   {
 66 |     for (i = size; i < m.rows; ++i)
 67 |     {
 68 |       free(m.vals[i]);
 69 |     }
 70 |     m.vals = (float **)realloc(m.vals, size * sizeof(float *));
 71 |   }
 72 |   m.rows = size;
 73 |   return m;
 74 | }
 75 | 
 76 | void matrix_add_matrix(matrix from, matrix to)
 77 | {
 78 |   assert(from.rows == to.rows && from.cols == to.cols);
 79 |   int i, j;
 80 |   for (i = 0; i < from.rows; ++i)
 81 |   {
 82 |     for (j = 0; j < from.cols; ++j)
 83 |     {
 84 |       to.vals[i][j] += from.vals[i][j];
 85 |     }
 86 |   }
 87 | }
 88 | 
 89 | matrix copy_matrix(matrix m)
 90 | {
 91 |   matrix c = {0};
 92 |   c.rows   = m.rows;
 93 |   c.cols   = m.cols;
 94 |   c.vals   = (float **)calloc(c.rows, sizeof(float *));
 95 |   int i;
 96 |   for (i = 0; i < c.rows; ++i)
 97 |   {
 98 |     c.vals[i] = (float *)calloc(c.cols, sizeof(float));
 99 |     copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1);
100 |   }
101 |   return c;
102 | }
103 | 
104 | matrix make_matrix(int rows, int cols)
105 | {
106 |   int i;
107 |   matrix m;
108 |   m.rows = rows;
109 |   m.cols = cols;
110 |   m.vals = (float **)calloc(m.rows, sizeof(float *));
111 |   for (i = 0; i < m.rows; ++i)
112 |   {
113 |     m.vals[i] = (float *)calloc(m.cols, sizeof(float));
114 |   }
115 |   return m;
116 | }
117 | 
118 | matrix hold_out_matrix(matrix *m, int n)
119 | {
120 |   int i;
121 |   matrix h;
122 |   h.rows = n;
123 |   h.cols = m->cols;
124 |   h.vals = (float **)calloc(h.rows, sizeof(float *));
125 |   for (i = 0; i < n; ++i)
126 |   {
127 |     int index      = rand() % m->rows;
128 |     h.vals[i]      = m->vals[index];
129 |     m->vals[index] = m->vals[--(m->rows)];
130 |   }
131 |   return h;
132 | }
133 | 
134 | float *pop_column(matrix *m, int c)
135 | {
136 |   float *col = (float *)calloc(m->rows, sizeof(float));
137 |   int i, j;
138 |   for (i = 0; i < m->rows; ++i)
139 |   {
140 |     col[i] = m->vals[i][c];
141 |     for (j = c; j < m->cols - 1; ++j)
142 |     {
143 |       m->vals[i][j] = m->vals[i][j + 1];
144 |     }
145 |   }
146 |   --m->cols;
147 |   return col;
148 | }
149 | 
150 | matrix csv_to_matrix(char *filename)
151 | {
152 |   FILE *fp = fopen(filename, "r");
153 |   if (!fp) file_error(filename);
154 | 
155 |   matrix m;
156 |   m.cols = -1;
157 | 
158 |   char *line;
159 | 
160 |   int n    = 0;
161 |   int size = 1024;
162 |   m.vals   = (float **)calloc(size, sizeof(float *));
163 |   while ((line = fgetl(fp)))
164 |   {
165 |     if (m.cols == -1) m.cols = count_fields(line);
166 |     if (n == size)
167 |     {
168 |       size *= 2;
169 |       m.vals = (float **)realloc(m.vals, size * sizeof(float *));
170 |     }
171 |     m.vals[n] = parse_fields(line, m.cols);
172 |     free(line);
173 |     ++n;
174 |   }
175 |   m.vals = (float **)realloc(m.vals, n * sizeof(float *));
176 |   m.rows = n;
177 |   return m;
178 | }
179 | 
180 | void matrix_to_csv(matrix m)
181 | {
182 |   int i, j;
183 | 
184 |   for (i = 0; i < m.rows; ++i)
185 |   {
186 |     for (j = 0; j < m.cols; ++j)
187 |     {
188 |       if (j > 0) printf(",");
189 |       printf("%.17g", m.vals[i][j]);
190 |     }
191 |     printf("\n");
192 |   }
193 | }
194 | 
195 | void print_matrix(matrix m)
196 | {
197 |   int i, j;
198 |   printf("%d X %d Matrix:\n", m.rows, m.cols);
199 |   printf(" __");
200 |   for (j = 0; j < 16 * m.cols - 1; ++j) printf(" ");
201 |   printf("__ \n");
202 | 
203 |   printf("|  ");
204 |   for (j = 0; j < 16 * m.cols - 1; ++j) printf(" ");
205 |   printf("  |\n");
206 | 
207 |   for (i = 0; i < m.rows; ++i)
208 |   {
209 |     printf("|  ");
210 |     for (j = 0; j < m.cols; ++j)
211 |     {
212 |       printf("%15.7f ", m.vals[i][j]);
213 |     }
214 |     printf(" |\n");
215 |   }
216 |   printf("|__");
217 |   for (j = 0; j < 16 * m.cols - 1; ++j) printf(" ");
218 |   printf("__|\n");
219 | }
220 | 


--------------------------------------------------------------------------------
/darknet/src/maxpool_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "maxpool_layer.h"
  2 | #include <stdio.h>
  3 | #include "cuda.h"
  4 | 
  5 | image get_maxpool_image(maxpool_layer l)
  6 | {
  7 |   int h = l.out_h;
  8 |   int w = l.out_w;
  9 |   int c = l.c;
 10 |   return float_to_image(w, h, c, l.output);
 11 | }
 12 | 
 13 | image get_maxpool_delta(maxpool_layer l)
 14 | {
 15 |   int h = l.out_h;
 16 |   int w = l.out_w;
 17 |   int c = l.c;
 18 |   return float_to_image(w, h, c, l.delta);
 19 | }
 20 | 
 21 | maxpool_layer make_maxpool_layer(
 22 |     int batch, int h, int w, int c, int size, int stride, int padding)
 23 | {
 24 |   maxpool_layer l;
 25 |   l.type          = MAXPOOL;
 26 |   l.batch         = batch;
 27 |   l.h             = h;
 28 |   l.w             = w;
 29 |   l.c             = c;
 30 |   l.pad           = padding;
 31 |   l.out_w         = (w + 2 * padding) / stride;
 32 |   l.out_h         = (h + 2 * padding) / stride;
 33 |   l.out_c         = c;
 34 |   l.outputs       = l.out_h * l.out_w * l.out_c;
 35 |   l.inputs        = h * w * c;
 36 |   l.size          = size;
 37 |   l.stride        = stride;
 38 |   int output_size = l.out_h * l.out_w * l.out_c * batch;
 39 |   l.indexes       = (int *)calloc(output_size, sizeof(int));
 40 |   l.output        = (float *)calloc(output_size, sizeof(float));
 41 |   l.delta         = (float *)calloc(output_size, sizeof(float));
 42 |   l.forward       = forward_maxpool_layer;
 43 |   l.backward      = backward_maxpool_layer;
 44 | #ifdef DKGPU
 45 |   l.forward_gpu  = forward_maxpool_layer_gpu;
 46 |   l.backward_gpu = backward_maxpool_layer_gpu;
 47 |   l.indexes_gpu  = cuda_make_int_array(0, output_size);
 48 |   l.output_gpu   = cuda_make_array(l.output, output_size);
 49 |   l.delta_gpu    = cuda_make_array(l.delta, output_size);
 50 | #endif
 51 |   // fprintf(stderr,
 52 |   //         "max          %d x %d / %d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n",
 53 |   //         size,
 54 |   //         size,
 55 |   //         stride,
 56 |   //         w,
 57 |   //         h,
 58 |   //         c,
 59 |   //         l.out_w,
 60 |   //         l.out_h,
 61 |   //         l.out_c);
 62 |   return l;
 63 | }
 64 | 
 65 | void resize_maxpool_layer(maxpool_layer *l, int w, int h)
 66 | {
 67 |   l->h      = h;
 68 |   l->w      = w;
 69 |   l->inputs = h * w * l->c;
 70 | 
 71 |   l->out_w        = (w + 2 * l->pad) / l->stride;
 72 |   l->out_h        = (h + 2 * l->pad) / l->stride;
 73 |   l->outputs      = l->out_w * l->out_h * l->c;
 74 |   int output_size = l->outputs * l->batch;
 75 | 
 76 |   l->indexes = (int *)realloc(l->indexes, output_size * sizeof(int));
 77 |   l->output  = (float *)realloc(l->output, output_size * sizeof(float));
 78 |   l->delta   = (float *)realloc(l->delta, output_size * sizeof(float));
 79 | 
 80 | #ifdef DKGPU
 81 |   cuda_free((float *)l->indexes_gpu);
 82 |   cuda_free(l->output_gpu);
 83 |   cuda_free(l->delta_gpu);
 84 |   l->indexes_gpu = cuda_make_int_array(0, output_size);
 85 |   l->output_gpu  = cuda_make_array(l->output, output_size);
 86 |   l->delta_gpu   = cuda_make_array(l->delta, output_size);
 87 | #endif
 88 | }
 89 | 
 90 | void forward_maxpool_layer(const maxpool_layer l, network net)
 91 | {
 92 |   int b, i, j, k, m, n;
 93 |   int w_offset = -l.pad;
 94 |   int h_offset = -l.pad;
 95 | 
 96 |   int h = l.out_h;
 97 |   int w = l.out_w;
 98 |   int c = l.c;
 99 | 
100 |   for (b = 0; b < l.batch; ++b)
101 |   {
102 |     for (k = 0; k < c; ++k)
103 |     {
104 |       for (i = 0; i < h; ++i)
105 |       {
106 |         for (j = 0; j < w; ++j)
107 |         {
108 |           int out_index = j + w * (i + h * (k + c * b));
109 |           float max     = -FLT_MAX;
110 |           int max_i     = -1;
111 |           for (n = 0; n < l.size; ++n)
112 |           {
113 |             for (m = 0; m < l.size; ++m)
114 |             {
115 |               int cur_h = h_offset + i * l.stride + n;
116 |               int cur_w = w_offset + j * l.stride + m;
117 |               int index = cur_w + l.w * (cur_h + l.h * (k + b * l.c));
118 |               int valid =
119 |                   (cur_h >= 0 && cur_h < l.h && cur_w >= 0 && cur_w < l.w);
120 |               float val = (valid != 0) ? net.input[index] : -FLT_MAX;
121 |               max_i     = (val > max) ? index : max_i;
122 |               max       = (val > max) ? val : max;
123 |             }
124 |           }
125 |           l.output[out_index]  = max;
126 |           l.indexes[out_index] = max_i;
127 |         }
128 |       }
129 |     }
130 |   }
131 | }
132 | 
133 | void backward_maxpool_layer(const maxpool_layer l, network net)
134 | {
135 |   int i;
136 |   int h = l.out_h;
137 |   int w = l.out_w;
138 |   int c = l.c;
139 |   for (i = 0; i < h * w * c * l.batch; ++i)
140 |   {
141 |     int index = l.indexes[i];
142 |     net.delta[index] += l.delta[i];
143 |   }
144 | }
145 | 


--------------------------------------------------------------------------------
/darknet/src/maxpool_layer_kernels.cu:
--------------------------------------------------------------------------------
  1 | #include "cuda_runtime.h"
  2 | #include "curand.h"
  3 | #include "cublas_v2.h"
  4 | 
  5 | extern "C" {
  6 | #include "maxpool_layer.h"
  7 | #include "cuda.h"
  8 | }
  9 | 
 10 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
 11 | {
 12 |     int h = (in_h + 2*pad)/stride;
 13 |     int w = (in_w + 2*pad)/stride;
 14 |     int c = in_c;
 15 | 
 16 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
 17 |     if(id >= n) return;
 18 | 
 19 |     int j = id % w;
 20 |     id /= w;
 21 |     int i = id % h;
 22 |     id /= h;
 23 |     int k = id % c;
 24 |     id /= c;
 25 |     int b = id;
 26 | 
 27 |     int w_offset = -pad;
 28 |     int h_offset = -pad;
 29 | 
 30 |     int out_index = j + w*(i + h*(k + c*b));
 31 |     float max = -INFINITY;
 32 |     int max_i = -1;
 33 |     int l, m;
 34 |     for(l = 0; l < size; ++l){
 35 |         for(m = 0; m < size; ++m){
 36 |             int cur_h = h_offset + i*stride + l;
 37 |             int cur_w = w_offset + j*stride + m;
 38 |             int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c));
 39 |             int valid = (cur_h >= 0 && cur_h < in_h &&
 40 |                     cur_w >= 0 && cur_w < in_w);
 41 |             float val = (valid != 0) ? input[index] : -INFINITY;
 42 |             max_i = (val > max) ? index : max_i;
 43 |             max   = (val > max) ? val   : max;
 44 |         }
 45 |     }
 46 |     output[out_index] = max;
 47 |     indexes[out_index] = max_i;
 48 | }
 49 | 
 50 | __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
 51 | {
 52 |     int h = (in_h + 2*pad)/stride;
 53 |     int w = (in_w + 2*pad)/stride;
 54 |     int c = in_c;
 55 |     int area = (size-1)/stride;
 56 | 
 57 |     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
 58 |     if(id >= n) return;
 59 | 
 60 |     int index = id;
 61 |     int j = id % in_w;
 62 |     id /= in_w;
 63 |     int i = id % in_h;
 64 |     id /= in_h;
 65 |     int k = id % in_c;
 66 |     id /= in_c;
 67 |     int b = id;
 68 | 
 69 |     int w_offset = -pad;
 70 |     int h_offset = -pad;
 71 | 
 72 |     float d = 0;
 73 |     int l, m;
 74 |     for(l = -area; l < area+1; ++l){
 75 |         for(m = -area; m < area+1; ++m){
 76 |             int out_w = (j-w_offset)/stride + m;
 77 |             int out_h = (i-h_offset)/stride + l;
 78 |             int out_index = out_w + w*(out_h + h*(k + c*b));
 79 |             int valid = (out_w >= 0 && out_w < w &&
 80 |                      out_h >= 0 && out_h < h);
 81 |             d += (valid && indexes[out_index] == index) ? delta[out_index] : 0;
 82 |         }
 83 |     }
 84 |     prev_delta[index] += d;
 85 | }
 86 | 
 87 | extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net)
 88 | {
 89 |     int h = layer.out_h;
 90 |     int w = layer.out_w;
 91 |     int c = layer.c;
 92 | 
 93 |     size_t n = h*w*c*layer.batch;
 94 | 
 95 |     forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu);
 96 |     check_error(cudaPeekAtLastError());
 97 | }
 98 | 
 99 | extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net)
100 | {
101 |     size_t n = layer.h*layer.w*layer.c*layer.batch;
102 | 
103 |     backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu);
104 |     check_error(cudaPeekAtLastError());
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/darknet/src/normalization_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "normalization_layer.h"
  2 | #include "blas.h"
  3 | 
  4 | #include <stdio.h>
  5 | 
  6 | layer make_normalization_layer(int batch,
  7 |                                int w,
  8 |                                int h,
  9 |                                int c,
 10 |                                int size,
 11 |                                float alpha,
 12 |                                float beta,
 13 |                                float kappa)
 14 | {
 15 |   fprintf(stderr,
 16 |           "Local Response Normalization Layer: %d x %d x %d image, %d size\n",
 17 |           w,
 18 |           h,
 19 |           c,
 20 |           size);
 21 |   layer layer;
 22 |   layer.type  = NORMALIZATION;
 23 |   layer.batch = batch;
 24 |   layer.h = layer.out_h = h;
 25 |   layer.w = layer.out_w = w;
 26 |   layer.c = layer.out_c = c;
 27 |   layer.kappa           = kappa;
 28 |   layer.size            = size;
 29 |   layer.alpha           = alpha;
 30 |   layer.beta            = beta;
 31 |   layer.output          = (float *)calloc(h * w * c * batch, sizeof(float));
 32 |   layer.delta           = (float *)calloc(h * w * c * batch, sizeof(float));
 33 |   layer.squared         = (float *)calloc(h * w * c * batch, sizeof(float));
 34 |   layer.norms           = (float *)calloc(h * w * c * batch, sizeof(float));
 35 |   layer.inputs          = w * h * c;
 36 |   layer.outputs         = layer.inputs;
 37 | 
 38 |   layer.forward  = forward_normalization_layer;
 39 |   layer.backward = backward_normalization_layer;
 40 | #ifdef DKGPU
 41 |   layer.forward_gpu  = forward_normalization_layer_gpu;
 42 |   layer.backward_gpu = backward_normalization_layer_gpu;
 43 | 
 44 |   layer.output_gpu  = cuda_make_array(layer.output, h * w * c * batch);
 45 |   layer.delta_gpu   = cuda_make_array(layer.delta, h * w * c * batch);
 46 |   layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);
 47 |   layer.norms_gpu   = cuda_make_array(layer.norms, h * w * c * batch);
 48 | #endif
 49 |   return layer;
 50 | }
 51 | 
 52 | void resize_normalization_layer(layer *layer, int w, int h)
 53 | {
 54 |   int c          = layer->c;
 55 |   int batch      = layer->batch;
 56 |   layer->h       = h;
 57 |   layer->w       = w;
 58 |   layer->out_h   = h;
 59 |   layer->out_w   = w;
 60 |   layer->inputs  = w * h * c;
 61 |   layer->outputs = layer->inputs;
 62 |   layer->output =
 63 |       (float *)realloc(layer->output, h * w * c * batch * sizeof(float));
 64 |   layer->delta =
 65 |       (float *)realloc(layer->delta, h * w * c * batch * sizeof(float));
 66 |   layer->squared =
 67 |       (float *)realloc(layer->squared, h * w * c * batch * sizeof(float));
 68 |   layer->norms =
 69 |       (float *)realloc(layer->norms, h * w * c * batch * sizeof(float));
 70 | #ifdef DKGPU
 71 |   cuda_free(layer->output_gpu);
 72 |   cuda_free(layer->delta_gpu);
 73 |   cuda_free(layer->squared_gpu);
 74 |   cuda_free(layer->norms_gpu);
 75 |   layer->output_gpu  = cuda_make_array(layer->output, h * w * c * batch);
 76 |   layer->delta_gpu   = cuda_make_array(layer->delta, h * w * c * batch);
 77 |   layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
 78 |   layer->norms_gpu   = cuda_make_array(layer->norms, h * w * c * batch);
 79 | #endif
 80 | }
 81 | 
 82 | void forward_normalization_layer(const layer layer, network net)
 83 | {
 84 |   int k, b;
 85 |   int w = layer.w;
 86 |   int h = layer.h;
 87 |   int c = layer.c;
 88 |   scal_cpu(w * h * c * layer.batch, 0, layer.squared, 1);
 89 | 
 90 |   for (b = 0; b < layer.batch; ++b)
 91 |   {
 92 |     float *squared = layer.squared + w * h * c * b;
 93 |     float *norms   = layer.norms + w * h * c * b;
 94 |     float *input   = net.input + w * h * c * b;
 95 |     pow_cpu(w * h * c, 2, input, 1, squared, 1);
 96 | 
 97 |     const_cpu(w * h, layer.kappa, norms, 1);
 98 |     for (k = 0; k < layer.size / 2; ++k)
 99 |     {
100 |       axpy_cpu(w * h, layer.alpha, squared + w * h * k, 1, norms, 1);
101 |     }
102 | 
103 |     for (k = 1; k < layer.c; ++k)
104 |     {
105 |       copy_cpu(w * h, norms + w * h * (k - 1), 1, norms + w * h * k, 1);
106 |       int prev = k - ((layer.size - 1) / 2) - 1;
107 |       int next = k + (layer.size / 2);
108 |       if (prev >= 0)
109 |         axpy_cpu(w * h,
110 |                  -layer.alpha,
111 |                  squared + w * h * prev,
112 |                  1,
113 |                  norms + w * h * k,
114 |                  1);
115 |       if (next < layer.c)
116 |         axpy_cpu(w * h,
117 |                  layer.alpha,
118 |                  squared + w * h * next,
119 |                  1,
120 |                  norms + w * h * k,
121 |                  1);
122 |     }
123 |   }
124 |   pow_cpu(
125 |       w * h * c * layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
126 |   mul_cpu(w * h * c * layer.batch, net.input, 1, layer.output, 1);
127 | }
128 | 
129 | void backward_normalization_layer(const layer layer, network net)
130 | {
131 |   // TODO This is approximate ;-)
132 |   // Also this should add in to delta instead of overwritting.
133 | 
134 |   int w = layer.w;
135 |   int h = layer.h;
136 |   int c = layer.c;
137 |   pow_cpu(w * h * c * layer.batch, -layer.beta, layer.norms, 1, net.delta, 1);
138 |   mul_cpu(w * h * c * layer.batch, layer.delta, 1, net.delta, 1);
139 | }
140 | 
141 | #ifdef DKGPU
142 | void forward_normalization_layer_gpu(const layer layer, network net)
143 | {
144 |   int k, b;
145 |   int w = layer.w;
146 |   int h = layer.h;
147 |   int c = layer.c;
148 |   scal_gpu(w * h * c * layer.batch, 0, layer.squared_gpu, 1);
149 | 
150 |   for (b = 0; b < layer.batch; ++b)
151 |   {
152 |     float *squared = layer.squared_gpu + w * h * c * b;
153 |     float *norms   = layer.norms_gpu + w * h * c * b;
154 |     float *input   = net.input_gpu + w * h * c * b;
155 |     pow_gpu(w * h * c, 2, input, 1, squared, 1);
156 | 
157 |     const_gpu(w * h, layer.kappa, norms, 1);
158 |     for (k = 0; k < layer.size / 2; ++k)
159 |     {
160 |       axpy_gpu(w * h, layer.alpha, squared + w * h * k, 1, norms, 1);
161 |     }
162 | 
163 |     for (k = 1; k < layer.c; ++k)
164 |     {
165 |       copy_gpu(w * h, norms + w * h * (k - 1), 1, norms + w * h * k, 1);
166 |       int prev = k - ((layer.size - 1) / 2) - 1;
167 |       int next = k + (layer.size / 2);
168 |       if (prev >= 0)
169 |         axpy_gpu(w * h,
170 |                  -layer.alpha,
171 |                  squared + w * h * prev,
172 |                  1,
173 |                  norms + w * h * k,
174 |                  1);
175 |       if (next < layer.c)
176 |         axpy_gpu(w * h,
177 |                  layer.alpha,
178 |                  squared + w * h * next,
179 |                  1,
180 |                  norms + w * h * k,
181 |                  1);
182 |     }
183 |   }
184 |   pow_gpu(w * h * c * layer.batch,
185 |           -layer.beta,
186 |           layer.norms_gpu,
187 |           1,
188 |           layer.output_gpu,
189 |           1);
190 |   mul_gpu(w * h * c * layer.batch, net.input_gpu, 1, layer.output_gpu, 1);
191 | }
192 | 
193 | void backward_normalization_layer_gpu(const layer layer, network net)
194 | {
195 |   // TODO This is approximate ;-)
196 | 
197 |   int w = layer.w;
198 |   int h = layer.h;
199 |   int c = layer.c;
200 |   pow_gpu(w * h * c * layer.batch,
201 |           -layer.beta,
202 |           layer.norms_gpu,
203 |           1,
204 |           net.delta_gpu,
205 |           1);
206 |   mul_gpu(w * h * c * layer.batch, layer.delta_gpu, 1, net.delta_gpu, 1);
207 | }
208 | #endif
209 | 


--------------------------------------------------------------------------------
/darknet/src/option_list.cpp:
--------------------------------------------------------------------------------
  1 | #include "option_list.h"
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include "utils.h"
  6 | 
  7 | list *read_data_cfg(char *filename)
  8 | {
  9 |   FILE *file = fopen(filename, "r");
 10 |   if (file == 0) file_error(filename);
 11 |   char *line;
 12 |   int nu        = 0;
 13 |   list *options = make_list();
 14 |   while ((line = fgetl(file)) != 0)
 15 |   {
 16 |     ++nu;
 17 |     strip(line);
 18 |     switch (line[0])
 19 |     {
 20 |       case '\0':
 21 |       case '#':
 22 |       case ';': free(line); break;
 23 |       default:
 24 |         if (!read_option(line, options))
 25 |         {
 26 |           fprintf(
 27 |               stderr, "Config file error line %d, could parse: %s\n", nu, line);
 28 |           free(line);
 29 |         }
 30 |         break;
 31 |     }
 32 |   }
 33 |   fclose(file);
 34 |   return options;
 35 | }
 36 | 
 37 | int read_option(char *s, list *options)
 38 | {
 39 |   size_t i;
 40 |   size_t len = strlen(s);
 41 |   char *val  = 0;
 42 |   for (i = 0; i < len; ++i)
 43 |   {
 44 |     if (s[i] == '=')
 45 |     {
 46 |       s[i] = '\0';
 47 |       val  = s + i + 1;
 48 |       break;
 49 |     }
 50 |   }
 51 |   if (i == len - 1) return 0;
 52 |   char *key = s;
 53 |   option_insert(options, key, val);
 54 |   return 1;
 55 | }
 56 | 
 57 | void option_insert(list *l, char *key, char *val)
 58 | {
 59 |   kvp *p  = (kvp *)malloc(sizeof(kvp));
 60 |   p->key  = key;
 61 |   p->val  = val;
 62 |   p->used = 0;
 63 |   list_insert(l, p);
 64 | }
 65 | 
 66 | void option_unused(list *l)
 67 | {
 68 |   node *n = l->front;
 69 |   while (n)
 70 |   {
 71 |     kvp *p = (kvp *)n->val;
 72 |     if (!p->used)
 73 |     {
 74 |       fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val);
 75 |     }
 76 |     n = n->next;
 77 |   }
 78 | }
 79 | 
 80 | char *option_find(list *l, char *key)
 81 | {
 82 |   node *n = l->front;
 83 |   while (n)
 84 |   {
 85 |     kvp *p = (kvp *)n->val;
 86 |     if (strcmp(p->key, key) == 0)
 87 |     {
 88 |       p->used = 1;
 89 |       return p->val;
 90 |     }
 91 |     n = n->next;
 92 |   }
 93 |   return 0;
 94 | }
 95 | char *option_find_str(list *l, char *key, char *def)
 96 | {
 97 |   char *v = option_find(l, key);
 98 |   if (v) return v;
 99 |   // if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def);
100 |   return def;
101 | }
102 | 
103 | int option_find_int(list *l, char *key, int def)
104 | {
105 |   char *v = option_find(l, key);
106 |   if (v) return atoi(v);
107 |   // fprintf(stderr, "%s: Using default '%d'\n", key, def);
108 |   return def;
109 | }
110 | 
111 | int option_find_int_quiet(list *l, char *key, int def)
112 | {
113 |   char *v = option_find(l, key);
114 |   if (v) return atoi(v);
115 |   return def;
116 | }
117 | 
118 | float option_find_float_quiet(list *l, char *key, float def)
119 | {
120 |   char *v = option_find(l, key);
121 |   if (v) return atof(v);
122 |   return def;
123 | }
124 | 
125 | float option_find_float(list *l, char *key, float def)
126 | {
127 |   char *v = option_find(l, key);
128 |   if (v) return atof(v);
129 |   // fprintf(stderr, "%s: Using default '%lf'\n", key, def);
130 |   return def;
131 | }
132 | 


--------------------------------------------------------------------------------
/darknet/src/reorg_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "reorg_layer.h"
  2 | #include "blas.h"
  3 | #include "cuda.h"
  4 | 
  5 | #include <stdio.h>
  6 | 
  7 | layer make_reorg_layer(int batch,
  8 |                        int w,
  9 |                        int h,
 10 |                        int c,
 11 |                        int stride,
 12 |                        int reverse,
 13 |                        int flatten,
 14 |                        int extra)
 15 | {
 16 |   layer l;
 17 |   l.type    = REORG;
 18 |   l.batch   = batch;
 19 |   l.stride  = stride;
 20 |   l.extra   = extra;
 21 |   l.h       = h;
 22 |   l.w       = w;
 23 |   l.c       = c;
 24 |   l.flatten = flatten;
 25 |   if (reverse)
 26 |   {
 27 |     l.out_w = w * stride;
 28 |     l.out_h = h * stride;
 29 |     l.out_c = c / (stride * stride);
 30 |   }
 31 |   else
 32 |   {
 33 |     l.out_w = w / stride;
 34 |     l.out_h = h / stride;
 35 |     l.out_c = c * (stride * stride);
 36 |   }
 37 |   l.reverse = reverse;
 38 | 
 39 |   l.outputs = l.out_h * l.out_w * l.out_c;
 40 |   l.inputs  = h * w * c;
 41 |   if (l.extra)
 42 |   {
 43 |     l.out_w = l.out_h = l.out_c = 0;
 44 |     l.outputs                   = l.inputs + l.extra;
 45 |   }
 46 | 
 47 |   if (extra)
 48 |   {
 49 |     // fprintf(stderr, "reorg              %4d   ->  %4d\n", l.inputs,
 50 |     // l.outputs);
 51 |   }
 52 |   else
 53 |   {
 54 |     // fprintf(stderr,
 55 |     //         "reorg              /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n",
 56 |     //         stride,
 57 |     //         w,
 58 |     //         h,
 59 |     //         c,
 60 |     //         l.out_w,
 61 |     //         l.out_h,
 62 |     //         l.out_c);
 63 |   }
 64 |   int output_size = l.outputs * batch;
 65 |   l.output        = (float *)calloc(output_size, sizeof(float));
 66 |   l.delta         = (float *)calloc(output_size, sizeof(float));
 67 | 
 68 |   l.forward  = forward_reorg_layer;
 69 |   l.backward = backward_reorg_layer;
 70 | #ifdef DKGPU
 71 |   l.forward_gpu  = forward_reorg_layer_gpu;
 72 |   l.backward_gpu = backward_reorg_layer_gpu;
 73 | 
 74 |   l.output_gpu = cuda_make_array(l.output, output_size);
 75 |   l.delta_gpu  = cuda_make_array(l.delta, output_size);
 76 | #endif
 77 |   return l;
 78 | }
 79 | 
 80 | void resize_reorg_layer(layer *l, int w, int h)
 81 | {
 82 |   int stride = l->stride;
 83 |   int c      = l->c;
 84 | 
 85 |   l->h = h;
 86 |   l->w = w;
 87 | 
 88 |   if (l->reverse)
 89 |   {
 90 |     l->out_w = w * stride;
 91 |     l->out_h = h * stride;
 92 |     l->out_c = c / (stride * stride);
 93 |   }
 94 |   else
 95 |   {
 96 |     l->out_w = w / stride;
 97 |     l->out_h = h / stride;
 98 |     l->out_c = c * (stride * stride);
 99 |   }
100 | 
101 |   l->outputs      = l->out_h * l->out_w * l->out_c;
102 |   l->inputs       = l->outputs;
103 |   int output_size = l->outputs * l->batch;
104 | 
105 |   l->output = (float *)realloc(l->output, output_size * sizeof(float));
106 |   l->delta  = (float *)realloc(l->delta, output_size * sizeof(float));
107 | 
108 | #ifdef DKGPU
109 |   cuda_free(l->output_gpu);
110 |   cuda_free(l->delta_gpu);
111 |   l->output_gpu = cuda_make_array(l->output, output_size);
112 |   l->delta_gpu  = cuda_make_array(l->delta, output_size);
113 | #endif
114 | }
115 | 
116 | void forward_reorg_layer(const layer l, network net)
117 | {
118 |   int i;
119 |   if (l.flatten)
120 |   {
121 |     memcpy(l.output, net.input, l.outputs * l.batch * sizeof(float));
122 |     if (l.reverse)
123 |     {
124 |       flatten(l.output, l.w * l.h, l.c, l.batch, 0);
125 |     }
126 |     else
127 |     {
128 |       flatten(l.output, l.w * l.h, l.c, l.batch, 1);
129 |     }
130 |   }
131 |   else if (l.extra)
132 |   {
133 |     for (i = 0; i < l.batch; ++i)
134 |     {
135 |       copy_cpu(
136 |           l.inputs, net.input + i * l.inputs, 1, l.output + i * l.outputs, 1);
137 |     }
138 |   }
139 |   else if (l.reverse)
140 |   {
141 |     reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output);
142 |   }
143 |   else
144 |   {
145 |     reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output);
146 |   }
147 | }
148 | 
149 | void backward_reorg_layer(const layer l, network net)
150 | {
151 |   int i;
152 |   if (l.flatten)
153 |   {
154 |     memcpy(net.delta, l.delta, l.outputs * l.batch * sizeof(float));
155 |     if (l.reverse)
156 |     {
157 |       flatten(net.delta, l.w * l.h, l.c, l.batch, 1);
158 |     }
159 |     else
160 |     {
161 |       flatten(net.delta, l.w * l.h, l.c, l.batch, 0);
162 |     }
163 |   }
164 |   else if (l.reverse)
165 |   {
166 |     reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta);
167 |   }
168 |   else if (l.extra)
169 |   {
170 |     for (i = 0; i < l.batch; ++i)
171 |     {
172 |       copy_cpu(
173 |           l.inputs, l.delta + i * l.outputs, 1, net.delta + i * l.inputs, 1);
174 |     }
175 |   }
176 |   else
177 |   {
178 |     reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta);
179 |   }
180 | }
181 | 
182 | #ifdef DKGPU
183 | void forward_reorg_layer_gpu(layer l, network net)
184 | {
185 |   int i;
186 |   if (l.flatten)
187 |   {
188 |     if (l.reverse)
189 |     {
190 |       flatten_gpu(net.input_gpu, l.w * l.h, l.c, l.batch, 0, l.output_gpu);
191 |     }
192 |     else
193 |     {
194 |       flatten_gpu(net.input_gpu, l.w * l.h, l.c, l.batch, 1, l.output_gpu);
195 |     }
196 |   }
197 |   else if (l.extra)
198 |   {
199 |     for (i = 0; i < l.batch; ++i)
200 |     {
201 |       copy_gpu(l.inputs,
202 |                net.input_gpu + i * l.inputs,
203 |                1,
204 |                l.output_gpu + i * l.outputs,
205 |                1);
206 |     }
207 |   }
208 |   else if (l.reverse)
209 |   {
210 |     reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu);
211 |   }
212 |   else
213 |   {
214 |     reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu);
215 |   }
216 | }
217 | 
218 | void backward_reorg_layer_gpu(layer l, network net)
219 | {
220 |   if (l.flatten)
221 |   {
222 |     if (l.reverse)
223 |     {
224 |       flatten_gpu(l.delta_gpu, l.w * l.h, l.c, l.batch, 1, net.delta_gpu);
225 |     }
226 |     else
227 |     {
228 |       flatten_gpu(l.delta_gpu, l.w * l.h, l.c, l.batch, 0, net.delta_gpu);
229 |     }
230 |   }
231 |   else if (l.extra)
232 |   {
233 |     int i;
234 |     for (i = 0; i < l.batch; ++i)
235 |     {
236 |       copy_gpu(l.inputs,
237 |                l.delta_gpu + i * l.outputs,
238 |                1,
239 |                net.delta_gpu + i * l.inputs,
240 |                1);
241 |     }
242 |   }
243 |   else if (l.reverse)
244 |   {
245 |     reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu);
246 |   }
247 |   else
248 |   {
249 |     reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu);
250 |   }
251 | }
252 | #endif
253 | 


--------------------------------------------------------------------------------
/darknet/src/route_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "route_layer.h"
  2 | #include "blas.h"
  3 | #include "cuda.h"
  4 | 
  5 | #include <stdio.h>
  6 | 
  7 | route_layer make_route_layer(int batch,
  8 |                              int n,
  9 |                              int *input_layers,
 10 |                              int *input_sizes)
 11 | {
 12 |   // fprintf(stderr, "route ");
 13 |   route_layer l;
 14 |   l.type         = ROUTE;
 15 |   l.batch        = batch;
 16 |   l.n            = n;
 17 |   l.input_layers = input_layers;
 18 |   l.input_sizes  = input_sizes;
 19 |   int i;
 20 |   int outputs = 0;
 21 |   for (i = 0; i < n; ++i)
 22 |   {
 23 |     // fprintf(stderr, " %d", input_layers[i]);
 24 |     outputs += input_sizes[i];
 25 |   }
 26 |   // fprintf(stderr, "\n");
 27 |   l.outputs = outputs;
 28 |   l.inputs  = outputs;
 29 |   l.delta   = (float *)calloc(outputs * batch, sizeof(float));
 30 |   l.output  = (float *)calloc(outputs * batch, sizeof(float));
 31 |   ;
 32 | 
 33 |   l.forward  = forward_route_layer;
 34 |   l.backward = backward_route_layer;
 35 | #ifdef DKGPU
 36 |   l.forward_gpu  = forward_route_layer_gpu;
 37 |   l.backward_gpu = backward_route_layer_gpu;
 38 | 
 39 |   l.delta_gpu  = cuda_make_array(l.delta, outputs * batch);
 40 |   l.output_gpu = cuda_make_array(l.output, outputs * batch);
 41 | #endif
 42 |   return l;
 43 | }
 44 | 
 45 | void resize_route_layer(route_layer *l, network *net)
 46 | {
 47 |   int i;
 48 |   layer first       = net->layers[l->input_layers[0]];
 49 |   l->out_w          = first.out_w;
 50 |   l->out_h          = first.out_h;
 51 |   l->out_c          = first.out_c;
 52 |   l->outputs        = first.outputs;
 53 |   l->input_sizes[0] = first.outputs;
 54 |   for (i = 1; i < l->n; ++i)
 55 |   {
 56 |     int index  = l->input_layers[i];
 57 |     layer next = net->layers[index];
 58 |     l->outputs += next.outputs;
 59 |     l->input_sizes[i] = next.outputs;
 60 |     if (next.out_w == first.out_w && next.out_h == first.out_h)
 61 |     {
 62 |       l->out_c += next.out_c;
 63 |     }
 64 |     else
 65 |     {
 66 |       // printf(
 67 |       //     "%d %d, %d %d\n", next.out_w, next.out_h, first.out_w,
 68 |       //     first.out_h);
 69 |       l->out_h = l->out_w = l->out_c = 0;
 70 |     }
 71 |   }
 72 |   l->inputs = l->outputs;
 73 |   l->delta  = (float *)realloc(l->delta, l->outputs * l->batch * sizeof(float));
 74 |   l->output =
 75 |       (float *)realloc(l->output, l->outputs * l->batch * sizeof(float));
 76 | 
 77 | #ifdef DKGPU
 78 |   cuda_free(l->output_gpu);
 79 |   cuda_free(l->delta_gpu);
 80 |   l->output_gpu = cuda_make_array(l->output, l->outputs * l->batch);
 81 |   l->delta_gpu  = cuda_make_array(l->delta, l->outputs * l->batch);
 82 | #endif
 83 | }
 84 | 
 85 | void forward_route_layer(const route_layer l, network net)
 86 | {
 87 |   int i, j;
 88 |   int offset = 0;
 89 |   for (i = 0; i < l.n; ++i)
 90 |   {
 91 |     int index      = l.input_layers[i];
 92 |     float *input   = net.layers[index].output;
 93 |     int input_size = l.input_sizes[i];
 94 |     for (j = 0; j < l.batch; ++j)
 95 |     {
 96 |       copy_cpu(input_size,
 97 |                input + j * input_size,
 98 |                1,
 99 |                l.output + offset + j * l.outputs,
100 |                1);
101 |     }
102 |     offset += input_size;
103 |   }
104 | }
105 | 
106 | void backward_route_layer(const route_layer l, network net)
107 | {
108 |   int i, j;
109 |   int offset = 0;
110 |   for (i = 0; i < l.n; ++i)
111 |   {
112 |     int index      = l.input_layers[i];
113 |     float *delta   = net.layers[index].delta;
114 |     int input_size = l.input_sizes[i];
115 |     for (j = 0; j < l.batch; ++j)
116 |     {
117 |       axpy_cpu(input_size,
118 |                1,
119 |                l.delta + offset + j * l.outputs,
120 |                1,
121 |                delta + j * input_size,
122 |                1);
123 |     }
124 |     offset += input_size;
125 |   }
126 | }
127 | 
128 | #ifdef DKGPU
129 | void forward_route_layer_gpu(const route_layer l, network net)
130 | {
131 |   int i, j;
132 |   int offset = 0;
133 |   for (i = 0; i < l.n; ++i)
134 |   {
135 |     int index      = l.input_layers[i];
136 |     float *input   = net.layers[index].output_gpu;
137 |     int input_size = l.input_sizes[i];
138 |     for (j = 0; j < l.batch; ++j)
139 |     {
140 |       copy_gpu(input_size,
141 |                input + j * input_size,
142 |                1,
143 |                l.output_gpu + offset + j * l.outputs,
144 |                1);
145 |     }
146 |     offset += input_size;
147 |   }
148 | }
149 | 
150 | void backward_route_layer_gpu(const route_layer l, network net)
151 | {
152 |   int i, j;
153 |   int offset = 0;
154 |   for (i = 0; i < l.n; ++i)
155 |   {
156 |     int index      = l.input_layers[i];
157 |     float *delta   = net.layers[index].delta_gpu;
158 |     int input_size = l.input_sizes[i];
159 |     for (j = 0; j < l.batch; ++j)
160 |     {
161 |       axpy_gpu(input_size,
162 |                1,
163 |                l.delta_gpu + offset + j * l.outputs,
164 |                1,
165 |                delta + j * input_size,
166 |                1);
167 |     }
168 |     offset += input_size;
169 |   }
170 | }
171 | #endif
172 | 


--------------------------------------------------------------------------------
/darknet/src/shortcut_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "shortcut_layer.h"
  2 | #include "activations.h"
  3 | #include "blas.h"
  4 | #include "cuda.h"
  5 | 
  6 | #include <assert.h>
  7 | #include <stdio.h>
  8 | 
  9 | layer make_shortcut_layer(
 10 |     int batch, int index, int w, int h, int c, int w2, int h2, int c2)
 11 | {
 12 |   fprintf(stderr,
 13 |           "res  %3d                %4d x%4d x%4d   ->  %4d x%4d x%4d\n",
 14 |           index,
 15 |           w2,
 16 |           h2,
 17 |           c2,
 18 |           w,
 19 |           h,
 20 |           c);
 21 |   layer l;
 22 |   l.type    = SHORTCUT;
 23 |   l.batch   = batch;
 24 |   l.w       = w2;
 25 |   l.h       = h2;
 26 |   l.c       = c2;
 27 |   l.out_w   = w;
 28 |   l.out_h   = h;
 29 |   l.out_c   = c;
 30 |   l.outputs = w * h * c;
 31 |   l.inputs  = l.outputs;
 32 | 
 33 |   l.index = index;
 34 | 
 35 |   l.delta  = (float *)calloc(l.outputs * batch, sizeof(float));
 36 |   l.output = (float *)calloc(l.outputs * batch, sizeof(float));
 37 |   ;
 38 | 
 39 |   l.forward  = forward_shortcut_layer;
 40 |   l.backward = backward_shortcut_layer;
 41 | #ifdef DKGPU
 42 |   l.forward_gpu  = forward_shortcut_layer_gpu;
 43 |   l.backward_gpu = backward_shortcut_layer_gpu;
 44 | 
 45 |   l.delta_gpu  = cuda_make_array(l.delta, l.outputs * batch);
 46 |   l.output_gpu = cuda_make_array(l.output, l.outputs * batch);
 47 | #endif
 48 |   return l;
 49 | }
 50 | 
 51 | void resize_shortcut_layer(layer *l, int w, int h)
 52 | {
 53 |   assert(l->w == l->out_w);
 54 |   assert(l->h == l->out_h);
 55 |   l->w = l->out_w = w;
 56 |   l->h = l->out_h = h;
 57 |   l->outputs      = w * h * l->out_c;
 58 |   l->inputs       = l->outputs;
 59 |   l->delta = (float *)realloc(l->delta, l->outputs * l->batch * sizeof(float));
 60 |   l->output =
 61 |       (float *)realloc(l->output, l->outputs * l->batch * sizeof(float));
 62 | 
 63 | #ifdef DKGPU
 64 |   cuda_free(l->output_gpu);
 65 |   cuda_free(l->delta_gpu);
 66 |   l->output_gpu = cuda_make_array(l->output, l->outputs * l->batch);
 67 |   l->delta_gpu  = cuda_make_array(l->delta, l->outputs * l->batch);
 68 | #endif
 69 | }
 70 | 
 71 | void forward_shortcut_layer(const layer l, network net)
 72 | {
 73 |   copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1);
 74 |   shortcut_cpu(l.batch,
 75 |                l.w,
 76 |                l.h,
 77 |                l.c,
 78 |                net.layers[l.index].output,
 79 |                l.out_w,
 80 |                l.out_h,
 81 |                l.out_c,
 82 |                l.alpha,
 83 |                l.beta,
 84 |                l.output);
 85 |   activate_array(l.output, l.outputs * l.batch, l.activation);
 86 | }
 87 | 
 88 | void backward_shortcut_layer(const layer l, network net)
 89 | {
 90 |   gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta);
 91 |   axpy_cpu(l.outputs * l.batch, l.alpha, l.delta, 1, net.delta, 1);
 92 |   shortcut_cpu(l.batch,
 93 |                l.out_w,
 94 |                l.out_h,
 95 |                l.out_c,
 96 |                l.delta,
 97 |                l.w,
 98 |                l.h,
 99 |                l.c,
100 |                1,
101 |                l.beta,
102 |                net.layers[l.index].delta);
103 | }
104 | 
105 | #ifdef DKGPU
106 | void forward_shortcut_layer_gpu(const layer l, network net)
107 | {
108 |   copy_gpu(l.outputs * l.batch, net.input_gpu, 1, l.output_gpu, 1);
109 |   shortcut_gpu(l.batch,
110 |                l.w,
111 |                l.h,
112 |                l.c,
113 |                net.layers[l.index].output_gpu,
114 |                l.out_w,
115 |                l.out_h,
116 |                l.out_c,
117 |                l.alpha,
118 |                l.beta,
119 |                l.output_gpu);
120 |   activate_array_gpu(l.output_gpu, l.outputs * l.batch, l.activation);
121 | }
122 | 
123 | void backward_shortcut_layer_gpu(const layer l, network net)
124 | {
125 |   gradient_array_gpu(
126 |       l.output_gpu, l.outputs * l.batch, l.activation, l.delta_gpu);
127 |   axpy_gpu(l.outputs * l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1);
128 |   shortcut_gpu(l.batch,
129 |                l.out_w,
130 |                l.out_h,
131 |                l.out_c,
132 |                l.delta_gpu,
133 |                l.w,
134 |                l.h,
135 |                l.c,
136 |                1,
137 |                l.beta,
138 |                net.layers[l.index].delta_gpu);
139 | }
140 | #endif
141 | 


--------------------------------------------------------------------------------
/darknet/src/softmax_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "softmax_layer.h"
  2 | #include "blas.h"
  3 | #include "cuda.h"
  4 | 
  5 | #include <assert.h>
  6 | #include <float.h>
  7 | #include <math.h>
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | 
 11 | softmax_layer make_softmax_layer(int batch, int inputs, int groups)
 12 | {
 13 |   assert(inputs % groups == 0);
 14 |   fprintf(
 15 |       stderr, "softmax                                        %4d\n", inputs);
 16 |   softmax_layer l;
 17 |   l.type    = SOFTMAX;
 18 |   l.batch   = batch;
 19 |   l.groups  = groups;
 20 |   l.inputs  = inputs;
 21 |   l.outputs = inputs;
 22 |   l.loss    = (float*)calloc(inputs * batch, sizeof(float));
 23 |   l.output  = (float*)calloc(inputs * batch, sizeof(float));
 24 |   l.delta   = (float*)calloc(inputs * batch, sizeof(float));
 25 |   l.cost    = (float*)calloc(1, sizeof(float));
 26 | 
 27 |   l.forward  = forward_softmax_layer;
 28 |   l.backward = backward_softmax_layer;
 29 | #ifdef DKGPU
 30 |   l.forward_gpu  = forward_softmax_layer_gpu;
 31 |   l.backward_gpu = backward_softmax_layer_gpu;
 32 | 
 33 |   l.output_gpu = cuda_make_array(l.output, inputs * batch);
 34 |   l.loss_gpu   = cuda_make_array(l.loss, inputs * batch);
 35 |   l.delta_gpu  = cuda_make_array(l.delta, inputs * batch);
 36 | #endif
 37 |   return l;
 38 | }
 39 | 
 40 | void forward_softmax_layer(const softmax_layer l, network net)
 41 | {
 42 |   if (l.softmax_tree)
 43 |   {
 44 |     int i;
 45 |     int count = 0;
 46 |     for (i = 0; i < l.softmax_tree->groups; ++i)
 47 |     {
 48 |       int group_size = l.softmax_tree->group_size[i];
 49 |       softmax_cpu(net.input + count,
 50 |                   group_size,
 51 |                   l.batch,
 52 |                   l.inputs,
 53 |                   1,
 54 |                   0,
 55 |                   1,
 56 |                   l.temperature,
 57 |                   l.output + count);
 58 |       count += group_size;
 59 |     }
 60 |   }
 61 |   else
 62 |   {
 63 |     softmax_cpu(net.input,
 64 |                 l.inputs / l.groups,
 65 |                 l.batch,
 66 |                 l.inputs,
 67 |                 l.groups,
 68 |                 l.inputs / l.groups,
 69 |                 1,
 70 |                 l.temperature,
 71 |                 l.output);
 72 |   }
 73 | 
 74 |   if (net.truth)
 75 |   {
 76 |     softmax_x_ent_cpu(l.batch * l.inputs, l.output, net.truth, l.delta, l.loss);
 77 |     l.cost[0] = sum_array(l.loss, l.batch * l.inputs);
 78 |   }
 79 | }
 80 | 
 81 | void backward_softmax_layer(const softmax_layer l, network net)
 82 | {
 83 |   axpy_cpu(l.inputs * l.batch, 1, l.delta, 1, net.delta, 1);
 84 | }
 85 | 
 86 | #ifdef DKGPU
 87 | 
 88 | void pull_softmax_layer_output(const softmax_layer layer)
 89 | {
 90 |   cuda_pull_array(layer.output_gpu, layer.output, layer.inputs * layer.batch);
 91 | }
 92 | 
 93 | void forward_softmax_layer_gpu(const softmax_layer l, network net)
 94 | {
 95 |   if (l.softmax_tree)
 96 |   {
 97 |     softmax_tree(net.input_gpu,
 98 |                  1,
 99 |                  l.batch,
100 |                  l.inputs,
101 |                  l.temperature,
102 |                  l.output_gpu,
103 |                  *l.softmax_tree);
104 |     /*
105 |     int i;
106 |     int count = 0;
107 |     for (i = 0; i < l.softmax_tree->groups; ++i) {
108 |         int group_size = l.softmax_tree->group_size[i];
109 |         softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0,
110 |     1, l.temperature, l.output_gpu + count); count += group_size;
111 |     }
112 |     */
113 |   }
114 |   else
115 |   {
116 |     if (l.spatial)
117 |     {
118 |       softmax_gpu(net.input_gpu,
119 |                   l.c,
120 |                   l.batch * l.c,
121 |                   l.inputs / l.c,
122 |                   l.w * l.h,
123 |                   1,
124 |                   l.w * l.h,
125 |                   1,
126 |                   l.output_gpu);
127 |     }
128 |     else
129 |     {
130 |       softmax_gpu(net.input_gpu,
131 |                   l.inputs / l.groups,
132 |                   l.batch,
133 |                   l.inputs,
134 |                   l.groups,
135 |                   l.inputs / l.groups,
136 |                   1,
137 |                   l.temperature,
138 |                   l.output_gpu);
139 |     }
140 |   }
141 |   if (net.truth)
142 |   {
143 |     softmax_x_ent_gpu(l.batch * l.inputs,
144 |                       l.output_gpu,
145 |                       net.truth_gpu,
146 |                       l.delta_gpu,
147 |                       l.loss_gpu);
148 |     if (l.softmax_tree)
149 |     {
150 |       mask_gpu(l.batch * l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0);
151 |       mask_gpu(l.batch * l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0);
152 |     }
153 |     cuda_pull_array(l.loss_gpu, l.loss, l.batch * l.inputs);
154 |     l.cost[0] = sum_array(l.loss, l.batch * l.inputs);
155 |   }
156 | }
157 | 
158 | void backward_softmax_layer_gpu(const softmax_layer layer, network net)
159 | {
160 |   axpy_gpu(layer.batch * layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1);
161 | }
162 | 
163 | #endif
164 | 


--------------------------------------------------------------------------------
/darknet/src/tree.cpp:
--------------------------------------------------------------------------------
  1 | #include "tree.h"
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include "data.h"
  5 | #include "utils.h"
  6 | 
  7 | void change_leaves(tree *t, char *leaf_list)
  8 | {
  9 |   list *llist   = get_paths(leaf_list);
 10 |   char **leaves = (char **)list_to_array(llist);
 11 |   int n         = llist->size;
 12 |   int i, j;
 13 |   int found = 0;
 14 |   for (i = 0; i < t->n; ++i)
 15 |   {
 16 |     t->leaf[i] = 0;
 17 |     for (j = 0; j < n; ++j)
 18 |     {
 19 |       if (0 == strcmp(t->name[i], leaves[j]))
 20 |       {
 21 |         t->leaf[i] = 1;
 22 |         ++found;
 23 |         break;
 24 |       }
 25 |     }
 26 |   }
 27 |   fprintf(stderr, "Found %d leaves.\n", found);
 28 | }
 29 | 
 30 | float get_hierarchy_probability(float *x, tree *hier, int c, int stride)
 31 | {
 32 |   float p = 1;
 33 |   while (c >= 0)
 34 |   {
 35 |     p = p * x[c * stride];
 36 |     c = hier->parent[c];
 37 |   }
 38 |   return p;
 39 | }
 40 | 
 41 | void hierarchy_predictions(
 42 |     float *predictions, int n, tree *hier, int only_leaves, int stride)
 43 | {
 44 |   int j;
 45 |   for (j = 0; j < n; ++j)
 46 |   {
 47 |     int parent = hier->parent[j];
 48 |     if (parent >= 0)
 49 |     {
 50 |       predictions[j * stride] *= predictions[parent * stride];
 51 |     }
 52 |   }
 53 |   if (only_leaves)
 54 |   {
 55 |     for (j = 0; j < n; ++j)
 56 |     {
 57 |       if (!hier->leaf[j]) predictions[j * stride] = 0;
 58 |     }
 59 |   }
 60 | }
 61 | 
 62 | int hierarchy_top_prediction(float *predictions,
 63 |                              tree *hier,
 64 |                              float thresh,
 65 |                              int stride)
 66 | {
 67 |   float p   = 1;
 68 |   int group = 0;
 69 |   int i;
 70 |   while (1)
 71 |   {
 72 |     float max = 0;
 73 |     int max_i = 0;
 74 | 
 75 |     for (i = 0; i < hier->group_size[group]; ++i)
 76 |     {
 77 |       int index = i + hier->group_offset[group];
 78 |       float val = predictions[(i + hier->group_offset[group]) * stride];
 79 |       if (val > max)
 80 |       {
 81 |         max_i = index;
 82 |         max   = val;
 83 |       }
 84 |     }
 85 |     if (p * max > thresh)
 86 |     {
 87 |       p     = p * max;
 88 |       group = hier->child[max_i];
 89 |       if (hier->child[max_i] < 0) return max_i;
 90 |     }
 91 |     else if (group == 0)
 92 |     {
 93 |       return max_i;
 94 |     }
 95 |     else
 96 |     {
 97 |       return hier->parent[hier->group_offset[group]];
 98 |     }
 99 |   }
100 |   return 0;
101 | }
102 | 
103 | tree *read_tree(char *filename)
104 | {
105 |   tree t   = {0};
106 |   FILE *fp = fopen(filename, "r");
107 | 
108 |   char *line;
109 |   int last_parent = -1;
110 |   int group_size  = 0;
111 |   int groups      = 0;
112 |   int n           = 0;
113 |   while ((line = fgetl(fp)) != 0)
114 |   {
115 |     char *id   = (char *)calloc(256, sizeof(char));
116 |     int parent = -1;
117 |     sscanf(line, "%s %d", id, &parent);
118 |     t.parent    = (int *)realloc(t.parent, (n + 1) * sizeof(int));
119 |     t.parent[n] = parent;
120 | 
121 |     t.child    = (int *)realloc(t.child, (n + 1) * sizeof(int));
122 |     t.child[n] = -1;
123 | 
124 |     t.name    = (char **)realloc(t.name, (n + 1) * sizeof(char *));
125 |     t.name[n] = id;
126 |     if (parent != last_parent)
127 |     {
128 |       ++groups;
129 |       t.group_offset = (int *)realloc(t.group_offset, groups * sizeof(int));
130 |       t.group_offset[groups - 1] = n - group_size;
131 |       t.group_size = (int *)realloc(t.group_size, groups * sizeof(int));
132 |       t.group_size[groups - 1] = group_size;
133 |       group_size               = 0;
134 |       last_parent              = parent;
135 |     }
136 |     t.group    = (int *)realloc(t.group, (n + 1) * sizeof(int));
137 |     t.group[n] = groups;
138 |     if (parent >= 0)
139 |     {
140 |       t.child[parent] = groups;
141 |     }
142 |     ++n;
143 |     ++group_size;
144 |   }
145 |   ++groups;
146 |   t.group_offset = (int *)realloc(t.group_offset, groups * sizeof(int));
147 |   t.group_offset[groups - 1] = n - group_size;
148 |   t.group_size             = (int *)realloc(t.group_size, groups * sizeof(int));
149 |   t.group_size[groups - 1] = group_size;
150 |   t.n                      = n;
151 |   t.groups                 = groups;
152 |   t.leaf                   = (int *)calloc(n, sizeof(int));
153 |   int i;
154 |   for (i = 0; i < n; ++i) t.leaf[i] = 1;
155 |   for (i = 0; i < n; ++i)
156 |     if (t.parent[i] >= 0) t.leaf[t.parent[i]] = 0;
157 | 
158 |   fclose(fp);
159 |   tree *tree_ptr = (tree *)calloc(1, sizeof(tree));
160 |   *tree_ptr      = t;
161 |   // error(0);
162 |   return tree_ptr;
163 | }
164 | 


--------------------------------------------------------------------------------
/darknet/src/upsample_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include "upsample_layer.h"
  2 | #include "blas.h"
  3 | #include "cuda.h"
  4 | 
  5 | #include <stdio.h>
  6 | 
  7 | layer make_upsample_layer(int batch, int w, int h, int c, int stride)
  8 | {
  9 |   layer l;
 10 |   l.type  = UPSAMPLE;
 11 |   l.batch = batch;
 12 |   l.w     = w;
 13 |   l.h     = h;
 14 |   l.c     = c;
 15 |   l.out_w = w * stride;
 16 |   l.out_h = h * stride;
 17 |   l.out_c = c;
 18 |   if (stride < 0)
 19 |   {
 20 |     stride    = -stride;
 21 |     l.reverse = 1;
 22 |     l.out_w   = w / stride;
 23 |     l.out_h   = h / stride;
 24 |   }
 25 |   l.stride  = stride;
 26 |   l.outputs = l.out_w * l.out_h * l.out_c;
 27 |   l.inputs  = l.w * l.h * l.c;
 28 |   l.delta   = (float *)calloc(l.outputs * batch, sizeof(float));
 29 |   l.output  = (float *)calloc(l.outputs * batch, sizeof(float));
 30 |   ;
 31 | 
 32 |   l.forward  = forward_upsample_layer;
 33 |   l.backward = backward_upsample_layer;
 34 | #ifdef DKGPU
 35 |   l.forward_gpu  = forward_upsample_layer_gpu;
 36 |   l.backward_gpu = backward_upsample_layer_gpu;
 37 | 
 38 |   l.delta_gpu  = cuda_make_array(l.delta, l.outputs * batch);
 39 |   l.output_gpu = cuda_make_array(l.output, l.outputs * batch);
 40 | #endif
 41 |   if (l.reverse)
 42 |     fprintf(stderr,
 43 |             "downsample         %2dx  %4d x%4d x%4d   ->  %4d x%4d x%4d\n",
 44 |             stride,
 45 |             w,
 46 |             h,
 47 |             c,
 48 |             l.out_w,
 49 |             l.out_h,
 50 |             l.out_c);
 51 |   else
 52 |     fprintf(stderr,
 53 |             "upsample           %2dx  %4d x%4d x%4d   ->  %4d x%4d x%4d\n",
 54 |             stride,
 55 |             w,
 56 |             h,
 57 |             c,
 58 |             l.out_w,
 59 |             l.out_h,
 60 |             l.out_c);
 61 |   return l;
 62 | }
 63 | 
 64 | void resize_upsample_layer(layer *l, int w, int h)
 65 | {
 66 |   l->w     = w;
 67 |   l->h     = h;
 68 |   l->out_w = w * l->stride;
 69 |   l->out_h = h * l->stride;
 70 |   if (l->reverse)
 71 |   {
 72 |     l->out_w = w / l->stride;
 73 |     l->out_h = h / l->stride;
 74 |   }
 75 |   l->outputs = l->out_w * l->out_h * l->out_c;
 76 |   l->inputs  = l->h * l->w * l->c;
 77 |   l->delta = (float *)realloc(l->delta, l->outputs * l->batch * sizeof(float));
 78 |   l->output =
 79 |       (float *)realloc(l->output, l->outputs * l->batch * sizeof(float));
 80 | 
 81 | #ifdef DKGPU
 82 |   cuda_free(l->output_gpu);
 83 |   cuda_free(l->delta_gpu);
 84 |   l->output_gpu = cuda_make_array(l->output, l->outputs * l->batch);
 85 |   l->delta_gpu  = cuda_make_array(l->delta, l->outputs * l->batch);
 86 | #endif
 87 | }
 88 | 
 89 | void forward_upsample_layer(const layer l, network net)
 90 | {
 91 |   fill_cpu(l.outputs * l.batch, 0, l.output, 1);
 92 |   if (l.reverse)
 93 |   {
 94 |     upsample_cpu(l.output,
 95 |                  l.out_w,
 96 |                  l.out_h,
 97 |                  l.c,
 98 |                  l.batch,
 99 |                  l.stride,
100 |                  0,
101 |                  l.scale,
102 |                  net.input);
103 |   }
104 |   else
105 |   {
106 |     upsample_cpu(
107 |         net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output);
108 |   }
109 | }
110 | 
111 | void backward_upsample_layer(const layer l, network net)
112 | {
113 |   if (l.reverse)
114 |   {
115 |     upsample_cpu(l.delta,
116 |                  l.out_w,
117 |                  l.out_h,
118 |                  l.c,
119 |                  l.batch,
120 |                  l.stride,
121 |                  1,
122 |                  l.scale,
123 |                  net.delta);
124 |   }
125 |   else
126 |   {
127 |     upsample_cpu(
128 |         net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta);
129 |   }
130 | }
131 | 
132 | #ifdef DKGPU
133 | void forward_upsample_layer_gpu(const layer l, network net)
134 | {
135 |   fill_gpu(l.outputs * l.batch, 0, l.output_gpu, 1);
136 |   if (l.reverse)
137 |   {
138 |     upsample_gpu(l.output_gpu,
139 |                  l.out_w,
140 |                  l.out_h,
141 |                  l.c,
142 |                  l.batch,
143 |                  l.stride,
144 |                  0,
145 |                  l.scale,
146 |                  net.input_gpu);
147 |   }
148 |   else
149 |   {
150 |     upsample_gpu(net.input_gpu,
151 |                  l.w,
152 |                  l.h,
153 |                  l.c,
154 |                  l.batch,
155 |                  l.stride,
156 |                  1,
157 |                  l.scale,
158 |                  l.output_gpu);
159 |   }
160 | }
161 | 
162 | void backward_upsample_layer_gpu(const layer l, network net)
163 | {
164 |   if (l.reverse)
165 |   {
166 |     upsample_gpu(l.delta_gpu,
167 |                  l.out_w,
168 |                  l.out_h,
169 |                  l.c,
170 |                  l.batch,
171 |                  l.stride,
172 |                  1,
173 |                  l.scale,
174 |                  net.delta_gpu);
175 |   }
176 |   else
177 |   {
178 |     upsample_gpu(net.delta_gpu,
179 |                  l.w,
180 |                  l.h,
181 |                  l.c,
182 |                  l.batch,
183 |                  l.stride,
184 |                  0,
185 |                  l.scale,
186 |                  l.delta_gpu);
187 |   }
188 | }
189 | #endif
190 | 


--------------------------------------------------------------------------------