├── .gitignore ├── CMakeLists.txt ├── README.md ├── cmake_uninstall.cmake.in └── darknet ├── CMakeLists.txt ├── include ├── activation_layer.h ├── activations.h ├── avgpool_layer.h ├── batchnorm_layer.h ├── blas.h ├── box.h ├── classifier.h ├── col2im.h ├── connected_layer.h ├── convolutional_layer.h ├── cost_layer.h ├── crnn_layer.h ├── crop_layer.h ├── cuda.h ├── darknet.h ├── data.h ├── deconvolutional_layer.h ├── demo.h ├── detection_layer.h ├── dropout_layer.h ├── gemm.h ├── gru_layer.h ├── im2col.h ├── image.h ├── l2norm_layer.h ├── layer.h ├── list.h ├── local_layer.h ├── logistic_layer.h ├── lstm_layer.h ├── matrix.h ├── maxpool_layer.h ├── network.h ├── normalization_layer.h ├── option_list.h ├── parser.h ├── region_layer.h ├── reorg_layer.h ├── rnn_layer.h ├── route_layer.h ├── shortcut_layer.h ├── softmax_layer.h ├── stb_image.h ├── stb_image_write.h ├── tree.h ├── upsample_layer.h ├── utils.h └── yolo_layer.h └── src ├── CMakeLists.txt ├── activation_kernels.cu ├── activation_layer.cpp ├── activations.cpp ├── avgpool_layer.cpp ├── avgpool_layer_kernels.cu ├── batchnorm_layer.cpp ├── blas.cpp ├── blas_kernels.cu ├── box.cpp ├── col2im.cpp ├── col2im_kernels.cu ├── connected_layer.cpp ├── convolutional_kernels.cu ├── convolutional_layer.cpp ├── cost_layer.cpp ├── crnn_layer.cpp ├── crop_layer.cpp ├── crop_layer_kernels.cu ├── cuda.cpp ├── data.cpp ├── deconvolutional_kernels.cu ├── deconvolutional_layer.cpp ├── demo.cpp ├── detection_layer.cpp ├── dropout_layer.cpp ├── dropout_layer_kernels.cu ├── gemm.cpp ├── gru_layer.cpp ├── im2col.cpp ├── im2col_kernels.cu ├── image.cpp ├── l2norm_layer.cpp ├── layer.cpp ├── list.cpp ├── local_layer.cpp ├── logistic_layer.cpp ├── lstm_layer.cpp ├── matrix.cpp ├── maxpool_layer.cpp ├── maxpool_layer_kernels.cu ├── network.cpp ├── normalization_layer.cpp ├── option_list.cpp ├── parser.cpp ├── region_layer.cpp ├── reorg_layer.cpp ├── rnn_layer.cpp ├── route_layer.cpp ├── shortcut_layer.cpp ├── softmax_layer.cpp ├── tree.cpp ├── upsample_layer.cpp ├── utils.cpp └── yolo_layer.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9) 2 | 3 | project(darknet_cpp LANGUAGES CXX CUDA) 4 | if(CMAKE_BUILD_TYPE STREQUAL "DEBUG") 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Winline") 6 | else() 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG") 8 | endif() 9 | message(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 10 | 11 | set(CUDA_PROPAGATE_HOST_FLAGS OFF) 12 | set(CUDA_WARNING "cross-execution-space-call") 13 | # new flags introduced in CUDA 9 set(CUDA_WARNING "reorder,cross-execution- 14 | # space-call,deprecated-declarations") 15 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror ${CUDA_WARNING} -restrict") 16 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_61,code=sm_61") 17 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_52,code=sm_52") 18 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_50,code=sm_50") 19 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_35,code=sm_35") 20 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_30,code=sm_30") 21 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_75,code=sm_75") 22 | set(CMAKE_CUDA_FLAGS 23 | "${CMAKE_CUDA_FLAGS} -gencode arch=compute_75,code=compute_75") 24 | 25 | message(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}") 26 | 27 | if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR}) 28 | export(TARGETS FILE ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake) 29 | endif() 30 | 31 | file( 32 | WRITE 33 | ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake 34 | "include(CMakeFindDependencyMacro)\n" 35 | "include(\${CMAKE_CURRENT_LIST_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake)\n") 36 | 37 | if(NOT TARGET uninstall) 38 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in 39 | ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake 40 | IMMEDIATE 41 | @ONLY) 42 | 43 | add_custom_target(uninstall 44 | COMMAND ${CMAKE_COMMAND} -P 45 | ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) 46 | endif() 47 | 48 | add_subdirectory(darknet) 49 | 50 | install(FILES ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake 51 | DESTINATION lib/cmake/${CMAKE_PROJECT_NAME}) 52 | 53 | install(EXPORT ${CMAKE_PROJECT_NAME}Targets 54 | DESTINATION lib/cmake/${CMAKE_PROJECT_NAME}) 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # darknet-cpp 2 | 3 | Darknet is well known for yolo implementation. However, darknet is built in c and it makes it little painful to integrate with other c++ projects. In this project, I will update darknet c library to c++. 4 | 5 | Current Status: 6 | - Cmake files added 7 | - Yolo v3 added 8 | - Network detection memory leak fix after darknet update 9 | 10 | Future work: 11 | - Replace c style raw pointers to smart pointers 12 | - Improve modularity 13 | -------------------------------------------------------------------------------- /cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 2 | message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 3 | endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 4 | 5 | if (NOT DEFINED CMAKE_INSTALL_PREFIX) 6 | set (CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@") 7 | endif () 8 | message(${CMAKE_INSTALL_PREFIX}) 9 | 10 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) 11 | string(REGEX REPLACE "\n" ";" files "${files}") 12 | foreach(file ${files}) 13 | message(STATUS "Uninstalling $ENV{DESTDIR}${file}") 14 | if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 15 | exec_program( 16 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 17 | OUTPUT_VARIABLE rm_out 18 | RETURN_VALUE rm_retval 19 | ) 20 | if(NOT "${rm_retval}" STREQUAL 0) 21 | message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") 22 | endif(NOT "${rm_retval}" STREQUAL 0) 23 | else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 24 | message(STATUS "File $ENV{DESTDIR}${file} does not exist.") 25 | endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 26 | endforeach(file) -------------------------------------------------------------------------------- /darknet/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9) 2 | 3 | project(darknet_cpp) 4 | 5 | file(GLOB YOLO_SOURCE_FILES src/*.cpp) 6 | file(GLOB CUDA_SOURCE_FILES src/*.cu) 7 | file(GLOB DARKNET_INCS include/*.h) 8 | 9 | set(PROJECT_SRCS 10 | ${YOLO_SOURCE_FILES} 11 | ${CUDA_SOURCE_FILES} 12 | ) 13 | 14 | set(PROJECT_INCS 15 | ${DARKNET_INCS} 16 | ) 17 | 18 | find_package( OpenCV REQUIRED ) 19 | find_package( CUDA REQUIRED ) 20 | 21 | add_library(${PROJECT_NAME} SHARED ${PROJECT_SRCS} ${PROJECT_INCS}) 22 | target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_14) 23 | 24 | target_compile_definitions(${PROJECT_NAME} PUBLIC 25 | DKGPU) 26 | 27 | target_include_directories(${PROJECT_NAME} PUBLIC 28 | $ 29 | $) 30 | 31 | target_include_directories(${PROJECT_NAME} SYSTEM PUBLIC 32 | ${OpenCV_INCLUDE_DIRS} 33 | ${CUDA_INCLUDE_DIRS}) 34 | 35 | target_link_libraries(${PROJECT_NAME} 36 | ${OpenCV_LIBS} 37 | ${CUDA_LIBRARIES} 38 | ${CUDA_curand_LIBRARY} 39 | ${CUDA_CUBLAS_LIBRARIES}) 40 | 41 | install(TARGETS ${PROJECT_NAME} 42 | EXPORT ${CMAKE_PROJECT_NAME}Targets 43 | ARCHIVE DESTINATION lib 44 | LIBRARY DESTINATION lib 45 | RUNTIME DESTINATION bin) 46 | 47 | install(FILES ${DARKNET_INCS} 48 | DESTINATION include/Darknet/) 49 | 50 | export(TARGETS ${PROJECT_NAME} 51 | APPEND FILE ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Targets.cmake) 52 | 53 | file(APPEND ${CMAKE_BINARY_DIR}/${CMAKE_PROJECT_NAME}Config.cmake 54 | "find_dependency(OpenCV REQUIRED)\n" 55 | "find_dependency(CUDA REQUIRED)\n") 56 | -------------------------------------------------------------------------------- /darknet/include/activation_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef ACTIVATION_LAYER_H 2 | #define ACTIVATION_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | layer make_activation_layer(int batch, int inputs, ACTIVATION activation); 14 | 15 | void forward_activation_layer(layer l, network net); 16 | void backward_activation_layer(layer l, network net); 17 | 18 | #ifdef DKGPU 19 | void forward_activation_layer_gpu(layer l, network net); 20 | void backward_activation_layer_gpu(layer l, network net); 21 | #endif 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /darknet/include/activations.h: -------------------------------------------------------------------------------- 1 | #ifndef ACTIVATIONS_H 2 | #define ACTIVATIONS_H 3 | #include "cuda.h" 4 | #include "darknet.h" 5 | #include "math.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" 9 | { 10 | #endif 11 | 12 | ACTIVATION get_activation(char *s); 13 | 14 | char *get_activation_string(ACTIVATION a); 15 | float activate(float x, ACTIVATION a); 16 | float gradient(float x, ACTIVATION a); 17 | void gradient_array(const float *x, 18 | const int n, 19 | const ACTIVATION a, 20 | float *delta); 21 | void activate_array(float *x, const int n, const ACTIVATION a); 22 | #ifdef DKGPU 23 | void activate_array_gpu(float *x, int n, ACTIVATION a); 24 | void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta); 25 | #endif 26 | 27 | static inline float stair_activate(float x) 28 | { 29 | int n = floor(x); 30 | if (n % 2 == 0) 31 | return floor(x / 2.); 32 | else 33 | return (x - n) + floor(x / 2.); 34 | } 35 | static inline float hardtan_activate(float x) 36 | { 37 | if (x < -1) return -1; 38 | if (x > 1) return 1; 39 | return x; 40 | } 41 | static inline float linear_activate(float x) 42 | { 43 | return x; 44 | } 45 | static inline float logistic_activate(float x) 46 | { 47 | return 1. / (1. + exp(-x)); 48 | } 49 | static inline float loggy_activate(float x) 50 | { 51 | return 2. / (1. + exp(-x)) - 1; 52 | } 53 | static inline float relu_activate(float x) 54 | { 55 | return x * (x > 0); 56 | } 57 | static inline float elu_activate(float x) 58 | { 59 | return (x >= 0) * x + (x < 0) * (exp(x) - 1); 60 | } 61 | static inline float relie_activate(float x) 62 | { 63 | return (x > 0) ? x : .01 * x; 64 | } 65 | static inline float ramp_activate(float x) 66 | { 67 | return x * (x > 0) + .1 * x; 68 | } 69 | static inline float leaky_activate(float x) 70 | { 71 | return (x > 0) ? x : .1 * x; 72 | } 73 | static inline float tanh_activate(float x) 74 | { 75 | return (exp(2 * x) - 1) / (exp(2 * x) + 1); 76 | } 77 | static inline float plse_activate(float x) 78 | { 79 | if (x < -4) return .01 * (x + 4); 80 | if (x > 4) return .01 * (x - 4) + 1; 81 | return .125 * x + .5; 82 | } 83 | 84 | static inline float lhtan_activate(float x) 85 | { 86 | if (x < 0) return .001 * x; 87 | if (x > 1) return .001 * (x - 1) + 1; 88 | return x; 89 | } 90 | static inline float lhtan_gradient(float x) 91 | { 92 | if (x > 0 && x < 1) return 1; 93 | return .001; 94 | } 95 | 96 | static inline float hardtan_gradient(float x) 97 | { 98 | if (x > -1 && x < 1) return 1; 99 | return 0; 100 | } 101 | static inline float linear_gradient(float x) 102 | { 103 | return 1; 104 | } 105 | static inline float logistic_gradient(float x) 106 | { 107 | return (1 - x) * x; 108 | } 109 | static inline float loggy_gradient(float x) 110 | { 111 | float y = (x + 1.) / 2.; 112 | return 2 * (1 - y) * y; 113 | } 114 | static inline float stair_gradient(float x) 115 | { 116 | if (floor(x) == x) return 0; 117 | return 1; 118 | } 119 | static inline float relu_gradient(float x) 120 | { 121 | return (x > 0); 122 | } 123 | static inline float elu_gradient(float x) 124 | { 125 | return (x >= 0) + (x < 0) * (x + 1); 126 | } 127 | static inline float relie_gradient(float x) 128 | { 129 | return (x > 0) ? 1 : .01; 130 | } 131 | static inline float ramp_gradient(float x) 132 | { 133 | return (x > 0) + .1; 134 | } 135 | static inline float leaky_gradient(float x) 136 | { 137 | return (x > 0) ? 1 : .1; 138 | } 139 | static inline float tanh_gradient(float x) 140 | { 141 | return 1 - x * x; 142 | } 143 | static inline float plse_gradient(float x) 144 | { 145 | return (x < 0 || x > 1) ? .01 : .125; 146 | } 147 | 148 | #ifdef __cplusplus 149 | } 150 | #endif 151 | 152 | #endif 153 | -------------------------------------------------------------------------------- /darknet/include/avgpool_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef AVGPOOL_LAYER_H 2 | #define AVGPOOL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | #ifdef __cplusplus 10 | extern "C" 11 | { 12 | #endif 13 | 14 | typedef layer avgpool_layer; 15 | 16 | image get_avgpool_image(avgpool_layer l); 17 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); 18 | void resize_avgpool_layer(avgpool_layer *l, int w, int h); 19 | void forward_avgpool_layer(const avgpool_layer l, network net); 20 | void backward_avgpool_layer(const avgpool_layer l, network net); 21 | 22 | #ifdef DKGPU 23 | void forward_avgpool_layer_gpu(avgpool_layer l, network net); 24 | void backward_avgpool_layer_gpu(avgpool_layer l, network net); 25 | #endif 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /darknet/include/batchnorm_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef BATCHNORM_LAYER_H 2 | #define BATCHNORM_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | layer make_batchnorm_layer(int batch, int w, int h, int c); 14 | void forward_batchnorm_layer(layer l, network net); 15 | void backward_batchnorm_layer(layer l, network net); 16 | 17 | #ifdef DKGPU 18 | void forward_batchnorm_layer_gpu(layer l, network net); 19 | void backward_batchnorm_layer_gpu(layer l, network net); 20 | void pull_batchnorm_layer(layer l); 21 | void push_batchnorm_layer(layer l); 22 | #endif 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /darknet/include/box.h: -------------------------------------------------------------------------------- 1 | #ifndef BOX_H 2 | #define BOX_H 3 | #include "darknet.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" 7 | { 8 | #endif 9 | 10 | typedef struct 11 | { 12 | float dx, dy, dw, dh; 13 | } dbox; 14 | 15 | float box_rmse(box a, box b); 16 | dbox diou(box a, box b); 17 | box decode_box(box b, box anchor); 18 | box encode_box(box b, box anchor); 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | #endif 23 | -------------------------------------------------------------------------------- /darknet/include/classifier.h: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /darknet/include/col2im.h: -------------------------------------------------------------------------------- 1 | #ifndef COL2IM_H 2 | #define COL2IM_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | 9 | void col2im_cpu(float* data_col, 10 | int channels, 11 | int height, 12 | int width, 13 | int ksize, 14 | int stride, 15 | int pad, 16 | float* data_im); 17 | 18 | #ifdef DKGPU 19 | void col2im_gpu(float* data_col, 20 | int channels, 21 | int height, 22 | int width, 23 | int ksize, 24 | int stride, 25 | int pad, 26 | float* data_im); 27 | #endif 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /darknet/include/connected_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CONNECTED_LAYER_H 2 | #define CONNECTED_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | layer make_connected_layer(int batch, 14 | int inputs, 15 | int outputs, 16 | ACTIVATION activation, 17 | int batch_normalize, 18 | int adam); 19 | 20 | void forward_connected_layer(layer l, network net); 21 | void backward_connected_layer(layer l, network net); 22 | void update_connected_layer(layer l, update_args a); 23 | 24 | #ifdef DKGPU 25 | void forward_connected_layer_gpu(layer l, network net); 26 | void backward_connected_layer_gpu(layer l, network net); 27 | void update_connected_layer_gpu(layer l, update_args a); 28 | void push_connected_layer(layer l); 29 | void pull_connected_layer(layer l); 30 | #endif 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /darknet/include/convolutional_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CONVOLUTIONAL_LAYER_H 2 | #define CONVOLUTIONAL_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "cuda.h" 6 | #include "image.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | #ifdef __cplusplus 11 | extern "C" 12 | { 13 | #endif 14 | 15 | typedef layer convolutional_layer; 16 | 17 | #ifdef DKGPU 18 | void forward_convolutional_layer_gpu(convolutional_layer layer, network net); 19 | void backward_convolutional_layer_gpu(convolutional_layer layer, network net); 20 | void update_convolutional_layer_gpu(convolutional_layer layer, update_args a); 21 | 22 | void push_convolutional_layer(convolutional_layer layer); 23 | void pull_convolutional_layer(convolutional_layer layer); 24 | 25 | void add_bias_gpu(float *output, float *biases, int batch, int n, int size); 26 | void backward_bias_gpu( 27 | float *bias_updates, float *delta, int batch, int n, int size); 28 | void adam_update_gpu(float *w, 29 | float *d, 30 | float *m, 31 | float *v, 32 | float B1, 33 | float B2, 34 | float eps, 35 | float decay, 36 | float rate, 37 | int n, 38 | int batch, 39 | int t); 40 | #ifdef CUDNN 41 | void cudnn_convolutional_setup(layer *l); 42 | #endif 43 | #endif 44 | 45 | convolutional_layer make_convolutional_layer(int batch, 46 | int h, 47 | int w, 48 | int c, 49 | int n, 50 | int groups, 51 | int size, 52 | int stride, 53 | int padding, 54 | ACTIVATION activation, 55 | int batch_normalize, 56 | int binary, 57 | int xnor, 58 | int adam); 59 | void resize_convolutional_layer(convolutional_layer *layer, int w, int h); 60 | void forward_convolutional_layer(const convolutional_layer layer, 61 | network net); 62 | void update_convolutional_layer(convolutional_layer layer, update_args a); 63 | image *visualize_convolutional_layer(convolutional_layer layer, 64 | char *window, 65 | image *prev_weights); 66 | void binarize_weights(float *weights, int n, int size, float *binary); 67 | void swap_binary(convolutional_layer *l); 68 | void binarize_weights2( 69 | float *weights, int n, int size, char *binary, float *scales); 70 | 71 | void backward_convolutional_layer(convolutional_layer layer, network net); 72 | 73 | void add_bias(float *output, float *biases, int batch, int n, int size); 74 | void backward_bias( 75 | float *bias_updates, float *delta, int batch, int n, int size); 76 | 77 | image get_convolutional_image(convolutional_layer layer); 78 | image get_convolutional_delta(convolutional_layer layer); 79 | image get_convolutional_weight(convolutional_layer layer, int i); 80 | 81 | int convolutional_out_height(convolutional_layer layer); 82 | int convolutional_out_width(convolutional_layer layer); 83 | 84 | #ifdef __cplusplus 85 | } 86 | #endif 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /darknet/include/cost_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef COST_LAYER_H 2 | #define COST_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" 8 | { 9 | #endif 10 | 11 | typedef layer cost_layer; 12 | 13 | COST_TYPE get_cost_type(char *s); 14 | char *get_cost_string(COST_TYPE a); 15 | cost_layer make_cost_layer(int batch, 16 | int inputs, 17 | COST_TYPE type, 18 | float scale); 19 | void forward_cost_layer(const cost_layer l, network net); 20 | void backward_cost_layer(const cost_layer l, network net); 21 | void resize_cost_layer(cost_layer *l, int inputs); 22 | 23 | #ifdef DKGPU 24 | void forward_cost_layer_gpu(cost_layer l, network net); 25 | void backward_cost_layer_gpu(const cost_layer l, network net); 26 | #endif 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /darknet/include/crnn_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CRNN_LAYER_H 2 | #define CRNN_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | layer make_crnn_layer(int batch, 14 | int h, 15 | int w, 16 | int c, 17 | int hidden_filters, 18 | int output_filters, 19 | int steps, 20 | ACTIVATION activation, 21 | int batch_normalize); 22 | 23 | void forward_crnn_layer(layer l, network net); 24 | void backward_crnn_layer(layer l, network net); 25 | void update_crnn_layer(layer l, update_args a); 26 | 27 | #ifdef DKGPU 28 | void forward_crnn_layer_gpu(layer l, network net); 29 | void backward_crnn_layer_gpu(layer l, network net); 30 | void update_crnn_layer_gpu(layer l, update_args a); 31 | void push_crnn_layer(layer l); 32 | void pull_crnn_layer(layer l); 33 | #endif 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /darknet/include/crop_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CROP_LAYER_H 2 | #define CROP_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | typedef layer crop_layer; 14 | 15 | image get_crop_image(crop_layer l); 16 | crop_layer make_crop_layer(int batch, 17 | int h, 18 | int w, 19 | int c, 20 | int crop_height, 21 | int crop_width, 22 | int flip, 23 | float angle, 24 | float saturation, 25 | float exposure); 26 | void forward_crop_layer(const crop_layer l, network net); 27 | void resize_crop_layer(layer *l, int w, int h); 28 | 29 | #ifdef DKGPU 30 | void forward_crop_layer_gpu(crop_layer l, network net); 31 | #endif 32 | 33 | #ifdef __cplusplus 34 | } 35 | #endif 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /darknet/include/cuda.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDA_H 2 | #define CUDA_H 3 | 4 | #include "darknet.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" 8 | { 9 | #endif 10 | 11 | #ifdef DKGPU 12 | 13 | void check_error(cudaError_t status); 14 | cublasHandle_t blas_handle(); 15 | int *cuda_make_int_array(int *x, size_t n); 16 | void cuda_random(float *x_gpu, size_t n); 17 | float cuda_compare(float *x_gpu, float *x, size_t n, char *s); 18 | dim3 cuda_gridsize(size_t n); 19 | 20 | #ifdef CUDNN 21 | cudnnHandle_t cudnn_handle(); 22 | #endif 23 | 24 | #endif 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /darknet/include/data.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_H 2 | #define DATA_H 3 | #include 4 | 5 | #include "darknet.h" 6 | #include "image.h" 7 | #include "list.h" 8 | #include "matrix.h" 9 | #include "tree.h" 10 | 11 | #ifdef __cplusplus 12 | extern "C" 13 | { 14 | #endif 15 | 16 | static inline float distance_from_edge(int x, int max) 17 | { 18 | int dx = (max / 2) - x; 19 | if (dx < 0) dx = -dx; 20 | dx = (max / 2) + 1 - dx; 21 | dx *= 2; 22 | float dist = (float)dx / max; 23 | if (dist > 1) dist = 1; 24 | return dist; 25 | } 26 | void load_data_blocking(load_args args); 27 | 28 | void print_letters(float *pred, int n); 29 | data load_data_captcha(char **paths, int n, int m, int k, int w, int h); 30 | data load_data_captcha_encode(char **paths, int n, int m, int w, int h); 31 | data load_data_detection(int n, 32 | char **paths, 33 | int m, 34 | int w, 35 | int h, 36 | int boxes, 37 | int classes, 38 | float jitter, 39 | float hue, 40 | float saturation, 41 | float exposure); 42 | data load_data_tag(char **paths, 43 | int n, 44 | int m, 45 | int k, 46 | int min, 47 | int max, 48 | int size, 49 | float angle, 50 | float aspect, 51 | float hue, 52 | float saturation, 53 | float exposure); 54 | matrix load_image_augment_paths(char **paths, 55 | int n, 56 | int min, 57 | int max, 58 | int size, 59 | float angle, 60 | float aspect, 61 | float hue, 62 | float saturation, 63 | float exposure, 64 | int center); 65 | data load_data_super(char **paths, int n, int m, int w, int h, int scale); 66 | data load_data_augment(char **paths, 67 | int n, 68 | int m, 69 | char **labels, 70 | int k, 71 | tree *hierarchy, 72 | int min, 73 | int max, 74 | int size, 75 | float angle, 76 | float aspect, 77 | float hue, 78 | float saturation, 79 | float exposure, 80 | int center); 81 | data load_data_regression(char **paths, 82 | int n, 83 | int m, 84 | int classes, 85 | int min, 86 | int max, 87 | int size, 88 | float angle, 89 | float aspect, 90 | float hue, 91 | float saturation, 92 | float exposure); 93 | data load_go(char *filename); 94 | 95 | data load_data_writing( 96 | char **paths, int n, int m, int w, int h, int out_w, int out_h); 97 | 98 | void get_random_batch(data d, int n, float *X, float *y); 99 | data get_data_part(data d, int part, int total); 100 | data get_random_data(data d, int num); 101 | data load_categorical_data_csv(char *filename, int target, int k); 102 | void normalize_data_rows(data d); 103 | void scale_data_rows(data d, float s); 104 | void translate_data_rows(data d, float s); 105 | void randomize_data(data d); 106 | data *split_data(data d, int part, int total); 107 | data concat_datas(data *d, int n); 108 | void fill_truth(char *path, char **labels, int k, float *truth); 109 | 110 | #ifdef __cplusplus 111 | } 112 | #endif 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /darknet/include/deconvolutional_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DECONVOLUTIONAL_LAYER_H 2 | #define DECONVOLUTIONAL_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "cuda.h" 6 | #include "image.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | #ifdef __cplusplus 11 | extern "C" 12 | { 13 | #endif 14 | 15 | #ifdef DKGPU 16 | void forward_deconvolutional_layer_gpu(layer l, network net); 17 | void backward_deconvolutional_layer_gpu(layer l, network net); 18 | void update_deconvolutional_layer_gpu(layer l, update_args a); 19 | void push_deconvolutional_layer(layer l); 20 | void pull_deconvolutional_layer(layer l); 21 | #endif 22 | 23 | layer make_deconvolutional_layer(int batch, 24 | int h, 25 | int w, 26 | int c, 27 | int n, 28 | int size, 29 | int stride, 30 | int padding, 31 | ACTIVATION activation, 32 | int batch_normalize, 33 | int adam); 34 | void resize_deconvolutional_layer(layer *l, int h, int w); 35 | void forward_deconvolutional_layer(const layer l, network net); 36 | void update_deconvolutional_layer(layer l, update_args a); 37 | void backward_deconvolutional_layer(layer l, network net); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /darknet/include/demo.h: -------------------------------------------------------------------------------- 1 | #ifndef DEMO_H 2 | #define DEMO_H 3 | 4 | #include "image.h" 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /darknet/include/detection_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DETECTION_LAYER_H 2 | #define DETECTION_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" 9 | { 10 | #endif 11 | 12 | typedef layer detection_layer; 13 | 14 | detection_layer make_detection_layer(int batch, 15 | int inputs, 16 | int n, 17 | int size, 18 | int classes, 19 | int coords, 20 | int rescore); 21 | void forward_detection_layer(const detection_layer l, network net); 22 | void backward_detection_layer(const detection_layer l, network net); 23 | 24 | #ifdef DKGPU 25 | void forward_detection_layer_gpu(const detection_layer l, network net); 26 | void backward_detection_layer_gpu(detection_layer l, network net); 27 | #endif 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /darknet/include/dropout_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DROPOUT_LAYER_H 2 | #define DROPOUT_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" 9 | { 10 | #endif 11 | 12 | typedef layer dropout_layer; 13 | 14 | dropout_layer make_dropout_layer(int batch, int inputs, float probability); 15 | 16 | void forward_dropout_layer(dropout_layer l, network net); 17 | void backward_dropout_layer(dropout_layer l, network net); 18 | void resize_dropout_layer(dropout_layer *l, int inputs); 19 | 20 | #ifdef DKGPU 21 | void forward_dropout_layer_gpu(dropout_layer l, network net); 22 | void backward_dropout_layer_gpu(dropout_layer l, network net); 23 | 24 | #endif 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /darknet/include/gemm.h: -------------------------------------------------------------------------------- 1 | #ifndef GEMM_H 2 | #define GEMM_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | 9 | void gemm_bin(int M, 10 | int N, 11 | int K, 12 | float ALPHA, 13 | char *A, 14 | int lda, 15 | float *B, 16 | int ldb, 17 | float *C, 18 | int ldc); 19 | 20 | void gemm(int TA, 21 | int TB, 22 | int M, 23 | int N, 24 | int K, 25 | float ALPHA, 26 | float *A, 27 | int lda, 28 | float *B, 29 | int ldb, 30 | float BETA, 31 | float *C, 32 | int ldc); 33 | 34 | void gemm_cpu(int TA, 35 | int TB, 36 | int M, 37 | int N, 38 | int K, 39 | float ALPHA, 40 | float *A, 41 | int lda, 42 | float *B, 43 | int ldb, 44 | float BETA, 45 | float *C, 46 | int ldc); 47 | 48 | #ifdef DKGPU 49 | void gemm_gpu(int TA, 50 | int TB, 51 | int M, 52 | int N, 53 | int K, 54 | float ALPHA, 55 | float *A_gpu, 56 | int lda, 57 | float *B_gpu, 58 | int ldb, 59 | float BETA, 60 | float *C_gpu, 61 | int ldc); 62 | 63 | void gemm_gpu(int TA, 64 | int TB, 65 | int M, 66 | int N, 67 | int K, 68 | float ALPHA, 69 | float *A, 70 | int lda, 71 | float *B, 72 | int ldb, 73 | float BETA, 74 | float *C, 75 | int ldc); 76 | #endif 77 | #ifdef __cplusplus 78 | } 79 | #endif 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /darknet/include/gru_layer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef GRU_LAYER_H 3 | #define GRU_LAYER_H 4 | 5 | #include "activations.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | #ifdef __cplusplus 10 | extern "C" 11 | { 12 | #endif 13 | 14 | layer make_gru_layer(int batch, 15 | int inputs, 16 | int outputs, 17 | int steps, 18 | int batch_normalize, 19 | int adam); 20 | 21 | void forward_gru_layer(layer l, network state); 22 | void backward_gru_layer(layer l, network state); 23 | void update_gru_layer(layer l, update_args a); 24 | 25 | #ifdef DKGPU 26 | void forward_gru_layer_gpu(layer l, network state); 27 | void backward_gru_layer_gpu(layer l, network state); 28 | void update_gru_layer_gpu(layer l, update_args a); 29 | void push_gru_layer(layer l); 30 | void pull_gru_layer(layer l); 31 | #endif 32 | 33 | #ifdef __cplusplus 34 | } 35 | #endif 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /darknet/include/im2col.h: -------------------------------------------------------------------------------- 1 | #ifndef IM2COL_H 2 | #define IM2COL_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | 9 | void im2col_cpu(float* data_im, 10 | int channels, 11 | int height, 12 | int width, 13 | int ksize, 14 | int stride, 15 | int pad, 16 | float* data_col); 17 | 18 | #ifdef DKGPU 19 | 20 | void im2col_gpu(float* im, 21 | int channels, 22 | int height, 23 | int width, 24 | int ksize, 25 | int stride, 26 | int pad, 27 | float* data_col); 28 | 29 | #endif 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | #endif 35 | -------------------------------------------------------------------------------- /darknet/include/image.h: -------------------------------------------------------------------------------- 1 | #ifndef IMAGE_H 2 | #define IMAGE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "box.h" 10 | #include "darknet.h" 11 | 12 | #ifdef __cplusplus 13 | extern "C" 14 | { 15 | #endif 16 | 17 | #ifndef __cplusplus 18 | #ifdef OPENCV 19 | int fill_image_from_stream(CvCapture *cap, image im); 20 | image ipl_to_image(IplImage *src); 21 | void ipl_into_image(IplImage *src, image im); 22 | void flush_stream_buffer(CvCapture *cap, int n); 23 | void show_image_cv(image p, const char *name, IplImage *disp); 24 | #endif 25 | #endif 26 | 27 | float get_color(int c, int x, int max); 28 | void draw_box( 29 | image a, int x1, int y1, int x2, int y2, float r, float g, float b); 30 | void draw_bbox(image a, box bbox, int w, float r, float g, float b); 31 | void write_label( 32 | image a, int r, int c, image *characters, char *string, float *rgb); 33 | image image_distance(image a, image b); 34 | void scale_image(image m, float s); 35 | image rotate_crop_image(image im, 36 | float rad, 37 | float s, 38 | int w, 39 | int h, 40 | float dx, 41 | float dy, 42 | float aspect); 43 | image random_crop_image(image im, int w, int h); 44 | image random_augment_image( 45 | image im, float angle, float aspect, int low, int high, int w, int h); 46 | augment_args random_augment_args( 47 | image im, float angle, float aspect, int low, int high, int w, int h); 48 | void letterbox_image_into(image im, int w, int h, image boxed); 49 | image resize_max(image im, int max); 50 | void translate_image(image m, float s); 51 | void embed_image(image source, image dest, int dx, int dy); 52 | void place_image(image im, int w, int h, int dx, int dy, image canvas); 53 | void saturate_image(image im, float sat); 54 | void exposure_image(image im, float sat); 55 | void distort_image(image im, float hue, float sat, float val); 56 | void saturate_exposure_image(image im, float sat, float exposure); 57 | void rgb_to_hsv(image im); 58 | void hsv_to_rgb(image im); 59 | void yuv_to_rgb(image im); 60 | void rgb_to_yuv(image im); 61 | 62 | image collapse_image_layers(image source, int border); 63 | image collapse_images_horz(image *ims, int n); 64 | image collapse_images_vert(image *ims, int n); 65 | 66 | void show_image_normalized(image im, const char *name); 67 | void show_images(image *ims, int n, char *window); 68 | void show_image_layers(image p, char *name); 69 | void show_image_collapsed(image p, char *name); 70 | 71 | void print_image(image m); 72 | 73 | image make_empty_image(int w, int h, int c); 74 | void copy_image_into(image src, image dest); 75 | 76 | image get_image_layer(image m, int l); 77 | 78 | #ifdef __cplusplus 79 | } 80 | #endif 81 | #endif 82 | -------------------------------------------------------------------------------- /darknet/include/l2norm_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef L2NORM_LAYER_H 2 | #define L2NORM_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" 8 | { 9 | #endif 10 | 11 | layer make_l2norm_layer(int batch, int inputs); 12 | void forward_l2norm_layer(const layer l, network net); 13 | void backward_l2norm_layer(const layer l, network net); 14 | 15 | #ifdef DKGPU 16 | void forward_l2norm_layer_gpu(const layer l, network net); 17 | void backward_l2norm_layer_gpu(const layer l, network net); 18 | #endif 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /darknet/include/layer.h: -------------------------------------------------------------------------------- 1 | #include "darknet.h" 2 | -------------------------------------------------------------------------------- /darknet/include/list.h: -------------------------------------------------------------------------------- 1 | #ifndef LIST_H 2 | #define LIST_H 3 | #include "darknet.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" 7 | { 8 | #endif 9 | 10 | list *make_list(); 11 | int list_find(list *l, void *val); 12 | 13 | void list_insert(list *, void *); 14 | 15 | void free_list_contents(list *l); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /darknet/include/local_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef LOCAL_LAYER_H 2 | #define LOCAL_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "cuda.h" 6 | #include "image.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | #ifdef __cplusplus 11 | extern "C" 12 | { 13 | #endif 14 | 15 | typedef layer local_layer; 16 | 17 | #ifdef DKGPU 18 | void forward_local_layer_gpu(local_layer layer, network net); 19 | void backward_local_layer_gpu(local_layer layer, network net); 20 | void update_local_layer_gpu(local_layer layer, update_args a); 21 | 22 | void push_local_layer(local_layer layer); 23 | void pull_local_layer(local_layer layer); 24 | #endif 25 | 26 | local_layer make_local_layer(int batch, 27 | int h, 28 | int w, 29 | int c, 30 | int n, 31 | int size, 32 | int stride, 33 | int pad, 34 | ACTIVATION activation); 35 | 36 | void forward_local_layer(const local_layer layer, network net); 37 | void backward_local_layer(local_layer layer, network net); 38 | void update_local_layer(local_layer layer, update_args a); 39 | 40 | void bias_output(float *output, float *biases, int batch, int n, int size); 41 | void backward_bias( 42 | float *bias_updates, float *delta, int batch, int n, int size); 43 | 44 | #ifdef __cplusplus 45 | } 46 | #endif 47 | #endif 48 | -------------------------------------------------------------------------------- /darknet/include/logistic_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGISTIC_LAYER_H 2 | #define LOGISTIC_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" 8 | { 9 | #endif 10 | 11 | layer make_logistic_layer(int batch, int inputs); 12 | void forward_logistic_layer(const layer l, network net); 13 | void backward_logistic_layer(const layer l, network net); 14 | 15 | #ifdef DKGPU 16 | void forward_logistic_layer_gpu(const layer l, network net); 17 | void backward_logistic_layer_gpu(const layer l, network net); 18 | #endif 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /darknet/include/lstm_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef LSTM_LAYER_H 2 | #define LSTM_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | #define USET 8 | 9 | #ifdef __cplusplus 10 | extern "C" 11 | { 12 | #endif 13 | 14 | layer make_lstm_layer(int batch, 15 | int inputs, 16 | int outputs, 17 | int steps, 18 | int batch_normalize, 19 | int adam); 20 | 21 | void forward_lstm_layer(layer l, network net); 22 | void update_lstm_layer(layer l, update_args a); 23 | 24 | #ifdef DKGPU 25 | void forward_lstm_layer_gpu(layer l, network net); 26 | void backward_lstm_layer_gpu(layer l, network net); 27 | void update_lstm_layer_gpu(layer l, update_args a); 28 | 29 | #endif 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | #endif 34 | -------------------------------------------------------------------------------- /darknet/include/matrix.h: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX_H 2 | #define MATRIX_H 3 | #include "darknet.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" 7 | { 8 | #endif 9 | matrix copy_matrix(matrix m); 10 | void print_matrix(matrix m); 11 | 12 | matrix hold_out_matrix(matrix *m, int n); 13 | matrix resize_matrix(matrix m, int size); 14 | 15 | float *pop_column(matrix *m, int c); 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | #endif 20 | -------------------------------------------------------------------------------- /darknet/include/maxpool_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef MAXPOOL_LAYER_H 2 | #define MAXPOOL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | typedef layer maxpool_layer; 13 | 14 | image get_maxpool_image(maxpool_layer l); 15 | maxpool_layer make_maxpool_layer( 16 | int batch, int h, int w, int c, int size, int stride, int padding); 17 | void resize_maxpool_layer(maxpool_layer *l, int w, int h); 18 | void forward_maxpool_layer(const maxpool_layer l, network net); 19 | void backward_maxpool_layer(const maxpool_layer l, network net); 20 | 21 | #ifdef DKGPU 22 | void forward_maxpool_layer_gpu(maxpool_layer l, network net); 23 | void backward_maxpool_layer_gpu(maxpool_layer l, network net); 24 | #endif 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | #endif 29 | -------------------------------------------------------------------------------- /darknet/include/network.h: -------------------------------------------------------------------------------- 1 | // Oh boy, why am I about to do this.... 2 | #ifndef NETWORK_H 3 | #define NETWORK_H 4 | #include "darknet.h" 5 | 6 | #include "data.h" 7 | #include "image.h" 8 | #include "layer.h" 9 | #include "tree.h" 10 | #ifdef __cplusplus 11 | extern "C" 12 | { 13 | #endif 14 | #ifdef DKGPU 15 | void pull_network_output(network *net); 16 | #endif 17 | 18 | void compare_networks(network *n1, network *n2, data d); 19 | char *get_layer_string(LAYER_TYPE a); 20 | 21 | network *make_network(int n); 22 | 23 | float network_accuracy_multi(network *net, data d, int n); 24 | int get_predicted_class_network(network *net); 25 | void print_network(network *net); 26 | int resize_network(network *net, int w, int h); 27 | void calc_network_cost(network *net); 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | #endif 32 | -------------------------------------------------------------------------------- /darknet/include/normalization_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef NORMALIZATION_LAYER_H 2 | #define NORMALIZATION_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | #ifdef __cplusplus 8 | extern "C" 9 | { 10 | #endif 11 | layer make_normalization_layer(int batch, 12 | int w, 13 | int h, 14 | int c, 15 | int size, 16 | float alpha, 17 | float beta, 18 | float kappa); 19 | void resize_normalization_layer(layer *layer, int h, int w); 20 | void forward_normalization_layer(const layer layer, network net); 21 | void backward_normalization_layer(const layer layer, network net); 22 | void visualize_normalization_layer(layer layer, char *window); 23 | 24 | #ifdef DKGPU 25 | void forward_normalization_layer_gpu(const layer layer, network net); 26 | void backward_normalization_layer_gpu(const layer layer, network net); 27 | #endif 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | #endif 32 | -------------------------------------------------------------------------------- /darknet/include/option_list.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTION_LIST_H 2 | #define OPTION_LIST_H 3 | #include "list.h" 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | typedef struct 9 | { 10 | char *key; 11 | char *val; 12 | int used; 13 | } kvp; 14 | 15 | int read_option(char *s, list *options); 16 | void option_insert(list *l, char *key, char *val); 17 | char *option_find(list *l, char *key); 18 | float option_find_float(list *l, char *key, float def); 19 | float option_find_float_quiet(list *l, char *key, float def); 20 | void option_unused(list *l); 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | #endif 25 | -------------------------------------------------------------------------------- /darknet/include/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H 2 | #define PARSER_H 3 | #include "darknet.h" 4 | #include "network.h" 5 | #ifdef __cplusplus 6 | extern "C" 7 | { 8 | #endif 9 | void save_network(network net, char *filename); 10 | void save_weights_double(network net, char *filename); 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | #endif 15 | -------------------------------------------------------------------------------- /darknet/include/region_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef REGION_LAYER_H 2 | #define REGION_LAYER_H 3 | 4 | #include "darknet.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | #ifdef __cplusplus 8 | extern "C" 9 | { 10 | #endif 11 | layer make_region_layer( 12 | int batch, int w, int h, int n, int classes, int coords); 13 | void forward_region_layer(const layer l, network net); 14 | void backward_region_layer(const layer l, network net); 15 | void resize_region_layer(layer *l, int w, int h); 16 | 17 | #ifdef DKGPU 18 | void forward_region_layer_gpu(const layer l, network net); 19 | void backward_region_layer_gpu(layer l, network net); 20 | #endif 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | #endif 25 | -------------------------------------------------------------------------------- /darknet/include/reorg_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef REORG_LAYER_H 2 | #define REORG_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | layer make_reorg_layer(int batch, 13 | int w, 14 | int h, 15 | int c, 16 | int stride, 17 | int reverse, 18 | int flatten, 19 | int extra); 20 | void resize_reorg_layer(layer *l, int w, int h); 21 | void forward_reorg_layer(const layer l, network net); 22 | void backward_reorg_layer(const layer l, network net); 23 | 24 | #ifdef DKGPU 25 | void forward_reorg_layer_gpu(layer l, network net); 26 | void backward_reorg_layer_gpu(layer l, network net); 27 | #endif 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | #endif 32 | -------------------------------------------------------------------------------- /darknet/include/rnn_layer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef RNN_LAYER_H 3 | #define RNN_LAYER_H 4 | 5 | #include "activations.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | #define USET 9 | #ifdef __cplusplus 10 | extern "C" 11 | { 12 | #endif 13 | layer make_rnn_layer(int batch, 14 | int inputs, 15 | int outputs, 16 | int steps, 17 | ACTIVATION activation, 18 | int batch_normalize, 19 | int adam); 20 | 21 | void forward_rnn_layer(layer l, network net); 22 | void backward_rnn_layer(layer l, network net); 23 | void update_rnn_layer(layer l, update_args a); 24 | 25 | #ifdef DKGPU 26 | void forward_rnn_layer_gpu(layer l, network net); 27 | void backward_rnn_layer_gpu(layer l, network net); 28 | void update_rnn_layer_gpu(layer l, update_args a); 29 | void push_rnn_layer(layer l); 30 | void pull_rnn_layer(layer l); 31 | #endif 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | #endif 36 | -------------------------------------------------------------------------------- /darknet/include/route_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef ROUTE_LAYER_H 2 | #define ROUTE_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | #ifdef __cplusplus 6 | extern "C" 7 | { 8 | #endif 9 | typedef layer route_layer; 10 | 11 | route_layer make_route_layer(int batch, 12 | int n, 13 | int *input_layers, 14 | int *input_size); 15 | void forward_route_layer(const route_layer l, network net); 16 | void backward_route_layer(const route_layer l, network net); 17 | void resize_route_layer(route_layer *l, network *net); 18 | 19 | #ifdef DKGPU 20 | void forward_route_layer_gpu(const route_layer l, network net); 21 | void backward_route_layer_gpu(const route_layer l, network net); 22 | #endif 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /darknet/include/shortcut_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef SHORTCUT_LAYER_H 2 | #define SHORTCUT_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | #ifdef __cplusplus 7 | extern "C" 8 | { 9 | #endif 10 | layer make_shortcut_layer( 11 | int batch, int index, int w, int h, int c, int w2, int h2, int c2); 12 | void forward_shortcut_layer(const layer l, network net); 13 | void backward_shortcut_layer(const layer l, network net); 14 | void resize_shortcut_layer(layer *l, int w, int h); 15 | 16 | #ifdef DKGPU 17 | void forward_shortcut_layer_gpu(const layer l, network net); 18 | void backward_shortcut_layer_gpu(const layer l, network net); 19 | #endif 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /darknet/include/softmax_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef SOFTMAX_LAYER_H 2 | #define SOFTMAX_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | #ifdef __cplusplus 6 | extern "C" 7 | { 8 | #endif 9 | typedef layer softmax_layer; 10 | 11 | void softmax_array(float *input, int n, float temp, float *output); 12 | softmax_layer make_softmax_layer(int batch, int inputs, int groups); 13 | void forward_softmax_layer(const softmax_layer l, network net); 14 | void backward_softmax_layer(const softmax_layer l, network net); 15 | 16 | #ifdef DKGPU 17 | void pull_softmax_layer_output(const softmax_layer l); 18 | void forward_softmax_layer_gpu(const softmax_layer l, network net); 19 | void backward_softmax_layer_gpu(const softmax_layer l, network net); 20 | #endif 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | #endif 25 | -------------------------------------------------------------------------------- /darknet/include/tree.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_H 2 | #define TREE_H 3 | #include "darknet.h" 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | int hierarchy_top_prediction(float *predictions, 9 | tree *hier, 10 | float thresh, 11 | int stride); 12 | float get_hierarchy_probability(float *x, tree *hier, int c, int stride); 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | #endif 17 | -------------------------------------------------------------------------------- /darknet/include/upsample_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef UPSAMPLE_LAYER_H 2 | #define UPSAMPLE_LAYER_H 3 | #include "darknet.h" 4 | #ifdef __cplusplus 5 | extern "C" 6 | { 7 | #endif 8 | layer make_upsample_layer(int batch, int w, int h, int c, int stride); 9 | void forward_upsample_layer(const layer l, network net); 10 | void backward_upsample_layer(const layer l, network net); 11 | void resize_upsample_layer(layer *l, int w, int h); 12 | 13 | #ifdef DKGPU 14 | void forward_upsample_layer_gpu(const layer l, network net); 15 | void backward_upsample_layer_gpu(const layer l, network net); 16 | #endif 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /darknet/include/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | #include 4 | #include 5 | #include "darknet.h" 6 | #include "list.h" 7 | #ifdef __cplusplus 8 | extern "C" 9 | { 10 | #endif 11 | #define TIME(a) \ 12 | do \ 13 | { \ 14 | double start = what_time_is_it_now(); \ 15 | a; \ 16 | printf("%s took: %f seconds\n", #a, what_time_is_it_now() - start); \ 17 | } while (0) 18 | 19 | #define TWO_PI 6.2831853071795864769252866f 20 | 21 | double what_time_is_it_now(); 22 | void shuffle(void *arr, size_t n, size_t size); 23 | void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); 24 | void free_ptrs(void **ptrs, int n); 25 | int alphanum_to_int(char c); 26 | char int_to_alphanum(int i); 27 | int read_int(int fd); 28 | void write_int(int fd, int n); 29 | void read_all(int fd, char *buffer, size_t bytes); 30 | void write_all(int fd, char *buffer, size_t bytes); 31 | int read_all_fail(int fd, char *buffer, size_t bytes); 32 | int write_all_fail(int fd, char *buffer, size_t bytes); 33 | void find_replace(char *str, char *orig, char *rep, char *output); 34 | void malloc_error(); 35 | void file_error(char *s); 36 | void strip(char *s); 37 | void strip_char(char *s, char bad); 38 | list *split_str(char *s, char delim); 39 | char *fgetl(FILE *fp); 40 | list *parse_csv_line(char *line); 41 | char *copy_string(char *s); 42 | int count_fields(char *line); 43 | float *parse_fields(char *line, int n); 44 | void translate_array(float *a, int n, float s); 45 | float constrain(float min, float max, float a); 46 | int constrain_int(int a, int min, int max); 47 | float rand_scale(float s); 48 | int rand_int(int min, int max); 49 | void mean_arrays(float **a, int n, int els, float *avg); 50 | float dist_array(float *a, float *b, int n, int sub); 51 | float **one_hot_encode(float *a, int n, int k); 52 | float sec(clock_t clocks); 53 | void print_statistics(float *a, int n); 54 | int int_index(int *a, int val, int n); 55 | #ifdef __cplusplus 56 | } 57 | #endif 58 | #endif 59 | -------------------------------------------------------------------------------- /darknet/include/yolo_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef YOLO_LAYER_H 2 | #define YOLO_LAYER_H 3 | 4 | #include "darknet.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | layer make_yolo_layer( 14 | int batch, int w, int h, int n, int total, int *mask, int classes); 15 | void forward_yolo_layer(const layer l, network net); 16 | void backward_yolo_layer(const layer l, network net); 17 | void resize_yolo_layer(layer *l, int w, int h); 18 | int yolo_num_detections(layer l, float thresh); 19 | 20 | #ifdef DKGPU 21 | void forward_yolo_layer_gpu(const layer l, network net); 22 | void backward_yolo_layer_gpu(layer l, network net); 23 | #endif 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /darknet/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.11) 2 | 3 | set(MODULENAME ${PROJECT_NAME}_core) 4 | 5 | file(GLOB YOLO_SOURCE_FILES *.cpp) 6 | file(GLOB CUDA_SOURCE_FILES *.cu) 7 | 8 | set(DARKNET_CORE_LIBNAME ${MODULENAME} CACHE INTERNAL "${MODULENAME}: core library" FORCE) 9 | set(DARKNET_CUDA_LIBNAME ${MODULENAME}_cuda CACHE INTERNAL "${MODULENAME}: cuda library" FORCE) 10 | set(YOLO_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE INTERNAL "${MODULENAME}: Include Directories" FORCE) 11 | 12 | CUDA_ADD_LIBRARY( 13 | ${DARKNET_CUDA_LIBNAME} SHARED 14 | ${CUDA_SOURCE_FILES}) 15 | 16 | add_library(${DARKNET_CORE_LIBNAME} SHARED ${YOLO_SOURCE_FILES}) 17 | target_link_libraries( 18 | ${DARKNET_CORE_LIBNAME} 19 | ${DARKNET_CUDA_LIBNAME} 20 | ${OpenCV_LIBRARIES} 21 | ${CUDA_LIBRARIES} 22 | ${CUDA_CUBLAS_LIBRARIES} 23 | ${CUDA_curand_LIBRARY} 24 | ${CMAKE_THREAD_LIBS_INIT}) 25 | -------------------------------------------------------------------------------- /darknet/src/activation_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "activations.h" 7 | #include "cuda.h" 8 | } 9 | 10 | 11 | __device__ float lhtan_activate_kernel(float x) 12 | { 13 | if(x < 0) return .001f*x; 14 | if(x > 1) return .001f*(x-1.f) + 1.f; 15 | return x; 16 | } 17 | __device__ float lhtan_gradient_kernel(float x) 18 | { 19 | if(x > 0 && x < 1) return 1; 20 | return .001; 21 | } 22 | 23 | __device__ float hardtan_activate_kernel(float x) 24 | { 25 | if (x < -1) return -1; 26 | if (x > 1) return 1; 27 | return x; 28 | } 29 | __device__ float linear_activate_kernel(float x){return x;} 30 | __device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} 31 | __device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} 32 | __device__ float relu_activate_kernel(float x){return x*(x>0);} 33 | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} 34 | __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} 35 | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} 36 | __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} 37 | __device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);} 38 | __device__ float plse_activate_kernel(float x) 39 | { 40 | if(x < -4) return .01f * (x + 4); 41 | if(x > 4) return .01f * (x - 4) + 1; 42 | return .125f*x + .5f; 43 | } 44 | __device__ float stair_activate_kernel(float x) 45 | { 46 | int n = floorf(x); 47 | if (n%2 == 0) return floorf(x/2); 48 | else return (x - n) + floorf(x/2); 49 | } 50 | 51 | 52 | __device__ float hardtan_gradient_kernel(float x) 53 | { 54 | if (x > -1 && x < 1) return 1; 55 | return 0; 56 | } 57 | __device__ float linear_gradient_kernel(float x){return 1;} 58 | __device__ float logistic_gradient_kernel(float x){return (1-x)*x;} 59 | __device__ float loggy_gradient_kernel(float x) 60 | { 61 | float y = (x+1)/2; 62 | return 2*(1-y)*y; 63 | } 64 | __device__ float relu_gradient_kernel(float x){return (x>0);} 65 | __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} 66 | __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} 67 | __device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} 68 | __device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} 69 | __device__ float tanh_gradient_kernel(float x){return 1-x*x;} 70 | __device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} 71 | __device__ float stair_gradient_kernel(float x) 72 | { 73 | if (floorf(x) == x) return 0; 74 | return 1; 75 | } 76 | 77 | __device__ float activate_kernel(float x, ACTIVATION a) 78 | { 79 | switch(a){ 80 | case LINEAR: 81 | return linear_activate_kernel(x); 82 | case LOGISTIC: 83 | return logistic_activate_kernel(x); 84 | case LOGGY: 85 | return loggy_activate_kernel(x); 86 | case RELU: 87 | return relu_activate_kernel(x); 88 | case ELU: 89 | return elu_activate_kernel(x); 90 | case RELIE: 91 | return relie_activate_kernel(x); 92 | case RAMP: 93 | return ramp_activate_kernel(x); 94 | case LEAKY: 95 | return leaky_activate_kernel(x); 96 | case TANH: 97 | return tanh_activate_kernel(x); 98 | case PLSE: 99 | return plse_activate_kernel(x); 100 | case STAIR: 101 | return stair_activate_kernel(x); 102 | case HARDTAN: 103 | return hardtan_activate_kernel(x); 104 | case LHTAN: 105 | return lhtan_activate_kernel(x); 106 | } 107 | return 0; 108 | } 109 | 110 | __device__ float gradient_kernel(float x, ACTIVATION a) 111 | { 112 | switch(a){ 113 | case LINEAR: 114 | return linear_gradient_kernel(x); 115 | case LOGISTIC: 116 | return logistic_gradient_kernel(x); 117 | case LOGGY: 118 | return loggy_gradient_kernel(x); 119 | case RELU: 120 | return relu_gradient_kernel(x); 121 | case ELU: 122 | return elu_gradient_kernel(x); 123 | case RELIE: 124 | return relie_gradient_kernel(x); 125 | case RAMP: 126 | return ramp_gradient_kernel(x); 127 | case LEAKY: 128 | return leaky_gradient_kernel(x); 129 | case TANH: 130 | return tanh_gradient_kernel(x); 131 | case PLSE: 132 | return plse_gradient_kernel(x); 133 | case STAIR: 134 | return stair_gradient_kernel(x); 135 | case HARDTAN: 136 | return hardtan_gradient_kernel(x); 137 | case LHTAN: 138 | return lhtan_gradient_kernel(x); 139 | } 140 | return 0; 141 | } 142 | 143 | __global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) 144 | { 145 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 146 | int i = id % s; 147 | int b = id / s; 148 | float x1 = x[b*s + i]; 149 | float x2 = x[b*s + s/2 + i]; 150 | if(id < n) { 151 | float de = dy[id]; 152 | dx[b*s + i] = x2*de; 153 | dx[b*s + s/2 + i] = x1*de; 154 | } 155 | } 156 | 157 | extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) 158 | { 159 | binary_gradient_array_kernel<<>>(x, dx, n/2, size, a, y); 160 | check_error(cudaPeekAtLastError()); 161 | } 162 | __global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) 163 | { 164 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 165 | int i = id % s; 166 | int b = id / s; 167 | float x1 = x[b*s + i]; 168 | float x2 = x[b*s + s/2 + i]; 169 | if(id < n) y[id] = x1*x2; 170 | } 171 | 172 | extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) 173 | { 174 | binary_activate_array_kernel<<>>(x, n/2, size, a, y); 175 | check_error(cudaPeekAtLastError()); 176 | } 177 | 178 | __global__ void activate_array_kernel(float *x, int n, ACTIVATION a) 179 | { 180 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 181 | if(i < n) x[i] = activate_kernel(x[i], a); 182 | } 183 | 184 | __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) 185 | { 186 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 187 | if(i < n) delta[i] *= gradient_kernel(x[i], a); 188 | } 189 | 190 | extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a) 191 | { 192 | activate_array_kernel<<>>(x, n, a); 193 | check_error(cudaPeekAtLastError()); 194 | } 195 | 196 | extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) 197 | { 198 | gradient_array_kernel<<>>(x, n, a, delta); 199 | check_error(cudaPeekAtLastError()); 200 | } 201 | -------------------------------------------------------------------------------- /darknet/src/activation_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "activation_layer.h" 2 | #include "utils.h" 3 | #include "cuda.h" 4 | #include "blas.h" 5 | #include "gemm.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | layer make_activation_layer(int batch, int inputs, ACTIVATION activation) 13 | { 14 | layer l; 15 | l.type = ACTIVE; 16 | 17 | l.inputs = inputs; 18 | l.outputs = inputs; 19 | l.batch = batch; 20 | 21 | l.output = (float*)calloc(batch * inputs, sizeof(float*)); 22 | l.delta = (float*)calloc(batch * inputs, sizeof(float*)); 23 | 24 | l.forward = forward_activation_layer; 25 | l.backward = backward_activation_layer; 26 | #ifdef DKGPU 27 | l.forward_gpu = forward_activation_layer_gpu; 28 | l.backward_gpu = backward_activation_layer_gpu; 29 | 30 | l.output_gpu = cuda_make_array(l.output, inputs * batch); 31 | l.delta_gpu = cuda_make_array(l.delta, inputs * batch); 32 | #endif 33 | l.activation = activation; 34 | fprintf(stderr, "Activation Layer: %d inputs\n", inputs); 35 | return l; 36 | } 37 | 38 | void forward_activation_layer(layer l, network net) 39 | { 40 | copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1); 41 | activate_array(l.output, l.outputs * l.batch, l.activation); 42 | } 43 | 44 | void backward_activation_layer(layer l, network net) 45 | { 46 | gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta); 47 | copy_cpu(l.outputs * l.batch, l.delta, 1, net.delta, 1); 48 | } 49 | 50 | #ifdef DKGPU 51 | 52 | void forward_activation_layer_gpu(layer l, network net) 53 | { 54 | copy_gpu(l.outputs * l.batch, net.input_gpu, 1, l.output_gpu, 1); 55 | activate_array_gpu(l.output_gpu, l.outputs * l.batch, l.activation); 56 | } 57 | 58 | void backward_activation_layer_gpu(layer l, network net) 59 | { 60 | gradient_array_gpu( 61 | l.output_gpu, l.outputs * l.batch, l.activation, l.delta_gpu); 62 | copy_gpu(l.outputs * l.batch, l.delta_gpu, 1, net.delta_gpu, 1); 63 | } 64 | #endif 65 | -------------------------------------------------------------------------------- /darknet/src/activations.cpp: -------------------------------------------------------------------------------- 1 | #include "activations.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | char *get_activation_string(ACTIVATION a) 9 | { 10 | switch(a){ 11 | case LOGISTIC: 12 | return "logistic"; 13 | case LOGGY: 14 | return "loggy"; 15 | case RELU: 16 | return "relu"; 17 | case ELU: 18 | return "elu"; 19 | case RELIE: 20 | return "relie"; 21 | case RAMP: 22 | return "ramp"; 23 | case LINEAR: 24 | return "linear"; 25 | case TANH: 26 | return "tanh"; 27 | case PLSE: 28 | return "plse"; 29 | case LEAKY: 30 | return "leaky"; 31 | case STAIR: 32 | return "stair"; 33 | case HARDTAN: 34 | return "hardtan"; 35 | case LHTAN: 36 | return "lhtan"; 37 | default: 38 | break; 39 | } 40 | return "relu"; 41 | } 42 | 43 | ACTIVATION get_activation(char *s) 44 | { 45 | if (strcmp(s, "logistic")==0) return LOGISTIC; 46 | if (strcmp(s, "loggy")==0) return LOGGY; 47 | if (strcmp(s, "relu")==0) return RELU; 48 | if (strcmp(s, "elu")==0) return ELU; 49 | if (strcmp(s, "relie")==0) return RELIE; 50 | if (strcmp(s, "plse")==0) return PLSE; 51 | if (strcmp(s, "hardtan")==0) return HARDTAN; 52 | if (strcmp(s, "lhtan")==0) return LHTAN; 53 | if (strcmp(s, "linear")==0) return LINEAR; 54 | if (strcmp(s, "ramp")==0) return RAMP; 55 | if (strcmp(s, "leaky")==0) return LEAKY; 56 | if (strcmp(s, "tanh")==0) return TANH; 57 | if (strcmp(s, "stair")==0) return STAIR; 58 | fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); 59 | return RELU; 60 | } 61 | 62 | float activate(float x, ACTIVATION a) 63 | { 64 | switch(a){ 65 | case LINEAR: 66 | return linear_activate(x); 67 | case LOGISTIC: 68 | return logistic_activate(x); 69 | case LOGGY: 70 | return loggy_activate(x); 71 | case RELU: 72 | return relu_activate(x); 73 | case ELU: 74 | return elu_activate(x); 75 | case RELIE: 76 | return relie_activate(x); 77 | case RAMP: 78 | return ramp_activate(x); 79 | case LEAKY: 80 | return leaky_activate(x); 81 | case TANH: 82 | return tanh_activate(x); 83 | case PLSE: 84 | return plse_activate(x); 85 | case STAIR: 86 | return stair_activate(x); 87 | case HARDTAN: 88 | return hardtan_activate(x); 89 | case LHTAN: 90 | return lhtan_activate(x); 91 | } 92 | return 0; 93 | } 94 | 95 | void activate_array(float *x, const int n, const ACTIVATION a) 96 | { 97 | int i; 98 | for(i = 0; i < n; ++i){ 99 | x[i] = activate(x[i], a); 100 | } 101 | } 102 | 103 | float gradient(float x, ACTIVATION a) 104 | { 105 | switch(a){ 106 | case LINEAR: 107 | return linear_gradient(x); 108 | case LOGISTIC: 109 | return logistic_gradient(x); 110 | case LOGGY: 111 | return loggy_gradient(x); 112 | case RELU: 113 | return relu_gradient(x); 114 | case ELU: 115 | return elu_gradient(x); 116 | case RELIE: 117 | return relie_gradient(x); 118 | case RAMP: 119 | return ramp_gradient(x); 120 | case LEAKY: 121 | return leaky_gradient(x); 122 | case TANH: 123 | return tanh_gradient(x); 124 | case PLSE: 125 | return plse_gradient(x); 126 | case STAIR: 127 | return stair_gradient(x); 128 | case HARDTAN: 129 | return hardtan_gradient(x); 130 | case LHTAN: 131 | return lhtan_gradient(x); 132 | } 133 | return 0; 134 | } 135 | 136 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) 137 | { 138 | int i; 139 | for(i = 0; i < n; ++i){ 140 | delta[i] *= gradient(x[i], a); 141 | } 142 | } 143 | 144 | -------------------------------------------------------------------------------- /darknet/src/avgpool_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "avgpool_layer.h" 2 | #include 3 | #include "cuda.h" 4 | 5 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) 6 | { 7 | fprintf( 8 | stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); 9 | avgpool_layer l; 10 | l.type = AVGPOOL; 11 | l.batch = batch; 12 | l.h = h; 13 | l.w = w; 14 | l.c = c; 15 | l.out_w = 1; 16 | l.out_h = 1; 17 | l.out_c = c; 18 | l.outputs = l.out_c; 19 | l.inputs = h * w * c; 20 | int output_size = l.outputs * batch; 21 | l.output = (float *)calloc(output_size, sizeof(float)); 22 | l.delta = (float *)calloc(output_size, sizeof(float)); 23 | l.forward = forward_avgpool_layer; 24 | l.backward = backward_avgpool_layer; 25 | #ifdef DKGPU 26 | l.forward_gpu = forward_avgpool_layer_gpu; 27 | l.backward_gpu = backward_avgpool_layer_gpu; 28 | l.output_gpu = cuda_make_array(l.output, output_size); 29 | l.delta_gpu = cuda_make_array(l.delta, output_size); 30 | #endif 31 | return l; 32 | } 33 | 34 | void resize_avgpool_layer(avgpool_layer *l, int w, int h) 35 | { 36 | l->w = w; 37 | l->h = h; 38 | l->inputs = h * w * l->c; 39 | } 40 | 41 | void forward_avgpool_layer(const avgpool_layer l, network net) 42 | { 43 | int b, i, k; 44 | 45 | for (b = 0; b < l.batch; ++b) 46 | { 47 | for (k = 0; k < l.c; ++k) 48 | { 49 | int out_index = k + b * l.c; 50 | l.output[out_index] = 0; 51 | for (i = 0; i < l.h * l.w; ++i) 52 | { 53 | int in_index = i + l.h * l.w * (k + b * l.c); 54 | l.output[out_index] += net.input[in_index]; 55 | } 56 | l.output[out_index] /= l.h * l.w; 57 | } 58 | } 59 | } 60 | 61 | void backward_avgpool_layer(const avgpool_layer l, network net) 62 | { 63 | int b, i, k; 64 | 65 | for (b = 0; b < l.batch; ++b) 66 | { 67 | for (k = 0; k < l.c; ++k) 68 | { 69 | int out_index = k + b * l.c; 70 | for (i = 0; i < l.h * l.w; ++i) 71 | { 72 | int in_index = i + l.h * l.w * (k + b * l.c); 73 | net.delta[in_index] += l.delta[out_index] / (l.h * l.w); 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /darknet/src/avgpool_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "avgpool_layer.h" 7 | #include "cuda.h" 8 | } 9 | 10 | __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) 11 | { 12 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 13 | if(id >= n) return; 14 | 15 | int k = id % c; 16 | id /= c; 17 | int b = id; 18 | 19 | int i; 20 | int out_index = (k + c*b); 21 | output[out_index] = 0; 22 | for(i = 0; i < w*h; ++i){ 23 | int in_index = i + h*w*(k + b*c); 24 | output[out_index] += input[in_index]; 25 | } 26 | output[out_index] /= w*h; 27 | } 28 | 29 | __global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) 30 | { 31 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 32 | if(id >= n) return; 33 | 34 | int k = id % c; 35 | id /= c; 36 | int b = id; 37 | 38 | int i; 39 | int out_index = (k + c*b); 40 | for(i = 0; i < w*h; ++i){ 41 | int in_index = i + h*w*(k + b*c); 42 | in_delta[in_index] += out_delta[out_index] / (w*h); 43 | } 44 | } 45 | 46 | extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net) 47 | { 48 | size_t n = layer.c*layer.batch; 49 | 50 | forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu); 51 | check_error(cudaPeekAtLastError()); 52 | } 53 | 54 | extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net) 55 | { 56 | size_t n = layer.c*layer.batch; 57 | 58 | backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu); 59 | check_error(cudaPeekAtLastError()); 60 | } 61 | 62 | -------------------------------------------------------------------------------- /darknet/src/box.cpp: -------------------------------------------------------------------------------- 1 | #include "box.h" 2 | #include 3 | #include 4 | #include 5 | 6 | int nms_comparator(const void *pa, const void *pb) 7 | { 8 | detection a = *(detection *)pa; 9 | detection b = *(detection *)pb; 10 | float diff = 0; 11 | if (b.sort_class >= 0) 12 | { 13 | diff = a.prob[b.sort_class] - b.prob[b.sort_class]; 14 | } 15 | else 16 | { 17 | diff = a.objectness - b.objectness; 18 | } 19 | if (diff < 0) 20 | return 1; 21 | else if (diff > 0) 22 | return -1; 23 | return 0; 24 | } 25 | 26 | void do_nms_obj(detection *dets, int total, int classes, float thresh) 27 | { 28 | int i, j, k; 29 | k = total - 1; 30 | for (i = 0; i <= k; ++i) 31 | { 32 | if (dets[i].objectness == 0) 33 | { 34 | detection swap = dets[i]; 35 | dets[i] = dets[k]; 36 | dets[k] = swap; 37 | --k; 38 | --i; 39 | } 40 | } 41 | total = k + 1; 42 | 43 | for (i = 0; i < total; ++i) 44 | { 45 | dets[i].sort_class = -1; 46 | } 47 | 48 | qsort(dets, total, sizeof(detection), nms_comparator); 49 | for (i = 0; i < total; ++i) 50 | { 51 | if (dets[i].objectness == 0) continue; 52 | box a = dets[i].bbox; 53 | for (j = i + 1; j < total; ++j) 54 | { 55 | if (dets[j].objectness == 0) continue; 56 | box b = dets[j].bbox; 57 | if (box_iou(a, b) > thresh) 58 | { 59 | dets[j].objectness = 0; 60 | for (k = 0; k < classes; ++k) 61 | { 62 | dets[j].prob[k] = 0; 63 | } 64 | } 65 | } 66 | } 67 | } 68 | 69 | void do_nms_sort(detection *dets, int total, int classes, float thresh) 70 | { 71 | int i, j, k; 72 | k = total - 1; 73 | for (i = 0; i <= k; ++i) 74 | { 75 | if (dets[i].objectness == 0) 76 | { 77 | detection swap = dets[i]; 78 | dets[i] = dets[k]; 79 | dets[k] = swap; 80 | --k; 81 | --i; 82 | } 83 | } 84 | total = k + 1; 85 | 86 | for (k = 0; k < classes; ++k) 87 | { 88 | for (i = 0; i < total; ++i) 89 | { 90 | dets[i].sort_class = k; 91 | } 92 | qsort(dets, total, sizeof(detection), nms_comparator); 93 | for (i = 0; i < total; ++i) 94 | { 95 | if (dets[i].prob[k] == 0) continue; 96 | box a = dets[i].bbox; 97 | for (j = i + 1; j < total; ++j) 98 | { 99 | box b = dets[j].bbox; 100 | if (box_iou(a, b) > thresh) 101 | { 102 | dets[j].prob[k] = 0; 103 | } 104 | } 105 | } 106 | } 107 | } 108 | 109 | box float_to_box(float *f, int stride) 110 | { 111 | box b = {0}; 112 | b.x = f[0]; 113 | b.y = f[1 * stride]; 114 | b.w = f[2 * stride]; 115 | b.h = f[3 * stride]; 116 | return b; 117 | } 118 | 119 | dbox derivative(box a, box b) 120 | { 121 | dbox d; 122 | d.dx = 0; 123 | d.dw = 0; 124 | float l1 = a.x - a.w / 2; 125 | float l2 = b.x - b.w / 2; 126 | if (l1 > l2) 127 | { 128 | d.dx -= 1; 129 | d.dw += .5; 130 | } 131 | float r1 = a.x + a.w / 2; 132 | float r2 = b.x + b.w / 2; 133 | if (r1 < r2) 134 | { 135 | d.dx += 1; 136 | d.dw += .5; 137 | } 138 | if (l1 > r2) 139 | { 140 | d.dx = -1; 141 | d.dw = 0; 142 | } 143 | if (r1 < l2) 144 | { 145 | d.dx = 1; 146 | d.dw = 0; 147 | } 148 | 149 | d.dy = 0; 150 | d.dh = 0; 151 | float t1 = a.y - a.h / 2; 152 | float t2 = b.y - b.h / 2; 153 | if (t1 > t2) 154 | { 155 | d.dy -= 1; 156 | d.dh += .5; 157 | } 158 | float b1 = a.y + a.h / 2; 159 | float b2 = b.y + b.h / 2; 160 | if (b1 < b2) 161 | { 162 | d.dy += 1; 163 | d.dh += .5; 164 | } 165 | if (t1 > b2) 166 | { 167 | d.dy = -1; 168 | d.dh = 0; 169 | } 170 | if (b1 < t2) 171 | { 172 | d.dy = 1; 173 | d.dh = 0; 174 | } 175 | return d; 176 | } 177 | 178 | float overlap(float x1, float w1, float x2, float w2) 179 | { 180 | float l1 = x1 - w1 / 2; 181 | float l2 = x2 - w2 / 2; 182 | float left = l1 > l2 ? l1 : l2; 183 | float r1 = x1 + w1 / 2; 184 | float r2 = x2 + w2 / 2; 185 | float right = r1 < r2 ? r1 : r2; 186 | return right - left; 187 | } 188 | 189 | float box_intersection(box a, box b) 190 | { 191 | float w = overlap(a.x, a.w, b.x, b.w); 192 | float h = overlap(a.y, a.h, b.y, b.h); 193 | if (w < 0 || h < 0) return 0; 194 | float area = w * h; 195 | return area; 196 | } 197 | 198 | float box_union(box a, box b) 199 | { 200 | float i = box_intersection(a, b); 201 | float u = a.w * a.h + b.w * b.h - i; 202 | return u; 203 | } 204 | 205 | float box_iou(box a, box b) 206 | { 207 | return box_intersection(a, b) / box_union(a, b); 208 | } 209 | 210 | float box_rmse(box a, box b) 211 | { 212 | return sqrt(pow(a.x - b.x, 2) + pow(a.y - b.y, 2) + pow(a.w - b.w, 2) + 213 | pow(a.h - b.h, 2)); 214 | } 215 | 216 | dbox dintersect(box a, box b) 217 | { 218 | float w = overlap(a.x, a.w, b.x, b.w); 219 | float h = overlap(a.y, a.h, b.y, b.h); 220 | dbox dover = derivative(a, b); 221 | dbox di; 222 | 223 | di.dw = dover.dw * h; 224 | di.dx = dover.dx * h; 225 | di.dh = dover.dh * w; 226 | di.dy = dover.dy * w; 227 | 228 | return di; 229 | } 230 | 231 | dbox dunion(box a, box b) 232 | { 233 | dbox du; 234 | 235 | dbox di = dintersect(a, b); 236 | du.dw = a.h - di.dw; 237 | du.dh = a.w - di.dh; 238 | du.dx = -di.dx; 239 | du.dy = -di.dy; 240 | 241 | return du; 242 | } 243 | 244 | void test_dunion() 245 | { 246 | box a = {0, 0, 1, 1}; 247 | box dxa = {0 + .0001, 0, 1, 1}; 248 | box dya = {0, 0 + .0001, 1, 1}; 249 | box dwa = {0, 0, 1 + .0001, 1}; 250 | box dha = {0, 0, 1, 1 + .0001}; 251 | 252 | box b = {.5, .5, .2, .2}; 253 | dbox di = dunion(a, b); 254 | printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); 255 | float inter = box_union(a, b); 256 | float xinter = box_union(dxa, b); 257 | float yinter = box_union(dya, b); 258 | float winter = box_union(dwa, b); 259 | float hinter = box_union(dha, b); 260 | xinter = (xinter - inter) / (.0001); 261 | yinter = (yinter - inter) / (.0001); 262 | winter = (winter - inter) / (.0001); 263 | hinter = (hinter - inter) / (.0001); 264 | printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter); 265 | } 266 | void test_dintersect() 267 | { 268 | box a = {0, 0, 1, 1}; 269 | box dxa = {0 + .0001, 0, 1, 1}; 270 | box dya = {0, 0 + .0001, 1, 1}; 271 | box dwa = {0, 0, 1 + .0001, 1}; 272 | box dha = {0, 0, 1, 1 + .0001}; 273 | 274 | box b = {.5, .5, .2, .2}; 275 | dbox di = dintersect(a, b); 276 | printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); 277 | float inter = box_intersection(a, b); 278 | float xinter = box_intersection(dxa, b); 279 | float yinter = box_intersection(dya, b); 280 | float winter = box_intersection(dwa, b); 281 | float hinter = box_intersection(dha, b); 282 | xinter = (xinter - inter) / (.0001); 283 | yinter = (yinter - inter) / (.0001); 284 | winter = (winter - inter) / (.0001); 285 | hinter = (hinter - inter) / (.0001); 286 | printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter); 287 | } 288 | 289 | void test_box() 290 | { 291 | test_dintersect(); 292 | test_dunion(); 293 | box a = {0, 0, 1, 1}; 294 | box dxa = {0 + .00001, 0, 1, 1}; 295 | box dya = {0, 0 + .00001, 1, 1}; 296 | box dwa = {0, 0, 1 + .00001, 1}; 297 | box dha = {0, 0, 1, 1 + .00001}; 298 | 299 | box b = {.5, 0, .2, .2}; 300 | 301 | float iou = box_iou(a, b); 302 | iou = (1 - iou) * (1 - iou); 303 | printf("%f\n", iou); 304 | dbox d = diou(a, b); 305 | printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh); 306 | 307 | float xiou = box_iou(dxa, b); 308 | float yiou = box_iou(dya, b); 309 | float wiou = box_iou(dwa, b); 310 | float hiou = box_iou(dha, b); 311 | xiou = ((1 - xiou) * (1 - xiou) - iou) / (.00001); 312 | yiou = ((1 - yiou) * (1 - yiou) - iou) / (.00001); 313 | wiou = ((1 - wiou) * (1 - wiou) - iou) / (.00001); 314 | hiou = ((1 - hiou) * (1 - hiou) - iou) / (.00001); 315 | printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou); 316 | } 317 | 318 | dbox diou(box a, box b) 319 | { 320 | float u = box_union(a, b); 321 | float i = box_intersection(a, b); 322 | dbox di = dintersect(a, b); 323 | dbox du = dunion(a, b); 324 | dbox dd = {0, 0, 0, 0}; 325 | 326 | if (i <= 0 || 1) 327 | { 328 | dd.dx = b.x - a.x; 329 | dd.dy = b.y - a.y; 330 | dd.dw = b.w - a.w; 331 | dd.dh = b.h - a.h; 332 | return dd; 333 | } 334 | 335 | dd.dx = 2 * pow((1 - (i / u)), 1) * (di.dx * u - du.dx * i) / (u * u); 336 | dd.dy = 2 * pow((1 - (i / u)), 1) * (di.dy * u - du.dy * i) / (u * u); 337 | dd.dw = 2 * pow((1 - (i / u)), 1) * (di.dw * u - du.dw * i) / (u * u); 338 | dd.dh = 2 * pow((1 - (i / u)), 1) * (di.dh * u - du.dh * i) / (u * u); 339 | return dd; 340 | } 341 | 342 | void do_nms(box *boxes, float **probs, int total, int classes, float thresh) 343 | { 344 | int i, j, k; 345 | for (i = 0; i < total; ++i) 346 | { 347 | int any = 0; 348 | for (k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); 349 | if (!any) 350 | { 351 | continue; 352 | } 353 | for (j = i + 1; j < total; ++j) 354 | { 355 | if (box_iou(boxes[i], boxes[j]) > thresh) 356 | { 357 | for (k = 0; k < classes; ++k) 358 | { 359 | if (probs[i][k] < probs[j][k]) 360 | probs[i][k] = 0; 361 | else 362 | probs[j][k] = 0; 363 | } 364 | } 365 | } 366 | } 367 | } 368 | 369 | box encode_box(box b, box anchor) 370 | { 371 | box encode; 372 | encode.x = (b.x - anchor.x) / anchor.w; 373 | encode.y = (b.y - anchor.y) / anchor.h; 374 | encode.w = log2(b.w / anchor.w); 375 | encode.h = log2(b.h / anchor.h); 376 | return encode; 377 | } 378 | 379 | box decode_box(box b, box anchor) 380 | { 381 | box decode; 382 | decode.x = b.x * anchor.w + anchor.x; 383 | decode.y = b.y * anchor.h + anchor.y; 384 | decode.w = pow(2., b.w) * anchor.w; 385 | decode.h = pow(2., b.h) * anchor.h; 386 | return decode; 387 | } 388 | -------------------------------------------------------------------------------- /darknet/src/col2im.cpp: -------------------------------------------------------------------------------- 1 | #include "col2im.h" 2 | #include 3 | #include 4 | 5 | void col2im_add_pixel(float *im, int height, int width, int channels, 6 | int row, int col, int channel, int pad, float val) 7 | { 8 | row -= pad; 9 | col -= pad; 10 | 11 | if (row < 0 || col < 0 || 12 | row >= height || col >= width) return; 13 | im[col + width*(row + height*channel)] += val; 14 | } 15 | //This one might be too, can't remember. 16 | void col2im_cpu(float* data_col, 17 | int channels, int height, int width, 18 | int ksize, int stride, int pad, float* data_im) 19 | { 20 | int c,h,w; 21 | int height_col = (height + 2*pad - ksize) / stride + 1; 22 | int width_col = (width + 2*pad - ksize) / stride + 1; 23 | 24 | int channels_col = channels * ksize * ksize; 25 | for (c = 0; c < channels_col; ++c) { 26 | int w_offset = c % ksize; 27 | int h_offset = (c / ksize) % ksize; 28 | int c_im = c / ksize / ksize; 29 | for (h = 0; h < height_col; ++h) { 30 | for (w = 0; w < width_col; ++w) { 31 | int im_row = h_offset + h * stride; 32 | int im_col = w_offset + w * stride; 33 | int col_index = (c * height_col + h) * width_col + w; 34 | double val = data_col[col_index]; 35 | col2im_add_pixel(data_im, height, width, channels, 36 | im_row, im_col, c_im, pad, val); 37 | } 38 | } 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /darknet/src/col2im_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "col2im.h" 7 | #include "cuda.h" 8 | } 9 | 10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu 11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE 12 | 13 | __global__ void col2im_gpu_kernel(const int n, const float* data_col, 14 | const int height, const int width, const int ksize, 15 | const int pad, 16 | const int stride, 17 | const int height_col, const int width_col, 18 | float *data_im) { 19 | int index = blockIdx.x*blockDim.x+threadIdx.x; 20 | for(; index < n; index += blockDim.x*gridDim.x){ 21 | float val = 0; 22 | int w = index % width + pad; 23 | int h = (index / width) % height + pad; 24 | int c = index / (width * height); 25 | // compute the start and end of the output 26 | int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; 27 | int w_col_end = min(w / stride + 1, width_col); 28 | int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; 29 | int h_col_end = min(h / stride + 1, height_col); 30 | // equivalent implementation 31 | int offset = 32 | (c * ksize * ksize + h * ksize + w) * height_col * width_col; 33 | int coeff_h_col = (1 - stride * ksize * height_col) * width_col; 34 | int coeff_w_col = (1 - stride * height_col * width_col); 35 | for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { 36 | for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { 37 | val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; 38 | } 39 | } 40 | data_im[index] += val; 41 | } 42 | } 43 | 44 | void col2im_gpu(float *data_col, 45 | int channels, int height, int width, 46 | int ksize, int stride, int pad, float *data_im){ 47 | // We are going to launch channels * height_col * width_col kernels, each 48 | // kernel responsible for copying a single-channel grid. 49 | int height_col = (height + 2 * pad - ksize) / stride + 1; 50 | int width_col = (width + 2 * pad - ksize) / stride + 1; 51 | int num_kernels = channels * height * width; 52 | col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, 53 | BLOCK>>>( 54 | num_kernels, data_col, height, width, ksize, pad, 55 | stride, height_col, 56 | width_col, data_im); 57 | } 58 | 59 | -------------------------------------------------------------------------------- /darknet/src/cost_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "cost_layer.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "blas.h" 7 | #include "cuda.h" 8 | #include "utils.h" 9 | 10 | COST_TYPE get_cost_type(char *s) 11 | { 12 | if (strcmp(s, "seg") == 0) return SEG; 13 | if (strcmp(s, "sse") == 0) return SSE; 14 | if (strcmp(s, "masked") == 0) return MASKED; 15 | if (strcmp(s, "smooth") == 0) return SMOOTH; 16 | if (strcmp(s, "L1") == 0) return L1; 17 | if (strcmp(s, "wgan") == 0) return WGAN; 18 | fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); 19 | return SSE; 20 | } 21 | 22 | char *get_cost_string(COST_TYPE a) 23 | { 24 | switch (a) 25 | { 26 | case SEG: return "seg"; 27 | case SSE: return "sse"; 28 | case MASKED: return "masked"; 29 | case SMOOTH: return "smooth"; 30 | case L1: return "L1"; 31 | case WGAN: return "wgan"; 32 | } 33 | return "sse"; 34 | } 35 | 36 | cost_layer make_cost_layer(int batch, 37 | int inputs, 38 | COST_TYPE cost_type, 39 | float scale) 40 | { 41 | fprintf( 42 | stderr, "cost %4d\n", inputs); 43 | cost_layer l; 44 | l.type = COST; 45 | 46 | l.scale = scale; 47 | l.batch = batch; 48 | l.inputs = inputs; 49 | l.outputs = inputs; 50 | l.cost_type = cost_type; 51 | l.delta = (float *)calloc(inputs * batch, sizeof(float)); 52 | l.output = (float *)calloc(inputs * batch, sizeof(float)); 53 | l.cost = (float *)calloc(1, sizeof(float)); 54 | 55 | l.forward = forward_cost_layer; 56 | l.backward = backward_cost_layer; 57 | #ifdef DKGPU 58 | l.forward_gpu = forward_cost_layer_gpu; 59 | l.backward_gpu = backward_cost_layer_gpu; 60 | 61 | l.delta_gpu = cuda_make_array(l.output, inputs * batch); 62 | l.output_gpu = cuda_make_array(l.delta, inputs * batch); 63 | #endif 64 | return l; 65 | } 66 | 67 | void resize_cost_layer(cost_layer *l, int inputs) 68 | { 69 | l->inputs = inputs; 70 | l->outputs = inputs; 71 | l->delta = (float *)realloc(l->delta, inputs * l->batch * sizeof(float)); 72 | l->output = (float *)realloc(l->output, inputs * l->batch * sizeof(float)); 73 | #ifdef DKGPU 74 | cuda_free(l->delta_gpu); 75 | cuda_free(l->output_gpu); 76 | l->delta_gpu = cuda_make_array(l->delta, inputs * l->batch); 77 | l->output_gpu = cuda_make_array(l->output, inputs * l->batch); 78 | #endif 79 | } 80 | 81 | void forward_cost_layer(cost_layer l, network net) 82 | { 83 | if (!net.truth) return; 84 | if (l.cost_type == MASKED) 85 | { 86 | int i; 87 | for (i = 0; i < l.batch * l.inputs; ++i) 88 | { 89 | if (net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM; 90 | } 91 | } 92 | if (l.cost_type == SMOOTH) 93 | { 94 | smooth_l1_cpu(l.batch * l.inputs, net.input, net.truth, l.delta, l.output); 95 | } 96 | else if (l.cost_type == L1) 97 | { 98 | l1_cpu(l.batch * l.inputs, net.input, net.truth, l.delta, l.output); 99 | } 100 | else 101 | { 102 | l2_cpu(l.batch * l.inputs, net.input, net.truth, l.delta, l.output); 103 | } 104 | l.cost[0] = sum_array(l.output, l.batch * l.inputs); 105 | } 106 | 107 | void backward_cost_layer(const cost_layer l, network net) 108 | { 109 | axpy_cpu(l.batch * l.inputs, l.scale, l.delta, 1, net.delta, 1); 110 | } 111 | 112 | #ifdef DKGPU 113 | 114 | void pull_cost_layer(cost_layer l) 115 | { 116 | cuda_pull_array(l.delta_gpu, l.delta, l.batch * l.inputs); 117 | } 118 | 119 | void push_cost_layer(cost_layer l) 120 | { 121 | cuda_push_array(l.delta_gpu, l.delta, l.batch * l.inputs); 122 | } 123 | 124 | int float_abs_compare(const void *a, const void *b) 125 | { 126 | float fa = *(const float *)a; 127 | if (fa < 0) fa = -fa; 128 | float fb = *(const float *)b; 129 | if (fb < 0) fb = -fb; 130 | return (fa > fb) - (fa < fb); 131 | } 132 | 133 | void forward_cost_layer_gpu(cost_layer l, network net) 134 | { 135 | if (!net.truth) return; 136 | if (l.smooth) 137 | { 138 | scal_gpu(l.batch * l.inputs, (1 - l.smooth), net.truth_gpu, 1); 139 | add_gpu(l.batch * l.inputs, l.smooth * 1. / l.inputs, net.truth_gpu, 1); 140 | } 141 | 142 | if (l.cost_type == SMOOTH) 143 | { 144 | smooth_l1_gpu(l.batch * l.inputs, 145 | net.input_gpu, 146 | net.truth_gpu, 147 | l.delta_gpu, 148 | l.output_gpu); 149 | } 150 | else if (l.cost_type == L1) 151 | { 152 | l1_gpu(l.batch * l.inputs, 153 | net.input_gpu, 154 | net.truth_gpu, 155 | l.delta_gpu, 156 | l.output_gpu); 157 | } 158 | else if (l.cost_type == WGAN) 159 | { 160 | wgan_gpu(l.batch * l.inputs, 161 | net.input_gpu, 162 | net.truth_gpu, 163 | l.delta_gpu, 164 | l.output_gpu); 165 | } 166 | else 167 | { 168 | l2_gpu(l.batch * l.inputs, 169 | net.input_gpu, 170 | net.truth_gpu, 171 | l.delta_gpu, 172 | l.output_gpu); 173 | } 174 | 175 | if (l.cost_type == SEG && l.noobject_scale != 1) 176 | { 177 | scale_mask_gpu( 178 | l.batch * l.inputs, l.delta_gpu, 0, net.truth_gpu, l.noobject_scale); 179 | scale_mask_gpu( 180 | l.batch * l.inputs, l.output_gpu, 0, net.truth_gpu, l.noobject_scale); 181 | } 182 | if (l.cost_type == MASKED) 183 | { 184 | mask_gpu(l.batch * l.inputs, net.delta_gpu, SECRET_NUM, net.truth_gpu, 0); 185 | } 186 | 187 | if (l.ratio) 188 | { 189 | cuda_pull_array(l.delta_gpu, l.delta, l.batch * l.inputs); 190 | qsort(l.delta, l.batch * l.inputs, sizeof(float), float_abs_compare); 191 | int n = (1 - l.ratio) * l.batch * l.inputs; 192 | float thresh = l.delta[n]; 193 | thresh = 0; 194 | printf("%f\n", thresh); 195 | supp_gpu(l.batch * l.inputs, thresh, l.delta_gpu, 1); 196 | } 197 | 198 | if (l.thresh) 199 | { 200 | supp_gpu(l.batch * l.inputs, l.thresh * 1. / l.inputs, l.delta_gpu, 1); 201 | } 202 | 203 | cuda_pull_array(l.output_gpu, l.output, l.batch * l.inputs); 204 | l.cost[0] = sum_array(l.output, l.batch * l.inputs); 205 | } 206 | 207 | void backward_cost_layer_gpu(const cost_layer l, network net) 208 | { 209 | axpy_gpu(l.batch * l.inputs, l.scale, l.delta_gpu, 1, net.delta_gpu, 1); 210 | } 211 | #endif 212 | -------------------------------------------------------------------------------- /darknet/src/crnn_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "crnn_layer.h" 2 | #include "convolutional_layer.h" 3 | #include "utils.h" 4 | #include "cuda.h" 5 | #include "blas.h" 6 | #include "gemm.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | static void increment_layer(layer *l, int steps) 14 | { 15 | int num = l->outputs*l->batch*steps; 16 | l->output += num; 17 | l->delta += num; 18 | l->x += num; 19 | l->x_norm += num; 20 | 21 | #ifdef DKGPU 22 | l->output_gpu += num; 23 | l->delta_gpu += num; 24 | l->x_gpu += num; 25 | l->x_norm_gpu += num; 26 | #endif 27 | } 28 | 29 | layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize) 30 | { 31 | fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); 32 | batch = batch / steps; 33 | layer l; 34 | l.batch = batch; 35 | l.type = CRNN; 36 | l.steps = steps; 37 | l.h = h; 38 | l.w = w; 39 | l.c = c; 40 | l.out_h = h; 41 | l.out_w = w; 42 | l.out_c = output_filters; 43 | l.inputs = h*w*c; 44 | l.hidden = h * w * hidden_filters; 45 | l.outputs = l.out_h * l.out_w * l.out_c; 46 | 47 | l.state = (float *)calloc(l.hidden * batch * (steps + 1), sizeof(float)); 48 | 49 | l.input_layer = (layer*)malloc(sizeof(layer)); 50 | fprintf(stderr, "\t\t"); 51 | *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); 52 | l.input_layer->batch = batch; 53 | 54 | l.self_layer = (layer*)malloc(sizeof(layer)); 55 | fprintf(stderr, "\t\t"); 56 | *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); 57 | l.self_layer->batch = batch; 58 | 59 | l.output_layer = (layer*)malloc(sizeof(layer)); 60 | fprintf(stderr, "\t\t"); 61 | *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0); 62 | l.output_layer->batch = batch; 63 | 64 | l.output = l.output_layer->output; 65 | l.delta = l.output_layer->delta; 66 | 67 | l.forward = forward_crnn_layer; 68 | l.backward = backward_crnn_layer; 69 | l.update = update_crnn_layer; 70 | 71 | #ifdef DKGPU 72 | l.forward_gpu = forward_crnn_layer_gpu; 73 | l.backward_gpu = backward_crnn_layer_gpu; 74 | l.update_gpu = update_crnn_layer_gpu; 75 | 76 | l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1)); 77 | l.output_gpu = l.output_layer->output_gpu; 78 | l.delta_gpu = l.output_layer->delta_gpu; 79 | #endif 80 | 81 | return l; 82 | } 83 | 84 | void update_crnn_layer(layer l, update_args a) 85 | { 86 | update_convolutional_layer(*(l.input_layer), a); 87 | update_convolutional_layer(*(l.self_layer), a); 88 | update_convolutional_layer(*(l.output_layer), a); 89 | } 90 | 91 | void forward_crnn_layer(layer l, network net) 92 | { 93 | network s = net; 94 | s.train = net.train; 95 | int i; 96 | layer input_layer = *(l.input_layer); 97 | layer self_layer = *(l.self_layer); 98 | layer output_layer = *(l.output_layer); 99 | 100 | fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); 101 | fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); 102 | fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); 103 | if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1); 104 | 105 | for (i = 0; i < l.steps; ++i) { 106 | s.input = net.input; 107 | forward_convolutional_layer(input_layer, s); 108 | 109 | s.input = l.state; 110 | forward_convolutional_layer(self_layer, s); 111 | 112 | float *old_state = l.state; 113 | if(net.train) l.state += l.hidden*l.batch; 114 | if(l.shortcut){ 115 | copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1); 116 | }else{ 117 | fill_cpu(l.hidden * l.batch, 0, l.state, 1); 118 | } 119 | axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1); 120 | axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); 121 | 122 | s.input = l.state; 123 | forward_convolutional_layer(output_layer, s); 124 | 125 | net.input += l.inputs*l.batch; 126 | increment_layer(&input_layer, 1); 127 | increment_layer(&self_layer, 1); 128 | increment_layer(&output_layer, 1); 129 | } 130 | } 131 | 132 | void backward_crnn_layer(layer l, network net) 133 | { 134 | network s = net; 135 | int i; 136 | layer input_layer = *(l.input_layer); 137 | layer self_layer = *(l.self_layer); 138 | layer output_layer = *(l.output_layer); 139 | 140 | increment_layer(&input_layer, l.steps-1); 141 | increment_layer(&self_layer, l.steps-1); 142 | increment_layer(&output_layer, l.steps-1); 143 | 144 | l.state += l.hidden*l.batch*l.steps; 145 | for (i = l.steps-1; i >= 0; --i) { 146 | copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); 147 | axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); 148 | 149 | s.input = l.state; 150 | s.delta = self_layer.delta; 151 | backward_convolutional_layer(output_layer, s); 152 | 153 | l.state -= l.hidden*l.batch; 154 | /* 155 | if(i > 0){ 156 | copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); 157 | axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); 158 | }else{ 159 | fill_cpu(l.hidden * l.batch, 0, l.state, 1); 160 | } 161 | */ 162 | 163 | s.input = l.state; 164 | s.delta = self_layer.delta - l.hidden*l.batch; 165 | if (i == 0) s.delta = 0; 166 | backward_convolutional_layer(self_layer, s); 167 | 168 | copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); 169 | if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1); 170 | s.input = net.input + i*l.inputs*l.batch; 171 | if(net.delta) s.delta = net.delta + i*l.inputs*l.batch; 172 | else s.delta = 0; 173 | backward_convolutional_layer(input_layer, s); 174 | 175 | increment_layer(&input_layer, -1); 176 | increment_layer(&self_layer, -1); 177 | increment_layer(&output_layer, -1); 178 | } 179 | } 180 | 181 | #ifdef DKGPU 182 | 183 | void pull_crnn_layer(layer l) 184 | { 185 | pull_convolutional_layer(*(l.input_layer)); 186 | pull_convolutional_layer(*(l.self_layer)); 187 | pull_convolutional_layer(*(l.output_layer)); 188 | } 189 | 190 | void push_crnn_layer(layer l) 191 | { 192 | push_convolutional_layer(*(l.input_layer)); 193 | push_convolutional_layer(*(l.self_layer)); 194 | push_convolutional_layer(*(l.output_layer)); 195 | } 196 | 197 | void update_crnn_layer_gpu(layer l, update_args a) 198 | { 199 | update_convolutional_layer_gpu(*(l.input_layer), a); 200 | update_convolutional_layer_gpu(*(l.self_layer), a); 201 | update_convolutional_layer_gpu(*(l.output_layer), a); 202 | } 203 | 204 | void forward_crnn_layer_gpu(layer l, network net) 205 | { 206 | network s = net; 207 | int i; 208 | layer input_layer = *(l.input_layer); 209 | layer self_layer = *(l.self_layer); 210 | layer output_layer = *(l.output_layer); 211 | 212 | fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); 213 | fill_gpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); 214 | fill_gpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); 215 | if(net.train) fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); 216 | 217 | for (i = 0; i < l.steps; ++i) { 218 | s.input_gpu = net.input_gpu; 219 | forward_convolutional_layer_gpu(input_layer, s); 220 | 221 | s.input_gpu = l.state_gpu; 222 | forward_convolutional_layer_gpu(self_layer, s); 223 | 224 | float *old_state = l.state_gpu; 225 | if(net.train) l.state_gpu += l.hidden*l.batch; 226 | if(l.shortcut){ 227 | copy_gpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1); 228 | }else{ 229 | fill_gpu(l.hidden * l.batch, 0, l.state_gpu, 1); 230 | } 231 | axpy_gpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); 232 | axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); 233 | 234 | s.input_gpu = l.state_gpu; 235 | forward_convolutional_layer_gpu(output_layer, s); 236 | 237 | net.input_gpu += l.inputs*l.batch; 238 | increment_layer(&input_layer, 1); 239 | increment_layer(&self_layer, 1); 240 | increment_layer(&output_layer, 1); 241 | } 242 | } 243 | 244 | void backward_crnn_layer_gpu(layer l, network net) 245 | { 246 | network s = net; 247 | s.train = net.train; 248 | int i; 249 | layer input_layer = *(l.input_layer); 250 | layer self_layer = *(l.self_layer); 251 | layer output_layer = *(l.output_layer); 252 | increment_layer(&input_layer, l.steps - 1); 253 | increment_layer(&self_layer, l.steps - 1); 254 | increment_layer(&output_layer, l.steps - 1); 255 | l.state_gpu += l.hidden*l.batch*l.steps; 256 | for (i = l.steps-1; i >= 0; --i) { 257 | copy_gpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); 258 | axpy_gpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); 259 | 260 | s.input_gpu = l.state_gpu; 261 | s.delta_gpu = self_layer.delta_gpu; 262 | backward_convolutional_layer_gpu(output_layer, s); 263 | 264 | l.state_gpu -= l.hidden*l.batch; 265 | 266 | s.input_gpu = l.state_gpu; 267 | s.delta_gpu = self_layer.delta_gpu - l.hidden*l.batch; 268 | if (i == 0) s.delta_gpu = 0; 269 | backward_convolutional_layer_gpu(self_layer, s); 270 | 271 | copy_gpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); 272 | if (i > 0 && l.shortcut) axpy_gpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); 273 | s.input_gpu = net.input_gpu + i*l.inputs*l.batch; 274 | if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch; 275 | else s.delta_gpu = 0; 276 | backward_convolutional_layer_gpu(input_layer, s); 277 | 278 | increment_layer(&input_layer, -1); 279 | increment_layer(&self_layer, -1); 280 | increment_layer(&output_layer, -1); 281 | } 282 | } 283 | #endif 284 | -------------------------------------------------------------------------------- /darknet/src/crop_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "crop_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | image get_crop_image(crop_layer l) 6 | { 7 | int h = l.out_h; 8 | int w = l.out_w; 9 | int c = l.out_c; 10 | return float_to_image(w,h,c,l.output); 11 | } 12 | 13 | void backward_crop_layer(const crop_layer l, network net){} 14 | void backward_crop_layer_gpu(const crop_layer l, network net){} 15 | 16 | crop_layer make_crop_layer(int batch, 17 | int h, 18 | int w, 19 | int c, 20 | int crop_height, 21 | int crop_width, 22 | int flip, 23 | float angle, 24 | float saturation, 25 | float exposure) 26 | { 27 | fprintf(stderr, 28 | "Crop Layer: %d x %d -> %d x %d x %d image\n", 29 | h, 30 | w, 31 | crop_height, 32 | crop_width, 33 | c); 34 | crop_layer l; 35 | l.type = CROP; 36 | l.batch = batch; 37 | l.h = h; 38 | l.w = w; 39 | l.c = c; 40 | l.scale = (float)crop_height / h; 41 | l.flip = flip; 42 | l.angle = angle; 43 | l.saturation = saturation; 44 | l.exposure = exposure; 45 | l.out_w = crop_width; 46 | l.out_h = crop_height; 47 | l.out_c = c; 48 | l.inputs = l.w * l.h * l.c; 49 | l.outputs = l.out_w * l.out_h * l.out_c; 50 | l.output = (float *)calloc(l.outputs * batch, sizeof(float)); 51 | l.forward = forward_crop_layer; 52 | l.backward = backward_crop_layer; 53 | 54 | #ifdef DKGPU 55 | l.forward_gpu = forward_crop_layer_gpu; 56 | l.backward_gpu = backward_crop_layer_gpu; 57 | l.output_gpu = cuda_make_array(l.output, l.outputs*batch); 58 | l.rand_gpu = cuda_make_array(0, l.batch*8); 59 | #endif 60 | return l; 61 | } 62 | 63 | void resize_crop_layer(layer *l, int w, int h) 64 | { 65 | l->w = w; 66 | l->h = h; 67 | 68 | l->out_w = l->scale*w; 69 | l->out_h = l->scale*h; 70 | 71 | l->inputs = l->w * l->h * l->c; 72 | l->outputs = l->out_h * l->out_w * l->out_c; 73 | 74 | l->output = (float*)realloc(l->output, l->batch*l->outputs*sizeof(float)); 75 | #ifdef DKGPU 76 | cuda_free(l->output_gpu); 77 | l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); 78 | #endif 79 | } 80 | 81 | 82 | void forward_crop_layer(const crop_layer l, network net) 83 | { 84 | int i,j,c,b,row,col; 85 | int index; 86 | int count = 0; 87 | int flip = (l.flip && rand()%2); 88 | int dh = rand()%(l.h - l.out_h + 1); 89 | int dw = rand()%(l.w - l.out_w + 1); 90 | float scale = 2; 91 | float trans = -1; 92 | if(l.noadjust){ 93 | scale = 1; 94 | trans = 0; 95 | } 96 | if(!net.train){ 97 | flip = 0; 98 | dh = (l.h - l.out_h)/2; 99 | dw = (l.w - l.out_w)/2; 100 | } 101 | for(b = 0; b < l.batch; ++b){ 102 | for(c = 0; c < l.c; ++c){ 103 | for(i = 0; i < l.out_h; ++i){ 104 | for(j = 0; j < l.out_w; ++j){ 105 | if(flip){ 106 | col = l.w - dw - j - 1; 107 | }else{ 108 | col = j + dw; 109 | } 110 | row = i + dh; 111 | index = col+l.w*(row+l.h*(c + l.c*b)); 112 | l.output[count++] = net.input[index]*scale + trans; 113 | } 114 | } 115 | } 116 | } 117 | } 118 | 119 | -------------------------------------------------------------------------------- /darknet/src/crop_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "crop_layer.h" 7 | #include "utils.h" 8 | #include "cuda.h" 9 | #include "image.h" 10 | } 11 | 12 | __device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) 13 | { 14 | if(x < 0 || x >= w || y < 0 || y >= h) return 0; 15 | return image[x + w*(y + c*h)]; 16 | } 17 | 18 | __device__ float3 rgb_to_hsv_kernel(float3 rgb) 19 | { 20 | float r = rgb.x; 21 | float g = rgb.y; 22 | float b = rgb.z; 23 | 24 | float h, s, v; 25 | float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); 26 | float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); 27 | float delta = max - min; 28 | v = max; 29 | if(max == 0){ 30 | s = 0; 31 | h = -1; 32 | }else{ 33 | s = delta/max; 34 | if(r == max){ 35 | h = (g - b) / delta; 36 | } else if (g == max) { 37 | h = 2 + (b - r) / delta; 38 | } else { 39 | h = 4 + (r - g) / delta; 40 | } 41 | if (h < 0) h += 6; 42 | } 43 | return make_float3(h, s, v); 44 | } 45 | 46 | __device__ float3 hsv_to_rgb_kernel(float3 hsv) 47 | { 48 | float h = hsv.x; 49 | float s = hsv.y; 50 | float v = hsv.z; 51 | 52 | float r, g, b; 53 | float f, p, q, t; 54 | 55 | if (s == 0) { 56 | r = g = b = v; 57 | } else { 58 | int index = (int) floorf(h); 59 | f = h - index; 60 | p = v*(1-s); 61 | q = v*(1-s*f); 62 | t = v*(1-s*(1-f)); 63 | if(index == 0){ 64 | r = v; g = t; b = p; 65 | } else if(index == 1){ 66 | r = q; g = v; b = p; 67 | } else if(index == 2){ 68 | r = p; g = v; b = t; 69 | } else if(index == 3){ 70 | r = p; g = q; b = v; 71 | } else if(index == 4){ 72 | r = t; g = p; b = v; 73 | } else { 74 | r = v; g = p; b = q; 75 | } 76 | } 77 | r = (r < 0) ? 0 : ((r > 1) ? 1 : r); 78 | g = (g < 0) ? 0 : ((g > 1) ? 1 : g); 79 | b = (b < 0) ? 0 : ((b > 1) ? 1 : b); 80 | return make_float3(r, g, b); 81 | } 82 | 83 | __device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) 84 | { 85 | int ix = (int) floorf(x); 86 | int iy = (int) floorf(y); 87 | 88 | float dx = x - ix; 89 | float dy = y - iy; 90 | 91 | float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + 92 | dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + 93 | (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + 94 | dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); 95 | return val; 96 | } 97 | 98 | __global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) 99 | { 100 | int size = batch * w * h; 101 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 102 | if(id >= size) return; 103 | int x = id % w; 104 | id /= w; 105 | int y = id % h; 106 | id /= h; 107 | float rshift = rand[0]; 108 | float gshift = rand[1]; 109 | float bshift = rand[2]; 110 | float r0 = rand[8*id + 0]; 111 | float r1 = rand[8*id + 1]; 112 | float r2 = rand[8*id + 2]; 113 | float r3 = rand[8*id + 3]; 114 | 115 | saturation = r0*(saturation - 1) + 1; 116 | saturation = (r1 > .5f) ? 1.f/saturation : saturation; 117 | exposure = r2*(exposure - 1) + 1; 118 | exposure = (r3 > .5f) ? 1.f/exposure : exposure; 119 | 120 | size_t offset = id * h * w * 3; 121 | image += offset; 122 | float r = image[x + w*(y + h*0)]; 123 | float g = image[x + w*(y + h*1)]; 124 | float b = image[x + w*(y + h*2)]; 125 | float3 rgb = make_float3(r,g,b); 126 | if(train){ 127 | float3 hsv = rgb_to_hsv_kernel(rgb); 128 | hsv.y *= saturation; 129 | hsv.z *= exposure; 130 | rgb = hsv_to_rgb_kernel(hsv); 131 | } else { 132 | shift = 0; 133 | } 134 | image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift; 135 | image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift; 136 | image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift; 137 | } 138 | 139 | __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) 140 | { 141 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 142 | if(id >= size) return; 143 | 144 | float cx = w/2.f; 145 | float cy = h/2.f; 146 | 147 | int count = id; 148 | int j = id % crop_width; 149 | id /= crop_width; 150 | int i = id % crop_height; 151 | id /= crop_height; 152 | int k = id % c; 153 | id /= c; 154 | int b = id; 155 | 156 | float r4 = rand[8*b + 4]; 157 | float r5 = rand[8*b + 5]; 158 | float r6 = rand[8*b + 6]; 159 | float r7 = rand[8*b + 7]; 160 | 161 | float dw = (w - crop_width)*r4; 162 | float dh = (h - crop_height)*r5; 163 | flip = (flip && (r6 > .5f)); 164 | angle = 2*angle*r7 - angle; 165 | if(!train){ 166 | dw = (w - crop_width)/2.f; 167 | dh = (h - crop_height)/2.f; 168 | flip = 0; 169 | angle = 0; 170 | } 171 | 172 | input += w*h*c*b; 173 | 174 | float x = (flip) ? w - dw - j - 1 : j + dw; 175 | float y = i + dh; 176 | 177 | float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx; 178 | float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy; 179 | 180 | output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); 181 | } 182 | 183 | extern "C" void forward_crop_layer_gpu(crop_layer layer, network net) 184 | { 185 | cuda_random(layer.rand_gpu, layer.batch*8); 186 | 187 | float radians = layer.angle*3.14159265f/180.f; 188 | 189 | float scale = 2; 190 | float translate = -1; 191 | if(layer.noadjust){ 192 | scale = 1; 193 | translate = 0; 194 | } 195 | 196 | int size = layer.batch * layer.w * layer.h; 197 | 198 | levels_image_kernel<<>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift); 199 | check_error(cudaPeekAtLastError()); 200 | 201 | size = layer.batch*layer.c*layer.out_w*layer.out_h; 202 | 203 | forward_crop_layer_kernel<<>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu); 204 | check_error(cudaPeekAtLastError()); 205 | 206 | /* 207 | cuda_pull_array(layer.output_gpu, layer.output, size); 208 | image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); 209 | image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); 210 | image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); 211 | 212 | translate_image(im, -translate); 213 | scale_image(im, 1/scale); 214 | translate_image(im2, -translate); 215 | scale_image(im2, 1/scale); 216 | translate_image(im3, -translate); 217 | scale_image(im3, 1/scale); 218 | 219 | show_image(im, "cropped"); 220 | show_image(im2, "cropped2"); 221 | show_image(im3, "cropped3"); 222 | cvWaitKey(0); 223 | */ 224 | } 225 | 226 | -------------------------------------------------------------------------------- /darknet/src/cuda.cpp: -------------------------------------------------------------------------------- 1 | int gpu_index = 0; 2 | 3 | #ifdef DKGPU 4 | 5 | #include "cuda.h" 6 | #include 7 | #include 8 | #include 9 | #include "blas.h" 10 | #include "utils.h" 11 | 12 | void cuda_set_device(int n) 13 | { 14 | gpu_index = n; 15 | cudaError_t status = cudaSetDevice(n); 16 | check_error(status); 17 | } 18 | 19 | int cuda_get_device() 20 | { 21 | int n = 0; 22 | cudaError_t status = cudaGetDevice(&n); 23 | check_error(status); 24 | return n; 25 | } 26 | 27 | void check_error(cudaError_t status) 28 | { 29 | // cudaDeviceSynchronize(); 30 | cudaError_t status2 = cudaGetLastError(); 31 | if (status != cudaSuccess) 32 | { 33 | const char *s = cudaGetErrorString(status); 34 | char buffer[256]; 35 | printf("CUDA Error: %s\n", s); 36 | assert(0); 37 | snprintf(buffer, 256, "CUDA Error: %s", s); 38 | error(buffer); 39 | } 40 | if (status2 != cudaSuccess) 41 | { 42 | const char *s = cudaGetErrorString(status); 43 | char buffer[256]; 44 | printf("CUDA Error Prev: %s\n", s); 45 | assert(0); 46 | snprintf(buffer, 256, "CUDA Error Prev: %s", s); 47 | error(buffer); 48 | } 49 | } 50 | 51 | dim3 cuda_gridsize(size_t n) 52 | { 53 | size_t k = (n - 1) / BLOCK + 1; 54 | size_t x = k; 55 | size_t y = 1; 56 | if (x > 65535) 57 | { 58 | x = ceil(sqrt(k)); 59 | y = (n - 1) / (x * BLOCK) + 1; 60 | } 61 | dim3 d = {x, y, 1}; 62 | // printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); 63 | return d; 64 | } 65 | 66 | #ifdef CUDNN 67 | cudnnHandle_t cudnn_handle() 68 | { 69 | static int init[16] = {0}; 70 | static cudnnHandle_t handle[16]; 71 | int i = cuda_get_device(); 72 | if (!init[i]) 73 | { 74 | cudnnCreate(&handle[i]); 75 | init[i] = 1; 76 | } 77 | return handle[i]; 78 | } 79 | #endif 80 | 81 | cublasHandle_t blas_handle() 82 | { 83 | static int init[16] = {0}; 84 | static cublasHandle_t handle[16]; 85 | int i = cuda_get_device(); 86 | if (!init[i]) 87 | { 88 | cublasCreate(&handle[i]); 89 | init[i] = 1; 90 | } 91 | return handle[i]; 92 | } 93 | 94 | float *cuda_make_array(float *x, size_t n) 95 | { 96 | float *x_gpu; 97 | size_t size = sizeof(float) * n; 98 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 99 | check_error(status); 100 | if (x) 101 | { 102 | status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 103 | check_error(status); 104 | } 105 | else 106 | { 107 | fill_gpu(n, 0, x_gpu, 1); 108 | } 109 | if (!x_gpu) error("Cuda malloc failed\n"); 110 | return x_gpu; 111 | } 112 | 113 | void cuda_random(float *x_gpu, size_t n) 114 | { 115 | static curandGenerator_t gen[16]; 116 | static int init[16] = {0}; 117 | int i = cuda_get_device(); 118 | if (!init[i]) 119 | { 120 | curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); 121 | curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); 122 | init[i] = 1; 123 | } 124 | curandGenerateUniform(gen[i], x_gpu, n); 125 | check_error(cudaPeekAtLastError()); 126 | } 127 | 128 | float cuda_compare(float *x_gpu, float *x, size_t n, char *s) 129 | { 130 | float *tmp = (float *)calloc(n, sizeof(float)); 131 | cuda_pull_array(x_gpu, tmp, n); 132 | // int i; 133 | // for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); 134 | axpy_cpu(n, -1, x, 1, tmp, 1); 135 | float err = dot_cpu(n, tmp, 1, tmp, 1); 136 | printf("Error %s: %f\n", s, sqrt(err / n)); 137 | free(tmp); 138 | return err; 139 | } 140 | 141 | int *cuda_make_int_array(int *x, size_t n) 142 | { 143 | int *x_gpu; 144 | size_t size = sizeof(int) * n; 145 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 146 | check_error(status); 147 | if (x) 148 | { 149 | status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 150 | check_error(status); 151 | } 152 | if (!x_gpu) error("Cuda malloc failed\n"); 153 | return x_gpu; 154 | } 155 | 156 | void cuda_free(float *x_gpu) 157 | { 158 | cudaError_t status = cudaFree(x_gpu); 159 | check_error(status); 160 | } 161 | 162 | void cuda_push_array(float *x_gpu, float *x, size_t n) 163 | { 164 | size_t size = sizeof(float) * n; 165 | cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 166 | check_error(status); 167 | } 168 | 169 | void cuda_pull_array(float *x_gpu, float *x, size_t n) 170 | { 171 | size_t size = sizeof(float) * n; 172 | cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); 173 | check_error(status); 174 | } 175 | 176 | float cuda_mag_array(float *x_gpu, size_t n) 177 | { 178 | float *temp = (float *)calloc(n, sizeof(float)); 179 | cuda_pull_array(x_gpu, temp, n); 180 | float m = mag_array(temp, n); 181 | free(temp); 182 | return m; 183 | } 184 | #else 185 | void cuda_set_device(int n) 186 | { 187 | } 188 | 189 | #endif 190 | -------------------------------------------------------------------------------- /darknet/src/deconvolutional_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "convolutional_layer.h" 7 | #include "deconvolutional_layer.h" 8 | #include "batchnorm_layer.h" 9 | #include "gemm.h" 10 | #include "blas.h" 11 | #include "im2col.h" 12 | #include "col2im.h" 13 | #include "utils.h" 14 | #include "cuda.h" 15 | } 16 | 17 | extern "C" void forward_deconvolutional_layer_gpu(layer l, network net) 18 | { 19 | int i; 20 | 21 | int m = l.size*l.size*l.n; 22 | int n = l.h*l.w; 23 | int k = l.c; 24 | 25 | fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1); 26 | 27 | for(i = 0; i < l.batch; ++i){ 28 | float *a = l.weights_gpu; 29 | float *b = net.input_gpu + i*l.c*l.h*l.w; 30 | float *c = net.workspace; 31 | 32 | gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n); 33 | 34 | col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs); 35 | } 36 | if (l.batch_normalize) { 37 | forward_batchnorm_layer_gpu(l, net); 38 | } else { 39 | add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); 40 | } 41 | activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation); 42 | } 43 | 44 | extern "C" void backward_deconvolutional_layer_gpu(layer l, network net) 45 | { 46 | int i; 47 | 48 | //constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1); 49 | gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); 50 | 51 | if(l.batch_normalize){ 52 | backward_batchnorm_layer_gpu(l, net); 53 | } else { 54 | backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); 55 | } 56 | 57 | //if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float)); 58 | 59 | for(i = 0; i < l.batch; ++i){ 60 | int m = l.c; 61 | int n = l.size*l.size*l.n; 62 | int k = l.h*l.w; 63 | 64 | float *a = net.input_gpu + i*m*k; 65 | float *b = net.workspace; 66 | float *c = l.weight_updates_gpu; 67 | 68 | im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w, 69 | l.size, l.stride, l.pad, b); 70 | gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n); 71 | 72 | if(net.delta_gpu){ 73 | int m = l.c; 74 | int n = l.h*l.w; 75 | int k = l.size*l.size*l.n; 76 | 77 | float *a = l.weights_gpu; 78 | float *b = net.workspace; 79 | float *c = net.delta_gpu + i*n*m; 80 | 81 | gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n); 82 | } 83 | } 84 | } 85 | 86 | extern "C" void pull_deconvolutional_layer(layer l) 87 | { 88 | cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); 89 | cuda_pull_array(l.biases_gpu, l.biases, l.n); 90 | cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); 91 | cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); 92 | if (l.batch_normalize){ 93 | cuda_pull_array(l.scales_gpu, l.scales, l.n); 94 | cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n); 95 | cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n); 96 | } 97 | } 98 | 99 | extern "C" void push_deconvolutional_layer(layer l) 100 | { 101 | cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size); 102 | cuda_push_array(l.biases_gpu, l.biases, l.n); 103 | cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size); 104 | cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); 105 | if (l.batch_normalize){ 106 | cuda_push_array(l.scales_gpu, l.scales, l.n); 107 | cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); 108 | cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); 109 | } 110 | } 111 | 112 | void update_deconvolutional_layer_gpu(layer l, update_args a) 113 | { 114 | float learning_rate = a.learning_rate*l.learning_rate_scale; 115 | float momentum = a.momentum; 116 | float decay = a.decay; 117 | int batch = a.batch; 118 | 119 | if(a.adam){ 120 | adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); 121 | adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); 122 | if(l.scales_gpu){ 123 | adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t); 124 | } 125 | }else{ 126 | axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); 127 | axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); 128 | scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1); 129 | 130 | axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); 131 | scal_gpu(l.n, momentum, l.bias_updates_gpu, 1); 132 | 133 | if(l.scales_gpu){ 134 | axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); 135 | scal_gpu(l.n, momentum, l.scale_updates_gpu, 1); 136 | } 137 | } 138 | } 139 | 140 | -------------------------------------------------------------------------------- /darknet/src/demo.cpp: -------------------------------------------------------------------------------- 1 | #include "network.h" 2 | #include "detection_layer.h" 3 | #include "region_layer.h" 4 | #include "cost_layer.h" 5 | #include "utils.h" 6 | #include "parser.h" 7 | #include "box.h" 8 | #include "image.h" 9 | #include "demo.h" 10 | #include 11 | 12 | #define FRAMES 3 13 | 14 | #ifdef OPENCV 15 | #include "opencv2/highgui/highgui_c.h" 16 | #include "opencv2/imgproc/imgproc_c.h" 17 | image get_image_from_stream(CvCapture *cap); 18 | 19 | static char **demo_names; 20 | static image **demo_alphabet; 21 | static int demo_classes; 22 | 23 | static float **probs; 24 | static box *boxes; 25 | static network net; 26 | static image in ; 27 | static image in_s ; 28 | static image det ; 29 | static image det_s; 30 | static image disp = {0}; 31 | static CvCapture * cap; 32 | static float fps = 0; 33 | static float demo_thresh = 0; 34 | static float demo_hier_thresh = .5; 35 | 36 | static float *predictions[FRAMES]; 37 | static int demo_index = 0; 38 | static image images[FRAMES]; 39 | static float *avg; 40 | 41 | void *fetch_in_thread(void *ptr) 42 | { 43 | in = get_image_from_stream(cap); 44 | if(!in.data){ 45 | error("Stream closed."); 46 | } 47 | in_s = resize_image(in, net.w, net.h); 48 | return 0; 49 | } 50 | 51 | void *detect_in_thread(void *ptr) 52 | { 53 | float nms = .4; 54 | 55 | layer l = net.layers[net.n-1]; 56 | float *X = det_s.data; 57 | float *prediction = network_predict(net, X); 58 | 59 | memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float)); 60 | mean_arrays(predictions, FRAMES, l.outputs, avg); 61 | l.output = avg; 62 | 63 | free_image(det_s); 64 | if(l.type == DETECTION){ 65 | get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); 66 | } else if (l.type == REGION){ 67 | get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0, demo_hier_thresh); 68 | } else { 69 | error("Last layer must produce detections\n"); 70 | } 71 | if (nms > 0) do_nms(boxes, probs, l.w*l.h*l.n, l.classes, nms); 72 | printf("\033[2J"); 73 | printf("\033[1;1H"); 74 | printf("\nFPS:%.1f\n",fps); 75 | printf("Objects:\n\n"); 76 | 77 | images[demo_index] = det; 78 | det = images[(demo_index + FRAMES/2 + 1)%FRAMES]; 79 | demo_index = (demo_index + 1)%FRAMES; 80 | 81 | draw_detections(det, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes); 82 | 83 | return 0; 84 | } 85 | 86 | double get_wall_time() 87 | { 88 | struct timeval time; 89 | if (gettimeofday(&time,NULL)){ 90 | return 0; 91 | } 92 | return (double)time.tv_sec + (double)time.tv_usec * .000001; 93 | } 94 | 95 | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh) 96 | { 97 | //skip = frame_skip; 98 | image **alphabet = load_alphabet(); 99 | int delay = frame_skip; 100 | demo_names = names; 101 | demo_alphabet = alphabet; 102 | demo_classes = classes; 103 | demo_thresh = thresh; 104 | demo_hier_thresh = hier_thresh; 105 | printf("Demo\n"); 106 | net = parse_network_cfg(cfgfile); 107 | if(weightfile){ 108 | load_weights(&net, weightfile); 109 | } 110 | set_batch_network(&net, 1); 111 | 112 | srand(2222222); 113 | 114 | if(filename){ 115 | printf("video file: %s\n", filename); 116 | cap = cvCaptureFromFile(filename); 117 | }else{ 118 | cap = cvCaptureFromCAM(cam_index); 119 | } 120 | 121 | if(!cap) error("Couldn't connect to webcam.\n"); 122 | 123 | layer l = net.layers[net.n-1]; 124 | int j; 125 | 126 | avg = (float *) calloc(l.outputs, sizeof(float)); 127 | for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float)); 128 | for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3); 129 | 130 | boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); 131 | probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); 132 | for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float)); 133 | 134 | pthread_t fetch_thread; 135 | pthread_t detect_thread; 136 | 137 | fetch_in_thread(0); 138 | det = in; 139 | det_s = in_s; 140 | 141 | fetch_in_thread(0); 142 | detect_in_thread(0); 143 | disp = det; 144 | det = in; 145 | det_s = in_s; 146 | 147 | for(j = 0; j < FRAMES/2; ++j){ 148 | fetch_in_thread(0); 149 | detect_in_thread(0); 150 | disp = det; 151 | det = in; 152 | det_s = in_s; 153 | } 154 | 155 | int count = 0; 156 | if(!prefix){ 157 | cvNamedWindow("Demo", CV_WINDOW_NORMAL); 158 | cvMoveWindow("Demo", 0, 0); 159 | cvResizeWindow("Demo", 1352, 1013); 160 | } 161 | 162 | double before = get_wall_time(); 163 | 164 | while(1){ 165 | ++count; 166 | if(1){ 167 | if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); 168 | if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); 169 | 170 | if(!prefix){ 171 | show_image(disp, "Demo"); 172 | int c = cvWaitKey(1); 173 | if (c == 10){ 174 | if(frame_skip == 0) frame_skip = 60; 175 | else if(frame_skip == 4) frame_skip = 0; 176 | else if(frame_skip == 60) frame_skip = 4; 177 | else frame_skip = 0; 178 | } 179 | }else{ 180 | char buff[256]; 181 | sprintf(buff, "%s_%08d", prefix, count); 182 | save_image(disp, buff); 183 | } 184 | 185 | pthread_join(fetch_thread, 0); 186 | pthread_join(detect_thread, 0); 187 | 188 | if(delay == 0){ 189 | free_image(disp); 190 | disp = det; 191 | } 192 | det = in; 193 | det_s = in_s; 194 | }else { 195 | fetch_in_thread(0); 196 | det = in; 197 | det_s = in_s; 198 | detect_in_thread(0); 199 | if(delay == 0) { 200 | free_image(disp); 201 | disp = det; 202 | } 203 | show_image(disp, "Demo"); 204 | cvWaitKey(1); 205 | } 206 | --delay; 207 | if(delay < 0){ 208 | delay = frame_skip; 209 | 210 | double after = get_wall_time(); 211 | float curr = 1./(after - before); 212 | fps = curr; 213 | before = after; 214 | } 215 | } 216 | } 217 | #else 218 | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, float hier_thresh) 219 | { 220 | fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); 221 | } 222 | #endif 223 | 224 | -------------------------------------------------------------------------------- /darknet/src/dropout_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "dropout_layer.h" 2 | #include 3 | #include 4 | #include "cuda.h" 5 | #include "utils.h" 6 | 7 | dropout_layer make_dropout_layer(int batch, int inputs, float probability) 8 | { 9 | dropout_layer l; 10 | l.type = DROPOUT; 11 | l.probability = probability; 12 | l.inputs = inputs; 13 | l.outputs = inputs; 14 | l.batch = batch; 15 | l.rand = (float *)calloc(inputs * batch, sizeof(float)); 16 | l.scale = 1. / (1. - probability); 17 | l.forward = forward_dropout_layer; 18 | l.backward = backward_dropout_layer; 19 | #ifdef DKGPU 20 | l.forward_gpu = forward_dropout_layer_gpu; 21 | l.backward_gpu = backward_dropout_layer_gpu; 22 | l.rand_gpu = cuda_make_array(l.rand, inputs * batch); 23 | #endif 24 | fprintf(stderr, 25 | "dropout p = %.2f %4d -> %4d\n", 26 | probability, 27 | inputs, 28 | inputs); 29 | return l; 30 | } 31 | 32 | void resize_dropout_layer(dropout_layer *l, int inputs) 33 | { 34 | l->rand = (float *)realloc(l->rand, l->inputs * l->batch * sizeof(float)); 35 | #ifdef DKGPU 36 | cuda_free(l->rand_gpu); 37 | 38 | l->rand_gpu = cuda_make_array(l->rand, inputs * l->batch); 39 | #endif 40 | } 41 | 42 | void forward_dropout_layer(dropout_layer l, network net) 43 | { 44 | int i; 45 | if (!net.train) return; 46 | for (i = 0; i < l.batch * l.inputs; ++i) 47 | { 48 | float r = rand_uniform(0, 1); 49 | l.rand[i] = r; 50 | if (r < l.probability) 51 | net.input[i] = 0; 52 | else 53 | net.input[i] *= l.scale; 54 | } 55 | } 56 | 57 | void backward_dropout_layer(dropout_layer l, network net) 58 | { 59 | int i; 60 | if (!net.delta) return; 61 | for (i = 0; i < l.batch * l.inputs; ++i) 62 | { 63 | float r = l.rand[i]; 64 | if (r < l.probability) 65 | net.delta[i] = 0; 66 | else 67 | net.delta[i] *= l.scale; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /darknet/src/dropout_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "dropout_layer.h" 7 | #include "cuda.h" 8 | #include "utils.h" 9 | } 10 | 11 | __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) 12 | { 13 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 14 | if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; 15 | } 16 | 17 | void forward_dropout_layer_gpu(dropout_layer layer, network net) 18 | { 19 | if (!net.train) return; 20 | int size = layer.inputs*layer.batch; 21 | cuda_random(layer.rand_gpu, size); 22 | /* 23 | int i; 24 | for(i = 0; i < size; ++i){ 25 | layer.rand[i] = rand_uniform(); 26 | } 27 | cuda_push_array(layer.rand_gpu, layer.rand, size); 28 | */ 29 | 30 | yoloswag420blazeit360noscope<<>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale); 31 | check_error(cudaPeekAtLastError()); 32 | } 33 | 34 | void backward_dropout_layer_gpu(dropout_layer layer, network net) 35 | { 36 | if(!net.delta_gpu) return; 37 | int size = layer.inputs*layer.batch; 38 | 39 | yoloswag420blazeit360noscope<<>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale); 40 | check_error(cudaPeekAtLastError()); 41 | } 42 | -------------------------------------------------------------------------------- /darknet/src/im2col.cpp: -------------------------------------------------------------------------------- 1 | #include "im2col.h" 2 | #include 3 | float im2col_get_pixel(float *im, int height, int width, int channels, 4 | int row, int col, int channel, int pad) 5 | { 6 | row -= pad; 7 | col -= pad; 8 | 9 | if (row < 0 || col < 0 || 10 | row >= height || col >= width) return 0; 11 | return im[col + width*(row + height*channel)]; 12 | } 13 | 14 | //From Berkeley Vision's Caffe! 15 | //https://github.com/BVLC/caffe/blob/master/LICENSE 16 | void im2col_cpu(float* data_im, 17 | int channels, int height, int width, 18 | int ksize, int stride, int pad, float* data_col) 19 | { 20 | int c,h,w; 21 | int height_col = (height + 2*pad - ksize) / stride + 1; 22 | int width_col = (width + 2*pad - ksize) / stride + 1; 23 | 24 | int channels_col = channels * ksize * ksize; 25 | for (c = 0; c < channels_col; ++c) { 26 | int w_offset = c % ksize; 27 | int h_offset = (c / ksize) % ksize; 28 | int c_im = c / ksize / ksize; 29 | for (h = 0; h < height_col; ++h) { 30 | for (w = 0; w < width_col; ++w) { 31 | int im_row = h_offset + h * stride; 32 | int im_col = w_offset + w * stride; 33 | int col_index = (c * height_col + h) * width_col + w; 34 | data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, 35 | im_row, im_col, c_im, pad); 36 | } 37 | } 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /darknet/src/im2col_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "im2col.h" 7 | #include "cuda.h" 8 | } 9 | 10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu 11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE 12 | 13 | __global__ void im2col_gpu_kernel(const int n, const float* data_im, 14 | const int height, const int width, const int ksize, 15 | const int pad, 16 | const int stride, 17 | const int height_col, const int width_col, 18 | float *data_col) { 19 | int index = blockIdx.x*blockDim.x+threadIdx.x; 20 | for(; index < n; index += blockDim.x*gridDim.x){ 21 | int w_out = index % width_col; 22 | int h_index = index / width_col; 23 | int h_out = h_index % height_col; 24 | int channel_in = h_index / height_col; 25 | int channel_out = channel_in * ksize * ksize; 26 | int h_in = h_out * stride - pad; 27 | int w_in = w_out * stride - pad; 28 | float* data_col_ptr = data_col; 29 | data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; 30 | const float* data_im_ptr = data_im; 31 | data_im_ptr += (channel_in * height + h_in) * width + w_in; 32 | for (int i = 0; i < ksize; ++i) { 33 | for (int j = 0; j < ksize; ++j) { 34 | int h = h_in + i; 35 | int w = w_in + j; 36 | 37 | *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? 38 | data_im_ptr[i * width + j] : 0; 39 | 40 | //*data_col_ptr = data_im_ptr[ii * width + jj]; 41 | 42 | data_col_ptr += height_col * width_col; 43 | } 44 | } 45 | } 46 | } 47 | 48 | void im2col_gpu(float *im, 49 | int channels, int height, int width, 50 | int ksize, int stride, int pad, float *data_col){ 51 | // We are going to launch channels * height_col * width_col kernels, each 52 | // kernel responsible for copying a single-channel grid. 53 | int height_col = (height + 2 * pad - ksize) / stride + 1; 54 | int width_col = (width + 2 * pad - ksize) / stride + 1; 55 | int num_kernels = channels * height_col * width_col; 56 | im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, 57 | BLOCK>>>( 58 | num_kernels, im, height, width, ksize, pad, 59 | stride, height_col, 60 | width_col, data_col); 61 | } 62 | -------------------------------------------------------------------------------- /darknet/src/l2norm_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "l2norm_layer.h" 2 | #include "activations.h" 3 | #include "blas.h" 4 | #include "cuda.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | layer make_l2norm_layer(int batch, int inputs) 13 | { 14 | fprintf( 15 | stderr, "l2norm %4d\n", inputs); 16 | layer l; 17 | l.type = L2NORM; 18 | l.batch = batch; 19 | l.inputs = inputs; 20 | l.outputs = inputs; 21 | l.output = (float*)calloc(inputs * batch, sizeof(float)); 22 | l.scales = (float*)calloc(inputs * batch, sizeof(float)); 23 | l.delta = (float*)calloc(inputs * batch, sizeof(float)); 24 | 25 | l.forward = forward_l2norm_layer; 26 | l.backward = backward_l2norm_layer; 27 | #ifdef DKGPU 28 | l.forward_gpu = forward_l2norm_layer_gpu; 29 | l.backward_gpu = backward_l2norm_layer_gpu; 30 | 31 | l.output_gpu = cuda_make_array(l.output, inputs * batch); 32 | l.scales_gpu = cuda_make_array(l.output, inputs * batch); 33 | l.delta_gpu = cuda_make_array(l.delta, inputs * batch); 34 | #endif 35 | return l; 36 | } 37 | 38 | void forward_l2norm_layer(const layer l, network net) 39 | { 40 | copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1); 41 | l2normalize_cpu(l.output, l.scales, l.batch, l.out_c, l.out_w * l.out_h); 42 | } 43 | 44 | void backward_l2norm_layer(const layer l, network net) 45 | { 46 | // axpy_cpu(l.inputs*l.batch, 1, l.scales, 1, l.delta, 1); 47 | axpy_cpu(l.inputs * l.batch, 1, l.delta, 1, net.delta, 1); 48 | } 49 | 50 | #ifdef DKGPU 51 | 52 | void forward_l2norm_layer_gpu(const layer l, network net) 53 | { 54 | copy_gpu(l.outputs * l.batch, net.input_gpu, 1, l.output_gpu, 1); 55 | l2normalize_gpu( 56 | l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_w * l.out_h); 57 | } 58 | 59 | void backward_l2norm_layer_gpu(const layer l, network net) 60 | { 61 | axpy_gpu(l.batch * l.inputs, 1, l.scales_gpu, 1, l.delta_gpu, 1); 62 | axpy_gpu(l.batch * l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /darknet/src/layer.cpp: -------------------------------------------------------------------------------- 1 | #include "layer.h" 2 | #include "cuda.h" 3 | 4 | #include 5 | 6 | void free_layer(layer l) 7 | { 8 | if (l.type == DROPOUT) 9 | { 10 | if (l.rand) free(l.rand); 11 | #ifdef DKGPU 12 | if (l.rand_gpu) cuda_free(l.rand_gpu); 13 | #endif 14 | return; 15 | } 16 | if (l.cweights) free(l.cweights); 17 | if (l.indexes) free(l.indexes); 18 | if (l.input_layers) free(l.input_layers); 19 | if (l.input_sizes) free(l.input_sizes); 20 | if (l.map) free(l.map); 21 | if (l.rand) free(l.rand); 22 | if (l.cost) free(l.cost); 23 | if (l.state) free(l.state); 24 | if (l.prev_state) free(l.prev_state); 25 | if (l.forgot_state) free(l.forgot_state); 26 | if (l.forgot_delta) free(l.forgot_delta); 27 | if (l.state_delta) free(l.state_delta); 28 | if (l.concat) free(l.concat); 29 | if (l.concat_delta) free(l.concat_delta); 30 | if (l.binary_weights) free(l.binary_weights); 31 | if (l.biases) free(l.biases); 32 | if (l.bias_updates) free(l.bias_updates); 33 | if (l.scales) free(l.scales); 34 | if (l.scale_updates) free(l.scale_updates); 35 | if (l.weights) free(l.weights); 36 | if (l.weight_updates) free(l.weight_updates); 37 | if (l.delta) free(l.delta); 38 | if (l.output) free(l.output); 39 | if (l.squared) free(l.squared); 40 | if (l.norms) free(l.norms); 41 | if (l.spatial_mean) free(l.spatial_mean); 42 | if (l.mean) free(l.mean); 43 | if (l.variance) free(l.variance); 44 | if (l.mean_delta) free(l.mean_delta); 45 | if (l.variance_delta) free(l.variance_delta); 46 | if (l.rolling_mean) free(l.rolling_mean); 47 | if (l.rolling_variance) free(l.rolling_variance); 48 | if (l.x) free(l.x); 49 | if (l.x_norm) free(l.x_norm); 50 | if (l.m) free(l.m); 51 | if (l.v) free(l.v); 52 | if (l.z_cpu) free(l.z_cpu); 53 | if (l.r_cpu) free(l.r_cpu); 54 | if (l.h_cpu) free(l.h_cpu); 55 | if (l.binary_input) free(l.binary_input); 56 | 57 | #ifdef DKGPU 58 | if (l.indexes_gpu) cuda_free((float *)l.indexes_gpu); 59 | 60 | if (l.z_gpu) cuda_free(l.z_gpu); 61 | if (l.r_gpu) cuda_free(l.r_gpu); 62 | if (l.h_gpu) cuda_free(l.h_gpu); 63 | if (l.m_gpu) cuda_free(l.m_gpu); 64 | if (l.v_gpu) cuda_free(l.v_gpu); 65 | if (l.prev_state_gpu) cuda_free(l.prev_state_gpu); 66 | if (l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); 67 | if (l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); 68 | if (l.state_gpu) cuda_free(l.state_gpu); 69 | if (l.state_delta_gpu) cuda_free(l.state_delta_gpu); 70 | if (l.gate_gpu) cuda_free(l.gate_gpu); 71 | if (l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); 72 | if (l.save_gpu) cuda_free(l.save_gpu); 73 | if (l.save_delta_gpu) cuda_free(l.save_delta_gpu); 74 | if (l.concat_gpu) cuda_free(l.concat_gpu); 75 | if (l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); 76 | if (l.binary_input_gpu) cuda_free(l.binary_input_gpu); 77 | if (l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); 78 | if (l.mean_gpu) cuda_free(l.mean_gpu); 79 | if (l.variance_gpu) cuda_free(l.variance_gpu); 80 | if (l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); 81 | if (l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); 82 | if (l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); 83 | if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); 84 | if (l.x_gpu) cuda_free(l.x_gpu); 85 | if (l.x_norm_gpu) cuda_free(l.x_norm_gpu); 86 | if (l.weights_gpu) cuda_free(l.weights_gpu); 87 | if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); 88 | if (l.biases_gpu) cuda_free(l.biases_gpu); 89 | if (l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); 90 | if (l.scales_gpu) cuda_free(l.scales_gpu); 91 | if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); 92 | if (l.output_gpu) cuda_free(l.output_gpu); 93 | if (l.delta_gpu) cuda_free(l.delta_gpu); 94 | if (l.rand_gpu) cuda_free(l.rand_gpu); 95 | if (l.squared_gpu) cuda_free(l.squared_gpu); 96 | if (l.norms_gpu) cuda_free(l.norms_gpu); 97 | #endif 98 | } 99 | -------------------------------------------------------------------------------- /darknet/src/list.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "list.h" 4 | 5 | list *make_list() 6 | { 7 | list *l = (list*)malloc(sizeof(list)); 8 | l->size = 0; 9 | l->front = 0; 10 | l->back = 0; 11 | return l; 12 | } 13 | 14 | /* 15 | void transfer_node(list *s, list *d, node *n) 16 | { 17 | node *prev, *next; 18 | prev = n->prev; 19 | next = n->next; 20 | if(prev) prev->next = next; 21 | if(next) next->prev = prev; 22 | --s->size; 23 | if(s->front == n) s->front = next; 24 | if(s->back == n) s->back = prev; 25 | } 26 | */ 27 | 28 | void *list_pop(list *l){ 29 | if(!l->back) return 0; 30 | node *b = l->back; 31 | void *val = b->val; 32 | l->back = b->prev; 33 | if(l->back) l->back->next = 0; 34 | free(b); 35 | --l->size; 36 | 37 | return val; 38 | } 39 | 40 | void list_insert(list *l, void *val) 41 | { 42 | node *new_ = (node*)malloc(sizeof(node)); 43 | new_->val = val; 44 | new_->next = 0; 45 | 46 | if(!l->back){ 47 | l->front = new_; 48 | new_->prev = 0; 49 | }else{ 50 | l->back->next = new_; 51 | new_->prev = l->back; 52 | } 53 | l->back = new_; 54 | ++l->size; 55 | } 56 | 57 | void free_node(node *n) 58 | { 59 | node *next; 60 | while(n) { 61 | next = n->next; 62 | free(n); 63 | n = next; 64 | } 65 | } 66 | 67 | void free_list(list *l) 68 | { 69 | free_node(l->front); 70 | free(l); 71 | } 72 | 73 | void free_list_contents(list *l) 74 | { 75 | node *n = l->front; 76 | while(n){ 77 | free(n->val); 78 | n = n->next; 79 | } 80 | } 81 | 82 | void **list_to_array(list *l) 83 | { 84 | void **a = (void**)calloc(l->size, sizeof(void*)); 85 | int count = 0; 86 | node *n = l->front; 87 | while(n){ 88 | a[count++] = n->val; 89 | n = n->next; 90 | } 91 | return a; 92 | } 93 | -------------------------------------------------------------------------------- /darknet/src/local_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "local_layer.h" 2 | #include 3 | #include 4 | #include "blas.h" 5 | #include "col2im.h" 6 | #include "gemm.h" 7 | #include "im2col.h" 8 | #include "utils.h" 9 | 10 | int local_out_height(local_layer l) 11 | { 12 | int h = l.h; 13 | if (!l.pad) 14 | h -= l.size; 15 | else 16 | h -= 1; 17 | return h / l.stride + 1; 18 | } 19 | 20 | int local_out_width(local_layer l) 21 | { 22 | int w = l.w; 23 | if (!l.pad) 24 | w -= l.size; 25 | else 26 | w -= 1; 27 | return w / l.stride + 1; 28 | } 29 | 30 | local_layer make_local_layer(int batch, 31 | int h, 32 | int w, 33 | int c, 34 | int n, 35 | int size, 36 | int stride, 37 | int pad, 38 | ACTIVATION activation) 39 | { 40 | int i; 41 | local_layer l; 42 | l.type = LOCAL; 43 | 44 | l.h = h; 45 | l.w = w; 46 | l.c = c; 47 | l.n = n; 48 | l.batch = batch; 49 | l.stride = stride; 50 | l.size = size; 51 | l.pad = pad; 52 | 53 | int out_h = local_out_height(l); 54 | int out_w = local_out_width(l); 55 | int locations = out_h * out_w; 56 | l.out_h = out_h; 57 | l.out_w = out_w; 58 | l.out_c = n; 59 | l.outputs = l.out_h * l.out_w * l.out_c; 60 | l.inputs = l.w * l.h * l.c; 61 | 62 | l.weights = (float *)calloc(c * n * size * size * locations, sizeof(float)); 63 | l.weight_updates = 64 | (float *)calloc(c * n * size * size * locations, sizeof(float)); 65 | 66 | l.biases = (float *)calloc(l.outputs, sizeof(float)); 67 | l.bias_updates = (float *)calloc(l.outputs, sizeof(float)); 68 | 69 | // float scale = 1./sqrt(size*size*c); 70 | float scale = sqrt(2. / (size * size * c)); 71 | for (i = 0; i < c * n * size * size; ++i) 72 | l.weights[i] = scale * rand_uniform(-1, 1); 73 | 74 | l.output = (float *)calloc(l.batch * out_h * out_w * n, sizeof(float)); 75 | l.delta = (float *)calloc(l.batch * out_h * out_w * n, sizeof(float)); 76 | 77 | l.workspace_size = out_h * out_w * size * size * c; 78 | 79 | l.forward = forward_local_layer; 80 | l.backward = backward_local_layer; 81 | l.update = update_local_layer; 82 | 83 | #ifdef DKGPU 84 | l.forward_gpu = forward_local_layer_gpu; 85 | l.backward_gpu = backward_local_layer_gpu; 86 | l.update_gpu = update_local_layer_gpu; 87 | 88 | l.weights_gpu = cuda_make_array(l.weights, c * n * size * size * locations); 89 | l.weight_updates_gpu = 90 | cuda_make_array(l.weight_updates, c * n * size * size * locations); 91 | 92 | l.biases_gpu = cuda_make_array(l.biases, l.outputs); 93 | l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs); 94 | 95 | l.delta_gpu = cuda_make_array(l.delta, l.batch * out_h * out_w * n); 96 | l.output_gpu = cuda_make_array(l.output, l.batch * out_h * out_w * n); 97 | 98 | #endif 99 | l.activation = activation; 100 | 101 | fprintf(stderr, 102 | "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", 103 | h, 104 | w, 105 | c, 106 | n, 107 | out_h, 108 | out_w, 109 | n); 110 | 111 | return l; 112 | } 113 | 114 | void forward_local_layer(const local_layer l, network net) 115 | { 116 | int out_h = local_out_height(l); 117 | int out_w = local_out_width(l); 118 | int i, j; 119 | int locations = out_h * out_w; 120 | 121 | for (i = 0; i < l.batch; ++i) 122 | { 123 | copy_cpu(l.outputs, l.biases, 1, l.output + i * l.outputs, 1); 124 | } 125 | 126 | for (i = 0; i < l.batch; ++i) 127 | { 128 | float *input = net.input + i * l.w * l.h * l.c; 129 | im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace); 130 | float *output = l.output + i * l.outputs; 131 | for (j = 0; j < locations; ++j) 132 | { 133 | float *a = l.weights + j * l.size * l.size * l.c * l.n; 134 | float *b = net.workspace + j; 135 | float *c = output + j; 136 | 137 | int m = l.n; 138 | int n = 1; 139 | int k = l.size * l.size * l.c; 140 | 141 | gemm(0, 0, m, n, k, 1, a, k, b, locations, 1, c, locations); 142 | } 143 | } 144 | activate_array(l.output, l.outputs * l.batch, l.activation); 145 | } 146 | 147 | void backward_local_layer(local_layer l, network net) 148 | { 149 | int i, j; 150 | int locations = l.out_w * l.out_h; 151 | 152 | gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta); 153 | 154 | for (i = 0; i < l.batch; ++i) 155 | { 156 | axpy_cpu(l.outputs, 1, l.delta + i * l.outputs, 1, l.bias_updates, 1); 157 | } 158 | 159 | for (i = 0; i < l.batch; ++i) 160 | { 161 | float *input = net.input + i * l.w * l.h * l.c; 162 | im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace); 163 | 164 | for (j = 0; j < locations; ++j) 165 | { 166 | float *a = l.delta + i * l.outputs + j; 167 | float *b = net.workspace + j; 168 | float *c = l.weight_updates + j * l.size * l.size * l.c * l.n; 169 | int m = l.n; 170 | int n = l.size * l.size * l.c; 171 | int k = 1; 172 | 173 | gemm(0, 1, m, n, k, 1, a, locations, b, locations, 1, c, n); 174 | } 175 | 176 | if (net.delta) 177 | { 178 | for (j = 0; j < locations; ++j) 179 | { 180 | float *a = l.weights + j * l.size * l.size * l.c * l.n; 181 | float *b = l.delta + i * l.outputs + j; 182 | float *c = net.workspace + j; 183 | 184 | int m = l.size * l.size * l.c; 185 | int n = 1; 186 | int k = l.n; 187 | 188 | gemm(1, 0, m, n, k, 1, a, m, b, locations, 0, c, locations); 189 | } 190 | 191 | col2im_cpu(net.workspace, 192 | l.c, 193 | l.h, 194 | l.w, 195 | l.size, 196 | l.stride, 197 | l.pad, 198 | net.delta + i * l.c * l.h * l.w); 199 | } 200 | } 201 | } 202 | 203 | void update_local_layer(local_layer l, update_args a) 204 | { 205 | float learning_rate = a.learning_rate * l.learning_rate_scale; 206 | float momentum = a.momentum; 207 | float decay = a.decay; 208 | int batch = a.batch; 209 | 210 | int locations = l.out_w * l.out_h; 211 | int size = l.size * l.size * l.c * l.n * locations; 212 | axpy_cpu(l.outputs, learning_rate / batch, l.bias_updates, 1, l.biases, 1); 213 | scal_cpu(l.outputs, momentum, l.bias_updates, 1); 214 | 215 | axpy_cpu(size, -decay * batch, l.weights, 1, l.weight_updates, 1); 216 | axpy_cpu(size, learning_rate / batch, l.weight_updates, 1, l.weights, 1); 217 | scal_cpu(size, momentum, l.weight_updates, 1); 218 | } 219 | 220 | #ifdef DKGPU 221 | 222 | void forward_local_layer_gpu(const local_layer l, network net) 223 | { 224 | int out_h = local_out_height(l); 225 | int out_w = local_out_width(l); 226 | int i, j; 227 | int locations = out_h * out_w; 228 | 229 | for (i = 0; i < l.batch; ++i) 230 | { 231 | copy_gpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i * l.outputs, 1); 232 | } 233 | 234 | for (i = 0; i < l.batch; ++i) 235 | { 236 | float *input = net.input_gpu + i * l.w * l.h * l.c; 237 | im2col_gpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace); 238 | float *output = l.output_gpu + i * l.outputs; 239 | for (j = 0; j < locations; ++j) 240 | { 241 | float *a = l.weights_gpu + j * l.size * l.size * l.c * l.n; 242 | float *b = net.workspace + j; 243 | float *c = output + j; 244 | 245 | int m = l.n; 246 | int n = 1; 247 | int k = l.size * l.size * l.c; 248 | 249 | gemm_gpu(0, 0, m, n, k, 1, a, k, b, locations, 1, c, locations); 250 | } 251 | } 252 | activate_array_gpu(l.output_gpu, l.outputs * l.batch, l.activation); 253 | } 254 | 255 | void backward_local_layer_gpu(local_layer l, network net) 256 | { 257 | int i, j; 258 | int locations = l.out_w * l.out_h; 259 | 260 | gradient_array_gpu( 261 | l.output_gpu, l.outputs * l.batch, l.activation, l.delta_gpu); 262 | for (i = 0; i < l.batch; ++i) 263 | { 264 | axpy_gpu( 265 | l.outputs, 1, l.delta_gpu + i * l.outputs, 1, l.bias_updates_gpu, 1); 266 | } 267 | 268 | for (i = 0; i < l.batch; ++i) 269 | { 270 | float *input = net.input_gpu + i * l.w * l.h * l.c; 271 | im2col_gpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, net.workspace); 272 | 273 | for (j = 0; j < locations; ++j) 274 | { 275 | float *a = l.delta_gpu + i * l.outputs + j; 276 | float *b = net.workspace + j; 277 | float *c = l.weight_updates_gpu + j * l.size * l.size * l.c * l.n; 278 | int m = l.n; 279 | int n = l.size * l.size * l.c; 280 | int k = 1; 281 | 282 | gemm_gpu(0, 1, m, n, k, 1, a, locations, b, locations, 1, c, n); 283 | } 284 | 285 | if (net.delta_gpu) 286 | { 287 | for (j = 0; j < locations; ++j) 288 | { 289 | float *a = l.weights_gpu + j * l.size * l.size * l.c * l.n; 290 | float *b = l.delta_gpu + i * l.outputs + j; 291 | float *c = net.workspace + j; 292 | 293 | int m = l.size * l.size * l.c; 294 | int n = 1; 295 | int k = l.n; 296 | 297 | gemm_gpu(1, 0, m, n, k, 1, a, m, b, locations, 0, c, locations); 298 | } 299 | 300 | col2im_gpu(net.workspace, 301 | l.c, 302 | l.h, 303 | l.w, 304 | l.size, 305 | l.stride, 306 | l.pad, 307 | net.delta_gpu + i * l.c * l.h * l.w); 308 | } 309 | } 310 | } 311 | 312 | void update_local_layer_gpu(local_layer l, update_args a) 313 | { 314 | float learning_rate = a.learning_rate * l.learning_rate_scale; 315 | float momentum = a.momentum; 316 | float decay = a.decay; 317 | int batch = a.batch; 318 | 319 | int locations = l.out_w * l.out_h; 320 | int size = l.size * l.size * l.c * l.n * locations; 321 | axpy_gpu( 322 | l.outputs, learning_rate / batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); 323 | scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1); 324 | 325 | axpy_gpu(size, -decay * batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); 326 | axpy_gpu( 327 | size, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); 328 | scal_gpu(size, momentum, l.weight_updates_gpu, 1); 329 | } 330 | 331 | void pull_local_layer(local_layer l) 332 | { 333 | int locations = l.out_w * l.out_h; 334 | int size = l.size * l.size * l.c * l.n * locations; 335 | cuda_pull_array(l.weights_gpu, l.weights, size); 336 | cuda_pull_array(l.biases_gpu, l.biases, l.outputs); 337 | } 338 | 339 | void push_local_layer(local_layer l) 340 | { 341 | int locations = l.out_w * l.out_h; 342 | int size = l.size * l.size * l.c * l.n * locations; 343 | cuda_push_array(l.weights_gpu, l.weights, size); 344 | cuda_push_array(l.biases_gpu, l.biases, l.outputs); 345 | } 346 | #endif 347 | -------------------------------------------------------------------------------- /darknet/src/logistic_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "logistic_layer.h" 2 | #include "activations.h" 3 | #include "blas.h" 4 | #include "cuda.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | layer make_logistic_layer(int batch, int inputs) 13 | { 14 | fprintf( 15 | stderr, "logistic x entropy %4d\n", inputs); 16 | layer l; 17 | l.type = LOGXENT; 18 | l.batch = batch; 19 | l.inputs = inputs; 20 | l.outputs = inputs; 21 | l.loss = (float*)calloc(inputs * batch, sizeof(float)); 22 | l.output = (float*)calloc(inputs * batch, sizeof(float)); 23 | l.delta = (float*)calloc(inputs * batch, sizeof(float)); 24 | l.cost = (float*)calloc(1, sizeof(float)); 25 | 26 | l.forward = forward_logistic_layer; 27 | l.backward = backward_logistic_layer; 28 | #ifdef DKGPU 29 | l.forward_gpu = forward_logistic_layer_gpu; 30 | l.backward_gpu = backward_logistic_layer_gpu; 31 | 32 | l.output_gpu = cuda_make_array(l.output, inputs * batch); 33 | l.loss_gpu = cuda_make_array(l.loss, inputs * batch); 34 | l.delta_gpu = cuda_make_array(l.delta, inputs * batch); 35 | #endif 36 | return l; 37 | } 38 | 39 | void forward_logistic_layer(const layer l, network net) 40 | { 41 | copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1); 42 | activate_array(l.output, l.outputs * l.batch, LOGISTIC); 43 | if (net.truth) 44 | { 45 | logistic_x_ent_cpu( 46 | l.batch * l.inputs, l.output, net.truth, l.delta, l.loss); 47 | l.cost[0] = sum_array(l.loss, l.batch * l.inputs); 48 | } 49 | } 50 | 51 | void backward_logistic_layer(const layer l, network net) 52 | { 53 | axpy_cpu(l.inputs * l.batch, 1, l.delta, 1, net.delta, 1); 54 | } 55 | 56 | #ifdef DKGPU 57 | 58 | void forward_logistic_layer_gpu(const layer l, network net) 59 | { 60 | copy_gpu(l.outputs * l.batch, net.input_gpu, 1, l.output_gpu, 1); 61 | activate_array_gpu(l.output_gpu, l.outputs * l.batch, LOGISTIC); 62 | if (net.truth) 63 | { 64 | logistic_x_ent_gpu(l.batch * l.inputs, 65 | l.output_gpu, 66 | net.truth_gpu, 67 | l.delta_gpu, 68 | l.loss_gpu); 69 | cuda_pull_array(l.loss_gpu, l.loss, l.batch * l.inputs); 70 | l.cost[0] = sum_array(l.loss, l.batch * l.inputs); 71 | } 72 | } 73 | 74 | void backward_logistic_layer_gpu(const layer l, network net) 75 | { 76 | axpy_gpu(l.batch * l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1); 77 | } 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /darknet/src/matrix.cpp: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "blas.h" 8 | #include "utils.h" 9 | 10 | void free_matrix(matrix m) 11 | { 12 | int i; 13 | for (i = 0; i < m.rows; ++i) free(m.vals[i]); 14 | free(m.vals); 15 | } 16 | 17 | float matrix_topk_accuracy(matrix truth, matrix guess, int k) 18 | { 19 | int *indexes = (int *)calloc(k, sizeof(int)); 20 | int n = truth.cols; 21 | int i, j; 22 | int correct = 0; 23 | for (i = 0; i < truth.rows; ++i) 24 | { 25 | top_k(guess.vals[i], n, k, indexes); 26 | for (j = 0; j < k; ++j) 27 | { 28 | int class_ = indexes[j]; 29 | if (truth.vals[i][class_]) 30 | { 31 | ++correct; 32 | break; 33 | } 34 | } 35 | } 36 | free(indexes); 37 | return (float)correct / truth.rows; 38 | } 39 | 40 | void scale_matrix(matrix m, float scale) 41 | { 42 | int i, j; 43 | for (i = 0; i < m.rows; ++i) 44 | { 45 | for (j = 0; j < m.cols; ++j) 46 | { 47 | m.vals[i][j] *= scale; 48 | } 49 | } 50 | } 51 | 52 | matrix resize_matrix(matrix m, int size) 53 | { 54 | int i; 55 | if (m.rows == size) return m; 56 | if (m.rows < size) 57 | { 58 | m.vals = (float **)realloc(m.vals, size * sizeof(float *)); 59 | for (i = m.rows; i < size; ++i) 60 | { 61 | m.vals[i] = (float *)calloc(m.cols, sizeof(float)); 62 | } 63 | } 64 | else if (m.rows > size) 65 | { 66 | for (i = size; i < m.rows; ++i) 67 | { 68 | free(m.vals[i]); 69 | } 70 | m.vals = (float **)realloc(m.vals, size * sizeof(float *)); 71 | } 72 | m.rows = size; 73 | return m; 74 | } 75 | 76 | void matrix_add_matrix(matrix from, matrix to) 77 | { 78 | assert(from.rows == to.rows && from.cols == to.cols); 79 | int i, j; 80 | for (i = 0; i < from.rows; ++i) 81 | { 82 | for (j = 0; j < from.cols; ++j) 83 | { 84 | to.vals[i][j] += from.vals[i][j]; 85 | } 86 | } 87 | } 88 | 89 | matrix copy_matrix(matrix m) 90 | { 91 | matrix c = {0}; 92 | c.rows = m.rows; 93 | c.cols = m.cols; 94 | c.vals = (float **)calloc(c.rows, sizeof(float *)); 95 | int i; 96 | for (i = 0; i < c.rows; ++i) 97 | { 98 | c.vals[i] = (float *)calloc(c.cols, sizeof(float)); 99 | copy_cpu(c.cols, m.vals[i], 1, c.vals[i], 1); 100 | } 101 | return c; 102 | } 103 | 104 | matrix make_matrix(int rows, int cols) 105 | { 106 | int i; 107 | matrix m; 108 | m.rows = rows; 109 | m.cols = cols; 110 | m.vals = (float **)calloc(m.rows, sizeof(float *)); 111 | for (i = 0; i < m.rows; ++i) 112 | { 113 | m.vals[i] = (float *)calloc(m.cols, sizeof(float)); 114 | } 115 | return m; 116 | } 117 | 118 | matrix hold_out_matrix(matrix *m, int n) 119 | { 120 | int i; 121 | matrix h; 122 | h.rows = n; 123 | h.cols = m->cols; 124 | h.vals = (float **)calloc(h.rows, sizeof(float *)); 125 | for (i = 0; i < n; ++i) 126 | { 127 | int index = rand() % m->rows; 128 | h.vals[i] = m->vals[index]; 129 | m->vals[index] = m->vals[--(m->rows)]; 130 | } 131 | return h; 132 | } 133 | 134 | float *pop_column(matrix *m, int c) 135 | { 136 | float *col = (float *)calloc(m->rows, sizeof(float)); 137 | int i, j; 138 | for (i = 0; i < m->rows; ++i) 139 | { 140 | col[i] = m->vals[i][c]; 141 | for (j = c; j < m->cols - 1; ++j) 142 | { 143 | m->vals[i][j] = m->vals[i][j + 1]; 144 | } 145 | } 146 | --m->cols; 147 | return col; 148 | } 149 | 150 | matrix csv_to_matrix(char *filename) 151 | { 152 | FILE *fp = fopen(filename, "r"); 153 | if (!fp) file_error(filename); 154 | 155 | matrix m; 156 | m.cols = -1; 157 | 158 | char *line; 159 | 160 | int n = 0; 161 | int size = 1024; 162 | m.vals = (float **)calloc(size, sizeof(float *)); 163 | while ((line = fgetl(fp))) 164 | { 165 | if (m.cols == -1) m.cols = count_fields(line); 166 | if (n == size) 167 | { 168 | size *= 2; 169 | m.vals = (float **)realloc(m.vals, size * sizeof(float *)); 170 | } 171 | m.vals[n] = parse_fields(line, m.cols); 172 | free(line); 173 | ++n; 174 | } 175 | m.vals = (float **)realloc(m.vals, n * sizeof(float *)); 176 | m.rows = n; 177 | return m; 178 | } 179 | 180 | void matrix_to_csv(matrix m) 181 | { 182 | int i, j; 183 | 184 | for (i = 0; i < m.rows; ++i) 185 | { 186 | for (j = 0; j < m.cols; ++j) 187 | { 188 | if (j > 0) printf(","); 189 | printf("%.17g", m.vals[i][j]); 190 | } 191 | printf("\n"); 192 | } 193 | } 194 | 195 | void print_matrix(matrix m) 196 | { 197 | int i, j; 198 | printf("%d X %d Matrix:\n", m.rows, m.cols); 199 | printf(" __"); 200 | for (j = 0; j < 16 * m.cols - 1; ++j) printf(" "); 201 | printf("__ \n"); 202 | 203 | printf("| "); 204 | for (j = 0; j < 16 * m.cols - 1; ++j) printf(" "); 205 | printf(" |\n"); 206 | 207 | for (i = 0; i < m.rows; ++i) 208 | { 209 | printf("| "); 210 | for (j = 0; j < m.cols; ++j) 211 | { 212 | printf("%15.7f ", m.vals[i][j]); 213 | } 214 | printf(" |\n"); 215 | } 216 | printf("|__"); 217 | for (j = 0; j < 16 * m.cols - 1; ++j) printf(" "); 218 | printf("__|\n"); 219 | } 220 | -------------------------------------------------------------------------------- /darknet/src/maxpool_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "maxpool_layer.h" 2 | #include 3 | #include "cuda.h" 4 | 5 | image get_maxpool_image(maxpool_layer l) 6 | { 7 | int h = l.out_h; 8 | int w = l.out_w; 9 | int c = l.c; 10 | return float_to_image(w, h, c, l.output); 11 | } 12 | 13 | image get_maxpool_delta(maxpool_layer l) 14 | { 15 | int h = l.out_h; 16 | int w = l.out_w; 17 | int c = l.c; 18 | return float_to_image(w, h, c, l.delta); 19 | } 20 | 21 | maxpool_layer make_maxpool_layer( 22 | int batch, int h, int w, int c, int size, int stride, int padding) 23 | { 24 | maxpool_layer l; 25 | l.type = MAXPOOL; 26 | l.batch = batch; 27 | l.h = h; 28 | l.w = w; 29 | l.c = c; 30 | l.pad = padding; 31 | l.out_w = (w + 2 * padding) / stride; 32 | l.out_h = (h + 2 * padding) / stride; 33 | l.out_c = c; 34 | l.outputs = l.out_h * l.out_w * l.out_c; 35 | l.inputs = h * w * c; 36 | l.size = size; 37 | l.stride = stride; 38 | int output_size = l.out_h * l.out_w * l.out_c * batch; 39 | l.indexes = (int *)calloc(output_size, sizeof(int)); 40 | l.output = (float *)calloc(output_size, sizeof(float)); 41 | l.delta = (float *)calloc(output_size, sizeof(float)); 42 | l.forward = forward_maxpool_layer; 43 | l.backward = backward_maxpool_layer; 44 | #ifdef DKGPU 45 | l.forward_gpu = forward_maxpool_layer_gpu; 46 | l.backward_gpu = backward_maxpool_layer_gpu; 47 | l.indexes_gpu = cuda_make_int_array(0, output_size); 48 | l.output_gpu = cuda_make_array(l.output, output_size); 49 | l.delta_gpu = cuda_make_array(l.delta, output_size); 50 | #endif 51 | // fprintf(stderr, 52 | // "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", 53 | // size, 54 | // size, 55 | // stride, 56 | // w, 57 | // h, 58 | // c, 59 | // l.out_w, 60 | // l.out_h, 61 | // l.out_c); 62 | return l; 63 | } 64 | 65 | void resize_maxpool_layer(maxpool_layer *l, int w, int h) 66 | { 67 | l->h = h; 68 | l->w = w; 69 | l->inputs = h * w * l->c; 70 | 71 | l->out_w = (w + 2 * l->pad) / l->stride; 72 | l->out_h = (h + 2 * l->pad) / l->stride; 73 | l->outputs = l->out_w * l->out_h * l->c; 74 | int output_size = l->outputs * l->batch; 75 | 76 | l->indexes = (int *)realloc(l->indexes, output_size * sizeof(int)); 77 | l->output = (float *)realloc(l->output, output_size * sizeof(float)); 78 | l->delta = (float *)realloc(l->delta, output_size * sizeof(float)); 79 | 80 | #ifdef DKGPU 81 | cuda_free((float *)l->indexes_gpu); 82 | cuda_free(l->output_gpu); 83 | cuda_free(l->delta_gpu); 84 | l->indexes_gpu = cuda_make_int_array(0, output_size); 85 | l->output_gpu = cuda_make_array(l->output, output_size); 86 | l->delta_gpu = cuda_make_array(l->delta, output_size); 87 | #endif 88 | } 89 | 90 | void forward_maxpool_layer(const maxpool_layer l, network net) 91 | { 92 | int b, i, j, k, m, n; 93 | int w_offset = -l.pad; 94 | int h_offset = -l.pad; 95 | 96 | int h = l.out_h; 97 | int w = l.out_w; 98 | int c = l.c; 99 | 100 | for (b = 0; b < l.batch; ++b) 101 | { 102 | for (k = 0; k < c; ++k) 103 | { 104 | for (i = 0; i < h; ++i) 105 | { 106 | for (j = 0; j < w; ++j) 107 | { 108 | int out_index = j + w * (i + h * (k + c * b)); 109 | float max = -FLT_MAX; 110 | int max_i = -1; 111 | for (n = 0; n < l.size; ++n) 112 | { 113 | for (m = 0; m < l.size; ++m) 114 | { 115 | int cur_h = h_offset + i * l.stride + n; 116 | int cur_w = w_offset + j * l.stride + m; 117 | int index = cur_w + l.w * (cur_h + l.h * (k + b * l.c)); 118 | int valid = 119 | (cur_h >= 0 && cur_h < l.h && cur_w >= 0 && cur_w < l.w); 120 | float val = (valid != 0) ? net.input[index] : -FLT_MAX; 121 | max_i = (val > max) ? index : max_i; 122 | max = (val > max) ? val : max; 123 | } 124 | } 125 | l.output[out_index] = max; 126 | l.indexes[out_index] = max_i; 127 | } 128 | } 129 | } 130 | } 131 | } 132 | 133 | void backward_maxpool_layer(const maxpool_layer l, network net) 134 | { 135 | int i; 136 | int h = l.out_h; 137 | int w = l.out_w; 138 | int c = l.c; 139 | for (i = 0; i < h * w * c * l.batch; ++i) 140 | { 141 | int index = l.indexes[i]; 142 | net.delta[index] += l.delta[i]; 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /darknet/src/maxpool_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "maxpool_layer.h" 7 | #include "cuda.h" 8 | } 9 | 10 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) 11 | { 12 | int h = (in_h + 2*pad)/stride; 13 | int w = (in_w + 2*pad)/stride; 14 | int c = in_c; 15 | 16 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 17 | if(id >= n) return; 18 | 19 | int j = id % w; 20 | id /= w; 21 | int i = id % h; 22 | id /= h; 23 | int k = id % c; 24 | id /= c; 25 | int b = id; 26 | 27 | int w_offset = -pad; 28 | int h_offset = -pad; 29 | 30 | int out_index = j + w*(i + h*(k + c*b)); 31 | float max = -INFINITY; 32 | int max_i = -1; 33 | int l, m; 34 | for(l = 0; l < size; ++l){ 35 | for(m = 0; m < size; ++m){ 36 | int cur_h = h_offset + i*stride + l; 37 | int cur_w = w_offset + j*stride + m; 38 | int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); 39 | int valid = (cur_h >= 0 && cur_h < in_h && 40 | cur_w >= 0 && cur_w < in_w); 41 | float val = (valid != 0) ? input[index] : -INFINITY; 42 | max_i = (val > max) ? index : max_i; 43 | max = (val > max) ? val : max; 44 | } 45 | } 46 | output[out_index] = max; 47 | indexes[out_index] = max_i; 48 | } 49 | 50 | __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) 51 | { 52 | int h = (in_h + 2*pad)/stride; 53 | int w = (in_w + 2*pad)/stride; 54 | int c = in_c; 55 | int area = (size-1)/stride; 56 | 57 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 58 | if(id >= n) return; 59 | 60 | int index = id; 61 | int j = id % in_w; 62 | id /= in_w; 63 | int i = id % in_h; 64 | id /= in_h; 65 | int k = id % in_c; 66 | id /= in_c; 67 | int b = id; 68 | 69 | int w_offset = -pad; 70 | int h_offset = -pad; 71 | 72 | float d = 0; 73 | int l, m; 74 | for(l = -area; l < area+1; ++l){ 75 | for(m = -area; m < area+1; ++m){ 76 | int out_w = (j-w_offset)/stride + m; 77 | int out_h = (i-h_offset)/stride + l; 78 | int out_index = out_w + w*(out_h + h*(k + c*b)); 79 | int valid = (out_w >= 0 && out_w < w && 80 | out_h >= 0 && out_h < h); 81 | d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; 82 | } 83 | } 84 | prev_delta[index] += d; 85 | } 86 | 87 | extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network net) 88 | { 89 | int h = layer.out_h; 90 | int w = layer.out_w; 91 | int c = layer.c; 92 | 93 | size_t n = h*w*c*layer.batch; 94 | 95 | forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu); 96 | check_error(cudaPeekAtLastError()); 97 | } 98 | 99 | extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network net) 100 | { 101 | size_t n = layer.h*layer.w*layer.c*layer.batch; 102 | 103 | backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu); 104 | check_error(cudaPeekAtLastError()); 105 | } 106 | 107 | -------------------------------------------------------------------------------- /darknet/src/normalization_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "normalization_layer.h" 2 | #include "blas.h" 3 | 4 | #include 5 | 6 | layer make_normalization_layer(int batch, 7 | int w, 8 | int h, 9 | int c, 10 | int size, 11 | float alpha, 12 | float beta, 13 | float kappa) 14 | { 15 | fprintf(stderr, 16 | "Local Response Normalization Layer: %d x %d x %d image, %d size\n", 17 | w, 18 | h, 19 | c, 20 | size); 21 | layer layer; 22 | layer.type = NORMALIZATION; 23 | layer.batch = batch; 24 | layer.h = layer.out_h = h; 25 | layer.w = layer.out_w = w; 26 | layer.c = layer.out_c = c; 27 | layer.kappa = kappa; 28 | layer.size = size; 29 | layer.alpha = alpha; 30 | layer.beta = beta; 31 | layer.output = (float *)calloc(h * w * c * batch, sizeof(float)); 32 | layer.delta = (float *)calloc(h * w * c * batch, sizeof(float)); 33 | layer.squared = (float *)calloc(h * w * c * batch, sizeof(float)); 34 | layer.norms = (float *)calloc(h * w * c * batch, sizeof(float)); 35 | layer.inputs = w * h * c; 36 | layer.outputs = layer.inputs; 37 | 38 | layer.forward = forward_normalization_layer; 39 | layer.backward = backward_normalization_layer; 40 | #ifdef DKGPU 41 | layer.forward_gpu = forward_normalization_layer_gpu; 42 | layer.backward_gpu = backward_normalization_layer_gpu; 43 | 44 | layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); 45 | layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); 46 | layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); 47 | layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); 48 | #endif 49 | return layer; 50 | } 51 | 52 | void resize_normalization_layer(layer *layer, int w, int h) 53 | { 54 | int c = layer->c; 55 | int batch = layer->batch; 56 | layer->h = h; 57 | layer->w = w; 58 | layer->out_h = h; 59 | layer->out_w = w; 60 | layer->inputs = w * h * c; 61 | layer->outputs = layer->inputs; 62 | layer->output = 63 | (float *)realloc(layer->output, h * w * c * batch * sizeof(float)); 64 | layer->delta = 65 | (float *)realloc(layer->delta, h * w * c * batch * sizeof(float)); 66 | layer->squared = 67 | (float *)realloc(layer->squared, h * w * c * batch * sizeof(float)); 68 | layer->norms = 69 | (float *)realloc(layer->norms, h * w * c * batch * sizeof(float)); 70 | #ifdef DKGPU 71 | cuda_free(layer->output_gpu); 72 | cuda_free(layer->delta_gpu); 73 | cuda_free(layer->squared_gpu); 74 | cuda_free(layer->norms_gpu); 75 | layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); 76 | layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); 77 | layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); 78 | layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); 79 | #endif 80 | } 81 | 82 | void forward_normalization_layer(const layer layer, network net) 83 | { 84 | int k, b; 85 | int w = layer.w; 86 | int h = layer.h; 87 | int c = layer.c; 88 | scal_cpu(w * h * c * layer.batch, 0, layer.squared, 1); 89 | 90 | for (b = 0; b < layer.batch; ++b) 91 | { 92 | float *squared = layer.squared + w * h * c * b; 93 | float *norms = layer.norms + w * h * c * b; 94 | float *input = net.input + w * h * c * b; 95 | pow_cpu(w * h * c, 2, input, 1, squared, 1); 96 | 97 | const_cpu(w * h, layer.kappa, norms, 1); 98 | for (k = 0; k < layer.size / 2; ++k) 99 | { 100 | axpy_cpu(w * h, layer.alpha, squared + w * h * k, 1, norms, 1); 101 | } 102 | 103 | for (k = 1; k < layer.c; ++k) 104 | { 105 | copy_cpu(w * h, norms + w * h * (k - 1), 1, norms + w * h * k, 1); 106 | int prev = k - ((layer.size - 1) / 2) - 1; 107 | int next = k + (layer.size / 2); 108 | if (prev >= 0) 109 | axpy_cpu(w * h, 110 | -layer.alpha, 111 | squared + w * h * prev, 112 | 1, 113 | norms + w * h * k, 114 | 1); 115 | if (next < layer.c) 116 | axpy_cpu(w * h, 117 | layer.alpha, 118 | squared + w * h * next, 119 | 1, 120 | norms + w * h * k, 121 | 1); 122 | } 123 | } 124 | pow_cpu( 125 | w * h * c * layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); 126 | mul_cpu(w * h * c * layer.batch, net.input, 1, layer.output, 1); 127 | } 128 | 129 | void backward_normalization_layer(const layer layer, network net) 130 | { 131 | // TODO This is approximate ;-) 132 | // Also this should add in to delta instead of overwritting. 133 | 134 | int w = layer.w; 135 | int h = layer.h; 136 | int c = layer.c; 137 | pow_cpu(w * h * c * layer.batch, -layer.beta, layer.norms, 1, net.delta, 1); 138 | mul_cpu(w * h * c * layer.batch, layer.delta, 1, net.delta, 1); 139 | } 140 | 141 | #ifdef DKGPU 142 | void forward_normalization_layer_gpu(const layer layer, network net) 143 | { 144 | int k, b; 145 | int w = layer.w; 146 | int h = layer.h; 147 | int c = layer.c; 148 | scal_gpu(w * h * c * layer.batch, 0, layer.squared_gpu, 1); 149 | 150 | for (b = 0; b < layer.batch; ++b) 151 | { 152 | float *squared = layer.squared_gpu + w * h * c * b; 153 | float *norms = layer.norms_gpu + w * h * c * b; 154 | float *input = net.input_gpu + w * h * c * b; 155 | pow_gpu(w * h * c, 2, input, 1, squared, 1); 156 | 157 | const_gpu(w * h, layer.kappa, norms, 1); 158 | for (k = 0; k < layer.size / 2; ++k) 159 | { 160 | axpy_gpu(w * h, layer.alpha, squared + w * h * k, 1, norms, 1); 161 | } 162 | 163 | for (k = 1; k < layer.c; ++k) 164 | { 165 | copy_gpu(w * h, norms + w * h * (k - 1), 1, norms + w * h * k, 1); 166 | int prev = k - ((layer.size - 1) / 2) - 1; 167 | int next = k + (layer.size / 2); 168 | if (prev >= 0) 169 | axpy_gpu(w * h, 170 | -layer.alpha, 171 | squared + w * h * prev, 172 | 1, 173 | norms + w * h * k, 174 | 1); 175 | if (next < layer.c) 176 | axpy_gpu(w * h, 177 | layer.alpha, 178 | squared + w * h * next, 179 | 1, 180 | norms + w * h * k, 181 | 1); 182 | } 183 | } 184 | pow_gpu(w * h * c * layer.batch, 185 | -layer.beta, 186 | layer.norms_gpu, 187 | 1, 188 | layer.output_gpu, 189 | 1); 190 | mul_gpu(w * h * c * layer.batch, net.input_gpu, 1, layer.output_gpu, 1); 191 | } 192 | 193 | void backward_normalization_layer_gpu(const layer layer, network net) 194 | { 195 | // TODO This is approximate ;-) 196 | 197 | int w = layer.w; 198 | int h = layer.h; 199 | int c = layer.c; 200 | pow_gpu(w * h * c * layer.batch, 201 | -layer.beta, 202 | layer.norms_gpu, 203 | 1, 204 | net.delta_gpu, 205 | 1); 206 | mul_gpu(w * h * c * layer.batch, layer.delta_gpu, 1, net.delta_gpu, 1); 207 | } 208 | #endif 209 | -------------------------------------------------------------------------------- /darknet/src/option_list.cpp: -------------------------------------------------------------------------------- 1 | #include "option_list.h" 2 | #include 3 | #include 4 | #include 5 | #include "utils.h" 6 | 7 | list *read_data_cfg(char *filename) 8 | { 9 | FILE *file = fopen(filename, "r"); 10 | if (file == 0) file_error(filename); 11 | char *line; 12 | int nu = 0; 13 | list *options = make_list(); 14 | while ((line = fgetl(file)) != 0) 15 | { 16 | ++nu; 17 | strip(line); 18 | switch (line[0]) 19 | { 20 | case '\0': 21 | case '#': 22 | case ';': free(line); break; 23 | default: 24 | if (!read_option(line, options)) 25 | { 26 | fprintf( 27 | stderr, "Config file error line %d, could parse: %s\n", nu, line); 28 | free(line); 29 | } 30 | break; 31 | } 32 | } 33 | fclose(file); 34 | return options; 35 | } 36 | 37 | int read_option(char *s, list *options) 38 | { 39 | size_t i; 40 | size_t len = strlen(s); 41 | char *val = 0; 42 | for (i = 0; i < len; ++i) 43 | { 44 | if (s[i] == '=') 45 | { 46 | s[i] = '\0'; 47 | val = s + i + 1; 48 | break; 49 | } 50 | } 51 | if (i == len - 1) return 0; 52 | char *key = s; 53 | option_insert(options, key, val); 54 | return 1; 55 | } 56 | 57 | void option_insert(list *l, char *key, char *val) 58 | { 59 | kvp *p = (kvp *)malloc(sizeof(kvp)); 60 | p->key = key; 61 | p->val = val; 62 | p->used = 0; 63 | list_insert(l, p); 64 | } 65 | 66 | void option_unused(list *l) 67 | { 68 | node *n = l->front; 69 | while (n) 70 | { 71 | kvp *p = (kvp *)n->val; 72 | if (!p->used) 73 | { 74 | fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); 75 | } 76 | n = n->next; 77 | } 78 | } 79 | 80 | char *option_find(list *l, char *key) 81 | { 82 | node *n = l->front; 83 | while (n) 84 | { 85 | kvp *p = (kvp *)n->val; 86 | if (strcmp(p->key, key) == 0) 87 | { 88 | p->used = 1; 89 | return p->val; 90 | } 91 | n = n->next; 92 | } 93 | return 0; 94 | } 95 | char *option_find_str(list *l, char *key, char *def) 96 | { 97 | char *v = option_find(l, key); 98 | if (v) return v; 99 | // if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); 100 | return def; 101 | } 102 | 103 | int option_find_int(list *l, char *key, int def) 104 | { 105 | char *v = option_find(l, key); 106 | if (v) return atoi(v); 107 | // fprintf(stderr, "%s: Using default '%d'\n", key, def); 108 | return def; 109 | } 110 | 111 | int option_find_int_quiet(list *l, char *key, int def) 112 | { 113 | char *v = option_find(l, key); 114 | if (v) return atoi(v); 115 | return def; 116 | } 117 | 118 | float option_find_float_quiet(list *l, char *key, float def) 119 | { 120 | char *v = option_find(l, key); 121 | if (v) return atof(v); 122 | return def; 123 | } 124 | 125 | float option_find_float(list *l, char *key, float def) 126 | { 127 | char *v = option_find(l, key); 128 | if (v) return atof(v); 129 | // fprintf(stderr, "%s: Using default '%lf'\n", key, def); 130 | return def; 131 | } 132 | -------------------------------------------------------------------------------- /darknet/src/reorg_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "reorg_layer.h" 2 | #include "blas.h" 3 | #include "cuda.h" 4 | 5 | #include 6 | 7 | layer make_reorg_layer(int batch, 8 | int w, 9 | int h, 10 | int c, 11 | int stride, 12 | int reverse, 13 | int flatten, 14 | int extra) 15 | { 16 | layer l; 17 | l.type = REORG; 18 | l.batch = batch; 19 | l.stride = stride; 20 | l.extra = extra; 21 | l.h = h; 22 | l.w = w; 23 | l.c = c; 24 | l.flatten = flatten; 25 | if (reverse) 26 | { 27 | l.out_w = w * stride; 28 | l.out_h = h * stride; 29 | l.out_c = c / (stride * stride); 30 | } 31 | else 32 | { 33 | l.out_w = w / stride; 34 | l.out_h = h / stride; 35 | l.out_c = c * (stride * stride); 36 | } 37 | l.reverse = reverse; 38 | 39 | l.outputs = l.out_h * l.out_w * l.out_c; 40 | l.inputs = h * w * c; 41 | if (l.extra) 42 | { 43 | l.out_w = l.out_h = l.out_c = 0; 44 | l.outputs = l.inputs + l.extra; 45 | } 46 | 47 | if (extra) 48 | { 49 | // fprintf(stderr, "reorg %4d -> %4d\n", l.inputs, 50 | // l.outputs); 51 | } 52 | else 53 | { 54 | // fprintf(stderr, 55 | // "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", 56 | // stride, 57 | // w, 58 | // h, 59 | // c, 60 | // l.out_w, 61 | // l.out_h, 62 | // l.out_c); 63 | } 64 | int output_size = l.outputs * batch; 65 | l.output = (float *)calloc(output_size, sizeof(float)); 66 | l.delta = (float *)calloc(output_size, sizeof(float)); 67 | 68 | l.forward = forward_reorg_layer; 69 | l.backward = backward_reorg_layer; 70 | #ifdef DKGPU 71 | l.forward_gpu = forward_reorg_layer_gpu; 72 | l.backward_gpu = backward_reorg_layer_gpu; 73 | 74 | l.output_gpu = cuda_make_array(l.output, output_size); 75 | l.delta_gpu = cuda_make_array(l.delta, output_size); 76 | #endif 77 | return l; 78 | } 79 | 80 | void resize_reorg_layer(layer *l, int w, int h) 81 | { 82 | int stride = l->stride; 83 | int c = l->c; 84 | 85 | l->h = h; 86 | l->w = w; 87 | 88 | if (l->reverse) 89 | { 90 | l->out_w = w * stride; 91 | l->out_h = h * stride; 92 | l->out_c = c / (stride * stride); 93 | } 94 | else 95 | { 96 | l->out_w = w / stride; 97 | l->out_h = h / stride; 98 | l->out_c = c * (stride * stride); 99 | } 100 | 101 | l->outputs = l->out_h * l->out_w * l->out_c; 102 | l->inputs = l->outputs; 103 | int output_size = l->outputs * l->batch; 104 | 105 | l->output = (float *)realloc(l->output, output_size * sizeof(float)); 106 | l->delta = (float *)realloc(l->delta, output_size * sizeof(float)); 107 | 108 | #ifdef DKGPU 109 | cuda_free(l->output_gpu); 110 | cuda_free(l->delta_gpu); 111 | l->output_gpu = cuda_make_array(l->output, output_size); 112 | l->delta_gpu = cuda_make_array(l->delta, output_size); 113 | #endif 114 | } 115 | 116 | void forward_reorg_layer(const layer l, network net) 117 | { 118 | int i; 119 | if (l.flatten) 120 | { 121 | memcpy(l.output, net.input, l.outputs * l.batch * sizeof(float)); 122 | if (l.reverse) 123 | { 124 | flatten(l.output, l.w * l.h, l.c, l.batch, 0); 125 | } 126 | else 127 | { 128 | flatten(l.output, l.w * l.h, l.c, l.batch, 1); 129 | } 130 | } 131 | else if (l.extra) 132 | { 133 | for (i = 0; i < l.batch; ++i) 134 | { 135 | copy_cpu( 136 | l.inputs, net.input + i * l.inputs, 1, l.output + i * l.outputs, 1); 137 | } 138 | } 139 | else if (l.reverse) 140 | { 141 | reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); 142 | } 143 | else 144 | { 145 | reorg_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); 146 | } 147 | } 148 | 149 | void backward_reorg_layer(const layer l, network net) 150 | { 151 | int i; 152 | if (l.flatten) 153 | { 154 | memcpy(net.delta, l.delta, l.outputs * l.batch * sizeof(float)); 155 | if (l.reverse) 156 | { 157 | flatten(net.delta, l.w * l.h, l.c, l.batch, 1); 158 | } 159 | else 160 | { 161 | flatten(net.delta, l.w * l.h, l.c, l.batch, 0); 162 | } 163 | } 164 | else if (l.reverse) 165 | { 166 | reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta); 167 | } 168 | else if (l.extra) 169 | { 170 | for (i = 0; i < l.batch; ++i) 171 | { 172 | copy_cpu( 173 | l.inputs, l.delta + i * l.outputs, 1, net.delta + i * l.inputs, 1); 174 | } 175 | } 176 | else 177 | { 178 | reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta); 179 | } 180 | } 181 | 182 | #ifdef DKGPU 183 | void forward_reorg_layer_gpu(layer l, network net) 184 | { 185 | int i; 186 | if (l.flatten) 187 | { 188 | if (l.reverse) 189 | { 190 | flatten_gpu(net.input_gpu, l.w * l.h, l.c, l.batch, 0, l.output_gpu); 191 | } 192 | else 193 | { 194 | flatten_gpu(net.input_gpu, l.w * l.h, l.c, l.batch, 1, l.output_gpu); 195 | } 196 | } 197 | else if (l.extra) 198 | { 199 | for (i = 0; i < l.batch; ++i) 200 | { 201 | copy_gpu(l.inputs, 202 | net.input_gpu + i * l.inputs, 203 | 1, 204 | l.output_gpu + i * l.outputs, 205 | 1); 206 | } 207 | } 208 | else if (l.reverse) 209 | { 210 | reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); 211 | } 212 | else 213 | { 214 | reorg_gpu(net.input_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); 215 | } 216 | } 217 | 218 | void backward_reorg_layer_gpu(layer l, network net) 219 | { 220 | if (l.flatten) 221 | { 222 | if (l.reverse) 223 | { 224 | flatten_gpu(l.delta_gpu, l.w * l.h, l.c, l.batch, 1, net.delta_gpu); 225 | } 226 | else 227 | { 228 | flatten_gpu(l.delta_gpu, l.w * l.h, l.c, l.batch, 0, net.delta_gpu); 229 | } 230 | } 231 | else if (l.extra) 232 | { 233 | int i; 234 | for (i = 0; i < l.batch; ++i) 235 | { 236 | copy_gpu(l.inputs, 237 | l.delta_gpu + i * l.outputs, 238 | 1, 239 | net.delta_gpu + i * l.inputs, 240 | 1); 241 | } 242 | } 243 | else if (l.reverse) 244 | { 245 | reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, net.delta_gpu); 246 | } 247 | else 248 | { 249 | reorg_gpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, net.delta_gpu); 250 | } 251 | } 252 | #endif 253 | -------------------------------------------------------------------------------- /darknet/src/route_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "route_layer.h" 2 | #include "blas.h" 3 | #include "cuda.h" 4 | 5 | #include 6 | 7 | route_layer make_route_layer(int batch, 8 | int n, 9 | int *input_layers, 10 | int *input_sizes) 11 | { 12 | // fprintf(stderr, "route "); 13 | route_layer l; 14 | l.type = ROUTE; 15 | l.batch = batch; 16 | l.n = n; 17 | l.input_layers = input_layers; 18 | l.input_sizes = input_sizes; 19 | int i; 20 | int outputs = 0; 21 | for (i = 0; i < n; ++i) 22 | { 23 | // fprintf(stderr, " %d", input_layers[i]); 24 | outputs += input_sizes[i]; 25 | } 26 | // fprintf(stderr, "\n"); 27 | l.outputs = outputs; 28 | l.inputs = outputs; 29 | l.delta = (float *)calloc(outputs * batch, sizeof(float)); 30 | l.output = (float *)calloc(outputs * batch, sizeof(float)); 31 | ; 32 | 33 | l.forward = forward_route_layer; 34 | l.backward = backward_route_layer; 35 | #ifdef DKGPU 36 | l.forward_gpu = forward_route_layer_gpu; 37 | l.backward_gpu = backward_route_layer_gpu; 38 | 39 | l.delta_gpu = cuda_make_array(l.delta, outputs * batch); 40 | l.output_gpu = cuda_make_array(l.output, outputs * batch); 41 | #endif 42 | return l; 43 | } 44 | 45 | void resize_route_layer(route_layer *l, network *net) 46 | { 47 | int i; 48 | layer first = net->layers[l->input_layers[0]]; 49 | l->out_w = first.out_w; 50 | l->out_h = first.out_h; 51 | l->out_c = first.out_c; 52 | l->outputs = first.outputs; 53 | l->input_sizes[0] = first.outputs; 54 | for (i = 1; i < l->n; ++i) 55 | { 56 | int index = l->input_layers[i]; 57 | layer next = net->layers[index]; 58 | l->outputs += next.outputs; 59 | l->input_sizes[i] = next.outputs; 60 | if (next.out_w == first.out_w && next.out_h == first.out_h) 61 | { 62 | l->out_c += next.out_c; 63 | } 64 | else 65 | { 66 | // printf( 67 | // "%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, 68 | // first.out_h); 69 | l->out_h = l->out_w = l->out_c = 0; 70 | } 71 | } 72 | l->inputs = l->outputs; 73 | l->delta = (float *)realloc(l->delta, l->outputs * l->batch * sizeof(float)); 74 | l->output = 75 | (float *)realloc(l->output, l->outputs * l->batch * sizeof(float)); 76 | 77 | #ifdef DKGPU 78 | cuda_free(l->output_gpu); 79 | cuda_free(l->delta_gpu); 80 | l->output_gpu = cuda_make_array(l->output, l->outputs * l->batch); 81 | l->delta_gpu = cuda_make_array(l->delta, l->outputs * l->batch); 82 | #endif 83 | } 84 | 85 | void forward_route_layer(const route_layer l, network net) 86 | { 87 | int i, j; 88 | int offset = 0; 89 | for (i = 0; i < l.n; ++i) 90 | { 91 | int index = l.input_layers[i]; 92 | float *input = net.layers[index].output; 93 | int input_size = l.input_sizes[i]; 94 | for (j = 0; j < l.batch; ++j) 95 | { 96 | copy_cpu(input_size, 97 | input + j * input_size, 98 | 1, 99 | l.output + offset + j * l.outputs, 100 | 1); 101 | } 102 | offset += input_size; 103 | } 104 | } 105 | 106 | void backward_route_layer(const route_layer l, network net) 107 | { 108 | int i, j; 109 | int offset = 0; 110 | for (i = 0; i < l.n; ++i) 111 | { 112 | int index = l.input_layers[i]; 113 | float *delta = net.layers[index].delta; 114 | int input_size = l.input_sizes[i]; 115 | for (j = 0; j < l.batch; ++j) 116 | { 117 | axpy_cpu(input_size, 118 | 1, 119 | l.delta + offset + j * l.outputs, 120 | 1, 121 | delta + j * input_size, 122 | 1); 123 | } 124 | offset += input_size; 125 | } 126 | } 127 | 128 | #ifdef DKGPU 129 | void forward_route_layer_gpu(const route_layer l, network net) 130 | { 131 | int i, j; 132 | int offset = 0; 133 | for (i = 0; i < l.n; ++i) 134 | { 135 | int index = l.input_layers[i]; 136 | float *input = net.layers[index].output_gpu; 137 | int input_size = l.input_sizes[i]; 138 | for (j = 0; j < l.batch; ++j) 139 | { 140 | copy_gpu(input_size, 141 | input + j * input_size, 142 | 1, 143 | l.output_gpu + offset + j * l.outputs, 144 | 1); 145 | } 146 | offset += input_size; 147 | } 148 | } 149 | 150 | void backward_route_layer_gpu(const route_layer l, network net) 151 | { 152 | int i, j; 153 | int offset = 0; 154 | for (i = 0; i < l.n; ++i) 155 | { 156 | int index = l.input_layers[i]; 157 | float *delta = net.layers[index].delta_gpu; 158 | int input_size = l.input_sizes[i]; 159 | for (j = 0; j < l.batch; ++j) 160 | { 161 | axpy_gpu(input_size, 162 | 1, 163 | l.delta_gpu + offset + j * l.outputs, 164 | 1, 165 | delta + j * input_size, 166 | 1); 167 | } 168 | offset += input_size; 169 | } 170 | } 171 | #endif 172 | -------------------------------------------------------------------------------- /darknet/src/shortcut_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "shortcut_layer.h" 2 | #include "activations.h" 3 | #include "blas.h" 4 | #include "cuda.h" 5 | 6 | #include 7 | #include 8 | 9 | layer make_shortcut_layer( 10 | int batch, int index, int w, int h, int c, int w2, int h2, int c2) 11 | { 12 | fprintf(stderr, 13 | "res %3d %4d x%4d x%4d -> %4d x%4d x%4d\n", 14 | index, 15 | w2, 16 | h2, 17 | c2, 18 | w, 19 | h, 20 | c); 21 | layer l; 22 | l.type = SHORTCUT; 23 | l.batch = batch; 24 | l.w = w2; 25 | l.h = h2; 26 | l.c = c2; 27 | l.out_w = w; 28 | l.out_h = h; 29 | l.out_c = c; 30 | l.outputs = w * h * c; 31 | l.inputs = l.outputs; 32 | 33 | l.index = index; 34 | 35 | l.delta = (float *)calloc(l.outputs * batch, sizeof(float)); 36 | l.output = (float *)calloc(l.outputs * batch, sizeof(float)); 37 | ; 38 | 39 | l.forward = forward_shortcut_layer; 40 | l.backward = backward_shortcut_layer; 41 | #ifdef DKGPU 42 | l.forward_gpu = forward_shortcut_layer_gpu; 43 | l.backward_gpu = backward_shortcut_layer_gpu; 44 | 45 | l.delta_gpu = cuda_make_array(l.delta, l.outputs * batch); 46 | l.output_gpu = cuda_make_array(l.output, l.outputs * batch); 47 | #endif 48 | return l; 49 | } 50 | 51 | void resize_shortcut_layer(layer *l, int w, int h) 52 | { 53 | assert(l->w == l->out_w); 54 | assert(l->h == l->out_h); 55 | l->w = l->out_w = w; 56 | l->h = l->out_h = h; 57 | l->outputs = w * h * l->out_c; 58 | l->inputs = l->outputs; 59 | l->delta = (float *)realloc(l->delta, l->outputs * l->batch * sizeof(float)); 60 | l->output = 61 | (float *)realloc(l->output, l->outputs * l->batch * sizeof(float)); 62 | 63 | #ifdef DKGPU 64 | cuda_free(l->output_gpu); 65 | cuda_free(l->delta_gpu); 66 | l->output_gpu = cuda_make_array(l->output, l->outputs * l->batch); 67 | l->delta_gpu = cuda_make_array(l->delta, l->outputs * l->batch); 68 | #endif 69 | } 70 | 71 | void forward_shortcut_layer(const layer l, network net) 72 | { 73 | copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1); 74 | shortcut_cpu(l.batch, 75 | l.w, 76 | l.h, 77 | l.c, 78 | net.layers[l.index].output, 79 | l.out_w, 80 | l.out_h, 81 | l.out_c, 82 | l.alpha, 83 | l.beta, 84 | l.output); 85 | activate_array(l.output, l.outputs * l.batch, l.activation); 86 | } 87 | 88 | void backward_shortcut_layer(const layer l, network net) 89 | { 90 | gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta); 91 | axpy_cpu(l.outputs * l.batch, l.alpha, l.delta, 1, net.delta, 1); 92 | shortcut_cpu(l.batch, 93 | l.out_w, 94 | l.out_h, 95 | l.out_c, 96 | l.delta, 97 | l.w, 98 | l.h, 99 | l.c, 100 | 1, 101 | l.beta, 102 | net.layers[l.index].delta); 103 | } 104 | 105 | #ifdef DKGPU 106 | void forward_shortcut_layer_gpu(const layer l, network net) 107 | { 108 | copy_gpu(l.outputs * l.batch, net.input_gpu, 1, l.output_gpu, 1); 109 | shortcut_gpu(l.batch, 110 | l.w, 111 | l.h, 112 | l.c, 113 | net.layers[l.index].output_gpu, 114 | l.out_w, 115 | l.out_h, 116 | l.out_c, 117 | l.alpha, 118 | l.beta, 119 | l.output_gpu); 120 | activate_array_gpu(l.output_gpu, l.outputs * l.batch, l.activation); 121 | } 122 | 123 | void backward_shortcut_layer_gpu(const layer l, network net) 124 | { 125 | gradient_array_gpu( 126 | l.output_gpu, l.outputs * l.batch, l.activation, l.delta_gpu); 127 | axpy_gpu(l.outputs * l.batch, l.alpha, l.delta_gpu, 1, net.delta_gpu, 1); 128 | shortcut_gpu(l.batch, 129 | l.out_w, 130 | l.out_h, 131 | l.out_c, 132 | l.delta_gpu, 133 | l.w, 134 | l.h, 135 | l.c, 136 | 1, 137 | l.beta, 138 | net.layers[l.index].delta_gpu); 139 | } 140 | #endif 141 | -------------------------------------------------------------------------------- /darknet/src/softmax_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "softmax_layer.h" 2 | #include "blas.h" 3 | #include "cuda.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | softmax_layer make_softmax_layer(int batch, int inputs, int groups) 12 | { 13 | assert(inputs % groups == 0); 14 | fprintf( 15 | stderr, "softmax %4d\n", inputs); 16 | softmax_layer l; 17 | l.type = SOFTMAX; 18 | l.batch = batch; 19 | l.groups = groups; 20 | l.inputs = inputs; 21 | l.outputs = inputs; 22 | l.loss = (float*)calloc(inputs * batch, sizeof(float)); 23 | l.output = (float*)calloc(inputs * batch, sizeof(float)); 24 | l.delta = (float*)calloc(inputs * batch, sizeof(float)); 25 | l.cost = (float*)calloc(1, sizeof(float)); 26 | 27 | l.forward = forward_softmax_layer; 28 | l.backward = backward_softmax_layer; 29 | #ifdef DKGPU 30 | l.forward_gpu = forward_softmax_layer_gpu; 31 | l.backward_gpu = backward_softmax_layer_gpu; 32 | 33 | l.output_gpu = cuda_make_array(l.output, inputs * batch); 34 | l.loss_gpu = cuda_make_array(l.loss, inputs * batch); 35 | l.delta_gpu = cuda_make_array(l.delta, inputs * batch); 36 | #endif 37 | return l; 38 | } 39 | 40 | void forward_softmax_layer(const softmax_layer l, network net) 41 | { 42 | if (l.softmax_tree) 43 | { 44 | int i; 45 | int count = 0; 46 | for (i = 0; i < l.softmax_tree->groups; ++i) 47 | { 48 | int group_size = l.softmax_tree->group_size[i]; 49 | softmax_cpu(net.input + count, 50 | group_size, 51 | l.batch, 52 | l.inputs, 53 | 1, 54 | 0, 55 | 1, 56 | l.temperature, 57 | l.output + count); 58 | count += group_size; 59 | } 60 | } 61 | else 62 | { 63 | softmax_cpu(net.input, 64 | l.inputs / l.groups, 65 | l.batch, 66 | l.inputs, 67 | l.groups, 68 | l.inputs / l.groups, 69 | 1, 70 | l.temperature, 71 | l.output); 72 | } 73 | 74 | if (net.truth) 75 | { 76 | softmax_x_ent_cpu(l.batch * l.inputs, l.output, net.truth, l.delta, l.loss); 77 | l.cost[0] = sum_array(l.loss, l.batch * l.inputs); 78 | } 79 | } 80 | 81 | void backward_softmax_layer(const softmax_layer l, network net) 82 | { 83 | axpy_cpu(l.inputs * l.batch, 1, l.delta, 1, net.delta, 1); 84 | } 85 | 86 | #ifdef DKGPU 87 | 88 | void pull_softmax_layer_output(const softmax_layer layer) 89 | { 90 | cuda_pull_array(layer.output_gpu, layer.output, layer.inputs * layer.batch); 91 | } 92 | 93 | void forward_softmax_layer_gpu(const softmax_layer l, network net) 94 | { 95 | if (l.softmax_tree) 96 | { 97 | softmax_tree(net.input_gpu, 98 | 1, 99 | l.batch, 100 | l.inputs, 101 | l.temperature, 102 | l.output_gpu, 103 | *l.softmax_tree); 104 | /* 105 | int i; 106 | int count = 0; 107 | for (i = 0; i < l.softmax_tree->groups; ++i) { 108 | int group_size = l.softmax_tree->group_size[i]; 109 | softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 110 | 1, l.temperature, l.output_gpu + count); count += group_size; 111 | } 112 | */ 113 | } 114 | else 115 | { 116 | if (l.spatial) 117 | { 118 | softmax_gpu(net.input_gpu, 119 | l.c, 120 | l.batch * l.c, 121 | l.inputs / l.c, 122 | l.w * l.h, 123 | 1, 124 | l.w * l.h, 125 | 1, 126 | l.output_gpu); 127 | } 128 | else 129 | { 130 | softmax_gpu(net.input_gpu, 131 | l.inputs / l.groups, 132 | l.batch, 133 | l.inputs, 134 | l.groups, 135 | l.inputs / l.groups, 136 | 1, 137 | l.temperature, 138 | l.output_gpu); 139 | } 140 | } 141 | if (net.truth) 142 | { 143 | softmax_x_ent_gpu(l.batch * l.inputs, 144 | l.output_gpu, 145 | net.truth_gpu, 146 | l.delta_gpu, 147 | l.loss_gpu); 148 | if (l.softmax_tree) 149 | { 150 | mask_gpu(l.batch * l.inputs, l.delta_gpu, SECRET_NUM, net.truth_gpu, 0); 151 | mask_gpu(l.batch * l.inputs, l.loss_gpu, SECRET_NUM, net.truth_gpu, 0); 152 | } 153 | cuda_pull_array(l.loss_gpu, l.loss, l.batch * l.inputs); 154 | l.cost[0] = sum_array(l.loss, l.batch * l.inputs); 155 | } 156 | } 157 | 158 | void backward_softmax_layer_gpu(const softmax_layer layer, network net) 159 | { 160 | axpy_gpu(layer.batch * layer.inputs, 1, layer.delta_gpu, 1, net.delta_gpu, 1); 161 | } 162 | 163 | #endif 164 | -------------------------------------------------------------------------------- /darknet/src/tree.cpp: -------------------------------------------------------------------------------- 1 | #include "tree.h" 2 | #include 3 | #include 4 | #include "data.h" 5 | #include "utils.h" 6 | 7 | void change_leaves(tree *t, char *leaf_list) 8 | { 9 | list *llist = get_paths(leaf_list); 10 | char **leaves = (char **)list_to_array(llist); 11 | int n = llist->size; 12 | int i, j; 13 | int found = 0; 14 | for (i = 0; i < t->n; ++i) 15 | { 16 | t->leaf[i] = 0; 17 | for (j = 0; j < n; ++j) 18 | { 19 | if (0 == strcmp(t->name[i], leaves[j])) 20 | { 21 | t->leaf[i] = 1; 22 | ++found; 23 | break; 24 | } 25 | } 26 | } 27 | fprintf(stderr, "Found %d leaves.\n", found); 28 | } 29 | 30 | float get_hierarchy_probability(float *x, tree *hier, int c, int stride) 31 | { 32 | float p = 1; 33 | while (c >= 0) 34 | { 35 | p = p * x[c * stride]; 36 | c = hier->parent[c]; 37 | } 38 | return p; 39 | } 40 | 41 | void hierarchy_predictions( 42 | float *predictions, int n, tree *hier, int only_leaves, int stride) 43 | { 44 | int j; 45 | for (j = 0; j < n; ++j) 46 | { 47 | int parent = hier->parent[j]; 48 | if (parent >= 0) 49 | { 50 | predictions[j * stride] *= predictions[parent * stride]; 51 | } 52 | } 53 | if (only_leaves) 54 | { 55 | for (j = 0; j < n; ++j) 56 | { 57 | if (!hier->leaf[j]) predictions[j * stride] = 0; 58 | } 59 | } 60 | } 61 | 62 | int hierarchy_top_prediction(float *predictions, 63 | tree *hier, 64 | float thresh, 65 | int stride) 66 | { 67 | float p = 1; 68 | int group = 0; 69 | int i; 70 | while (1) 71 | { 72 | float max = 0; 73 | int max_i = 0; 74 | 75 | for (i = 0; i < hier->group_size[group]; ++i) 76 | { 77 | int index = i + hier->group_offset[group]; 78 | float val = predictions[(i + hier->group_offset[group]) * stride]; 79 | if (val > max) 80 | { 81 | max_i = index; 82 | max = val; 83 | } 84 | } 85 | if (p * max > thresh) 86 | { 87 | p = p * max; 88 | group = hier->child[max_i]; 89 | if (hier->child[max_i] < 0) return max_i; 90 | } 91 | else if (group == 0) 92 | { 93 | return max_i; 94 | } 95 | else 96 | { 97 | return hier->parent[hier->group_offset[group]]; 98 | } 99 | } 100 | return 0; 101 | } 102 | 103 | tree *read_tree(char *filename) 104 | { 105 | tree t = {0}; 106 | FILE *fp = fopen(filename, "r"); 107 | 108 | char *line; 109 | int last_parent = -1; 110 | int group_size = 0; 111 | int groups = 0; 112 | int n = 0; 113 | while ((line = fgetl(fp)) != 0) 114 | { 115 | char *id = (char *)calloc(256, sizeof(char)); 116 | int parent = -1; 117 | sscanf(line, "%s %d", id, &parent); 118 | t.parent = (int *)realloc(t.parent, (n + 1) * sizeof(int)); 119 | t.parent[n] = parent; 120 | 121 | t.child = (int *)realloc(t.child, (n + 1) * sizeof(int)); 122 | t.child[n] = -1; 123 | 124 | t.name = (char **)realloc(t.name, (n + 1) * sizeof(char *)); 125 | t.name[n] = id; 126 | if (parent != last_parent) 127 | { 128 | ++groups; 129 | t.group_offset = (int *)realloc(t.group_offset, groups * sizeof(int)); 130 | t.group_offset[groups - 1] = n - group_size; 131 | t.group_size = (int *)realloc(t.group_size, groups * sizeof(int)); 132 | t.group_size[groups - 1] = group_size; 133 | group_size = 0; 134 | last_parent = parent; 135 | } 136 | t.group = (int *)realloc(t.group, (n + 1) * sizeof(int)); 137 | t.group[n] = groups; 138 | if (parent >= 0) 139 | { 140 | t.child[parent] = groups; 141 | } 142 | ++n; 143 | ++group_size; 144 | } 145 | ++groups; 146 | t.group_offset = (int *)realloc(t.group_offset, groups * sizeof(int)); 147 | t.group_offset[groups - 1] = n - group_size; 148 | t.group_size = (int *)realloc(t.group_size, groups * sizeof(int)); 149 | t.group_size[groups - 1] = group_size; 150 | t.n = n; 151 | t.groups = groups; 152 | t.leaf = (int *)calloc(n, sizeof(int)); 153 | int i; 154 | for (i = 0; i < n; ++i) t.leaf[i] = 1; 155 | for (i = 0; i < n; ++i) 156 | if (t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; 157 | 158 | fclose(fp); 159 | tree *tree_ptr = (tree *)calloc(1, sizeof(tree)); 160 | *tree_ptr = t; 161 | // error(0); 162 | return tree_ptr; 163 | } 164 | -------------------------------------------------------------------------------- /darknet/src/upsample_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "upsample_layer.h" 2 | #include "blas.h" 3 | #include "cuda.h" 4 | 5 | #include 6 | 7 | layer make_upsample_layer(int batch, int w, int h, int c, int stride) 8 | { 9 | layer l; 10 | l.type = UPSAMPLE; 11 | l.batch = batch; 12 | l.w = w; 13 | l.h = h; 14 | l.c = c; 15 | l.out_w = w * stride; 16 | l.out_h = h * stride; 17 | l.out_c = c; 18 | if (stride < 0) 19 | { 20 | stride = -stride; 21 | l.reverse = 1; 22 | l.out_w = w / stride; 23 | l.out_h = h / stride; 24 | } 25 | l.stride = stride; 26 | l.outputs = l.out_w * l.out_h * l.out_c; 27 | l.inputs = l.w * l.h * l.c; 28 | l.delta = (float *)calloc(l.outputs * batch, sizeof(float)); 29 | l.output = (float *)calloc(l.outputs * batch, sizeof(float)); 30 | ; 31 | 32 | l.forward = forward_upsample_layer; 33 | l.backward = backward_upsample_layer; 34 | #ifdef DKGPU 35 | l.forward_gpu = forward_upsample_layer_gpu; 36 | l.backward_gpu = backward_upsample_layer_gpu; 37 | 38 | l.delta_gpu = cuda_make_array(l.delta, l.outputs * batch); 39 | l.output_gpu = cuda_make_array(l.output, l.outputs * batch); 40 | #endif 41 | if (l.reverse) 42 | fprintf(stderr, 43 | "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", 44 | stride, 45 | w, 46 | h, 47 | c, 48 | l.out_w, 49 | l.out_h, 50 | l.out_c); 51 | else 52 | fprintf(stderr, 53 | "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", 54 | stride, 55 | w, 56 | h, 57 | c, 58 | l.out_w, 59 | l.out_h, 60 | l.out_c); 61 | return l; 62 | } 63 | 64 | void resize_upsample_layer(layer *l, int w, int h) 65 | { 66 | l->w = w; 67 | l->h = h; 68 | l->out_w = w * l->stride; 69 | l->out_h = h * l->stride; 70 | if (l->reverse) 71 | { 72 | l->out_w = w / l->stride; 73 | l->out_h = h / l->stride; 74 | } 75 | l->outputs = l->out_w * l->out_h * l->out_c; 76 | l->inputs = l->h * l->w * l->c; 77 | l->delta = (float *)realloc(l->delta, l->outputs * l->batch * sizeof(float)); 78 | l->output = 79 | (float *)realloc(l->output, l->outputs * l->batch * sizeof(float)); 80 | 81 | #ifdef DKGPU 82 | cuda_free(l->output_gpu); 83 | cuda_free(l->delta_gpu); 84 | l->output_gpu = cuda_make_array(l->output, l->outputs * l->batch); 85 | l->delta_gpu = cuda_make_array(l->delta, l->outputs * l->batch); 86 | #endif 87 | } 88 | 89 | void forward_upsample_layer(const layer l, network net) 90 | { 91 | fill_cpu(l.outputs * l.batch, 0, l.output, 1); 92 | if (l.reverse) 93 | { 94 | upsample_cpu(l.output, 95 | l.out_w, 96 | l.out_h, 97 | l.c, 98 | l.batch, 99 | l.stride, 100 | 0, 101 | l.scale, 102 | net.input); 103 | } 104 | else 105 | { 106 | upsample_cpu( 107 | net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); 108 | } 109 | } 110 | 111 | void backward_upsample_layer(const layer l, network net) 112 | { 113 | if (l.reverse) 114 | { 115 | upsample_cpu(l.delta, 116 | l.out_w, 117 | l.out_h, 118 | l.c, 119 | l.batch, 120 | l.stride, 121 | 1, 122 | l.scale, 123 | net.delta); 124 | } 125 | else 126 | { 127 | upsample_cpu( 128 | net.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); 129 | } 130 | } 131 | 132 | #ifdef DKGPU 133 | void forward_upsample_layer_gpu(const layer l, network net) 134 | { 135 | fill_gpu(l.outputs * l.batch, 0, l.output_gpu, 1); 136 | if (l.reverse) 137 | { 138 | upsample_gpu(l.output_gpu, 139 | l.out_w, 140 | l.out_h, 141 | l.c, 142 | l.batch, 143 | l.stride, 144 | 0, 145 | l.scale, 146 | net.input_gpu); 147 | } 148 | else 149 | { 150 | upsample_gpu(net.input_gpu, 151 | l.w, 152 | l.h, 153 | l.c, 154 | l.batch, 155 | l.stride, 156 | 1, 157 | l.scale, 158 | l.output_gpu); 159 | } 160 | } 161 | 162 | void backward_upsample_layer_gpu(const layer l, network net) 163 | { 164 | if (l.reverse) 165 | { 166 | upsample_gpu(l.delta_gpu, 167 | l.out_w, 168 | l.out_h, 169 | l.c, 170 | l.batch, 171 | l.stride, 172 | 1, 173 | l.scale, 174 | net.delta_gpu); 175 | } 176 | else 177 | { 178 | upsample_gpu(net.delta_gpu, 179 | l.w, 180 | l.h, 181 | l.c, 182 | l.batch, 183 | l.stride, 184 | 0, 185 | l.scale, 186 | l.delta_gpu); 187 | } 188 | } 189 | #endif 190 | --------------------------------------------------------------------------------