├── matrix.h ├── inits.h ├── typedef_tree.h ├── col2im.h ├── typedefs.h ├── im2col.h ├── yolo_detector.h ├── maxpool_layer.h ├── tree.h ├── yolo_detector_test.cpp ├── list.h ├── softmax_layer.h ├── connected_layer.h ├── batchnorm_layer.h ├── col2im.cpp ├── README.md ├── data.cpp ├── option_list.h ├── region_layer.h ├── blas.h ├── box.h ├── utilities.h ├── convolutional_layer.h ├── list.cpp ├── gemm.h ├── parser.h ├── softmax_layer.cpp ├── im2col.cpp ├── data.h ├── network.h ├── network.cpp ├── batchnorm_layer.cpp ├── timer.h ├── maxpool_layer.cpp ├── image.h ├── sysarr.cpp ├── activations.cpp ├── box.cpp ├── inits.cpp ├── activations.h ├── tree.cpp ├── blas.cpp ├── option_list.cpp ├── layer.h ├── utilities.cpp ├── yolo_detector.cpp ├── connected_layer.cpp ├── convolutional_layer.cpp ├── gemm2.cpp ├── region_layer.cpp ├── image.cpp ├── parser.cpp └── stb_image_write.h /matrix.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Matrix header file 3 | //======================================================================== 4 | // @brief: struct type definition 5 | 6 | #ifndef SRC_MATRIX_H_ 7 | #define SRC_MATRIX_H_ 8 | 9 | // row, column, and data 10 | typedef struct matrix 11 | { 12 | int row; 13 | int cols; 14 | float **vals; 15 | } matrix; 16 | 17 | #endif /* SRC_MATRIX_H_ */ 18 | -------------------------------------------------------------------------------- /inits.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Inits header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_INITS_H_ 7 | #define SRC_INITS_H_ 8 | 9 | #include 10 | #include 11 | 12 | #include "layer.h" 13 | #include "network.h" 14 | 15 | // layer init 16 | void init_layer(layer &l); 17 | 18 | #endif /* SRC_INITS_H_ */ 19 | -------------------------------------------------------------------------------- /typedef_tree.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Typedef_tree header file 3 | //======================================================================== 4 | // @brief: struct type definition 5 | 6 | #ifndef SRC_TYPEDEF_TREE_H_ 7 | #define SRC_TYPEDEF_TREE_H_ 8 | 9 | // tree structure 10 | typedef struct tree 11 | { 12 | int *leaf; 13 | int n; 14 | int *parent; 15 | int *child; 16 | int *group; 17 | char **name; 18 | 19 | int groups; 20 | int *group_size; 21 | int *group_offset; 22 | } tree; 23 | 24 | #endif /* SRC_TYPEDEF_TREE_H_ */ 25 | -------------------------------------------------------------------------------- /col2im.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Col2im header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_COL2IM_H_ 7 | #define SRC_COL2IM_H_ 8 | 9 | #include 10 | #include 11 | 12 | // column to image: filters%batch == 0 13 | void col2img(float *c_col,float *c, int m, int n, int count, int batch); 14 | // column to image: filters%batch != 0 15 | void col2img_extra(float *c_col,float *c, int m, int n, int count, int batch); 16 | 17 | #endif /* SRC_COL2IM_H_ */ 18 | -------------------------------------------------------------------------------- /typedefs.h: -------------------------------------------------------------------------------- 1 | //=========================================================================== 2 | // typedefs.h 3 | //=========================================================================== 4 | // @brief: define bitwise variables & macros 5 | 6 | #ifndef LOADATA_H 7 | #define LOADATA_H 8 | 9 | #include 10 | #include 11 | 12 | #define TOT_WIDTH_IN 32 13 | #define INT_WIDTH_IN 8 14 | #define TOT_WIDTH_OUT 64 15 | #define INT_WIDTH_OUT 16 16 | 17 | typedef ap_fixed INPUT_32; 18 | typedef ap_fixed OUTPUT_64; 19 | 20 | typedef ap_int<16> bit16; 21 | typedef ap_int<32> bit32; 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /im2col.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Im2col header file 3 | //======================================================================== 4 | // @brief: function prototype & activate type definition 5 | 6 | #ifndef SRC_IM2COL_H_ 7 | #define SRC_IM2COL_H_ 8 | 9 | #include 10 | #include 11 | 12 | // image to column : filters%batch == 0 13 | void im2col(float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col); 14 | // image to column : filters%batch != 0 15 | void im2col_extra(float *data_im,int channels, int height, int width, int ksize, int stride, int pad, float* data_col); 16 | 17 | #endif /* SRC_IM2COL_H_ */ 18 | -------------------------------------------------------------------------------- /yolo_detector.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // yolo_detector header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_YOLO_DETECTOR_H_ 7 | #define SRC_YOLO_DETECTOR_H_ 8 | 9 | #include 10 | #include 11 | 12 | #include "timer.h" 13 | #include "network.h" 14 | #include "region_layer.h" 15 | #include "utilities.h" 16 | #include "parser.h" 17 | #include "box.h" 18 | #include "option_list.h" 19 | #include "data.h" 20 | #include "timer.h" 21 | 22 | void detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh); 23 | 24 | #endif /* SRC_YOLO_DETECTOR_H_ */ 25 | -------------------------------------------------------------------------------- /maxpool_layer.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Maxpooling header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_MAXPOOL_LAYER_H_ 7 | #define SRC_MAXPOOL_LAYER_H_ 8 | 9 | #include 10 | #include 11 | 12 | #include "image.h" 13 | #include "layer.h" 14 | #include "network.h" 15 | #include "inits.h" 16 | 17 | // redefine layer 18 | typedef layer maxpool_layer; 19 | 20 | // make maxpooling layer 21 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); 22 | // maxpooling top function 23 | void forward_maxpool_layer(const maxpool_layer l, network_state state); 24 | 25 | #endif /* SRC_MAXPOOL_LAYER_H_ */ 26 | -------------------------------------------------------------------------------- /tree.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Tree header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_TREE_H_ 7 | #define SRC_TREE_H_ 8 | 9 | #include 10 | #include 11 | 12 | #include "typedef_tree.h" 13 | #include "utilities.h" 14 | #include "data.h" 15 | 16 | // update prediction tree 17 | int hierarchy_top_prediction(float *predictions, tree *hier, float thresh); 18 | // build tree hierarchy 19 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves); 20 | // get probabilities 21 | float get_hierarchy_probability(float *x, tree *hier, int c); 22 | // read values 23 | tree *read_tree(char *filaname); 24 | 25 | #endif /* SRC_TREE_H_ */ 26 | -------------------------------------------------------------------------------- /yolo_detector_test.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // testbench.cpp 3 | //======================================================================== 4 | // @brief: testbench for yolo detector 5 | 6 | #include 7 | #include 8 | 9 | #include "yolo_detector.h" 10 | 11 | int main (int argc, char **argv) 12 | { 13 | /* 14 | // transfer value 15 | // argv[0]: yolo_detector_test 16 | // argv[1]: detect 17 | // argv[2]: cfg/yolo.cfg 18 | // argv[3]: yolo.weights 19 | // argv[4]: data/dog.jpg 20 | */ 21 | if (strcmp(argv[1],"detect") == 0) 22 | { 23 | float thresh = 0.24; 24 | char *filename = (argc > 4) ? argv[4] : 0; 25 | detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5); 26 | } 27 | else 28 | { 29 | printf("Invalid input, program stop..."); 30 | } 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /list.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // List header file 3 | //======================================================================== 4 | // @brief: function prototype & activate type definition 5 | 6 | #ifndef SRC_LIST_H_ 7 | #define SRC_LIST_H_ 8 | 9 | #include 10 | #include 11 | 12 | // linked list 13 | typedef struct node 14 | { 15 | void *val; 16 | struct node *next; 17 | struct node *prev; 18 | } node; 19 | 20 | typedef struct list 21 | { 22 | int size; 23 | node *front; 24 | node *back; 25 | } list; 26 | 27 | // make a new list 28 | list *make_list(); 29 | // insert a node to l->back 30 | void list_insert(list *l, void *val); 31 | // convert the list to a 2D array (array of pointer) 32 | void **list_to_array(list *l); 33 | // free the space allocated for the list 34 | void free_list(list *l); 35 | // free the node 36 | void free_node(node *n); 37 | 38 | #endif /* SRC_LIST_H_ */ 39 | -------------------------------------------------------------------------------- /softmax_layer.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Softmax layer header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_SOFTMAX_LAYER_H_ 7 | #define SRC_SOFTMAX_LAYER_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "blas.h" 16 | #include "layer.h" 17 | #include "network.h" 18 | #include "inits.h" 19 | 20 | // redefine layer 21 | typedef layer softmax_layer; 22 | 23 | // update softmax tree 24 | void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output); 25 | // make softmax layer 26 | softmax_layer make_softmax_layer(int batch, int inputs, int groups); 27 | // softmx layer top function 28 | void forward_softmax_layer(const softmax_layer l, network_state state); 29 | // backward softmax function 30 | void backward_softmax_layer(const softmax_layer l, network_state state); 31 | 32 | 33 | #endif /* SRC_SOFTMAX_LAYER_H_ */ 34 | -------------------------------------------------------------------------------- /connected_layer.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Connected layer header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_CONNECTED_LAYER_H_ 7 | #define SRC_CONNECTED_LAYER_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "batchnorm_layer.h" 15 | #include "utilities.h" 16 | #include "blas.h" 17 | #include "gemm.h" 18 | #include "activations.h" 19 | #include "layer.h" 20 | #include "network.h" 21 | #include "inits.h" 22 | 23 | // redefine layer 24 | typedef layer connected_layer; 25 | 26 | connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize); 27 | void forward_connected_layer(connected_layer layer, network_state state); 28 | void backward_connected_layer(connected_layer layer, network_state state); 29 | void update_connected_layer(connected_layer l, int batch, float learning_rate, float momentum, float decay); 30 | void denormalize_connected_layer(layer l); 31 | void statistics_connected_layer(layer l); 32 | 33 | #endif /* SRC_CONNECTED_LAYER_H_ */ 34 | -------------------------------------------------------------------------------- /batchnorm_layer.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Batchnorm header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_BATCHNORM_LAYER_H_ 7 | #define SRC_BATCHNORM_LAYER_H_ 8 | 9 | #include 10 | #include 11 | 12 | #include "blas.h" 13 | #include "layer.h" 14 | #include "image.h" 15 | #include "network.h" 16 | #include "convolutional_layer.h" 17 | 18 | // batchnorm layer top function 19 | void forward_batchnorm_layer(layer l, network_state state); 20 | // scale calculation for backward propagation 21 | void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); 22 | // mean calculation 23 | void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); 24 | // variance calculation 25 | void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); 26 | // barchnorm with delta 27 | void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); 28 | 29 | #endif /* SRC_BATCHNORM_LAYER_H_ */ 30 | -------------------------------------------------------------------------------- /col2im.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Col2im header file 3 | //======================================================================== 4 | // @brief: post-processing image data 5 | 6 | // column to image: filters%batch == 0 7 | void col2img(float *c_col,float *c, int m, int n, int count, int batch) 8 | { 9 | for(int k = 0; k < count; k++) 10 | { 11 | for(int i = 0; i < batch; i++) 12 | { 13 | for(int j = 0; j < n; j++) 14 | { 15 | c[j+k*n*batch+i*n] = c_col[k*n*batch+i+j*batch]; 16 | } 17 | } 18 | } 19 | } 20 | 21 | // column to image: filters%batch == 0 22 | void col2img_extra(float *c_col,float *c, int m, int n, int count, int batch) 23 | { 24 | for(int k = 0; k < count-1; k++) 25 | { 26 | for(int i = 0; i < batch; i++) 27 | { 28 | for(int j = 0; j < n; j++) 29 | { 30 | c[i*n+j+k*n*batch] = c_col[i+j*batch+k*n*batch]; 31 | } 32 | } 33 | } 34 | 35 | for(int i = 0; i < m%batch; i++) 36 | { 37 | for(int j = 0; j < n; j++) 38 | { 39 | c[i*n+j+(m/batch)*n*batch] = c_col[i+j*batch+(m/batch)*n*batch]; 40 | } 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Yolo Detector Tutorial 2 | 3 | This tutorial goes over how to write, build and run the software (C/C++) and Hardware (SDSoC) yolo detector Application. 4 | The design is revised after previous YOLO designed, with systolic array structure implemented, though the performance is not satisfying currently. It can be improved by rearranging buffers on-chip. It is strongly suggested to go over the previous design referred by the current one, before any change is committed. 5 | 6 | ## Designing an application 7 | 8 | This section is a general overview of how to write an application. 9 | 10 | ### Main 11 | [main](yolo_detector_test.cpp) interfaces with the top-level [gemm2] to be instantiated on the FPGA. 12 | 13 | ## Software 14 | 15 | The software emulation runs the hardware [gemm2] on the host CPU. This is useful 16 | for functional verification. The design is complied using gcc/g++ and uses a 17 | pure software flow. 18 | 19 | 20 | ## Hardware 21 | 22 | The hardware design can be built by SDSoC. First of all, SDSoC will call Vivado HLS to synthesize the hardware [gemm2] into RTL. 23 | Then SDSoC will create datamover and wrap up the whole design. This design currently runs well on SDSoC (Vivado) 2017.1. 24 | For more details of using SDSoC, please refer to UG1028: SDSoC Environment User Guide https://forums.xilinx.com/xlnx/attachments/xlnx/sdsoc/23/2/ug1028-sdsoc-getting-started.pdf 25 | 26 | -------------------------------------------------------------------------------- /data.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Data 3 | //======================================================================== 4 | // @brief: loading data function 5 | 6 | #include "data.h" 7 | 8 | pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; 9 | 10 | // get 80 labels(classes) and store them into 2D array 11 | char **get_labels(char *filename) 12 | { 13 | // get label list 14 | list *plist = get_paths(filename); 15 | 16 | /* 17 | // verify plist 18 | node * pnode = plist->front; 19 | int counter = 0; 20 | printf("name_list size: %d;\n",plist->size); 21 | while(pnode->next) 22 | { 23 | pnode = pnode->next; 24 | printf("name_list NO. %d: %s; \n",counter, (char *)pnode->val); 25 | counter++; 26 | } 27 | */ 28 | 29 | 30 | char **labels = (char **)list_to_array(plist); //??? 31 | free_list(plist); 32 | return labels; 33 | } 34 | 35 | // read each line from a file, return a list 36 | list *get_paths(char *filename) 37 | { 38 | char *line; 39 | FILE *file = fopen(filename, "r"); 40 | if(!file) 41 | { 42 | file_error(filename); 43 | } 44 | // make a new list 45 | list *lines = make_list(); 46 | // store every line (classes) into the list 47 | while((line=fgetl(file))) 48 | { 49 | list_insert(lines, line); 50 | } 51 | fclose(file); 52 | return lines; 53 | } 54 | 55 | -------------------------------------------------------------------------------- /option_list.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Option list header file 3 | //======================================================================== 4 | // @brief: function prototype & type definition 5 | 6 | #ifndef SRC_OPTION_LIST_H_ 7 | #define SRC_OPTION_LIST_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "utilities.h" 14 | #include "list.h" 15 | 16 | // key+value+number 17 | typedef struct 18 | { 19 | char *key; 20 | char *val; 21 | int used; 22 | } kvp; 23 | 24 | // function prototype 25 | // read cfg data, and build a list 26 | list *read_data_cfg(char *filename); 27 | // change "=" to "\n", and insert it into a list option 28 | // val stores the address of string of value 29 | int read_option(char *s, list *options); 30 | // insert value(*val) into list option 31 | void option_insert(list *l, char *key, char *val); 32 | // find specific key in list l 33 | char *option_find(list *l, char *key); 34 | // find specific strings 35 | char *option_find_str(list *l, char *key, char *def); 36 | // find specific ints 37 | int option_find_int_quiet(list *l, char *key, int def); 38 | int option_find_int(list *l, char *key, int def); 39 | // find specific flaots 40 | float option_find_float(list *l, char *key, float def); 41 | float option_find_float_quiet(list *l, char *key, float def); 42 | // find unused items 43 | void option_unused(list *l); 44 | 45 | #endif /* SRC_OPTION_LIST_H_ */ 46 | -------------------------------------------------------------------------------- /region_layer.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Region layer header file 3 | //======================================================================== 4 | // @brief: function prototype & activate type definition 5 | 6 | #ifndef SRC_REGION_LAYER_H_ 7 | #define SRC_REGION_LAYER_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "layer.h" 15 | #include "network.h" 16 | #include "box.h" 17 | #include "utilities.h" 18 | #include "blas.h" 19 | #include "activations.h" 20 | #include "region_layer.h" 21 | #include "inits.h" 22 | 23 | // make region layer 24 | layer make_region_layer(int batch, int h, int w, int n, int classes, int coords); 25 | // get bounding boxes 26 | void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh); 27 | // get bounding box (single) 28 | box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h); 29 | // region layer top function 30 | void forward_region_layer(const layer l, network_state state); 31 | // extra region classes 32 | void delta_region_class(float *output, float *delta, int index, int class_s, int classes, tree *hier, float scale, float *avg_cat); 33 | // extra region boxes 34 | float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale); 35 | 36 | #endif /* SRC_REGION_LAYER_H_ */ 37 | -------------------------------------------------------------------------------- /blas.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Blas header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_BLAS_H_ 7 | #define SRC_BLAS_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | // multiply some values in *X with ALPHA 17 | void scal_cpu(int N, float ALPHA, float *X, int INCX); 18 | // assign some values in *X with ALPHA 19 | void fill_cpu(int N, float ALPHA, float *X, int INCX); 20 | // mean calculation 21 | void mean_cpu(float *x, int batch, int filters, int spatical, float *mean); 22 | // variance calculation 23 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatical,float *variance); 24 | // multiply some values in *X with ALPHA 25 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); 26 | // array copy 27 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); 28 | // normalization with mean and variance 29 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); 30 | // scale an array 31 | void scale_cpu(int N, float ALPHA, float *X, int INCX); 32 | // flatten layer 33 | void flatten(float *x, int size, int layers, int batch, int forward); 34 | // softmax layer 35 | void softmax(float *input, int n, float temp, float *output); 36 | 37 | #endif /* SRC_BLAS_H_ */ 38 | -------------------------------------------------------------------------------- /box.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Box header file 3 | //======================================================================== 4 | // @brief: function prototype & special type definition 5 | 6 | #ifndef SRC_BOX_H_ 7 | #define SRC_BOX_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | // bounding box 14 | typedef struct box 15 | { 16 | float x; 17 | float y; 18 | float w; 19 | float h; 20 | } box; 21 | 22 | // distance of bounding boxes 23 | typedef struct dbox 24 | { 25 | float dx; 26 | float dy; 27 | float dw; 28 | float dh; 29 | } dbox; 30 | 31 | // box for sort 32 | typedef struct sortable_box 33 | { 34 | int index; 35 | int classes; 36 | float **probs; 37 | }sortable_box; 38 | 39 | // sort boxes 40 | void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh); 41 | // compare function for qsort 42 | int nms_comparator(const void *pa, const void *pb); 43 | // intersection/union 44 | float box_iou(box a, box b); 45 | // overlap area 46 | float box_intersection(box a, box b); 47 | // overlap length (width, height, etc.) 48 | // x1, x2 midpoint of the boxes 49 | float overlap(float x1, float w1, float x2, float w2); 50 | // union area = total - intersection 51 | float box_union(box a, box b); 52 | // select boxes contains a confidence larger than the threshhold 53 | void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh); 54 | // transfer float type to stuct box 55 | box float_to_box(float *f); 56 | 57 | #endif /* SRC_BOX_H_ */ 58 | -------------------------------------------------------------------------------- /utilities.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Utilities header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_UTILITIES_H_ 7 | #define SRC_UTILITIES_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "list.h" 19 | 20 | #define SECRET_NUM -1234 21 | #define TWO_PI 6.2831853071795864769252866 22 | 23 | // read files 24 | int *read_map(char *filename); 25 | // report file open error 26 | void file_error(char *s); 27 | // remove space, tab, and enter in a string 28 | void strip(char *s); 29 | // get one line from file 30 | char *fgetl(FILE *fp); 31 | // report malloc error 32 | void malloc_error(); 33 | // report specific error 34 | void error(const char *s); 35 | // free 2D array (array of pointer) 36 | void free_ptrs(void **ptrs, int n); 37 | // find the maximum value in an array, return its index 38 | int max_index(float *a, int n); 39 | // return a random number in the given range(min, max) 40 | float rand_uniform(float min, float max); 41 | // print function 42 | void print_statistics(float *a, int n); 43 | // mean value of array a 44 | float mean_array(float *a, int n); 45 | // sum of array a 46 | float sum_array(float *a, int n); 47 | // variance of array a 48 | float variance_array(float *a, int n); 49 | // mean squared error of array a 50 | float mse_array(float *a, int n); 51 | // difference of two squares 52 | float mag_array(float *a, int n); 53 | 54 | #endif /* SRC_UTILITIES_H_ */ 55 | -------------------------------------------------------------------------------- /convolutional_layer.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Convolutional layer header file 3 | //======================================================================== 4 | // @brief: function prototype definition 5 | 6 | #ifndef SRC_CONVOLUTIONAL_LAYER_H_ 7 | #define SRC_CONVOLUTIONAL_LAYER_H_ 8 | 9 | #include 10 | #include 11 | #include "sds_lib.h" 12 | 13 | #include "image.h" 14 | #include "activations.h" 15 | #include "layer.h" 16 | #include "network.h" 17 | #include "utilities.h" 18 | #include "batchnorm_layer.h" 19 | #include "im2col.h" 20 | #include "col2im.h" 21 | #include "blas.h" 22 | #include "gemm.h" 23 | #include "inits.h" 24 | #include "timer.h" 25 | 26 | // redefine struct layer 27 | typedef layer convolutional_layer; 28 | 29 | // build and configure convolutional layer 30 | convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride,\ 31 | int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); 32 | // calculate workspace size for memory allocation 33 | size_t get_workspace_size(layer l); 34 | // convolutional layer top function 35 | void forward_convolutional_layer(const convolutional_layer layer, network_state state); 36 | // calculate output height 37 | int convolutional_out_height(convolutional_layer l); 38 | // calculate output_weight 39 | int convolutional_out_width(convolutional_layer l); 40 | // add bias to output values 41 | void add_bias(float *output, float *biases, int batch, int n, int size); 42 | // scale bias 43 | void scale_bias(float *output, float *scales, int batch, int n, int size); 44 | // swap values 45 | void swap_binary(convolutional_layer *l); 46 | 47 | #endif /* SRC_CONVOLUTIONAL_LAYER_H_ */ 48 | -------------------------------------------------------------------------------- /list.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // List 3 | //======================================================================== 4 | // @brief: linked list for reading parameters 5 | 6 | #include "list.h" 7 | 8 | // make an empty list, return a pointer 9 | list *make_list() 10 | { 11 | list *l = (list *)malloc(sizeof(list)); 12 | l->size = 0; 13 | l->front = 0; 14 | l->back = 0; 15 | 16 | return l; 17 | } 18 | 19 | // insert a new node into list *l with "value":*val 20 | void list_insert(list *l, void *val) 21 | { 22 | node *new_node = (node *)malloc(sizeof(node)); 23 | new_node->val = val; 24 | new_node->next = 0; 25 | // add new node to l->back 26 | if(!l->back) 27 | { // empty list 28 | l->front = new_node; 29 | new_node->prev = 0; 30 | } 31 | else 32 | { 33 | l->back->next = new_node; 34 | new_node->prev = l->back; 35 | } 36 | l->back = new_node; 37 | l->size++; //// 38 | } 39 | 40 | // convert a list to 2D array (***array of pointer***) 41 | void **list_to_array(list *l) 42 | { 43 | void **res = (void **)calloc(l->size, sizeof(void *)); 44 | int counter = 0; 45 | node *n = l->front; // first node in list l 46 | // convert the list 47 | while(n) 48 | { 49 | res[counter++] = n->val; // 50 | n = n->next; 51 | } 52 | return res; 53 | } 54 | 55 | // free memory allocated for the list 56 | void free_list(list *l) 57 | { 58 | free_node(l->front); // first node 59 | free(l); 60 | } 61 | 62 | // free node 63 | void free_node(node *n) 64 | { 65 | node *next; 66 | // free all nodes 67 | while(n) 68 | { 69 | next = n->next; 70 | free(n); 71 | n = next; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /gemm.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Gemm header file 3 | //======================================================================== 4 | // @brief: function prototype & macro definition 5 | 6 | #ifndef SRC_GEMM_H_ 7 | #define SRC_GEMM_H_ 8 | 9 | #include 10 | #include 11 | 12 | #include "typedefs.h" 13 | 14 | // data size 15 | #define SIZE_BATCH 16 16 | #define MAX_A 3*3*1024*SIZE_BATCH 17 | //#define MAX_A 1024*425 18 | #define MAX_B 210*210*16 19 | #define MAX_C 416*416*16 20 | // finter size 21 | #define SIZE_FILTER 3*3 22 | #define SIZE_FILTER_EXTRA 1*1 23 | #define MAX_FILTER_DEPTH 1024 24 | // line buffer size 25 | #define NUM_LINE_BUFFER 3 26 | #define SIZE_LINE_BUFFER 15*1024 27 | #define NUM_LINE_BUFFER_EXTRA 1 28 | // window buffer size 29 | #define NUM_WINDOW_BUFFER 3*3 30 | #define SIZE_WINDOW_BUFFER 1024 31 | #define NUM_WINDOW_BUFFER_EXTRA 1 32 | // systolic kernel size 33 | #define SystolicKernelSize 13 //greatest number the zc706 FPGA can hold: 13 34 | // data access pattern 35 | #pragma SDS data mem_attribute(A:PHYSICAL_CONTIGUOUS) 36 | #pragma SDS data mem_attribute(B:PHYSICAL_CONTIGUOUS) 37 | #pragma SDS data mem_attribute(C:PHYSICAL_CONTIGUOUS) 38 | #pragma SDS data access_pattern(A:SEQUENTIAL, B:SEQUENTIAL, C:SEQUENTIAL) 39 | #pragma SDS data copy(A[0:size_filter*SIZE_BATCH], B[0:(width+2*pad)*(height+2*pad)*channels], C[0:size_channel*SIZE_BATCH]) 40 | // gemm with filter size 3x3 41 | void gemm2(float A[MAX_A], float B[MAX_B], float C[MAX_C],int num_filter, int size_channel,int size_filter,\ 42 | int channels, int height, int width, int ksize, int pad); 43 | // extra gemm with filter size 1x1 44 | void gemm_extra2(float A[MAX_A], float B[MAX_B], float C[MAX_C],int size_channel,int size_filter,int ksize, 45 | INPUT_32 weights[SIZE_BATCH][SIZE_FILTER][MAX_FILTER_DEPTH],OUTPUT_64 output[SIZE_BATCH]); 46 | 47 | #endif /* SRC_GEMM_H_ */ 48 | -------------------------------------------------------------------------------- /parser.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Parser header file 3 | //======================================================================== 4 | // @brief: function prototype & type definition 5 | 6 | #ifndef SRC_PARSER_H_ 7 | #define SRC_PARSER_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "sds_lib.h" 14 | 15 | #include "network.h" 16 | #include "activations.h" 17 | #include "batchnorm_layer.h" 18 | #include "blas.h" 19 | #include "connected_layer.h" 20 | #include "convolutional_layer.h" 21 | #include "list.h" 22 | #include "maxpool_layer.h" 23 | #include "option_list.h" 24 | #include "parser.h" 25 | #include "region_layer.h" 26 | #include "utilities.h" 27 | #include "inits.h" 28 | 29 | // parameters 30 | typedef struct size_params{ 31 | int batch; 32 | int inputs; 33 | int h; 34 | int w; 35 | int c; 36 | int index; 37 | int time_steps; 38 | network net; 39 | } size_params; 40 | 41 | typedef struct section{ 42 | char *type; 43 | list *options; 44 | } section; 45 | 46 | // parser differnet layers 47 | maxpool_layer parse_maxpool(list *options, size_params params); 48 | void transpose_matrix(float *a, int rows, int cols); 49 | layer parse_region(list *options, size_params params); 50 | convolutional_layer parse_convolutional(list *options, size_params params); 51 | LAYER_TYPE string_to_layer_type(char * type); 52 | learning_rate_policy get_policy(char *s); 53 | void parse_net_options(list *options, network *net); 54 | void free_section(section *s); 55 | network parse_network_cfg(char *filename); 56 | // read data from file 57 | list *read_cfg(char *filename); 58 | // load weights for different layers 59 | void load_convolutional_weights(layer l, FILE *fp); 60 | void load_batchnorm_weights(layer l, FILE *fp); 61 | void load_connected_weights(layer l, FILE *fp, int transpose); 62 | void load_weights_upto(network *net, char *filename, int cutoff); 63 | // load weights top function 64 | void load_weights(network *net, char *filename); 65 | 66 | #endif /* SRC_PARSER_H_ */ 67 | -------------------------------------------------------------------------------- /softmax_layer.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Softmax layer 3 | //======================================================================== 4 | // @brief: softmax layer 5 | 6 | #include "softmax_layer.h" 7 | 8 | // update softmax tree 9 | void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output) 10 | { 11 | // 12 | for(int b = 0; b < batch; ++b) 13 | { 14 | int count = 0; 15 | for(int i = 0; i < hierarchy->groups; i++) 16 | { 17 | int group_size = hierarchy->group_size[i]; 18 | softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count); 19 | count += group_size; 20 | } 21 | } 22 | } 23 | 24 | // make softmax layer 25 | softmax_layer make_softmax_layer(int batch, int inputs, int groups) 26 | { 27 | assert(inputs%groups == 0); 28 | fprintf(stderr, "softmax %4d\n", inputs); 29 | 30 | softmax_layer l; 31 | init_layer(l); 32 | 33 | l.type = SOFTMAX; 34 | l.batch = batch; 35 | l.groups = groups; 36 | l.inputs = inputs; 37 | l.outputs = inputs; 38 | l.output = (float *)calloc(inputs*batch, sizeof(float)); 39 | l.delta = (float *)calloc(inputs*batch, sizeof(float)); 40 | 41 | l.forward = forward_softmax_layer; 42 | l.backward = backward_softmax_layer; 43 | 44 | return l; 45 | } 46 | 47 | // softmx layer top function 48 | void forward_softmax_layer(const softmax_layer l, network_state state) 49 | { 50 | int inputs = l.inputs / l.groups; 51 | int batch = l.batch * l.groups; 52 | if(l.softmax_tree) 53 | { 54 | softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output); 55 | } 56 | else 57 | { 58 | for(int b = 0; b < batch; b++) 59 | { 60 | softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs); 61 | } 62 | } 63 | } 64 | 65 | // backward softmax function 66 | void backward_softmax_layer(const softmax_layer l, network_state state) 67 | { 68 | for(int i = 0; i < l.inputs*l.batch; i++) 69 | { 70 | state.delta[i] += l.delta[i]; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /im2col.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Im2col 3 | //======================================================================== 4 | // @brief: pre-processing image data 5 | 6 | #include "gemm.h" 7 | #include "im2col.h" 8 | 9 | // image to column : filters%batch == 0 10 | void im2col(float *data_im,int channels, int height, int width, int ksize, int stride, int pad, float* data_col) 11 | { 12 | int c,h,w; 13 | int height_col = height + 2*pad; 14 | int width_col = width + 2*pad; 15 | float temp; 16 | int step; 17 | for(c = 0; c < channels; c++) 18 | { 19 | for(h = 0; h < height_col; h++) 20 | { 21 | for(w = 0; w < width_col; w++) 22 | { 23 | 24 | //for those width mod SystolicKernelSize != 0 and comes to last few points in each column 25 | if( ( w - ( w - 2 ) % SystolicKernelSize + SystolicKernelSize ) > width_col ) 26 | step = ( width_col - 2 ) % SystolicKernelSize; 27 | else 28 | step = SystolicKernelSize; 29 | //pad 30 | if((w == 0) || (h == 0) || (w == width_col-1) || (h == height_col-1)) 31 | temp = 0; 32 | //read data 33 | else 34 | temp = data_im[c*width*height+(h-1)*width+(w-1)]; 35 | 36 | //first two channel are directly read into buffer, thus it is transferred directly 37 | if( w == 0 || w == 1 ) 38 | { 39 | data_col[h * width_col * channels + w * channels + c] = temp; 40 | } 41 | //deal with data other than first two channels: pls refer to report 42 | else 43 | data_col[h * width_col * channels + ( w - ( w - 2 ) % SystolicKernelSize ) * channels + step * c + ( w - 2 ) % SystolicKernelSize ] = temp; 44 | } 45 | } 46 | } 47 | } 48 | 49 | // image to column : filters%batch != 0 50 | void im2col_extra(float *data_im,int channels, int height, int width, int ksize, int stride, int pad, float* data_col) 51 | { 52 | int c,h,w; 53 | float temp; 54 | for(w = 0; w < width; w++) 55 | { 56 | for(h = 0; h < height; h++) 57 | { 58 | for(c = 0; c < channels; c++) 59 | { 60 | int index_col = (w+h*width)*channels+c; 61 | int index_im = c*width*height+h*width+w; 62 | data_col[index_col] = data_im[index_im]; 63 | } 64 | } 65 | } 66 | } 67 | 68 | -------------------------------------------------------------------------------- /data.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Data header file 3 | //======================================================================== 4 | // @brief: function prototype & struct type defination 5 | 6 | #ifndef SRC_DATA_H_ 7 | #define SRC_DATA_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "typedef_tree.h" 15 | #include "tree.h" 16 | #include "matrix.h" 17 | #include "list.h" 18 | #include "image.h" 19 | #include "utilities.h" 20 | 21 | // data struct 22 | typedef struct data 23 | { 24 | int w; 25 | int h; 26 | matrix X; 27 | matrix Y; 28 | int shallow; 29 | int *num_boxes; 30 | box **boxes; 31 | } data; 32 | 33 | // data type 34 | typedef enum 35 | { 36 | CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA,\ 37 | IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA,\ 38 | OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA 39 | } data_type; 40 | 41 | // loading struct 42 | typedef struct load_args 43 | { 44 | int threads; 45 | char **paths; 46 | char *path; 47 | int n; 48 | int m; 49 | char **labels; 50 | int h; 51 | int w; 52 | int out_w; 53 | int out_h; 54 | int nh; 55 | int nw; 56 | int num_boxes; 57 | int min; 58 | int max; 59 | int size; 60 | int classes; 61 | int background; 62 | int scale; 63 | float jitter; 64 | float angle; 65 | float aspect; 66 | float saturation; 67 | float exposure; 68 | float hue; 69 | data *d; 70 | image *im; 71 | image *resized; 72 | data_type type; 73 | tree *hierarchy; 74 | } load_args; 75 | 76 | // loading box labels 77 | typedef struct box_label 78 | { 79 | int id; 80 | float x; 81 | float y; 82 | float w; 83 | float h; 84 | float left; 85 | float right; 86 | float top; 87 | float bottom; 88 | } box_label; 89 | 90 | // get 80 labels(classes) and store them into 2D array 91 | char **get_labels(char *filename); 92 | // read each line from a file, return a list 93 | list *get_paths(char *filename); 94 | 95 | 96 | // static inline function 97 | // the compiler simply copy codes when it is invoked 98 | static inline float distance_from_edge (int x, int max) 99 | { 100 | int dx = (max/2) - x; 101 | if (dx < 0) 102 | { 103 | dx = -dx; 104 | } 105 | dx = (max/2) + 1 -dx; 106 | dx *= 2; 107 | float dis = (float)dx/(float)max; 108 | if(dis > 1) 109 | { 110 | dis = 1; 111 | } 112 | return dis; 113 | } 114 | 115 | #endif /* SRC_DATA_H_ */ 116 | -------------------------------------------------------------------------------- /network.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Network header file 3 | //======================================================================== 4 | // @brief: function prototype & type definition 5 | 6 | #ifndef SRC_NETWORK_H_ 7 | #define SRC_NETWORK_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "image.h" 14 | #include "data.h" 15 | #include "utilities.h" 16 | #include "blas.h" 17 | #include "tree.h" 18 | 19 | #include "layer.h" 20 | #include "connected_layer.h" 21 | #include "convolutional_layer.h" 22 | #include "region_layer.h" 23 | #include "batchnorm_layer.h" 24 | #include "maxpool_layer.h" 25 | #include "softmax_layer.h" 26 | 27 | // learning rate policy 28 | typedef enum 29 | { 30 | CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM 31 | } learning_rate_policy; 32 | 33 | // struct network 34 | typedef struct network 35 | { 36 | float *workspace; 37 | int n; 38 | int batch; 39 | int *seen; 40 | float epoch; 41 | int subdivisions; 42 | float momentum; 43 | float decay; 44 | layer *layers; // layers 45 | int outputs; // output sizes 46 | float *output; // output values 47 | learning_rate_policy policy; 48 | // 49 | float learning_rate; 50 | float gamma; 51 | float scale; 52 | float power; 53 | int time_steps; 54 | int step; 55 | int max_batches; 56 | float *scales; 57 | int *steps; 58 | int num_steps; 59 | int burn_in; 60 | // 61 | int adam; 62 | float B1; 63 | float B2; 64 | float eps; 65 | // 66 | int inputs; 67 | int h; 68 | int w; 69 | int c; 70 | int max_crop; 71 | int min_crop; 72 | float angle; 73 | float aspect; 74 | float exposure; 75 | float saturation; 76 | float hue; 77 | // 78 | int gpu_index; 79 | tree *hierarchy; 80 | } network; 81 | 82 | // network state 83 | typedef struct network_state 84 | { 85 | float *truth; 86 | float *input; 87 | float *delta; 88 | float *workspace; 89 | int train; 90 | int index; 91 | network net; 92 | } network_state; 93 | 94 | // make a new && empty network with n layers 95 | network make_network(int n); 96 | // calculate the size of network 97 | int get_network_output_size(network net); 98 | // get network netowrk 99 | float *get_network_output(network net); 100 | // set batch mode 101 | void set_batch_network(network *net, int b); 102 | // top prediction function 103 | float *network_predict(network net, float *input); 104 | // go through all network layers ************************ 105 | void forward_network(network net, network_state state); 106 | 107 | #endif /* SRC_NETWORK_H_ */ 108 | -------------------------------------------------------------------------------- /network.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Network 3 | //======================================================================== 4 | // @brief: network layer 5 | 6 | #include "network.h" 7 | 8 | // make a new && empty network with n layers 9 | network make_network(int n) 10 | { 11 | network net = {0}; 12 | net.n = n; 13 | net.layers = (layer *)calloc(net.n, sizeof(layer)); 14 | net.seen = (int *)calloc(1, sizeof(int)); // what's net.seen? 1 integer 15 | 16 | return net; 17 | } 18 | 19 | // get output size from the layer with type COST 20 | int get_network_output_size(network net) 21 | { 22 | int i; 23 | for (i = net.n - 1; i > 0; i--) 24 | { 25 | if (net.layers[i].type != COST) 26 | { 27 | break; 28 | } 29 | } 30 | return net.layers[i].outputs; 31 | } 32 | 33 | // get output from the layer with type COST 34 | float *get_network_output(network net) 35 | { 36 | int i; 37 | for (i = net.n - 1; i > 0; i--) 38 | { 39 | if (net.layers[i].type != COST) 40 | { 41 | break; 42 | } 43 | } 44 | return net.layers[i].output; 45 | } 46 | 47 | // set batch size for each layer in the network 48 | void set_batch_network(network *net, int b) 49 | { 50 | net->batch = b; 51 | for (int i = 0; i < net->n; i++) 52 | { 53 | net->layers[i].batch = b; 54 | } 55 | } 56 | 57 | // top forward function, return final output ??? 58 | float *network_predict(network net, float *input) 59 | { 60 | printf("network_predict.\n"); 61 | network_state state; 62 | state.net = net; 63 | state.index = 0; 64 | state.input = input; 65 | state.truth = 0; 66 | state.train = 0; 67 | state.delta = 0; 68 | // 69 | forward_network(net, state); 70 | float *out = get_network_output(net); 71 | 72 | return out; 73 | } 74 | 75 | // go through all network layers ************************ 76 | void forward_network(network net, network_state state) 77 | { 78 | state.workspace = net.workspace; 79 | for (int i = 0; i < net.n; i++) 80 | { 81 | //printf("predicting: layer NO. %d.\n",i); 82 | //Timer timer("the whole layer"); 83 | //timer.start(); 84 | state.index = i; 85 | layer l = net.layers[i]; 86 | // delta = 0 87 | if (l.delta) 88 | { 89 | //Timer timer2("scale_cpu"); 90 | //timer2.start(); 91 | //printf("l.delta\n"); 92 | scal_cpu(l.outputs * l.batch, 0, l.delta, 1); 93 | //printf("ch1\n"); 94 | //timer2.stop(); 95 | } 96 | //printf("ch2\n"); 97 | l.forward(l, state); 98 | state.input = l.output; 99 | //timer.stop(); 100 | } 101 | } 102 | 103 | -------------------------------------------------------------------------------- /batchnorm_layer.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Batchnorm 3 | //======================================================================== 4 | // @brief: batchnorm layer 5 | 6 | #include "batchnorm_layer.h" 7 | 8 | // batchnorm layer top function 9 | void forward_batchnorm_layer(layer l, network_state state) 10 | { 11 | normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); 12 | 13 | scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); 14 | } 15 | 16 | // scale calculation for backward propagation 17 | void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) 18 | { 19 | for(int f = 0; f < n; ++f) 20 | { 21 | float sum = 0; 22 | for(int b = 0; b < batch; ++b) 23 | { 24 | for(int i = 0; i < size; ++i) 25 | { 26 | int index = i + size*(f + n*b); 27 | sum += delta[index] * x_norm[index]; 28 | } 29 | } 30 | scale_updates[f] += sum; 31 | } 32 | } 33 | 34 | // mean calculation 35 | void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) 36 | { 37 | // 38 | for(int i = 0; i < filters; ++i) 39 | { 40 | mean_delta[i] = 0; 41 | for (int j = 0; j < batch; ++j) 42 | { 43 | for (int k = 0; k < spatial; ++k) 44 | { 45 | int index = j*filters*spatial + i*spatial + k; 46 | mean_delta[i] += delta[index]; 47 | } 48 | } 49 | mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); 50 | } 51 | } 52 | 53 | // variance calculation 54 | void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) 55 | { 56 | for(int i = 0; i < filters; ++i) 57 | { 58 | variance_delta[i] = 0; 59 | for(int j = 0; j < batch; ++j) 60 | { 61 | for(int k = 0; k < spatial; ++k) 62 | { 63 | int index = j*filters*spatial + i*spatial + k; 64 | variance_delta[i] += delta[index]*(x[index] - mean[i]); 65 | } 66 | } 67 | variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); 68 | } 69 | } 70 | 71 | // barchnorm with delta 72 | void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) 73 | { 74 | for(int j = 0; j < batch; ++j) 75 | { 76 | for(int f = 0; f < filters; ++f) 77 | { 78 | for(int k = 0; k < spatial; ++k) 79 | { 80 | int index = j*filters*spatial + f*spatial + k; 81 | delta[index] = delta[index] * 1./(sqrt(variance[f]) + .00001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); 82 | } 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /timer.h: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------- 2 | // Timer.h 3 | //--------------------------------------------------------- 4 | #ifndef __TIMER_H__ 5 | #define __TIMER_H__ 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define TIMER_ON 12 | 13 | //--------------------------------------------------------- 14 | // Timer is an object which helps profile programs using 15 | // the clock() function. 16 | // - By default, a timer is stopped when you instantiate it 17 | // and must be started manually 18 | // - Passing True to the constructor starts the timer when 19 | // it is constructed 20 | // - When the timer is destructed it prints stats to stdout 21 | //--------------------------------------------------------- 22 | class Timer { 23 | 24 | #ifdef TIMER_ON 25 | 26 | char binName[50]; 27 | unsigned nCalls; 28 | timeval ts_start; 29 | float totalTime; 30 | 31 | public: 32 | //------------------------------------------------------------------ 33 | // constructor 34 | //------------------------------------------------------------------ 35 | Timer (const char* Name="", bool On=false) { 36 | if (On) { 37 | // record the start time 38 | gettimeofday(&ts_start, NULL); 39 | nCalls = 1; 40 | } 41 | else { 42 | nCalls = 0; 43 | } 44 | totalTime = 0; 45 | strcpy(binName, Name); 46 | } 47 | 48 | //------------------------------------------------------------------ 49 | // destructor 50 | //------------------------------------------------------------------ 51 | ~Timer () { 52 | // on being destroyed, print the average and total time 53 | if (nCalls > 0) { 54 | printf ("%-20s: ", binName); 55 | printf ("%6d calls; ", nCalls); 56 | printf ("%7.3f msecs total time\n", 1000*totalTime); 57 | //printf ("%7.4f msecs average time;\n", 1000*totalTime/nCalls); 58 | } 59 | } 60 | 61 | //------------------------------------------------------------------ 62 | // start timer 63 | //------------------------------------------------------------------ 64 | void start() { 65 | // record start time 66 | gettimeofday(&ts_start, NULL); 67 | nCalls++; 68 | } 69 | 70 | //------------------------------------------------------------------ 71 | // stop timer 72 | //------------------------------------------------------------------ 73 | void stop() { 74 | // get current time, add elapsed time to totalTime 75 | timeval ts_curr; 76 | gettimeofday(&ts_curr, NULL); 77 | totalTime += float(ts_curr.tv_sec - ts_start.tv_sec) + 78 | float(ts_curr.tv_usec)*1e-6 - float(ts_start.tv_usec)*1e-6; 79 | } 80 | 81 | #else 82 | 83 | //-------------------------------------------------------------------- 84 | // all methods do nothing if TIMER_ON is not set 85 | //-------------------------------------------------------------------- 86 | public: 87 | Timer (const char* Name, bool On=true) {} 88 | void start() {} 89 | void stop() {} 90 | 91 | #endif 92 | }; 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /maxpool_layer.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Maxpooling 3 | //======================================================================== 4 | // @brief: maxpooling layer 5 | 6 | #include "maxpool_layer.h" 7 | 8 | // make maxpooling layer 9 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) 10 | { 11 | maxpool_layer l; 12 | init_layer(l); 13 | 14 | l.type = MAXPOOL; 15 | l.batch = batch; 16 | l.h = h; 17 | l.w = w; 18 | l.c = c; 19 | l.pad = padding; 20 | l.out_w = (w + 2*padding)/stride; 21 | l.out_h = (h + 2*padding)/stride; 22 | l.out_c = c; 23 | l.outputs = l.out_h * l.out_w * l.out_c; 24 | l.inputs = h*w*c; 25 | l.size = size; 26 | l.stride = stride; 27 | int output_size = l.out_h * l.out_w * l.out_c * batch; 28 | //printf("\noutput_size: %d;\n",output_size); 29 | l.indexes = (int *)calloc(output_size, sizeof(int)); 30 | l.output = (float *)calloc(output_size, sizeof(float)); 31 | l.delta = (float *)calloc(output_size, sizeof(float)); 32 | l.forward = forward_maxpool_layer; 33 | //l.backward = backward_maxpool_layer; 34 | fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); 35 | 36 | return l; 37 | } 38 | 39 | // maxpooling top function 40 | void forward_maxpool_layer(const maxpool_layer l, network_state state) 41 | { 42 | int w_offset = -l.pad; 43 | int h_offset = -l.pad; 44 | 45 | int h = l.out_h; 46 | int w = l.out_w; 47 | int c = l.c; 48 | // 49 | for (int b = 0; b < l.batch; b++) 50 | { 51 | for (int k = 0; k < c; k++) 52 | { 53 | for (int i = 0; i < h; i++) 54 | { 55 | for (int j = 0; j < w; j++) 56 | { 57 | int out_index = j + w*(i + h*(k + c*b)); 58 | float max = -FLT_MAX; 59 | int max_i = -1; 60 | for (int n = 0; n < l.size; n++) 61 | { 62 | for (int m = 0; m < l.size; m++) 63 | { 64 | int cur_h = h_offset + i*l.stride + n; 65 | int cur_w = w_offset + j*l.stride + m; 66 | int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); 67 | int valid = (cur_h >= 0 && cur_h < l.h && cur_w >= 0 && cur_w < l.w); 68 | float val = (valid != 0) ? state.input[index] : -FLT_MAX; 69 | max_i = (val > max) ? index : max_i; 70 | max = (val > max) ? val : max; 71 | } 72 | } 73 | l.output[out_index] = max; 74 | l.indexes[out_index] = max_i; 75 | //printf("l.output[%d]:%f;\n",out_index,l.output[out_index]); 76 | } 77 | } 78 | } 79 | } 80 | /* 81 | // 82 | for (int x = 900; x < 1000; x++) 83 | { 84 | printf("state.input[%d]:%.12f; l.output[%d]:%.12f;\n",x,state.input[x],x,l.output[x]); 85 | } 86 | */ 87 | } 88 | 89 | -------------------------------------------------------------------------------- /image.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Image header file 3 | //======================================================================== 4 | // @brief: function prototype & struct type definition 5 | 6 | #ifndef SRC_IMAGE_H_ 7 | #define SRC_IMAGE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "box.h" 16 | #include "utilities.h" 17 | #include "blas.h" 18 | 19 | #define LABEL_SIZE 8 20 | #define LABEL_TYPE 128 21 | 22 | // height, weight, channel and data 23 | typedef struct image 24 | { 25 | int h; 26 | int w; 27 | int c; 28 | float *data; 29 | } image; 30 | 31 | //====================================================================================== 32 | // Read && resize images 33 | //====================================================================================== 34 | // return 8*128*image 35 | // load labels 8(different size), 32~126 (different type) 36 | // store information&value of labels: w,h,c,*data 37 | image **load_alphabet(); 38 | // pass value 39 | image load_image_color(char *filename, int w, int h); 40 | // load image top function 41 | image load_image(char *filename, int w, int h, int c); 42 | // return im.data: w(width); h(height); z(depth,channel) 43 | image load_image_stb(char *filename, int channels); 44 | // make image top function 45 | image make_image(int w, int h, int c); 46 | // make an empty image 47 | image make_empty_image(int w, int h, int c); 48 | // resize the given image (w*h) 49 | image resize_image(image im, int w, int h); 50 | // pick up pixel in m.data: x - width, y - height, c - channel 51 | float get_pixel(image m, int x, int y, int c); 52 | // fetch extra pixels 53 | float get_pixel_extend(image m, int x, int y, int c); 54 | // check the validity of data && store data into image 55 | void set_pixel(image m, int x, int y, int c, float val); 56 | // add value to pixels 57 | void add_pixel(image m, int x, int y, int c, float val); 58 | 59 | //====================================================================================== 60 | // Draw detections & save etc. 61 | //====================================================================================== 62 | // draw detecting results 63 | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **labels, int classes); 64 | // get label 65 | image get_label(image **characters, char *string, int size); 66 | // splite image 67 | image tile_images(image a, image b, int dx); 68 | // border/wrap up image 69 | image border_image(image a, int border); 70 | // copy image 71 | image copy_image(image p); 72 | // embed image (image data transmission) 73 | void embed_image(image source, image dest, int dx, int dy); 74 | // merge images 75 | void composite_image(image source, image dest, int dx, int dy); 76 | // get width of boxes 77 | void draw_box_width(image a, int x1, int y1, int x2, int y2,int w, float r, float g, float b); 78 | // draw one box 79 | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); 80 | // draw labels 81 | void draw_label(image a, int r, int c, image label, const float *rgb); 82 | // get image color 83 | float get_color(int c, int x, int max); 84 | // display image 85 | void show_image(image p, const char *name); 86 | // save image top function 87 | void save_image(image p, const char *name); 88 | // rearrange the output image 89 | void save_image_png(image im, const char *name); 90 | // free allocated memory 91 | void free_image(image p); 92 | 93 | #endif /* SRC_IMAGE_H_ */ 94 | -------------------------------------------------------------------------------- /sysarr.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | using namespace std; 4 | 5 | const int N = 20; 6 | const int M = 5; 7 | 8 | void printArr(int A[N][N]) { 9 | for (int i = 0; i < N; i++) { 10 | for (int j = 0; j < N; j++) 11 | cout << setw(3) << A[i][j] << " "; 12 | cout << endl; 13 | } 14 | cout << endl; 15 | } 16 | /* 17 | void top2( int A[N][N], int B[N][N], int C[N][N] ){ 18 | 19 | #pragma HLS array_partition variable=A dim=0 20 | #pragma HLS array_partition variable=B dim=0 21 | #pragma HLS array_partition variable=C dim=0 22 | 23 | for( int i = 0; i < N; i++ ){ 24 | for( int j = 0; j < N; j++ ){ 25 | for( int k = 0; k < N; k++ ){ 26 | C[i][j] += A[i][k] * B[k][j]; 27 | } 28 | } 29 | } 30 | } 31 | */ 32 | 33 | void top(int A[N][N], int B[N][N], int C[N][N]) { 34 | 35 | int inA[M][M]; 36 | int inB[M][M]; 37 | 38 | #pragma HLS array_partition variable=inA dim=0 39 | #pragma HLS array_partition variable=inB dim=0 40 | #pragma HLS array_partition variable=A dim=1 41 | #pragma HLS array_partition variable=B dim=2 42 | #pragma HLS array_partition variable=C dim=0 43 | 44 | // initialization 45 | for (int i = 0; i < M; i++) { 46 | #pragma HLS pipeline 47 | for (int j = 0; j < M; j++) { 48 | inA[i][j] = 0; 49 | inB[i][j] = 0; 50 | } 51 | } 52 | 53 | for( int ii = 0; ii < N/M; ii++ ){ 54 | for( int jj = 0; jj < N/M; jj++ ){ 55 | 56 | for (int r = 0; r < N + 2 * M - 2; r++) { 57 | #pragma HLS pipeline 58 | // update data (i.e., reads data from previous PE) 59 | for (int i = 0; i < M; i++) 60 | for (int j = M - 1; j >= 1; j--) 61 | inA[i][j] = inA[i][j-1]; 62 | 63 | for (int i = M - 1; i >= 1; i--) 64 | for (int j = 0; j < M; j++) 65 | inB[i][j] = inB[i-1][j]; 66 | 67 | // read new data from inputs 68 | // not ok here! 69 | for (int i = 0; i < M; i++) { 70 | if (r >= i && r < i+N) 71 | inA[i][0] = A[i + ii * M][r-i]; 72 | else 73 | inA[i][0] = 0; 74 | } 75 | 76 | for (int j = 0; j < M; j++) { 77 | if (r >= j && r < j+N) 78 | inB[0][j] = B[r-j][j + jj * M]; 79 | else 80 | inB[0][j] = 0; 81 | } 82 | 83 | 84 | // PE 85 | for (int i = 0; i < M; i++) 86 | for (int j = 0; j < M; j++) 87 | C[i + ii * M][j + jj * M] += inA[i][j] * inB[i][j]; 88 | } 89 | 90 | } 91 | } 92 | 93 | } 94 | 95 | 96 | 97 | int main(void) { 98 | 99 | int A[N][N]; 100 | int B[N][N]; 101 | int C[N][N]; 102 | int O[N][N]; 103 | 104 | for (int i = 0; i < N; i++) { 105 | for (int j = 0; j < N; j++) { 106 | A[i][j] = i + j; 107 | B[i][j] = i - j; 108 | C[i][j] = 0; 109 | O[i][j] = 0; 110 | } 111 | } 112 | 113 | top(A, B, C); 114 | 115 | for (int i = 0; i < N; i++) 116 | for (int j = 0; j < N; j++) 117 | for (int r = 0; r < N; r++) 118 | O[i][j] += A[i][r] * B[r][j]; 119 | 120 | for (int i = 0; i < N; i++) { 121 | for (int j = 0; j < N; j++) { 122 | if (O[i][j] != C[i][j]) { 123 | cout << "Wrong value at (" << j << ", " << i << "): " << O[i][j] << " != " << C[i][j] << endl; 124 | return 1; 125 | } 126 | } 127 | } 128 | cout << "Success!!" << endl; 129 | 130 | return 0; 131 | } 132 | -------------------------------------------------------------------------------- /activations.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Activation 3 | //======================================================================== 4 | // @brief: activation layer 5 | 6 | #include "activations.h" 7 | 8 | // get activation type 9 | ACTIVATION get_activation(char *s) 10 | { 11 | if (strcmp(s, "logistic")==0) return LOGISTIC; 12 | if (strcmp(s, "loggy")==0) return LOGGY; 13 | if (strcmp(s, "relu")==0) return RELU; 14 | if (strcmp(s, "elu")==0) return ELU; 15 | if (strcmp(s, "relie")==0) return RELIE; 16 | if (strcmp(s, "plse")==0) return PLSE; 17 | if (strcmp(s, "hardtan")==0) return HARDTAN; 18 | if (strcmp(s, "lhtan")==0) return LHTAN; 19 | if (strcmp(s, "linear")==0) return LINEAR; 20 | if (strcmp(s, "ramp")==0) return RAMP; 21 | if (strcmp(s, "leaky")==0) return LEAKY; 22 | if (strcmp(s, "tanh")==0) return TANH; 23 | if (strcmp(s, "stair")==0) return STAIR; 24 | fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); 25 | 26 | return RELU; 27 | } 28 | 29 | // select activation type 30 | float activate(float x, ACTIVATION a) 31 | { 32 | switch(a) 33 | { 34 | case LINEAR: 35 | return linear_activate(x); 36 | case LOGISTIC: 37 | return logistic_activate(x); 38 | case LOGGY: 39 | return loggy_activate(x); 40 | case RELU: 41 | return relu_activate(x); 42 | case ELU: 43 | return elu_activate(x); 44 | case RELIE: 45 | return relie_activate(x); 46 | case RAMP: 47 | return ramp_activate(x); 48 | case LEAKY: 49 | return leaky_activate(x); 50 | case TANH: 51 | return tanh_activate(x); 52 | case PLSE: 53 | return plse_activate(x); 54 | case STAIR: 55 | return stair_activate(x); 56 | case HARDTAN: 57 | return hardtan_activate(x); 58 | case LHTAN: 59 | return lhtan_activate(x); 60 | } 61 | return 0; 62 | } 63 | 64 | // activate all layers 65 | void activate_array(float *x, const int n, const ACTIVATION a) 66 | { 67 | for (int i = 0; i < n; i++) 68 | { 69 | x[i] = activate(x[i], a); 70 | } 71 | } 72 | 73 | // select gradient type 74 | float gradient(float x, ACTIVATION a) 75 | { 76 | switch(a) 77 | { 78 | case LINEAR: 79 | return linear_gradient(x); 80 | case LOGISTIC: 81 | return logistic_gradient(x); 82 | case LOGGY: 83 | return loggy_gradient(x); 84 | case RELU: 85 | return relu_gradient(x); 86 | case ELU: 87 | return elu_gradient(x); 88 | case RELIE: 89 | return relie_gradient(x); 90 | case RAMP: 91 | return ramp_gradient(x); 92 | case LEAKY: 93 | return leaky_gradient(x); 94 | case TANH: 95 | return tanh_gradient(x); 96 | case PLSE: 97 | return plse_gradient(x); 98 | case STAIR: 99 | return stair_gradient(x); 100 | case HARDTAN: 101 | return hardtan_gradient(x); 102 | case LHTAN: 103 | return lhtan_gradient(x); 104 | } 105 | return 0; 106 | } 107 | 108 | // activate all layers 109 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) 110 | { 111 | for (int i = 0; i < n; i++) 112 | { 113 | delta[i] *= gradient(x[i], a); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /box.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Box 3 | //======================================================================== 4 | // @brief: sort boxes according to the confidence 5 | 6 | #ifndef SRC_BOX_CPP_ 7 | #define SRC_BOX_CPP_ 8 | 9 | #include "box.h" 10 | 11 | // sort boxes 12 | void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh) 13 | { 14 | sortable_box *s = (sortable_box *)calloc(total, sizeof(sortable_box)); 15 | for (int i = 0; i < total; i++) 16 | { 17 | s[i].index = i; 18 | s[i].classes = 0; 19 | s[i].probs = probs; 20 | } 21 | for (int k = 0; k < classes; k++) 22 | { 23 | for (int i = 0; i < total; i++) 24 | { 25 | s[i].classes = k; 26 | } 27 | qsort(s, total, sizeof(sortable_box), nms_comparator); 28 | for (int i = 0; i < total; i++) 29 | { 30 | if (probs[s[i].index][k] == 0) 31 | { 32 | continue; 33 | } 34 | box a = boxes[s[i].index]; 35 | for (int j = i+1; j < total; j++) 36 | { 37 | box b = boxes[s[j].index]; 38 | if (box_iou(a, b) > thresh) 39 | { 40 | probs[s[j].index][k] = 0; 41 | } 42 | } 43 | } 44 | } 45 | free(s); 46 | } 47 | 48 | // compare function for qsort 49 | int nms_comparator(const void *pa, const void *pb) 50 | { 51 | sortable_box a = *(sortable_box *)pa; 52 | sortable_box b = *(sortable_box *)pb; 53 | float diff = a.probs[a.index][b.classes] - b.probs[b.index][b.classes]; 54 | if (diff < 0) return 1; 55 | else if (diff > 0) return -1; 56 | return 0; 57 | } 58 | 59 | // 60 | float box_iou(box a, box b) 61 | { 62 | return box_intersection(a, b)/box_union(a,b); 63 | } 64 | 65 | // overlap area 66 | float box_intersection(box a, box b) 67 | { 68 | float w = overlap(a.x, a.w, b.x, b.w); 69 | float h = overlap(a.y, a.h, b.y, b.h); 70 | if (w < 0 || h < 0) 71 | { 72 | return 0; 73 | } 74 | float area = w*h; 75 | return area; 76 | } 77 | 78 | // overlap length (width, height, etc.) 79 | // x1, x2 midpoint of the boxes 80 | float overlap(float x1, float w1, float x2, float w2) 81 | { 82 | float l1 = x1 - w1/2; 83 | float l2 = x2 - w2/2; 84 | float left = l1 > l2 ? l1 : l2; 85 | float r1 = x1 + w1/2; 86 | float r2 = x2 + w2/2; 87 | float right = r1 < r2 ? r1 : r2; 88 | 89 | return right - left; 90 | } 91 | 92 | // union area = total - intersection 93 | float box_union(box a, box b) 94 | { 95 | float i = box_intersection(a, b); 96 | float u = a.w*a.h + b.w*b.h - i; 97 | 98 | return u; 99 | } 100 | 101 | // select boxes contains a confidence larger than the threshhold 102 | void do_nms_obj(box *boxes, float **probs, int total, int classes, float thresh) 103 | { 104 | sortable_box *s = (sortable_box *)calloc(total, sizeof(sortable_box)); 105 | for (int i = 0; i < total; i++) 106 | { 107 | s[i].index = i; 108 | s[i].classes = classes; 109 | s[i].probs = probs; 110 | } 111 | qsort(s, total, sizeof(sortable_box), nms_comparator); 112 | for (int i = 0; i < total; i++) 113 | { 114 | if (probs[s[i].index][classes] == 0) 115 | { 116 | continue; 117 | } 118 | box a = boxes[s[i].index]; 119 | for (int j = i+1; j < total; j++) 120 | { 121 | box b = boxes[s[j].index]; 122 | if (box_iou(a, b) > thresh) 123 | { 124 | for (int k = 0; k < classes+1; k++) 125 | { 126 | probs[s[j].index][k] = 0; 127 | } 128 | } 129 | } 130 | } 131 | free(s); 132 | } 133 | 134 | // store value into box 135 | box float_to_box (float *f) 136 | { 137 | box b; 138 | b.x = f[0]; 139 | b.y = f[1]; 140 | b.w = f[2]; 141 | b.h = f[3]; 142 | 143 | return b; 144 | } 145 | 146 | #endif /* SRC_BOX_CPP_ */ 147 | -------------------------------------------------------------------------------- /inits.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Inits 3 | //======================================================================== 4 | // @brief: initilization of struct 5 | 6 | #include "inits.h" 7 | 8 | // init layer ***pass by reference 9 | void init_layer(layer &l) 10 | { 11 | /* 12 | l.type = 0; 13 | l.activation = 0; 14 | l.cost_type = 0; 15 | */ 16 | // 17 | l.forward = 0; 18 | l.backward = 0; 19 | l.update = 0; 20 | l.forward_gpu = 0; 21 | l.backward_gpu = 0; 22 | l.update_gpu = 0; 23 | 24 | // 25 | l.batch_normalize = 0; 26 | l.shorcut = 0; 27 | l.batch = 0; 28 | l.forced = 0; 29 | l.flipped = 0; 30 | l.inputs = 0; 31 | l.outputs = 0; 32 | l.truths = 0; 33 | l.h = 0; 34 | l.w = 0; 35 | l.c = 0; 36 | l.out_h = 0; 37 | l.out_w = 0; 38 | l. out_c = 0; 39 | l. n = 0; 40 | l. max_boxes = 0; 41 | l. groups = 0; 42 | l. size = 0; 43 | l. side = 0; 44 | l. stride = 0; 45 | l. reverse = 0; 46 | l. pad = 0; 47 | l. sqrt = 0; 48 | l. flip = 0; 49 | l. index = 0; 50 | l. binary = 0; 51 | l. xnor = 0; 52 | l. steps = 0; 53 | l. hidden = 0; 54 | l. dot = 0; 55 | l. angle = 0; 56 | l. jitter = 0; 57 | l. saturation = 0; 58 | l. exposure = 0; 59 | l. shift = 0; 60 | l. ratio = 0; 61 | l. softmax = 0; 62 | l. classes = 0; 63 | l. coords = 0; 64 | l. background = 0; 65 | l. rescore = 0; 66 | l. objectness = 0; 67 | l. does_cost = 0; 68 | l. joint = 0; 69 | l. noadjust = 0; 70 | l. reorg = 0; 71 | l. log = 0; 72 | // repeat in network ??? 73 | l. adam = 0; 74 | l. B1 = 0; 75 | l. B2 = 0; 76 | l. eps = 0; 77 | l. t = 0; 78 | // 79 | l. alpha = 0; 80 | l. belta = 0; 81 | l. kappa = 0; 82 | // 83 | l. coord_scale = 0; 84 | l. object_scale = 0; 85 | l. noobject_scale = 0; 86 | l. class_scale = 0; 87 | l. bias_match = 0; 88 | l. random = 0; 89 | l. thresh = 0; 90 | l. classfix = 0; 91 | l. absolute = 0; 92 | // 93 | l. dontload = 0; 94 | l. dontloadscales = 0; 95 | // 96 | l. temperature = 0; 97 | l. probability = 0; 98 | l. scale = 0; 99 | // 100 | l.cweights = 0; 101 | l.indexes = 0; 102 | l.input_layers = 0; 103 | l.input_sizes = 0; 104 | l. map = 0; 105 | l.rand = 0; 106 | l.cost = 0; 107 | l.state = 0; 108 | l.prev_state = 0; 109 | l.forgot_state = 0; 110 | l.forgot_delta = 0; 111 | l.state_delta = 0; 112 | // 113 | l.concat = 0; 114 | l.concat_delta = 0; 115 | // 116 | l.binary_weights = 0; 117 | // 118 | l.biases = 0; 119 | l.bias_updates = 0; 120 | // 121 | l.scales = 0; 122 | l.scale_updates = 0; 123 | // 124 | l.weights = 0; 125 | l.weight_updates = 0; 126 | // 127 | l.col_image = 0; 128 | l.delta = 0; 129 | l.output = 0; 130 | l.squared = 0; 131 | l.norms = 0; 132 | // 133 | l.spatial_mean = 0; 134 | l.mean = 0; 135 | l.variance = 0; 136 | // 137 | l.mean_delta = 0; 138 | l.variance_delta = 0; 139 | // 140 | l.rolling_mean = 0; 141 | l.rolling_variance = 0; 142 | // 143 | l.x = 0; 144 | l. x_norm = 0; 145 | l.m = 0; 146 | l.v = 0; 147 | // 148 | l.z_cpu = 0; 149 | l.r_cpu = 0; 150 | l.h_cpu = 0; 151 | // 152 | l.binary_input = 0; 153 | // 154 | l.input_layer = 0; 155 | l.self_layer = 0; 156 | l.output_layer = 0; 157 | // 158 | l.input_gate_layer = 0; 159 | l.state_gate_layer = 0; 160 | l.input_save_layer = 0; 161 | l.state_save_layer = 0; 162 | l.input_state_layer = 0; 163 | l.state_state_layer = 0; 164 | // 165 | l.input_z_layer = 0; 166 | l.state_z_layer = 0; 167 | // 168 | l.input_r_layer = 0; 169 | l.state_r_layer = 0; 170 | // 171 | l.input_h_layer = 0; 172 | l.state_h_layer = 0; 173 | // 174 | l.softmax_tree = 0; 175 | // 176 | l.workspace_size = 0; 177 | } 178 | 179 | 180 | -------------------------------------------------------------------------------- /activations.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Activation header file 3 | //======================================================================== 4 | // @brief: function prototype & activate type definition 5 | 6 | #ifndef SRC_ACTIVATIONS_H_ 7 | #define SRC_ACTIVATIONS_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | // activation type 15 | typedef enum 16 | { 17 | LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN 18 | } ACTIVATION; 19 | 20 | // get activation type 21 | ACTIVATION get_activation(char *s); 22 | // activation 23 | float activate(float x, ACTIVATION a); 24 | // activation in batch mode 25 | void activate_array(float *x, const int n, const ACTIVATION a); 26 | // add gradient 27 | float gradient(float x, ACTIVATION a); 28 | // add gradient in batch mode 29 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); 30 | 31 | //activation helper function 32 | static inline float logistic_activate(float x) 33 | { 34 | return 1.0/(1.0 + exp(-x)); 35 | } 36 | static inline float logistic_gradient(float x) 37 | { 38 | return (1-x)*x; 39 | } 40 | static inline float stair_activate(float x) 41 | { 42 | int n = floor(x); 43 | if (n%2 == 0) 44 | { 45 | return floor(x/2.0); 46 | } 47 | else 48 | { 49 | return (x - n) + floor(x/2.0); 50 | } 51 | } 52 | static inline float hardtan_activate(float x) 53 | { 54 | if (x < -1) 55 | { 56 | return -1; 57 | } 58 | if (x > 1) 59 | { 60 | return 1; 61 | } 62 | return x; 63 | } 64 | static inline float linear_activate(float x) 65 | { 66 | return x; 67 | } 68 | static inline float loggy_activate(float x) 69 | { 70 | return 2.0/(1.0 + exp(-x)) - 1; 71 | } 72 | static inline float relu_activate(float x) 73 | { 74 | return x*(x>0); 75 | } 76 | static inline float elu_activate(float x) 77 | { 78 | return (x >= 0)*x + (x < 0)*(exp(x)-1); 79 | } 80 | static inline float relie_activate(float x) 81 | { 82 | return (x>0) ? x : 0.01*x; 83 | } 84 | static inline float ramp_activate(float x) 85 | { 86 | return x*(x>0)+0.1*x; 87 | } 88 | static inline float leaky_activate(float x) 89 | { 90 | return (x>0) ? x : 0.1*x; 91 | } 92 | static inline float tanh_activate(float x) 93 | { 94 | return (exp(2*x)-1)/(exp(2*x)+1); 95 | } 96 | static inline float plse_activate(float x) 97 | { 98 | if(x < -4) 99 | { 100 | return 0.01 * (x + 4); 101 | } 102 | if(x > 4) 103 | { 104 | return 0.01 * (x - 4) + 1; 105 | } 106 | return 0.125*x + .5; 107 | } 108 | static inline float lhtan_activate(float x) 109 | { 110 | if(x < 0) 111 | { 112 | return 0.001*x; 113 | } 114 | if(x > 1) 115 | { 116 | return 0.001*(x-1) + 1; 117 | } 118 | return x; 119 | } 120 | static inline float lhtan_gradient(float x) 121 | { 122 | if(x > 0 && x < 1) 123 | { 124 | return 1; 125 | } 126 | return 0.001; 127 | } 128 | static inline float hardtan_gradient(float x) 129 | { 130 | if (x > -1 && x < 1) 131 | { 132 | return 1; 133 | } 134 | return 0; 135 | } 136 | static inline float linear_gradient(float x) 137 | { 138 | return 1; 139 | } 140 | static inline float loggy_gradient(float x) 141 | { 142 | float y = (x+1.0)/2.0; 143 | return 2*(1-y)*y; 144 | } 145 | static inline float stair_gradient(float x) 146 | { 147 | if (floor(x) == x) 148 | { 149 | return 0; 150 | } 151 | return 1; 152 | } 153 | static inline float relu_gradient(float x) 154 | { 155 | return (x>0); 156 | } 157 | static inline float elu_gradient(float x) 158 | { 159 | return (x >= 0) + (x < 0)*(x + 1); 160 | } 161 | static inline float relie_gradient(float x) 162 | { 163 | return (x>0) ? 1 : 0.01; 164 | } 165 | static inline float ramp_gradient(float x) 166 | { 167 | return (x>0)+0.1; 168 | } 169 | static inline float leaky_gradient(float x) 170 | { 171 | return (x>0) ? 1 : 0.1; 172 | } 173 | static inline float tanh_gradient(float x) 174 | { 175 | return 1-x*x; 176 | } 177 | static inline float plse_gradient(float x) 178 | { 179 | return (x < 0 || x > 1) ? 0.01 : 0.125; 180 | } 181 | 182 | #endif /* SRC_ACTIVATIONS_H_ */ 183 | -------------------------------------------------------------------------------- /tree.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Tree 3 | //======================================================================== 4 | // @brief: update tree of probabilities 5 | 6 | #include "tree.h" 7 | 8 | // update prediction tree 9 | int hierarchy_top_prediction(float *predictions, tree *hier, float thresh) 10 | { 11 | float p = 1; 12 | int group = 0; 13 | while(1) 14 | { 15 | float max = 0; 16 | int max_i = 0; 17 | 18 | for(int i = 0; i < hier->group_size[group]; i++) 19 | { 20 | int index = i + hier->group_offset[group]; 21 | float val = predictions[i + hier->group_offset[group]]; 22 | if(val > max) 23 | { 24 | max_i = index; 25 | max = val; 26 | } 27 | } 28 | if(p*max > thresh) 29 | { 30 | p = p*max; 31 | group = hier->child[max_i]; 32 | if(hier->child[max_i] < 0) 33 | { 34 | return max_i; 35 | } 36 | } 37 | else 38 | { 39 | return hier->parent[hier->group_offset[group]]; 40 | } 41 | } 42 | return 0; 43 | } 44 | 45 | // build tree hierarchy 46 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves) 47 | { 48 | for (int j = 0; j < n; j++) 49 | { 50 | int parent = hier->parent[j]; 51 | if(parent >= 0) 52 | { 53 | predictions[j] *= predictions[parent]; 54 | } 55 | } 56 | if (only_leaves) 57 | { 58 | for (int j = 0; j < n; j++) 59 | { 60 | if (!hier->leaf[j]) 61 | { 62 | predictions[j] = 0; 63 | } 64 | } 65 | } 66 | } 67 | 68 | // get probabilities 69 | float get_hierarchy_probability(float *x, tree *hier, int c) 70 | { 71 | float p = 1; 72 | while (c >= 0) 73 | { 74 | p *= x[c]; 75 | c = hier->parent[c]; 76 | } 77 | return p; 78 | } 79 | 80 | // read values 81 | tree *read_tree(char *filename) 82 | { 83 | tree t = {0}; 84 | FILE *file = fopen(filename, "r"); 85 | char *line; 86 | int last_parent = -1; 87 | int group_size = 0; 88 | int groups = 0; 89 | int n = 0; 90 | // 91 | while ((line = fgetl(file)) != 0) 92 | { 93 | char *id = (char *)calloc(256, sizeof(char)); 94 | int parent = -1; 95 | // read dat afrom string line 96 | sscanf(line, "%s %d", id, &parent); 97 | t.parent = (int *)realloc(t.parent, (n+1)*sizeof(int)); 98 | t.parent[n] = parent; 99 | 100 | t.child = (int *)realloc(t.child, (n+1)*sizeof(int)); 101 | t.child[n] = -1; 102 | 103 | t.name = (char **)realloc(t.name, (n+1)*sizeof(char *)); 104 | t.name[n] = id; 105 | 106 | if(parent != last_parent) 107 | { 108 | groups++; 109 | t.group_offset = (int *)realloc(t.group_offset, groups * sizeof(int)); 110 | t.group_offset[groups - 1] = n - group_size; 111 | t.group_size = (int *)realloc(t.group_size, groups * sizeof(int)); 112 | t.group_size[groups - 1] = group_size; 113 | group_size = 0; 114 | last_parent = parent; 115 | } 116 | t.group = (int *)realloc(t.group, (n+1)*sizeof(int)); 117 | t.group[n] = groups; 118 | if (parent >= 0) 119 | { 120 | t.child[parent] = groups; 121 | } 122 | n++; 123 | group_size++; 124 | } 125 | groups++; 126 | // 127 | t.group_offset = (int *)realloc (t.group_offset, groups * sizeof(int)); 128 | t.group_offset[groups - 1] = n - group_size; 129 | t.group_size = (int *)realloc(t.group_size, groups * sizeof(int)); 130 | t.group_size[groups - 1] = group_size; 131 | t.n = n; 132 | t.groups = groups; 133 | t. leaf = (int *)calloc(n, sizeof(int)); 134 | // 135 | for (int i = 0; i < n; i++) 136 | { 137 | t.leaf[i] = 1; 138 | } 139 | for (int i = 0; i < n; i++) 140 | { 141 | if (t.parent[i] >= 0) 142 | { 143 | t.leaf[t.parent[i]] = 0; 144 | } 145 | } 146 | // 147 | fclose(file); 148 | tree *tree_ptr = (tree *)calloc(1, sizeof(tree)); 149 | *tree_ptr = t; 150 | return tree_ptr; 151 | } 152 | -------------------------------------------------------------------------------- /blas.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Blas 3 | //======================================================================== 4 | // @brief: helper function for barchnorm layer 5 | 6 | #include "blas.h" 7 | 8 | // multiply some values in *X with ALPHA 9 | void scal_cpu(int N, float ALPHA, float *X, int INCX) 10 | { 11 | for (int i = 0; i < N; i++) 12 | { 13 | X[i] = 0; 14 | } 15 | } 16 | 17 | // assign some values in *X with ALPHA 18 | void fill_cpu(int N, float ALPHA, float *X, int INCX) 19 | { 20 | for (int i = 0; i < N; i++) 21 | { 22 | X[i] = 0; 23 | } 24 | } 25 | 26 | // calculation about *mean 27 | void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) 28 | { 29 | float scale = 1.0/(batch * spatial); 30 | // 31 | for (int i = 0; i < filters; i++) 32 | { 33 | mean[i] = 0; 34 | for (int j = 0; j < batch; j++) 35 | { 36 | for (int k = 0; k < spatial; k++) 37 | { 38 | int index = j*filters*spatial + i*spatial + k; 39 | mean[i] += x[index]; 40 | } 41 | } 42 | mean[i] *= scale; 43 | } 44 | } 45 | 46 | // calculation about *variance 47 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) 48 | { 49 | float scale = 1.0/(batch * spatial - 1); 50 | // 51 | for (int i = 0; i < filters; i++) 52 | { 53 | variance[i] = 0; 54 | for (int j = 0; j < batch; j++) 55 | { 56 | for (int k = 0; k < spatial; k++) 57 | { 58 | int index = j*filters*spatial + i*spatial + k; 59 | variance[i] += pow((x[index] - mean[i]), 2); 60 | } 61 | } 62 | variance[i] *= scale; 63 | } 64 | } 65 | 66 | // multiply some values in *X with ALPHA 67 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) 68 | { 69 | for (int i = 0; i < N; i++) 70 | { 71 | Y[i*INCY] += ALPHA*X[i*INCX]; 72 | } 73 | } 74 | 75 | // array copy 76 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) 77 | { 78 | for (int i = 0; i < N; i++) 79 | { 80 | Y[i*INCY] = X[i*INCX]; 81 | } 82 | } 83 | 84 | // normalization with mean and variance 85 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) 86 | { 87 | for (int j = 0; j < batch; j++) 88 | { 89 | for (int k = 0; k < filters; k++) 90 | { 91 | float p = sqrt(variance[k])+0.000001f; 92 | for (int i = 0; i < spatial; i++) 93 | { 94 | int index = j*filters*spatial + k*spatial + i; 95 | x[index] = (x[index] - mean[k])/p; 96 | //x[index] *= scales[k]; 97 | //x[index] += bias[k]; 98 | } 99 | } 100 | } 101 | } 102 | 103 | // scale an array 104 | void scale_cpu(int N, float ALPHA, float *X, int INCX) 105 | { 106 | for (int i = 0; i < N; i++) 107 | { 108 | X[i*INCX] *= ALPHA; 109 | } 110 | } 111 | 112 | // flatten layer 113 | void flatten(float *x, int size, int layers, int batch, int forward) 114 | { 115 | float *swap = (float *)calloc(size*layers*batch, sizeof(float)); 116 | // 117 | for(int b = 0; b < batch; ++b) 118 | { 119 | for(int c = 0; c < layers; ++c) 120 | { 121 | for(int i = 0; i < size; ++i) 122 | { 123 | int i1 = b*layers*size + c*size + i; 124 | int i2 = b*layers*size + i*layers + c; 125 | if (forward) 126 | { 127 | swap[i2] = x[i1]; 128 | } 129 | else 130 | { 131 | swap[i1] = x[i2]; 132 | } 133 | } 134 | } 135 | } 136 | memcpy(x, swap, size*layers*batch*sizeof(float)); 137 | free(swap); 138 | } 139 | 140 | // softmax layer 141 | void softmax(float *input, int n, float temp, float *output) 142 | { 143 | float sum = 0; 144 | float largest = -FLT_MAX; 145 | for(int i = 0; i < n; i++) 146 | { 147 | if(input[i] > largest) 148 | { 149 | largest = input[i]; 150 | } 151 | } 152 | for(int i = 0; i < n; i++) 153 | { 154 | float e = exp(input[i]/temp - largest/temp); 155 | sum += e; 156 | output[i] = e; 157 | } 158 | for(int i = 0; i < n; i++) 159 | { 160 | output[i] /= sum; 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /option_list.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Option list 3 | //======================================================================== 4 | // @brief: read and compare parameters 5 | 6 | #include "option_list.h" 7 | 8 | // read cfg data 9 | list *read_data_cfg(char *filename) 10 | { 11 | FILE *file = fopen(filename, "r"); 12 | if (file == 0) 13 | { 14 | file_error(filename); 15 | } 16 | char *line; 17 | list *options = make_list(); 18 | int nu = 0; 19 | // get each line 20 | while((line=fgetl(file)) != 0) 21 | { 22 | nu++; 23 | //printf("nu:%d; line: %s;\n",nu,line); 24 | strip(line); 25 | switch(line[0]) 26 | { 27 | case '\0': 28 | case '#': 29 | case ';': 30 | { 31 | free(line); 32 | break; 33 | } 34 | default: 35 | { 36 | if (!read_option(line, options)) 37 | { 38 | fprintf(stderr,"Config file error line %d, could parse: %s\n",nu,line); 39 | free(line); 40 | } 41 | break; 42 | } 43 | } 44 | } 45 | fclose(file); 46 | return options; 47 | } 48 | 49 | // change "=" to "\n", and insert it into a list option 50 | // val stores the address of string of value 51 | int read_option(char *s, list *options) 52 | { 53 | size_t i; 54 | size_t len = strlen(s); 55 | char *val=0; 56 | // split the string s within "=" 57 | for (i = 0; i < len; i++) 58 | { 59 | if (s[i] == '=') 60 | { 61 | s[i] = '\0'; 62 | val = s+i+1; 63 | break; 64 | } 65 | } 66 | // 67 | if(i == len-1) 68 | { // no value for this key, insert failed: return 0 69 | return 0; 70 | } 71 | char *key = s; 72 | option_insert(options, key, val); 73 | // successfully insert key&value into option: return 1 74 | return 1; 75 | } 76 | 77 | // insert value(*val) into list option 78 | void option_insert(list *l, char *key, char *val) 79 | { 80 | kvp *p = (kvp *)malloc(sizeof(kvp)); 81 | p->key = key; 82 | p->val = val; 83 | p->used = 0; 84 | list_insert(l, p); 85 | } 86 | 87 | // check specific strings (keys) 88 | char *option_find_str(list *l, char *key, char *def) 89 | { 90 | char *v = option_find(l, key); 91 | if (v) 92 | { // 93 | return v; 94 | } 95 | if (def) 96 | { // use default cfg 97 | fprintf(stderr, "%s: Using default '%s' \n", key, def); 98 | } 99 | return def; 100 | } 101 | 102 | //traverse the list l 103 | char *option_find (list *l, char *key) 104 | { 105 | node *n = l->front; 106 | // traverse the list from the first node l->front 107 | while(n) 108 | { 109 | kvp *p = (kvp *)n->val; 110 | if (strcmp(p->key, key) == 0) 111 | { 112 | p->used = 1; 113 | return p->val; 114 | } 115 | n = n->next; 116 | } 117 | // no match key found, return 0 118 | return 0; 119 | } 120 | 121 | // ??? 122 | void option_unused (list *l) 123 | { 124 | node *n = l->front; 125 | // traverse the list from the first node l->front 126 | while(n) 127 | { 128 | kvp *p = (kvp *)n->val; 129 | if(!p->used) 130 | { 131 | fprintf(stderr, "Unused field: '%s' = '%s'\n", p->key, p->val); 132 | } 133 | n = n->next; 134 | } 135 | } 136 | 137 | // find specific ints 138 | int option_find_int(list *l, char *key, int def) 139 | { 140 | char *v = option_find(l, key); 141 | if (v) 142 | { 143 | return atoi(v); 144 | } 145 | if (def) 146 | { 147 | fprintf(stderr, "%s: Using default '%d'\n", key, def); 148 | } 149 | return def; 150 | } 151 | 152 | // find specific ints 153 | int option_find_int_quiet(list *l, char *key, int def) 154 | { 155 | char *v = option_find(l, key); 156 | if (v) 157 | { 158 | return atoi(v); 159 | } 160 | return def; 161 | } 162 | 163 | // find specific floats 164 | float option_find_float(list *l, char *key, float def) 165 | { 166 | char *v = option_find(l, key); 167 | if (v) 168 | { 169 | return atof(v); 170 | } 171 | fprintf(stderr, "%s: Using default: '%lf'\n", key, def); 172 | return def; 173 | } 174 | 175 | // find specific floats 176 | float option_find_float_quiet(list *l, char *key, float def) 177 | { 178 | char *v = option_find(l, key); 179 | if (v) 180 | { 181 | return atof(v); 182 | } 183 | return def; 184 | } 185 | -------------------------------------------------------------------------------- /layer.h: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Layer header file 3 | //======================================================================== 4 | // @brief: function prototype & type definition 5 | 6 | #ifndef SRC_LAYER_H_ 7 | #define SRC_LAYER_H_ 8 | 9 | #include "activations.h" 10 | #include "stddef.h" 11 | #include "tree.h" 12 | 13 | // layer type 14 | typedef enum { 15 | CONVOLUTIONAL, 16 | DECONVOLUTIONAL, 17 | CONNECTED, 18 | MAXPOOL, 19 | SOFTMAX, 20 | DETECTION, 21 | DROPOUT, 22 | CROP, 23 | ROUTE, 24 | COST, 25 | NORMALIZATION, 26 | AVGPOOL, 27 | LOCAL, 28 | SHORTCUT, 29 | ACTIVE, 30 | RNN, 31 | GRU, 32 | CRNN, 33 | BATCHNORM, 34 | NETWORK, 35 | XNOR, 36 | REGION, 37 | REORG, 38 | BLANK 39 | } LAYER_TYPE; 40 | 41 | // ??? 42 | typedef enum{ 43 | SSE, MASKED, SMOOTH 44 | } COST_TYPE; 45 | 46 | typedef struct layer 47 | { 48 | LAYER_TYPE type; 49 | ACTIVATION activation; 50 | COST_TYPE cost_type; 51 | // 52 | void (*forward) (struct layer, struct network_state); 53 | void (*backward) (struct layer, struct network_state); 54 | void (*update) (struct layer, int, float, float, float); 55 | void (*forward_gpu) (struct layer, struct network_state); 56 | void (*backward_gpu) (struct layer, struct network_state); 57 | void (*update_gpu) (struct layer, int, float, float, float); 58 | // 59 | int batch_normalize; 60 | int shorcut; 61 | int batch; 62 | int forced; 63 | int flipped; 64 | int inputs; 65 | int outputs; // size of output 66 | int truths; 67 | int h; 68 | int w; 69 | int c; 70 | int out_h; 71 | int out_w; 72 | int out_c; 73 | int n; 74 | int max_boxes; 75 | int groups; 76 | int size; 77 | int side; 78 | int stride; 79 | int reverse; 80 | int pad; 81 | int sqrt; 82 | int flip; 83 | int index; 84 | int binary; 85 | int xnor; 86 | int steps; 87 | int hidden; 88 | float dot; 89 | float angle; 90 | float jitter; 91 | float saturation; 92 | float exposure; 93 | float shift; 94 | float ratio; 95 | int softmax; 96 | int classes; 97 | int coords; 98 | int background; 99 | int rescore; 100 | int objectness; 101 | int does_cost; 102 | int joint; 103 | int noadjust; 104 | int reorg; 105 | int log; 106 | // repeat in network ??? 107 | int adam; 108 | float B1; 109 | float B2; 110 | float eps; 111 | int t; 112 | // 113 | float alpha; 114 | float belta; 115 | float kappa; 116 | // 117 | float coord_scale; 118 | float object_scale; 119 | float noobject_scale; 120 | float class_scale; 121 | int bias_match; 122 | int random; 123 | float thresh; 124 | int classfix; 125 | int absolute; 126 | // 127 | int dontload; 128 | int dontloadscales; 129 | // 130 | float temperature; 131 | float probability; 132 | float scale; 133 | // 134 | char * cweights; 135 | int * indexes; 136 | int * input_layers; 137 | int * input_sizes; 138 | int * map; 139 | float * rand; 140 | float * cost; 141 | float * state; 142 | float * prev_state; 143 | float * forgot_state; 144 | float * forgot_delta; 145 | float * state_delta; 146 | // 147 | float * concat; 148 | float * concat_delta; 149 | // 150 | float * binary_weights; 151 | // 152 | float * biases; 153 | float * bias_updates; 154 | // 155 | float * scales; 156 | float * scale_updates; 157 | // 158 | float * weights; 159 | float * weight_updates; 160 | // 161 | float * col_image; 162 | float * delta; 163 | float * output; //output values 164 | float * squared; 165 | float * norms; 166 | // 167 | float * spatial_mean; 168 | float * mean; 169 | float * variance; 170 | // 171 | float * mean_delta; 172 | float * variance_delta; 173 | // 174 | float * rolling_mean; 175 | float * rolling_variance; 176 | // 177 | float * x; 178 | float * x_norm; 179 | float * m; 180 | float * v; 181 | // 182 | float * z_cpu; 183 | float * r_cpu; 184 | float * h_cpu; 185 | // 186 | float *binary_input; 187 | // 188 | struct layer * input_layer; 189 | struct layer * self_layer; 190 | struct layer * output_layer; 191 | // 192 | struct layer * input_gate_layer; 193 | struct layer * state_gate_layer; 194 | struct layer * input_save_layer; 195 | struct layer * state_save_layer; 196 | struct layer * input_state_layer; 197 | struct layer * state_state_layer; 198 | // 199 | struct layer * input_z_layer; 200 | struct layer * state_z_layer; 201 | // 202 | struct layer * input_r_layer; 203 | struct layer * state_r_layer; 204 | // 205 | struct layer * input_h_layer; 206 | struct layer * state_h_layer; 207 | // 208 | tree * softmax_tree; 209 | // 210 | size_t workspace_size; 211 | } layer; 212 | 213 | // free struct layer 214 | void free_layer(layer l); 215 | 216 | #endif /* SRC_LAYER_H_ */ 217 | -------------------------------------------------------------------------------- /utilities.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Utilities 3 | //======================================================================== 4 | // @brief: helper functions 5 | 6 | #include "utilities.h" 7 | 8 | // read file 9 | int *read_map(char *filename) 10 | { 11 | int n = 0; 12 | int *map = 0; 13 | char *str; 14 | FILE *file = fopen("filename", "r"); 15 | if(!file) 16 | { // open error 17 | file_error(filename); 18 | } 19 | while ((str=fgetl(file))) 20 | { 21 | n++; 22 | map = (int *)realloc(map, n*sizeof(int)); 23 | map[n-1] = atoi(str); 24 | } 25 | return map; 26 | } 27 | 28 | // report open file error 29 | void file_error(char *s) 30 | { 31 | fprintf(stderr, "Couldn't open file: %s\n", s); 32 | exit(0); // exit(0): normal exit 33 | } 34 | 35 | // remove space, tab, and enter from string 36 | void strip(char *s) 37 | { 38 | size_t len = strlen(s); 39 | size_t offset = 0; 40 | 41 | for (size_t i = 0; i < len; i++) 42 | { 43 | char c = s[i]; 44 | if (c == ' ' || c == '\t' || c == '\n') 45 | { 46 | offset++; 47 | } 48 | else 49 | { 50 | s[i-offset] = c; 51 | } 52 | } 53 | s[len-offset] = '\0'; 54 | } 55 | 56 | // read one line from file 57 | char *fgetl(FILE *file) 58 | { 59 | if (feof(file)) 60 | { // check end of file indicator 61 | return 0; 62 | } 63 | size_t size = 512; 64 | char *line = (char *)malloc(size * sizeof(char)); 65 | // read one line from file 66 | if (!fgets(line, size, file)) 67 | { // blank line 68 | free(line); 69 | return 0; 70 | } 71 | 72 | size_t curr = strlen(line); 73 | // verify the size of input line 74 | while (line[curr-1] != '\n' && !feof(file)) 75 | { // last char in line is '\0', size of line is 511 76 | if (curr == size - 1) 77 | { 78 | size *= 2; 79 | line = (char *)realloc(line, size * sizeof(char)); 80 | if(!line) 81 | { 82 | printf("Required size:%ld\n", size); 83 | malloc_error(); 84 | } 85 | } 86 | // read extra chars 87 | size_t readsize = size - curr; 88 | // check whether it overflow the maximum size 89 | // INT_MAX: 32767 (2^15-1) or greater* 90 | if (readsize > INT_MAX) 91 | { 92 | readsize = INT_MAX - 1; 93 | } 94 | // continue to read this line 95 | fgets(&line[curr], readsize, file); 96 | // update the current length read from file 97 | curr = strlen(line); 98 | } 99 | if (line[curr - 1] == '\n') 100 | { // final line in char - '\0' 101 | line[curr - 1] = '\0'; 102 | } 103 | 104 | return line; 105 | } 106 | 107 | // report malloc error 108 | void malloc_error() 109 | { 110 | fprintf(stderr, "Malloc error\n"); 111 | exit(-1); // exit(other numbers): abnormal exit 112 | } 113 | 114 | // report specific error 115 | void error(const char *s) 116 | { 117 | printf("%s\n",s); 118 | assert(0); 119 | exit(-1); 120 | } 121 | 122 | // free array of pointer 123 | void free_ptrs(void **ptrs, int n) 124 | { 125 | for (int i = 0; i < n; i++) 126 | { 127 | free(ptrs[i]); 128 | } 129 | free(ptrs); 130 | } 131 | 132 | // find the maximum value in an array, return its index 133 | int max_index(float *a, int n) 134 | { 135 | if(n <= 0) 136 | { 137 | return -1; 138 | } 139 | int max_index = 0; 140 | float temp = a[0]; 141 | // traverse the array 142 | for (int i = 1; i < n; i++) 143 | { 144 | if (a[i] > temp) 145 | { 146 | temp = a[i]; 147 | max_index = i; 148 | } 149 | } 150 | return max_index; 151 | } 152 | 153 | //return a random number in the given range(min, max) 154 | float rand_uniform(float min, float max) 155 | { 156 | if(max < min) 157 | { 158 | float temp = min; 159 | min = max; 160 | max = temp; 161 | } 162 | return ((float)rand()/RAND_MAX * (max-min)) + min; 163 | } 164 | 165 | // print function 166 | void print_statistics(float *a, int n) 167 | { 168 | float m = mean_array(a, n); 169 | float v = variance_array(a, n); 170 | printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); 171 | } 172 | 173 | // mean value of array a 174 | float mean_array(float *a, int n) 175 | { 176 | return sum_array(a, n)/n; 177 | } 178 | 179 | // sum of array a 180 | float sum_array(float *a, int n) 181 | { 182 | float sum = 0; 183 | for (int i = 0; i < n; i++) 184 | { 185 | sum += a[i]; 186 | } 187 | return sum; 188 | } 189 | 190 | // variance of array a 191 | float variance_array(float *a, int n) 192 | { 193 | float sum = 0; 194 | float mean = mean_array(a, n); 195 | for (int i = 0; i < n; i++) 196 | { 197 | sum += (a[i] - mean)*(a[i] - mean); 198 | } 199 | return sum/n; 200 | } 201 | 202 | // mean squared error of array a 203 | float mse_array(float *a, int n) 204 | { 205 | float sum = 0; 206 | for (int i = 0; i < n; i++) 207 | { 208 | sum += a[i] * a[i]; 209 | } 210 | return sqrt(sum/n); 211 | } 212 | 213 | // difference of two squares 214 | float mag_array(float *a, int n) 215 | { 216 | float sum = 0; 217 | for (int i = 0; i < n; i++) 218 | { 219 | sum += a[i] * a[i]; 220 | } 221 | return sqrt(sum); 222 | } 223 | -------------------------------------------------------------------------------- /yolo_detector.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // yolo_detector header file 3 | //======================================================================== 4 | // @brief: Application for detecting objects 5 | 6 | #include "yolo_detector.h" 7 | 8 | /* 9 | // transfer value 10 | // argv[0] - datacfg : cfg/coco.data 11 | // argv[1] - cfgfile : cfg/tiny-yolo.cfg 12 | // argv[2] - weightfile : tiny-yolo.weights 13 | // argv[3] - filename : data/dog.jpg 14 | // argv[4] - thresh : 0.24 15 | // argv[5] - hier_thresh : 0.5 16 | */ 17 | 18 | void detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh) 19 | { 20 | // load datacfg 21 | printf("datacfg:%s\n",datacfg); 22 | list *options = read_data_cfg(datacfg); 23 | 24 | /* 25 | // print options 26 | node *pnode = options->front; 27 | kvp *dis; 28 | int counter = 0; 29 | printf("Option size:%d\n",options->size); 30 | while (pnode) 31 | { 32 | dis = (kvp *)pnode->val; 33 | printf("NO. %d: (1)key: %s; (2)value: %s;\n",counter,(char*)dis->key,(char*)dis->val); 34 | counter++; 35 | pnode = pnode->next; 36 | } 37 | */ 38 | 39 | char *name_list = option_find_str(options, "names", "data/names.list"); 40 | //printf("name_list:%s\n",name_list); 41 | 42 | // name_list: data/coco.names 43 | char **names = get_labels(name_list); 44 | 45 | /* 46 | int size_names = sizeof(names); 47 | for (int i = 0; i < 80; i++) 48 | { 49 | printf("names NO. %d: %s;\n",i,names[i]); 50 | } 51 | */ 52 | // read labels 53 | image **alphabet = load_alphabet(); 54 | 55 | // load cfgfile 56 | network net = parse_network_cfg(cfgfile); 57 | 58 | // load weitht file 59 | if (weightfile) 60 | { 61 | load_weights(&net,weightfile); 62 | } 63 | 64 | // setup net.batch = 1 65 | set_batch_network(&net, 1); 66 | char buffer[255]; 67 | char *input = buffer; 68 | float nms = 0.4; 69 | 70 | // start timer 71 | Timer timer("yolo_detector"); 72 | 73 | while (1) 74 | { 75 | // copy image name 76 | if (filename) 77 | { 78 | strncpy (input,filename,256); 79 | } 80 | else 81 | { 82 | printf("Please enter image path: "); 83 | fflush(stdout); 84 | input = fgets(input, 256, stdin); 85 | if (!input) 86 | { 87 | return; 88 | } 89 | strtok(input,"\n"); 90 | } 91 | // 92 | image im = load_image_color(input,0,0); 93 | image sized = resize_image(im, net.w, net.h); 94 | //printf("sized.h:%d; sized.w:%d; sized.c:%d;\n",sized.h,sized.w,sized.c); 95 | /* 96 | // print input image 97 | for (int m = 0; m < 100; m++) 98 | { 99 | printf("im.data[%d]:%.12f;\n",m,im.data[m]); 100 | } 101 | */ 102 | // region layer 103 | layer l = net.layers[net.n-1]; 104 | // 105 | box *boxes = (box *)calloc(l.w * l.h * l.n, sizeof(box)); 106 | float **probs = (float **)calloc(l.w * l.h * l.n, sizeof(float *)); 107 | for (int i = 0; i < l.w*l.h*l.n; i++) 108 | { 109 | probs[i] = (float *)calloc(l.classes+1, sizeof(float)); // ??? 110 | } 111 | 112 | // 113 | float *X = sized.data; 114 | 115 | Timer timer("Total time"); 116 | // start prediction 117 | printf("Start prediction...\n"); 118 | timer.start(); 119 | network_predict(net,X); 120 | timer.stop(); 121 | printf("Prediction finishes!\n"); 122 | 123 | // draw region boxes 124 | printf("Getting region boxes...\n"); 125 | get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh); 126 | /* 127 | // verify **probs 128 | float sum = 0.0; 129 | printf("max_i:%d; max_j:%d;\n",l.w * l.h * l.n,l.classes+1); 130 | for (int i = 0; i < l.w * l.h * l.n; i++) 131 | { 132 | for (int j = 0; j < l.classes+1; j++) 133 | { 134 | if (probs[i][j] != 0) 135 | { 136 | printf("probs[%d][%d]:%.12f;\n",i,j,probs[i][j]); 137 | } 138 | sum += probs[i][j]; 139 | } 140 | } 141 | printf("sum:%.12f;\n",sum); 142 | */ 143 | // 144 | if (l.softmax_tree && nms) 145 | { 146 | //printf("Enter 111111\n"); 147 | do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms); 148 | } 149 | else if (nms) 150 | { 151 | //printf("Enter 222222\n"); 152 | do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); 153 | } 154 | /* 155 | // verify **probs 156 | float sum = 0.0; 157 | printf("max_i:%d; max_j:%d;\n",l.w * l.h * l.n,l.classes+1); 158 | for (int i = 0; i < l.w * l.h * l.n; i++) 159 | { 160 | for (int j = 0; j < l.classes+1; j++) 161 | { 162 | if (probs[i][j] != 0) 163 | { 164 | printf("probs[%d][%d]:%.12f;\n",i,j,probs[i][j]); 165 | } 166 | sum += probs[i][j]; 167 | } 168 | } 169 | printf("sum:%.12f;\n",sum); 170 | */ 171 | 172 | 173 | printf("Start draw predictions...\n"); 174 | draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes); 175 | printf("Save & print images...\n"); 176 | save_image(im, "predictions"); 177 | show_image(im, "predictions"); 178 | //printf("ch0\n"); 179 | 180 | // free memory 181 | free_image(im); 182 | free_image(sized); 183 | //printf("ch1\n"); 184 | free_ptrs((void **)probs, l.w*l.h*l.n); 185 | //printf("ch2\n"); 186 | free(boxes); 187 | //printf("ch3\n"); 188 | // where did we modify the value filename to jump out of the while loop????? 189 | if (filename) 190 | { 191 | break; 192 | } 193 | } 194 | printf("Exit program.\n"); 195 | } 196 | 197 | 198 | -------------------------------------------------------------------------------- /connected_layer.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Connected layer 3 | //======================================================================== 4 | // @brief: connected layer 5 | 6 | #include "connected_layer.h" 7 | 8 | // make connected layer 9 | connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize) 10 | { 11 | connected_layer l; 12 | init_layer(l); 13 | // 14 | l.type = CONNECTED; 15 | l.inputs = inputs; 16 | l.outputs = outputs; 17 | l.batch = batch; 18 | l.batch_normalize = batch_normalize; 19 | // 20 | l.h = 1; 21 | l.w = 1; 22 | l.c = inputs; 23 | l.out_h = 1; 24 | l.out_w = 1; 25 | l.out_c = outputs; 26 | // 27 | l.output = (float *)calloc(batch*outputs, sizeof(float)); 28 | l.delta = (float *)calloc(batch*outputs, sizeof(float)); 29 | // 30 | l.weight_updates = (float *)calloc(inputs*outputs, sizeof(float)); 31 | l.bias_updates = (float *)calloc(inputs*outputs, sizeof(float)); 32 | // 33 | l.weights = (float *)calloc(outputs*inputs, sizeof(float)); 34 | l.biases = (float *)calloc(outputs, sizeof(float)); 35 | // function pointers 36 | l.forward = forward_connected_layer; 37 | l.backward = backward_connected_layer; 38 | l.update = update_connected_layer; 39 | // 40 | float scale = sqrt(2.0/inputs); 41 | for (int i = 0; i < outputs*inputs; i++) 42 | { 43 | l.weights[i] = scale * rand_uniform(-1,1); 44 | } 45 | // 46 | for (int i = 0; i < outputs; i++) 47 | { 48 | l.biases[i] = 0; 49 | } 50 | // 51 | if (batch_normalize) 52 | { 53 | l.scales = (float *)calloc(outputs, sizeof(float)); 54 | l.scale_updates = (float *)calloc(outputs, sizeof(float)); 55 | // 56 | for (int i = 0; i < outputs; i++) 57 | { 58 | l.scales[i] = 1; 59 | } 60 | // 61 | l.mean = (float *)calloc(outputs, sizeof(float)); 62 | l.mean_delta = (float *)calloc(outputs, sizeof(float)); 63 | l.variance = (float *)calloc(outputs, sizeof(float)); 64 | l.variance_delta = (float *)calloc(outputs, sizeof(float)); 65 | // 66 | l.rolling_mean = (float *)calloc(outputs, sizeof(float)); 67 | l.rolling_variance = (float *)calloc(outputs, sizeof(float)); 68 | // 69 | l.x = (float *)calloc(outputs, sizeof(float)); 70 | l.x_norm = (float *)calloc(outputs, sizeof(float)); 71 | } 72 | // 73 | l.activation = activation; 74 | fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); 75 | return l; 76 | } 77 | 78 | // 79 | void forward_connected_layer(connected_layer l, network_state state) 80 | { 81 | // empty the l.output array 82 | fill_cpu(l.outputs*l.batch, 0, l.output, 1); 83 | int m = l.batch; 84 | int k = l.inputs; 85 | int n = l.outputs; 86 | float *a = state.input; 87 | float *b = l.weights; 88 | float *c = l.output; 89 | // 90 | //gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); 91 | // 92 | if(l.batch_normalize) 93 | { 94 | if(state.train) 95 | { 96 | mean_cpu(l.output, l.batch, l.outputs, 1, l.mean); 97 | variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance); 98 | // 99 | scal_cpu(l.outputs, .95, l.rolling_mean, 1); 100 | axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1); 101 | scal_cpu(l.outputs, .95, l.rolling_variance, 1); 102 | axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1); 103 | 104 | copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); 105 | normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1); 106 | copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); 107 | } 108 | else 109 | { 110 | normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1); 111 | } 112 | scale_bias(l.output, l.scales, l.batch, l.outputs, 1); 113 | } 114 | } 115 | 116 | // 117 | void backward_connected_layer(connected_layer l, network_state state) 118 | { 119 | // 120 | gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); 121 | // 122 | for (int i = 0; i < l.batch; i++) 123 | { 124 | axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); 125 | } 126 | // 127 | if(l.batch_normalize) 128 | { 129 | backward_scale_cpu(l.x_norm, l.delta, l.batch, l.outputs, 1, l.scale_updates); 130 | 131 | scale_bias(l.delta, l.scales, l.batch, l.outputs, 1); 132 | 133 | mean_delta_cpu(l.delta, l.variance, l.batch, l.outputs, 1, l.mean_delta); 134 | variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.outputs, 1, l.variance_delta); 135 | normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.outputs, 1, l.delta); 136 | } 137 | // 138 | int m = l.outputs; 139 | int k = l.batch; 140 | int n = l.inputs; 141 | float *a = l.delta; 142 | float *b = state.input; 143 | float *c = l.weight_updates; 144 | // 145 | m = l.batch; 146 | k = l.outputs; 147 | n = l.inputs; 148 | // 149 | a = l.delta; 150 | b = l.weights; 151 | c = state.delta; 152 | // 153 | if(c) 154 | { 155 | //gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); 156 | } 157 | } 158 | 159 | // 160 | void update_connected_layer(connected_layer l, int batch, float learning_rate, float momentum, float decay) 161 | { 162 | axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); 163 | scal_cpu(l.outputs, momentum, l.bias_updates, 1); 164 | 165 | if(l.batch_normalize) 166 | { 167 | axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1); 168 | scal_cpu(l.outputs, momentum, l.scale_updates, 1); 169 | } 170 | 171 | axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1); 172 | axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1); 173 | scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1); 174 | } 175 | 176 | // 177 | void denormalize_connected_layer(layer l) 178 | { 179 | // 180 | for (int i = 0; i < l.outputs; i++) 181 | { 182 | float scale = l.scales[i]/sqrt(l.rolling_variance[i] + 0.000001); 183 | for (int j = 0; j < l.inputs; i++) 184 | { 185 | l.weights[i*l.inputs + j] *= scale; 186 | } 187 | l.biases[i] -= l.rolling_mean[i] * scale; 188 | l.scales[i] = 1; 189 | l.rolling_mean[i] = 0; 190 | l.rolling_variance[i] = 1; 191 | } 192 | } 193 | 194 | // 195 | void statistics_connected_layer(layer l) 196 | { 197 | if(l.batch_normalize) 198 | { 199 | printf("Scales "); 200 | print_statistics(l.scales, l.outputs); //??? 201 | /* 202 | printf("Rolling Mean "); 203 | print_statistics(l.rolling_mean, l.outputs); 204 | printf("Rolling Variance "); 205 | print_statistics(l.rolling_variance, l.outputs); 206 | */ 207 | } 208 | printf("Biases "); 209 | print_statistics(l.biases, l.outputs); 210 | printf("Weights "); 211 | print_statistics(l.weights, l.outputs); 212 | } 213 | -------------------------------------------------------------------------------- /convolutional_layer.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Convolutional layer 3 | //======================================================================== 4 | // @brief: convolutional layer 5 | 6 | #include "convolutional_layer.h" 7 | #include 8 | 9 | convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride,\ 10 | int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) 11 | { 12 | convolutional_layer l; 13 | init_layer(l); 14 | l.type = CONVOLUTIONAL; 15 | 16 | l.h = h; 17 | l.w = w; 18 | l.c = c; 19 | l.n = n; 20 | l.binary = binary; 21 | l.xnor = xnor; 22 | l.batch = batch; 23 | l.stride = stride; 24 | l.size = size; 25 | l.pad = padding; 26 | l.batch_normalize = batch_normalize; 27 | 28 | //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 29 | l.weights = (float *)calloc(c*n*size*size, sizeof(float)); 30 | //l.weights = (float *)sds_alloc(c*n*size*size * sizeof(float)); 31 | l.weight_updates = (float *)calloc(c*n*size*size, sizeof(float)); 32 | 33 | l.biases = (float *)calloc(n, sizeof(float)); 34 | l.bias_updates = (float *)calloc(n, sizeof(float)); 35 | 36 | // float scale = 1./sqrt(size*size*c); 37 | float scale = sqrt(2./(size*size*c)); 38 | for (int i = 0; i < c*n*size*size; i++) 39 | { 40 | l.weights[i] = scale*rand_uniform(-1, 1); 41 | } 42 | int out_h = convolutional_out_height(l); 43 | int out_w = convolutional_out_width(l); 44 | l.out_h = out_h; 45 | l.out_w = out_w; 46 | l.out_c = n; 47 | l.outputs = l.out_h * l.out_w * l.out_c; 48 | l.inputs = l.w * l.h * l.c; 49 | 50 | //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 51 | l.output = (float *)calloc(l.batch*l.outputs, sizeof(float)); 52 | //l.output = (float *)sds_alloc(l.batch*l.outputs * sizeof(float)); 53 | l.delta = (float *)calloc(l.batch*l.outputs, sizeof(float)); 54 | 55 | l.forward = forward_convolutional_layer; 56 | //l.backward = backward_convolutional_layer; 57 | //l.update = update_convolutional_layer; 58 | /*if(binary){ 59 | l.binary_weights = calloc(c*n*size*size, sizeof(float)); 60 | l.cweights = calloc(c*n*size*size, sizeof(char)); 61 | l.scales = calloc(n, sizeof(float)); 62 | } 63 | if(xnor){ 64 | l.binary_weights = calloc(c*n*size*size, sizeof(float)); 65 | l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); 66 | }*/ 67 | 68 | if(batch_normalize) 69 | { 70 | l.scales = (float *)calloc(n, sizeof(float)); 71 | l.scale_updates = (float *)calloc(n, sizeof(float)); 72 | for(int i = 0; i < n; i++) 73 | { 74 | l.scales[i] = 1; 75 | } 76 | 77 | l.mean = (float *)calloc(n, sizeof(float)); 78 | l.variance = (float *)calloc(n, sizeof(float)); 79 | 80 | l.mean_delta = (float *)calloc(n, sizeof(float)); 81 | l.variance_delta = (float *)calloc(n, sizeof(float)); 82 | 83 | l.rolling_mean = (float *)calloc(n, sizeof(float)); 84 | l.rolling_variance = (float *)calloc(n, sizeof(float)); 85 | l.x = (float *)calloc(l.batch*l.outputs, sizeof(float)); 86 | l.x_norm = (float *)calloc(l.batch*l.outputs, sizeof(float)); 87 | } 88 | if(adam) 89 | { 90 | l.adam = 1; 91 | l.m = (float *)calloc(c*n*size*size, sizeof(float)); 92 | l.v = (float *)calloc(c*n*size*size, sizeof(float)); 93 | } 94 | 95 | l.workspace_size = get_workspace_size(l); 96 | l.activation = activation; 97 | 98 | fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); 99 | 100 | return l; 101 | } 102 | 103 | // get the size of output image 104 | size_t get_workspace_size(layer l) 105 | { 106 | return (size_t)l.out_h*l.out_w*l.size*l.size*l.c*sizeof(float); 107 | } 108 | 109 | // forward to convolutional layer 110 | void forward_convolutional_layer(convolutional_layer l, network_state state) 111 | { 112 | //printf("ch3\n"); 113 | int out_h = convolutional_out_height(l); 114 | int out_w = convolutional_out_width(l); 115 | // init l.output = 0 116 | //Timer timer2("fill_cpu"); 117 | //timer2.start(); 118 | fill_cpu(l.outputs * l.batch, 0, l.output, 1); 119 | //timer2.stop(); 120 | //printf("ch4\n"); 121 | // 122 | //Timer timer9("part left"); 123 | //timer9.start(); 124 | int m = l.n; 125 | int k = l.size * l.size * l.c; 126 | int n = out_h * out_w ; 127 | int n2 = (out_h + 2) * (out_w + 2); 128 | // 129 | float *a = l.weights; 130 | float *b = state.workspace; 131 | float *c = l.output; 132 | 133 | //Storing iamge 134 | b = (float *) sds_alloc ( l.c * n2 * sizeof(float) ); 135 | //Storing convolution results 136 | float *c_col = (float *) sds_alloc ((m*n+n*(SIZE_BATCH-m%SIZE_BATCH)) * sizeof(float)); 137 | //Storing weights 138 | float a_buf[3*3*1024*16]; 139 | 140 | int count; 141 | int batch; 142 | //Change input image format 143 | if(m%SIZE_BATCH == 0) 144 | { 145 | count = m/SIZE_BATCH; 146 | batch = SIZE_BATCH; 147 | im2col(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); 148 | } 149 | else 150 | { 151 | //for last layer: no special format 152 | count = m/SIZE_BATCH+1; 153 | batch = SIZE_BATCH; 154 | im2col_extra(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); 155 | } 156 | 157 | int aiCount = batch*k; 158 | struct timeval t0, t1; 159 | static double Duration = 0; 160 | double DurationTemp = Duration; 161 | 162 | for (int x = 0; x < count; x++) 163 | { 164 | //Give the layer being calculated 165 | if( x == 0 ){ 166 | printf("Layer info: channel = %d, x = %d, m = %d, n = %d\n", l.c, x, m, n); 167 | fflush(stdout); 168 | } 169 | //Copying weights 170 | if( m % SIZE_BATCH != 0 && x == count - 1 ) 171 | aiCount = ( m % SIZE_BATCH ) * k; 172 | for( int ai = 0; ai < aiCount; ai++ ){ 173 | a_buf[ai] = a[ai + x*batch*k]; 174 | } 175 | 176 | gettimeofday(&t0, 0); 177 | gemm2( a_buf,b,c_col,m,n,k,l.c,l.h,l.w,l.size, l.pad); 178 | gettimeofday(&t1, 0); 179 | Duration += (t1.tv_sec-t0.tv_sec)*1000000 + t1.tv_usec-t0.tv_usec; 180 | 181 | c_col += batch * n; 182 | } 183 | 184 | std::cout << "Duration for channel = " << l.c << " : " << ( Duration - DurationTemp ) / 1000 << " msec" << std::endl; 185 | if( m == 425 ) 186 | std::cout << "Duration in all: " << Duration / 1000 << " msec" << std::endl; 187 | 188 | //Transfer the output data format back 189 | if(m%SIZE_BATCH ==0) 190 | { 191 | c_col -= m*n; 192 | col2img(c_col,c,m,n,count,SIZE_BATCH); 193 | } 194 | else 195 | { 196 | c_col -= count * n * SIZE_BATCH; 197 | col2img_extra(c_col,c,m,n,count,SIZE_BATCH); 198 | } 199 | sds_free(b); 200 | sds_free(c_col); 201 | 202 | if (l.batch_normalize) 203 | { 204 | forward_batchnorm_layer(l, state); 205 | } 206 | add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); 207 | activate_array(l.output, m*n*l.batch, l.activation); 208 | 209 | } 210 | 211 | // calculate output height 212 | int convolutional_out_height(convolutional_layer l) 213 | { 214 | return (l.h + 2*l.pad - l.size)/l.stride + 1; 215 | } 216 | 217 | // calculate output_weight 218 | int convolutional_out_width(convolutional_layer l) 219 | { 220 | return (l.w + 2*l.pad - l.size)/l.stride + 1; 221 | } 222 | 223 | // add bias to output values 224 | void add_bias(float *output, float *biases, int batch, int n, int size) 225 | { 226 | // 227 | for (int b = 0; b < batch; b++) 228 | { 229 | for (int i = 0; i < n; i++) 230 | { 231 | for (int j = 0; j < size; j++) 232 | { 233 | output[(b*n+i)*size + j] += biases[i]; 234 | } 235 | } 236 | } 237 | } 238 | 239 | // scale bias 240 | void scale_bias(float *output, float *scales, int batch, int n, int size) 241 | { 242 | // 243 | for(int b = 0; b < batch; b++) 244 | { 245 | for(int i = 0; i < n; i++) 246 | { 247 | for(int j = 0; j < size; j++) 248 | { 249 | output[(b*n + i)*size + j] *= scales[i]; 250 | } 251 | } 252 | } 253 | } 254 | 255 | // scale bias 256 | void swap_binary(convolutional_layer *l) 257 | { 258 | float *swap = l->weights; 259 | l->weights = l->binary_weights; 260 | l->binary_weights = swap; 261 | } 262 | -------------------------------------------------------------------------------- /gemm2.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Gemm header file 3 | //======================================================================== 4 | // @brief: top hardware function - convolutional computation 5 | 6 | #include "gemm.h" 7 | 8 | /* 9 | //first convolutional layer 10 | M: l.n - number of filters & number of output channels e.g. 16 num_filter 11 | N: out_h * out_w - size of input&output channel e.g. 416x416 SIZE_CHANNEL 12 | K: l.size * l.size * l.c - size of one input filter e.g. 3x3x3 SIZE_FILTER 13 | A : weights/filters 14 | lda: l.size * l.size * l.c - size of one input filter e.g. 3x3x3 15 | B : input images 16 | ldb: out_h * out_w - size of input&output channel e.g. 416x416 17 | C : output images 18 | ldc: out_h * out_w - size of input&output channel e.g. 416x416 19 | */ 20 | void gemm2(float A[MAX_A], float B[MAX_B], float C[MAX_C],int num_filter, int size_channel,int size_filter,\ 21 | int channels, int height, int width, int ksize, int pad) 22 | { 23 | 24 | INPUT_32 weights[SIZE_BATCH][SIZE_FILTER][MAX_FILTER_DEPTH]; 25 | //store image of 3 separate column 26 | INPUT_32 line_buffer[NUM_LINE_BUFFER][SIZE_LINE_BUFFER]; 27 | //store image required by filter while computing 28 | INPUT_32 window_buffer[NUM_WINDOW_BUFFER][SIZE_WINDOW_BUFFER]; 29 | //read image from window_buffer to achieve parallel computing 30 | INPUT_32 ParallelWindow[SystolicKernelSize][NUM_WINDOW_BUFFER]; 31 | //store output image other than last layer 32 | OUTPUT_64 output[SIZE_BATCH][SystolicKernelSize]; 33 | //store image of last layer 34 | OUTPUT_64 output2[SIZE_BATCH]; 35 | //counter 36 | int index_lb; 37 | 38 | 39 | #pragma HLS array_partition variable=weights complete dim=1 40 | #pragma HLS array_partition variable=weights complete dim=2 41 | #pragma HLS array_partition variable=line_buffer complete dim=1 42 | #pragma HLS array_partition variable=window_buffer complete dim=1 43 | #pragma HLS array_partition variable=output complete 44 | #pragma HLS array_partition variable=output2 complete 45 | #pragma HLS array_partition variable=ParallelWindow complete dim=1 46 | 47 | //systolic data 48 | INPUT_32 inA[SIZE_BATCH][SystolicKernelSize]; 49 | INPUT_32 inB[SIZE_BATCH][SystolicKernelSize]; 50 | #pragma HLS array_partition variable=inA complete dim=0 51 | #pragma HLS array_partition variable=inB complete dim=0 52 | 53 | 54 | 55 | 56 | if (ksize == NUM_WINDOW_BUFFER_EXTRA) 57 | { 58 | //different computing core for last layer 59 | gemm_extra2(A,B,C,size_channel,size_filter,ksize,weights,output2); 60 | } 61 | else 62 | { 63 | // copy loop: store filters/weights in local BRAM 64 | 65 | Copy_weights: 66 | for (int i = 0; i < SIZE_BATCH; i++) 67 | { 68 | for (int k = 0; k < channels; k++) 69 | { 70 | for (int j = 0; j < ksize*ksize; j++) 71 | { 72 | #pragma HLS PIPELINE II=1 73 | int index_j = (j % ksize) * ksize + j / ksize; 74 | weights[i][index_j][k] = A[i*ksize*ksize*channels+k*ksize*ksize+j]; 75 | } 76 | } 77 | } 78 | 79 | Copy_image1://pads of first column 80 | for (int w = 0; w < (width+2*pad); w++) 81 | { 82 | for (int c = 0; c < channels; c++) 83 | { 84 | #pragma HLS PIPELINE II=1 85 | int index = w * channels + c; 86 | line_buffer[2][index] = B[index]; 87 | } 88 | } 89 | 90 | Copy_image2://pads for fisrt channel of second column 91 | for (int c = 0; c < channels; c++) 92 | { 93 | #pragma HLS PIPELINE II=1 94 | line_buffer[1][c] = line_buffer[2][c]; 95 | line_buffer[2][c] = B[(width+2*pad)*channels+c]; 96 | } 97 | 98 | //initialize counters 99 | int CountStep = 1; 100 | int step = 0; 101 | 102 | // start calculation 103 | Cal_h: 104 | for (int h = 0; h < (height+2*pad-1); h++) 105 | { 106 | Cal_w: 107 | for ( int w = 0; w < (width+2*pad); ) 108 | { 109 | // last iteration - after last pixel of map - do nothing 110 | if ((h == height+2*pad-2) && (w == width+2*pad-1)) 111 | { 112 | break; 113 | } 114 | 115 | // second column, first & last row -- read only, prepare image data 116 | int flag = (h != 0) && (w != 0) && (w != width+2*pad-1); 117 | 118 | // init output array 119 | Init_output: 120 | if (flag) 121 | { 122 | for (int i = 0; i < SIZE_BATCH; i++) 123 | { 124 | #pragma HLS unroll 125 | for (int j = 0; j < SystolicKernelSize; j++) 126 | { 127 | #pragma HLS unroll 128 | output[i][j] = 0; 129 | } 130 | } 131 | } 132 | // data_fetch & computation 133 | Cal_c: 134 | //all systolic array are busy then 135 | if( flag == 1 || ( h == 0 && w != 0 && w != width + 2 * pad - 1 ) ) 136 | CountStep = SystolicKernelSize; 137 | 138 | for (int c = 0; c < channels; c++) 139 | { 140 | #pragma HLS DEPENDENCE variable=line_buffer inter false 141 | #pragma HLS DEPENDENCE variable=index_lb inter false 142 | 143 | for( step = 0; step < CountStep; step++ ){ 144 | //when it comes to last row of each col 145 | if( w + 1 + step == width + 2 * pad && c == 0 ) 146 | { 147 | //width mod SystolicKernelSize == 0 or finished computing this column 148 | if( step == 0 ) 149 | CountStep = 1; 150 | //width mod SystolicKernelSize != 0 and finished computing this column 151 | else 152 | { 153 | CountStep = step; 154 | break; 155 | } 156 | } 157 | 158 | // update window buffer 159 | ParallelWindow[step][0] = ( window_buffer[0][c] = window_buffer[3][c] ); 160 | ParallelWindow[step][1] = ( window_buffer[1][c] = window_buffer[4][c] ); 161 | ParallelWindow[step][2] = ( window_buffer[2][c] = window_buffer[5][c] ); 162 | ParallelWindow[step][3] = ( window_buffer[3][c] = window_buffer[6][c] ); 163 | ParallelWindow[step][4] = ( window_buffer[4][c] = window_buffer[7][c] ); 164 | ParallelWindow[step][5] = ( window_buffer[5][c] = window_buffer[8][c] ); 165 | // update line buffer 166 | int fetch_w; 167 | if( w == 0 || w + 1 + step == width + 2 * pad ) 168 | fetch_w = ( w + 1 ) % ( width + 2 * pad ); 169 | else 170 | fetch_w = ( w + 1 - ( w - 1 ) % SystolicKernelSize ) % ( width + 2 * pad ); 171 | int fetch_h = h + 1 + ( w + 1 ) / ( width + 2 * pad ); 172 | // 173 | index_lb = fetch_w * channels + c * CountStep + step; //column 174 | int index_input = fetch_h * (width+2*pad) * channels + fetch_w * channels + c * CountStep + step; 175 | //read new image data, combine data read before and generate ParallelWindow required by filter 176 | ParallelWindow[step][6] = ( window_buffer[6][c] = (line_buffer[0][index_lb] = line_buffer[1][index_lb]) ); 177 | ParallelWindow[step][7] = ( window_buffer[7][c] = (line_buffer[1][index_lb] = line_buffer[2][index_lb]) ); 178 | ParallelWindow[step][8] = ( window_buffer[8][c] = (line_buffer[2][index_lb] = B[index_input]) ); 179 | 180 | 181 | } 182 | 183 | 184 | 185 | // multiplication 16 x SystolicKernelSize using systolic core 186 | if (flag) 187 | { 188 | //init data buffer of systolic core 189 | for( int j = 0; j < SIZE_BATCH; j++ ){ 190 | #pragma HLS pipeline 191 | for( int i = 0; i < SystolicKernelSize; i++ ){ 192 | inA[j][i]= 0; 193 | inB[j][i] = 0; 194 | } 195 | } 196 | 197 | //Iteration cycles determined by both array 198 | for( int r = 0; r < SIZE_BATCH + SIZE_FILTER + step - 2; r++ ){ 199 | #pragma HLS pipeline 200 | 201 | for (int i = 0; i < SIZE_BATCH; i++) 202 | for (int j = SystolicKernelSize - 1; j >= 1; j--) 203 | inA[i][j] = inA[i][j-1]; 204 | 205 | for (int i = SIZE_BATCH - 1; i >= 1; i--) 206 | for (int j = 0; j < SystolicKernelSize; j++) 207 | inB[i][j] = inB[i-1][j]; 208 | 209 | 210 | for( int i = 0; i < SIZE_BATCH; i++ ) 211 | if( r >= i && r < i + SIZE_FILTER ) 212 | inA[i][0] = weights[i][r-i][c]; 213 | else 214 | inA[i][0] = 0; 215 | 216 | for (int j = 0; j < SystolicKernelSize; j++) 217 | if( r >= j && r < j + SIZE_FILTER ) 218 | inB[0][j] = ParallelWindow[j][r-j]; 219 | else 220 | inB[0][j] = 0; 221 | 222 | //PE 223 | for( int i = 0; i < SIZE_BATCH; i++ ) 224 | for( int j = 0; j < SystolicKernelSize; j++ ) 225 | output[i][j] += inA[i][j] * inB[i][j]; 226 | 227 | } 228 | 229 | } 230 | } 231 | // output results 232 | if (flag) 233 | { 234 | for( int OutChannel = 0; OutChannel < step; OutChannel++ ){ 235 | 236 | int index_c = ( h - 1 ) * width + w - 1 + OutChannel; 237 | Output: 238 | for (int i = 0; i < SIZE_BATCH; i++) 239 | { 240 | #pragma HLS DEPENDENCE variable=output inter false 241 | #pragma HLS PIPELINE II=1 242 | // output final result 243 | 244 | C[index_c*SIZE_BATCH+i] = output[i][OutChannel]; 245 | 246 | } 247 | } 248 | } 249 | 250 | if( w + 1 + step == width + 2 * pad ) 251 | CountStep = 1; 252 | w += step; 253 | } 254 | } 255 | } 256 | 257 | } 258 | 259 | // extra gemm with filter size 1x1 260 | void gemm_extra2(float A[MAX_A], float B[MAX_B], float C[MAX_C],int size_channel,int size_filter,int ksize, 261 | INPUT_32 weights[SIZE_BATCH][SIZE_FILTER][MAX_FILTER_DEPTH],OUTPUT_64 output[SIZE_BATCH]) 262 | { 263 | // copy loop: store weights/filters in local BRAM 264 | Copy_weights_E: 265 | for (int i = 0; i < SIZE_BATCH; i++) 266 | { 267 | for (int k = 0; k < size_filter; k++) //1x1x425 268 | { 269 | for (int j = 0; j < ksize*ksize; j++) //1x1 270 | { 271 | #pragma HLS PIPELINE II=1 272 | weights[i][j][k] = A[i*ksize*ksize*size_filter+k*ksize*ksize+j]; 273 | } 274 | } 275 | } 276 | // start calculation 277 | Cal_t_E: 278 | for (int i = 0; i < size_channel; i++) 279 | { 280 | // init output 281 | Init_E: 282 | for (int k = 0; k < SIZE_BATCH; k++) 283 | { 284 | #pragma HLS unroll 285 | output[k] = 0; 286 | } 287 | // start calculation 1024 mul+add 288 | Cal_L1_E: 289 | for (int j = 0; j < size_filter; j++) 290 | { 291 | #pragma HLS PIPELINE II=1 292 | INPUT_32 input = B[i*size_filter+j]; 293 | for (int k = 0; k < SIZE_BATCH; k++) 294 | { 295 | Cal_L2_E: 296 | output[k] += input * weights[k][0][j]; 297 | } 298 | } 299 | // output results 300 | Output_E: 301 | for (int j = 0; j < SIZE_BATCH; j++) 302 | { 303 | #pragma HLS PIPELINE II=1 304 | C[i*SIZE_BATCH+j] = output[j]; 305 | } 306 | } 307 | } 308 | 309 | 310 | -------------------------------------------------------------------------------- /region_layer.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Region layer 3 | //======================================================================== 4 | // @brief: get predictions (final bounding boxes) 5 | 6 | #include "region_layer.h" 7 | 8 | // make region layer 9 | layer make_region_layer(int batch, int w, int h, int n, int classes, int coords) 10 | { 11 | layer l; 12 | init_layer(l); 13 | 14 | l.type = REGION; 15 | 16 | l.n = n; 17 | l.batch = batch; 18 | l.h = h; 19 | l.w = w; 20 | l.classes = classes; 21 | l.coords = coords; 22 | l.cost = (float *)calloc(1, sizeof(float)); 23 | l.biases = (float *)calloc(n*2, sizeof(float)); 24 | l.bias_updates = (float *)calloc(n*2, sizeof(float)); 25 | l.outputs = h*w*n*(classes + coords + 1); 26 | l.inputs = l.outputs; 27 | l.truths = 30*(5); 28 | l.delta = (float *)calloc(batch*l.outputs, sizeof(float)); 29 | l.output = (float *)calloc(batch*l.outputs, sizeof(float)); 30 | // 31 | for(int i = 0; i < n*2; i++){ 32 | l.biases[i] = .5; 33 | } 34 | 35 | l.forward = forward_region_layer; 36 | //l.backward = backward_region_layer; 37 | fprintf(stderr, "detection\n"); 38 | srand(0); 39 | 40 | return l; 41 | } 42 | 43 | // get bounding boxes 44 | // l 1 1 0.24 probs boxes 0 0 0.5 45 | void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh) 46 | { 47 | // 48 | float *predictions = l.output; 49 | /* 50 | // print output 51 | printf("l.outputs:%d;\n",l.outputs); 52 | for (int i = 0; i < 100; i++) 53 | { 54 | printf("predictions[%d]:%.12f;\n",i,predictions[i]); 55 | } 56 | */ 57 | 58 | //cover l.w * l.h grids l.n = 5 5boxes l.classes = 20 59 | for (int i = 0; i < l.w*l.h; i++){ 60 | int row = i / l.w; 61 | int col = i % l.w; 62 | for(int n = 0; n < l.n; n++){ 63 | int index = i*l.n + n; 64 | int p_index = index * (l.classes + 5) + 4; 65 | float scale = predictions[p_index]; 66 | int box_index = index * (l.classes + 5); 67 | boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h); 68 | boxes[index].x *= w; 69 | boxes[index].y *= h; 70 | boxes[index].w *= w; 71 | boxes[index].h *= h; 72 | 73 | int class_index = index * (l.classes + 5) + 5; 74 | if(l.softmax_tree) 75 | { 76 | hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0); 77 | if(map) 78 | { 79 | for(int j = 0; j < 200; j++) 80 | { 81 | float prob = scale*predictions[class_index+map[j]]; 82 | probs[index][j] = (prob > thresh) ? prob : 0; 83 | } 84 | } 85 | else 86 | { 87 | int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh); 88 | probs[index][j] = (scale > thresh) ? scale : 0; 89 | probs[index][l.classes] = scale; 90 | } 91 | } 92 | else 93 | { 94 | for(int j = 0; j < l.classes; ++j){ 95 | float prob = scale*predictions[class_index+j]; 96 | probs[index][j] = (prob > thresh) ? prob : 0; 97 | } 98 | } 99 | if(only_objectness) 100 | { 101 | probs[index][0] = scale; 102 | } 103 | } 104 | } 105 | } 106 | 107 | // get bounding box (single) 108 | box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h) 109 | { 110 | box b; 111 | b.x = (i + logistic_activate(x[index + 0])) / w; 112 | b.y = (j + logistic_activate(x[index + 1])) / h; 113 | b.w = exp(x[index + 2]) * biases[2*n] / w; 114 | b.h = exp(x[index + 3]) * biases[2*n+1] / h; 115 | 116 | return b; 117 | } 118 | 119 | // region layer top function 120 | void forward_region_layer(const layer l, network_state state) 121 | { 122 | int i,j,b,t,n; 123 | int size = l.coords + l.classes + 1; 124 | memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); 125 | /* 126 | // input of region layer 127 | for (int x = 0; x < 100; x++) 128 | { 129 | printf("l.output[%d]:%.12f;\n",x,l.output[x]); 130 | } 131 | */ 132 | 133 | 134 | #ifndef GPU 135 | flatten(l.output, l.w*l.h, size*l.n, l.batch, 1); 136 | #endif 137 | for (b = 0; b < l.batch; ++b){ 138 | for(i = 0; i < l.h*l.w*l.n; ++i){ 139 | int index = size*i + b*l.outputs; 140 | l.output[index + 4] = logistic_activate(l.output[index + 4]); 141 | } 142 | } 143 | 144 | 145 | #ifndef GPU 146 | if (l.softmax_tree){ 147 | for (b = 0; b < l.batch; ++b){ 148 | for(i = 0; i < l.h*l.w*l.n; ++i){ 149 | int index = size*i + b*l.outputs; 150 | softmax_tree(l.output + index + 5, 1, 0, 1, l.softmax_tree, l.output + index + 5); 151 | } 152 | } 153 | } else if (l.softmax){ 154 | for (b = 0; b < l.batch; ++b){ 155 | for(i = 0; i < l.h*l.w*l.n; ++i){ 156 | int index = size*i + b*l.outputs; 157 | softmax(l.output + index + 5, l.classes, 1, l.output + index + 5); 158 | } 159 | } 160 | } 161 | /* 162 | // output of region layer 163 | for (int x = 0; x < 100; x++) 164 | { 165 | printf("l.output[%d]:%.12f;\n",x,l.output[x]); 166 | } 167 | */ 168 | 169 | #endif 170 | if(!state.train) 171 | { 172 | //printf("return here???\n"); 173 | return; 174 | } 175 | memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); 176 | float avg_iou = 0; 177 | float recall = 0; 178 | float avg_cat = 0; 179 | float avg_obj = 0; 180 | float avg_anyobj = 0; 181 | int count = 0; 182 | int class_count = 0; 183 | *(l.cost) = 0; 184 | for (b = 0; b < l.batch; ++b) { 185 | if(l.softmax_tree){ 186 | int onlyclass = 0; 187 | for(t = 0; t < 30; ++t){ 188 | box truth = float_to_box(state.truth + t*5 + b*l.truths); 189 | if(!truth.x) break; 190 | int class_s = state.truth[t*5 + b*l.truths + 4]; 191 | float maxp = 0; 192 | int maxi = 0; 193 | if(truth.x > 100000 && truth.y > 100000){ 194 | for(n = 0; n < l.n*l.w*l.h; ++n){ 195 | int index = size*n + b*l.outputs + 5; 196 | float scale = l.output[index-1]; 197 | l.delta[index - 1] = l.noobject_scale * ((0 - l.output[index - 1]) * logistic_gradient(l.output[index - 1])); 198 | float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class_s); 199 | if(p > maxp){ 200 | maxp = p; 201 | maxi = n; 202 | } 203 | } 204 | int index = size*maxi + b*l.outputs + 5; 205 | delta_region_class(l.output, l.delta, index, class_s, l.classes, l.softmax_tree, l.class_scale, &avg_cat); 206 | if(l.output[index - 1] < .3) l.delta[index - 1] = l.object_scale * ((.3 - l.output[index - 1]) * logistic_gradient(l.output[index - 1])); 207 | else l.delta[index - 1] = 0; 208 | ++class_count; 209 | onlyclass = 1; 210 | break; 211 | } 212 | } 213 | if(onlyclass) continue; 214 | } 215 | for (j = 0; j < l.h; ++j) { 216 | for (i = 0; i < l.w; ++i) { 217 | for (n = 0; n < l.n; ++n) { 218 | int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; 219 | box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); 220 | float best_iou = 0; 221 | for(t = 0; t < 30; ++t){ 222 | box truth = float_to_box(state.truth + t*5 + b*l.truths); 223 | if(!truth.x) break; 224 | float iou = box_iou(pred, truth); 225 | if (iou > best_iou) { 226 | best_iou = iou; 227 | } 228 | } 229 | avg_anyobj += l.output[index + 4]; 230 | l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); 231 | if (best_iou > l.thresh) { 232 | l.delta[index + 4] = 0; 233 | } 234 | 235 | if(*(state.net.seen) < 12800){ 236 | box truth = {0}; 237 | truth.x = (i + .5)/l.w; 238 | truth.y = (j + .5)/l.h; 239 | truth.w = l.biases[2*n]/l.w; 240 | truth.h = l.biases[2*n+1]/l.h; 241 | delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01); 242 | } 243 | } 244 | } 245 | } 246 | for(t = 0; t < 30; ++t){ 247 | box truth = float_to_box(state.truth + t*5 + b*l.truths); 248 | 249 | if(!truth.x) break; 250 | float best_iou = 0; 251 | int best_index = 0; 252 | int best_n = 0; 253 | i = (truth.x * l.w); 254 | j = (truth.y * l.h); 255 | //printf("%d %f %d %f\n", i, truth.x*l.w, j, truth.y*l.h); 256 | box truth_shift = truth; 257 | truth_shift.x = 0; 258 | truth_shift.y = 0; 259 | //printf("index %d %d\n",i, j); 260 | for(n = 0; n < l.n; ++n){ 261 | int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; 262 | box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); 263 | if(l.bias_match){ 264 | pred.w = l.biases[2*n]/l.w; 265 | pred.h = l.biases[2*n+1]/l.h; 266 | } 267 | //printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h); 268 | pred.x = 0; 269 | pred.y = 0; 270 | float iou = box_iou(pred, truth_shift); 271 | if (iou > best_iou){ 272 | best_index = index; 273 | best_iou = iou; 274 | best_n = n; 275 | } 276 | } 277 | //printf("%d %f (%f, %f) %f x %f\n", best_n, best_iou, truth.x, truth.y, truth.w, truth.h); 278 | 279 | float iou = delta_region_box(truth, l.output, l.biases, best_n, best_index, i, j, l.w, l.h, l.delta, l.coord_scale); 280 | if(iou > .5) recall += 1; 281 | avg_iou += iou; 282 | 283 | //l.delta[best_index + 4] = iou - l.output[best_index + 4]; 284 | avg_obj += l.output[best_index + 4]; 285 | l.delta[best_index + 4] = l.object_scale * (1 - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]); 286 | if (l.rescore) { 287 | l.delta[best_index + 4] = l.object_scale * (iou - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]); 288 | } 289 | 290 | 291 | int class_s = state.truth[t*5 + b*l.truths + 4]; 292 | if (l.map) class_s = l.map[class_s]; 293 | delta_region_class(l.output, l.delta, best_index + 5, class_s, l.classes, l.softmax_tree, l.class_scale, &avg_cat); 294 | ++count; 295 | ++class_count; 296 | } 297 | } 298 | //printf("\n"); 299 | #ifndef GPU 300 | flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0); 301 | #endif 302 | *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); 303 | printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); 304 | } 305 | 306 | // extra region classes 307 | void delta_region_class(float *output, float *delta, int index, int class_s, int classes, tree *hier, float scale, float *avg_cat) 308 | { 309 | if(hier) 310 | { 311 | float pred = 1; 312 | while(class_s >= 0) 313 | { 314 | pred *= output[index + class_s]; 315 | int g = hier->group[class_s]; 316 | int offset = hier->group_offset[g]; 317 | for(int i = 0; i < hier->group_size[g]; i++) 318 | { 319 | delta[index + offset + i] = scale * (0 - output[index + offset + i]); 320 | } 321 | delta[index + class_s] = scale * (1 - output[index + class_s]); 322 | 323 | class_s = hier->parent[class_s]; 324 | } 325 | *avg_cat += pred; 326 | } 327 | else 328 | { 329 | for(int n = 0; n < classes; n++) 330 | { 331 | delta[index + n] = scale * (((n == class_s)?1 : 0) - output[index + n]); 332 | if(n == class_s) 333 | { 334 | *avg_cat += output[index + n]; 335 | } 336 | } 337 | } 338 | } 339 | 340 | // extra region boxes 341 | float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale) 342 | { 343 | box pred = get_region_box(x, biases, n, index, i, j, w, h); 344 | float iou = box_iou(pred, truth); 345 | 346 | float tx = (truth.x*w - i); 347 | float ty = (truth.y*h - j); 348 | float tw = log(truth.w*w / biases[2*n]); 349 | float th = log(truth.h*h / biases[2*n + 1]); 350 | 351 | delta[index + 0] = scale * (tx - logistic_activate(x[index + 0])) * logistic_gradient(logistic_activate(x[index + 0])); 352 | delta[index + 1] = scale * (ty - logistic_activate(x[index + 1])) * logistic_gradient(logistic_activate(x[index + 1])); 353 | delta[index + 2] = scale * (tw - x[index + 2]); 354 | delta[index + 3] = scale * (th - x[index + 3]); 355 | 356 | return iou; 357 | } 358 | -------------------------------------------------------------------------------- /image.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Image 3 | //======================================================================== 4 | // @brief: loading image data 5 | 6 | #include "image.h" 7 | 8 | #define STB_IMAGE_IMPLEMENTATION 9 | #include "stb_image_read.h" 10 | #define STB_IMAGE_WRITE_IMPLEMENTATION 11 | #include "stb_image_write.h" 12 | 13 | int windows = 0; 14 | float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; 15 | 16 | // return 8*128*image 17 | // load labels 8(different size), 32~126 (different type) 18 | // store information&value of labels: w,h,c,*data 19 | image **load_alphabet() 20 | { 21 | const int nsize = LABEL_SIZE; 22 | image **alphabets = (image **)calloc(nsize, sizeof(image *)); 23 | // 24 | for (int j = 0; j < nsize; j++) 25 | { 26 | alphabets[j] = (image *)calloc(LABEL_TYPE, sizeof(image)); 27 | for (int i = 32; i < LABEL_TYPE - 1; i++) 28 | { 29 | char buffer[256]; 30 | sprintf(buffer, "data/labels/%d_%d.png", i, j); 31 | // buffer: filename of labels 32 | alphabets[j][i] = load_image_color(buffer, 0, 0); 33 | } 34 | } 35 | return alphabets; 36 | } 37 | 38 | // pass value? 39 | image load_image_color(char *filename, int w, int h) 40 | { 41 | return load_image(filename, w, h, 3); 42 | } 43 | 44 | // load image top function 45 | image load_image(char *filename, int w, int h, int c) //003 46 | { 47 | // load image value BUG 48 | image out = load_image_stb(filename, c); 49 | 50 | //printf("out.h: %d; out.w: %d; out.c: %d;\n",out.h,out.w,out.c); 51 | // when resize??? 52 | if((h && w) && (h != out.h || w != out.w)) 53 | { 54 | image resized = resize_image(out, w, h); 55 | free_image(out); 56 | out = resized; 57 | } 58 | return out; 59 | } 60 | 61 | // return im.data: w(width); h(height); z(depth,channel) 62 | image load_image_stb(char *filename, int channels) // filename, 3 63 | { 64 | int w, h, c; 65 | // standard image load function 66 | // stbi_load output: z(depth, channel); w(width); h(height) 67 | //printf("filename: %s; channels: %d;\n",filename,channels); 68 | unsigned char *data = stbi_load(filename, &w, &h, &c, channels); 69 | //printf("out.h: %d; out.w: %d; out.c: %d;\n",h,w,c); 70 | 71 | if(channels) 72 | { 73 | c = channels; 74 | } 75 | // make new image 76 | image im = make_image(w, h, c); 77 | for (int k = 0; k < c; k++) 78 | { 79 | for (int j = 0; j < h; j++) 80 | { 81 | for(int i = 0; i < w; i++) 82 | { 83 | int index_dst = i + w*j + w*h*k; 84 | int index_src = k + c*i + c*w*j; 85 | im.data[index_dst] = (float)data[index_src]/255.0; 86 | } 87 | } 88 | } 89 | free(data); 90 | return im; 91 | } 92 | 93 | // make_image top function 94 | image make_image(int w, int h, int c) 95 | { 96 | image out = make_empty_image(w, h, c); 97 | out.data = (float *)calloc(h*w*c, sizeof(float)); 98 | return out; 99 | } 100 | 101 | // make empty image (data pointer: 0) 102 | image make_empty_image(int w, int h, int c) 103 | { 104 | image out; 105 | out.data = 0; 106 | out.h = h; 107 | out.w = w; 108 | out.c = c; 109 | 110 | return out; 111 | } 112 | 113 | // resize the given image (w*h) 114 | image resize_image(image im, int w, int h) 115 | { 116 | image resized = make_image(w, h, im.c); 117 | image part = make_image(w, im.h, im.c); 118 | 119 | float w_scale = (float)(im.w - 1)/(w - 1); 120 | float h_scale = (float)(im.h - 1)/(h - 1); 121 | // stage 1: resize image within given width (column) 122 | for (int k = 0; k < im.c; k++) 123 | { 124 | for (int r = 0; r < im.h; r++) // row 125 | { 126 | for(int c = 0; c < w; c++) // column 127 | { 128 | float val = 0; 129 | // last column || only one column 130 | if (c == w-1 || im.w == 1) 131 | { // simply fetch the original final column 132 | val = get_pixel(im, im.w - 1, r, k); 133 | } 134 | else 135 | { 136 | float sx = c*w_scale; 137 | int ix = (int)sx; 138 | float dx = sx - ix; 139 | // weighted sum for other columns 140 | val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); 141 | } 142 | // store val into image part 143 | set_pixel(part, c, r, k, val); 144 | } 145 | } 146 | } 147 | // stage 2: resize image within given height (row) 148 | for (int k = 0; k < im.c; k++) 149 | { 150 | for(int r = 0; r < h; r++) 151 | { 152 | float sy = r*h_scale; 153 | int iy = (int)sy; 154 | float dy = sy - iy; 155 | // 156 | for (int c = 0; c < w; c++) 157 | { 158 | float val = (1 - dy) * get_pixel(part, c, iy, k); 159 | // store val into image resized 160 | set_pixel(resized, c, r, k, val); 161 | } 162 | // the last row || only one row 163 | if (r == h-1 || im.h == 1) 164 | { 165 | continue; 166 | } 167 | // 168 | for (int c = 0; c < w; c++) 169 | { 170 | float val = dy * get_pixel(part, c, iy+1, k); 171 | add_pixel(resized, c, r, k, val); 172 | } 173 | } 174 | } 175 | free_image(part); 176 | return resized; 177 | } 178 | 179 | // pick up pixel in m.data: x - width, y - height, c - channel 180 | float get_pixel(image m, int x, int y, int c) 181 | { 182 | // x < m.w && y < m.h && c < m.c == 0: assert 183 | assert(x < m.w && y < m.h && c < m.c); 184 | return m.data[c*m.h*m.w + y*m.w + x]; 185 | } 186 | 187 | // fetch extra pixels 188 | float get_pixel_extend(image m, int x, int y, int c) 189 | { 190 | if(x < 0) 191 | { 192 | x = 0; 193 | } 194 | if(x >= m.w) 195 | { 196 | x = m.w-1; 197 | } 198 | if(y < 0) 199 | { 200 | y = 0; 201 | } 202 | if(y >= m.h) 203 | { 204 | y = m.h-1; 205 | } 206 | if(c < 0 || c >= m.c) 207 | { 208 | return 0; 209 | } 210 | return get_pixel(m, x, y, c); 211 | } 212 | 213 | // check the validity of data && store data into image 214 | void set_pixel(image m, int x, int y, int c, float val) 215 | { 216 | if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) 217 | { 218 | return; 219 | } 220 | // x < m.w && y < m.h && c < m.c == 0: assert 221 | assert(x < m.w && y < m.h && c < m.c); 222 | m.data[c*m.h*m.w + y*m.w + x] = val; 223 | } 224 | 225 | // add value to pixels 226 | void add_pixel(image m, int x, int y, int c, float val) 227 | { 228 | // x < m.w && y < m.h && c < m.c == 0: assert 229 | assert(x < m.w && y < m.h && c < m.c); 230 | m.data[c*m.h*m.w + y*m.w + x] += val; 231 | } 232 | 233 | // draw detecting results 234 | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes) 235 | { 236 | //printf("probs[176][2]:%.12f; probs[176][7]:%.12f;\n",probs[176][2],probs[176][7]); 237 | for (int i = 0; i < num; i++) 238 | { 239 | //printf("ch0;\n"); 240 | int class_max = max_index(probs[i], classes); // max_index??? 241 | float prob = probs[i][class_max]; 242 | /* 243 | if(probs[i][class_max] != 0) 244 | { 245 | printf("i:%d; class_max:%d; prob:%.12f; \n",i,class_max,prob); 246 | } 247 | */ 248 | if (prob > thresh) 249 | { 250 | //printf("ch1;\n"); 251 | int width = im.h * 0.012; 252 | /* ?????????????????????????????????????? 253 | if(0) 254 | { 255 | width = pow(prob, 1.0/2.0)*10 + 1; 256 | alphabet = 0; 257 | } 258 | */ 259 | printf("%s: %.0f%%\n", names[class_max], prob*100); 260 | int offset = class_max * 123457 % classes; 261 | float red = get_color(2, offset, classes); 262 | float green = get_color(1, offset, classes); 263 | float blue = get_color(0, offset, classes); 264 | float rgb[3]; 265 | // 266 | rgb[0] = red; 267 | rgb[1] = green; 268 | rgb[2] = blue; 269 | box b = boxes[i]; 270 | // 271 | int left = (b.x-b.w/2.0)*im.w; 272 | int right = (b.x+b.w/2.0)*im.w; 273 | int top = (b.y-b.h/2.0)*im.h; 274 | int bot = (b.y+b.h/2.0)*im.h; 275 | // 276 | if(left < 0) left = 0; 277 | if(right > im.w-1) right = im.w-1; 278 | if(top < 0) top = 0; 279 | if(bot > im.h-1) bot = im.h-1; 280 | //printf("ch2;\n"); 281 | // 282 | draw_box_width(im, left, top, right, bot, width, red, green, blue); 283 | //printf("ch3;\n"); 284 | if(alphabet) 285 | { 286 | image label = get_label(alphabet, names[class_max], (im.h*0.03)/10); 287 | draw_label(im, top + width, left, label, rgb); 288 | } 289 | } 290 | //printf("ch4;\n"); 291 | } 292 | } 293 | 294 | // get label 295 | image get_label(image **characters, char *string, int size) 296 | { 297 | if (size > 7) 298 | { 299 | size = 7; 300 | } 301 | image label = make_empty_image(0, 0, 0); 302 | // 303 | while(*string) 304 | { 305 | image l = characters[size][(int)*string]; 306 | image n = tile_images(label, l, -size - 1 + (size+1)/2); 307 | free_image(label); 308 | label = n; 309 | string++; 310 | } 311 | image b = border_image(label, label.h*.25); 312 | free_image(label); 313 | 314 | return b; 315 | } 316 | 317 | // splite image 318 | image tile_images(image a, image b, int dx) 319 | { 320 | if(a.w == 0) 321 | { 322 | return copy_image(b); 323 | } 324 | image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); 325 | fill_cpu(c.w*c.h*c.c, 1, c.data, 1); 326 | embed_image(a, c, 0, 0); 327 | composite_image(b, c, a.w + dx, 0); 328 | 329 | return c; 330 | } 331 | 332 | // border/wrap up image 333 | image border_image(image a, int border) 334 | { 335 | image b = make_image(a.w + 2*border, a.h + 2*border, a.c); 336 | // 337 | 338 | for(int k = 0; k < b.c; ++k) 339 | { 340 | for(int y = 0; y < b.h; ++y) 341 | { 342 | for(int x = 0; x < b.w; ++x) 343 | { 344 | float val = get_pixel_extend(a, x - border, y - border, k); 345 | if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) 346 | { 347 | val = 1; 348 | } 349 | set_pixel(b, x, y, k, val); 350 | } 351 | } 352 | } 353 | 354 | return b; 355 | } 356 | 357 | // copy image 358 | image copy_image(image p) 359 | { 360 | image copy = p; 361 | copy.data = (float *)calloc(p.h*p.w*p.c, sizeof(float)); 362 | memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); 363 | 364 | return copy; 365 | } 366 | 367 | // embed image (image data transmission) 368 | void embed_image(image source, image dest, int dx, int dy) 369 | { 370 | for(int k = 0; k < source.c; k++) 371 | { 372 | 373 | for(int y = 0; y < source.h; y++) 374 | { 375 | for(int x = 0; x < source.w; x++) 376 | { 377 | float val = get_pixel(source, x,y,k); 378 | set_pixel(dest, dx+x, dy+y, k, val); 379 | } 380 | } 381 | } 382 | } 383 | 384 | // merge images 385 | void composite_image(image source, image dest, int dx, int dy) 386 | { 387 | for (int k = 0; k < source.c; k++) 388 | { 389 | for (int y = 0; y < source.h; y++) 390 | { 391 | for (int x = 0; x < source.w; x++) 392 | { 393 | float val = get_pixel(source, x, y, k); 394 | float val2 = get_pixel_extend(dest, dx+x, dy+y, k); 395 | set_pixel(dest, dx+x, dy+y, k, val * val2); 396 | } 397 | } 398 | } 399 | } 400 | 401 | // get width of boxes 402 | void draw_box_width(image a, int x1, int y1, int x2, int y2,int w, float r, float g, float b) 403 | { 404 | for (int i = 0; i < w; i++) 405 | { 406 | draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); 407 | } 408 | } 409 | 410 | // draw one box 411 | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) 412 | { 413 | // normalize_image(a) 414 | // ensure the boxed in the picture 415 | if(x1 < 0) x1 = 0; 416 | if(x1 >= a.w) x1 = a.w-1; 417 | if(x2 < 0) x2 = 0; 418 | if(x2 >= a.w) x2 = a.w-1; 419 | 420 | if(y1 < 0) y1 = 0; 421 | if(y1 >= a.h) y1 = a.h-1; 422 | if(y2 < 0) y2 = 0; 423 | if(y2 >= a.h) y2 = a.h-1; 424 | // draw boxes: rgb 425 | for (int i = x1; i <= x2; i++) 426 | { // two horizontal lines 427 | a.data[i + y1*a.w + 0*a.w*a.h] = r; 428 | a.data[i + y2*a.w + 0*a.w*a.h] = r; 429 | a.data[i + y1*a.w + 1*a.w*a.h] = g; 430 | a.data[i + y2*a.w + 1*a.w*a.h] = g; 431 | a.data[i + y1*a.w + 2*a.w*a.h] = b; 432 | a.data[i + y2*a.w + 2*a.w*a.h] = b; 433 | } 434 | for (int i = y1; i <= y2; i++) 435 | { // two vertical lines 436 | a.data[x1 + i*a.w + 0*a.w*a.h] = r; 437 | a.data[x2 + i*a.w + 0*a.w*a.h] = r; 438 | a.data[x1 + i*a.w + 1*a.w*a.h] = g; 439 | a.data[x2 + i*a.w + 1*a.w*a.h] = g; 440 | a.data[x1 + i*a.w + 2*a.w*a.h] = b; 441 | a.data[x2 + i*a.w + 2*a.w*a.h] = b; 442 | } 443 | } 444 | 445 | // draw labels 446 | void draw_label(image a, int r, int c, image label, const float *rgb) 447 | { 448 | int w = label.w; 449 | int h = label.h; 450 | if(r - h >= 0) 451 | { 452 | r = r - h; 453 | } 454 | // replace corresponding pixels for labels 455 | for (int j = 0; j < h && j + r < a.h; j++) 456 | { 457 | for (int i = 0; i < w && i + c < a.w; i++) 458 | { 459 | for (int k = 0; k < label.c; k++) 460 | { 461 | float val = get_pixel(label, i, j, k); 462 | set_pixel(a, i+c, j+r, k, rgb[k] * val); 463 | } 464 | } 465 | } 466 | } 467 | 468 | // get image color 469 | float get_color(int c, int x, int max) 470 | { 471 | float ratio = ((float)x/max)*5; 472 | int i = floor(ratio); 473 | int j = ceil(ratio); 474 | ratio -= i; 475 | float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; 476 | 477 | return r; 478 | } 479 | 480 | // display image 481 | void show_image(image p, const char *name) 482 | { 483 | fprintf(stderr,"Not compiled with OpenCV, saving to %s.png instead.\n", name); 484 | save_image(p, name); 485 | } 486 | 487 | // save image top function 488 | void save_image(image im, const char *name) 489 | { 490 | save_image_png(im, name); 491 | } 492 | 493 | // data: one pixel(three channels); im.data: all pixels for one channel, next channel, etc. 494 | void save_image_png(image im, const char *name) 495 | { 496 | char buffer[256]; 497 | // save picture name into buffer 498 | sprintf(buffer, "%s.png", name); 499 | unsigned char *data = (unsigned char *)calloc(im.w*im.h*im.c, sizeof(char)); 500 | // 501 | //printf("ch0;\n"); 502 | for(int k = 0; k < im.c; k++) 503 | { 504 | for(int i = 0; i < im.w*im.h; i++) 505 | { 506 | data[i*im.c + k] = (unsigned char) (255 * im.data[i + k*im.w*im.h]); 507 | } 508 | } 509 | //printf("ch1;\n"); 510 | int success = stbi_write_png(buffer, im.w, im.h, im.c, data, im.w*im.c); 511 | //printf("ch2;\n"); 512 | free(data); 513 | if(!success) fprintf(stderr, "Failed to write image %s\n", buffer); 514 | } 515 | 516 | // free allocated memory 517 | void free_image(image m) 518 | { 519 | if(m.data) 520 | { 521 | free(m.data); 522 | } 523 | } 524 | -------------------------------------------------------------------------------- /parser.cpp: -------------------------------------------------------------------------------- 1 | //======================================================================== 2 | // Parser 3 | //======================================================================== 4 | // @brief: parse and store configs 5 | 6 | #ifndef SRC_PARSER_CPP_ 7 | #define SRC_PARSER_CPP_ 8 | 9 | #include "parser.h" 10 | 11 | // parse amxpooling layer 12 | maxpool_layer parse_maxpool(list *options, size_params params) 13 | { 14 | int stride = option_find_int(options, "stride",1); 15 | int size = option_find_int(options, "size",stride); 16 | int padding = option_find_int_quiet(options, "padding", (size-1)/2); 17 | //printf("\nstride: %d; size: %d; padding: %d;\n",stride,size,padding); 18 | 19 | int batch,h,w,c; 20 | h = params.h; 21 | w = params.w; 22 | c = params.c; 23 | batch=params.batch; 24 | //printf("\nh: %d; w: %d; c: %d; batch: %d;\n",h,w,c,batch); 25 | if(!(h && w && c)) 26 | { 27 | error("Layer before maxpool layer must output image."); 28 | } 29 | 30 | maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); 31 | return layer; 32 | } 33 | 34 | // copy matrix 35 | void transpose_matrix(float *a, int rows, int cols) 36 | { 37 | float *transpose = (float *)calloc(rows*cols, sizeof(float)); 38 | // 39 | for(int x = 0; x < rows; x++) 40 | { 41 | for(int y = 0; y < cols; y++) 42 | { 43 | transpose[y*rows + x] = a[x*cols + y]; 44 | } 45 | } 46 | memcpy(a, transpose, rows*cols*sizeof(float)); 47 | free(transpose); 48 | } 49 | 50 | // parse region layers 51 | layer parse_region(list *options, size_params params) 52 | { 53 | int coords = option_find_int(options, "coords", 4); 54 | int classes = option_find_int(options, "classes", 20); 55 | int num = option_find_int(options, "num", 1); 56 | 57 | layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords); 58 | assert(l.outputs == params.inputs); 59 | 60 | l.log = option_find_int_quiet(options, "log", 0); 61 | l.sqrt = option_find_int_quiet(options, "sqrt", 0); 62 | 63 | l.softmax = option_find_int(options, "softmax", 0); 64 | l.max_boxes = option_find_int_quiet(options, "max",30); 65 | l.jitter = option_find_float(options, "jitter", .2); 66 | l.rescore = option_find_int_quiet(options, "rescore",0); 67 | 68 | l.thresh = option_find_float(options, "thresh", .5); 69 | l.classfix = option_find_int_quiet(options, "classfix", 0); 70 | l.absolute = option_find_int_quiet(options, "absolute", 0); 71 | l.random = option_find_int_quiet(options, "random", 0); 72 | 73 | l.coord_scale = option_find_float(options, "coord_scale", 1); 74 | l.object_scale = option_find_float(options, "object_scale", 1); 75 | l.noobject_scale = option_find_float(options, "noobject_scale", 1); 76 | l.class_scale = option_find_float(options, "class_scale", 1); 77 | l.bias_match = option_find_int_quiet(options, "bias_match",0); 78 | 79 | char *tree_file = option_find_str(options, "tree", 0); 80 | if (tree_file) 81 | { 82 | l.softmax_tree = read_tree(tree_file); 83 | } 84 | char *map_file = option_find_str(options, "map", 0); 85 | if (map_file) 86 | { 87 | l.map = read_map(map_file); 88 | } 89 | 90 | char *a = option_find_str(options, "anchors", 0); 91 | if(a) 92 | { 93 | int len = strlen(a); 94 | int n = 1; 95 | // 96 | for(int i = 0; i < len; i++){ 97 | if (a[i] == ',') 98 | { 99 | n++; 100 | } 101 | } 102 | for(int i = 0; i < n; i++){ 103 | float bias = atof(a); 104 | l.biases[i] = bias; 105 | a = strchr(a, ',')+1; 106 | } 107 | } 108 | return l; 109 | } 110 | 111 | // parse convolutional layer 112 | convolutional_layer parse_convolutional(list *options, size_params params) 113 | { 114 | int n = option_find_int(options, "filters",1); 115 | int size = option_find_int(options, "size",1); 116 | int stride = option_find_int(options, "stride",1); 117 | int pad = option_find_int_quiet(options, "pad",0); 118 | int padding = option_find_int_quiet(options, "padding",0); 119 | if(pad) padding = size/2; 120 | 121 | char *activation_s = option_find_str(options, "activation", "logistic"); 122 | ACTIVATION activation = get_activation(activation_s); 123 | 124 | int batch,h,w,c; 125 | h = params.h; 126 | w = params.w; 127 | c = params.c; 128 | batch=params.batch; 129 | if(!(h && w && c)) 130 | { 131 | error("Layer before convolutional layer must output image."); 132 | } 133 | int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); 134 | int binary = option_find_int_quiet(options, "binary", 0); 135 | int xnor = option_find_int_quiet(options, "xnor", 0); 136 | 137 | convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor, params.net.adam); 138 | layer.flipped = option_find_int_quiet(options, "flipped", 0); 139 | layer.dot = option_find_float_quiet(options, "dot", 0); 140 | if(params.net.adam) 141 | { 142 | layer.B1 = params.net.B1; 143 | layer.B2 = params.net.B2; 144 | layer.eps = params.net.eps; 145 | } 146 | 147 | return layer; 148 | } 149 | 150 | // get string type 151 | LAYER_TYPE string_to_layer_type(char * type) 152 | { 153 | 154 | if (strcmp(type, "[shortcut]")==0) return SHORTCUT; 155 | if (strcmp(type, "[crop]")==0) return CROP; 156 | if (strcmp(type, "[cost]")==0) return COST; 157 | if (strcmp(type, "[detection]")==0) return DETECTION; 158 | if (strcmp(type, "[region]")==0) return REGION; 159 | if (strcmp(type, "[local]")==0) return LOCAL; 160 | if (strcmp(type, "[conv]")==0 161 | || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; 162 | if (strcmp(type, "[activation]")==0) return ACTIVE; 163 | if (strcmp(type, "[net]")==0 164 | || strcmp(type, "[network]")==0) return NETWORK; 165 | if (strcmp(type, "[crnn]")==0) return CRNN; 166 | if (strcmp(type, "[gru]")==0) return GRU; 167 | if (strcmp(type, "[rnn]")==0) return RNN; 168 | if (strcmp(type, "[conn]")==0 169 | || strcmp(type, "[connected]")==0) return CONNECTED; 170 | if (strcmp(type, "[max]")==0 171 | || strcmp(type, "[maxpool]")==0) return MAXPOOL; 172 | if (strcmp(type, "[reorg]")==0) return REORG; 173 | if (strcmp(type, "[avg]")==0 174 | || strcmp(type, "[avgpool]")==0) return AVGPOOL; 175 | if (strcmp(type, "[dropout]")==0) return DROPOUT; 176 | if (strcmp(type, "[lrn]")==0 177 | || strcmp(type, "[normalization]")==0) return NORMALIZATION; 178 | if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; 179 | if (strcmp(type, "[soft]")==0 180 | || strcmp(type, "[softmax]")==0) return SOFTMAX; 181 | if (strcmp(type, "[route]")==0) return ROUTE; 182 | return BLANK; 183 | } 184 | 185 | // get larning rate policy 186 | learning_rate_policy get_policy(char *s) 187 | { 188 | if (strcmp(s, "random")==0) return RANDOM; 189 | if (strcmp(s, "poly")==0) return POLY; 190 | if (strcmp(s, "constant")==0) return CONSTANT; 191 | if (strcmp(s, "step")==0) return STEP; 192 | if (strcmp(s, "exp")==0) return EXP; 193 | if (strcmp(s, "sigmoid")==0) return SIG; 194 | if (strcmp(s, "steps")==0) return STEPS; 195 | fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); 196 | 197 | return CONSTANT; 198 | } 199 | 200 | // config parser 201 | void parse_net_options(list *options, network *net) 202 | { 203 | net->batch = option_find_int(options, "batch",1); 204 | net->learning_rate = option_find_float(options, "learning_rate", .001); 205 | net->momentum = option_find_float(options, "momentum", .9); 206 | net->decay = option_find_float(options, "decay", .0001); 207 | int subdivs = option_find_int(options, "subdivisions",1); 208 | net->time_steps = option_find_int_quiet(options, "time_steps",1); 209 | net->batch /= subdivs; 210 | net->batch *= net->time_steps; 211 | net->subdivisions = subdivs; 212 | 213 | net->adam = option_find_int_quiet(options, "adam", 0); 214 | if(net->adam) 215 | { 216 | net->B1 = option_find_float(options, "B1", .9); 217 | net->B2 = option_find_float(options, "B2", .999); 218 | net->eps = option_find_float(options, "eps", .000001); 219 | } 220 | 221 | net->h = option_find_int_quiet(options, "height",0); 222 | net->w = option_find_int_quiet(options, "width",0); 223 | net->c = option_find_int_quiet(options, "channels",0); 224 | net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); 225 | net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); 226 | net->min_crop = option_find_int_quiet(options, "min_crop",net->w); 227 | 228 | net->angle = option_find_float_quiet(options, "angle", 0); 229 | net->aspect = option_find_float_quiet(options, "aspect", 1); 230 | net->saturation = option_find_float_quiet(options, "saturation", 1); 231 | net->exposure = option_find_float_quiet(options, "exposure", 1); 232 | net->hue = option_find_float_quiet(options, "hue", 0); 233 | 234 | if(!net->inputs && !(net->h && net->w && net->c)) 235 | { 236 | error("No input parameters supplied"); 237 | } 238 | 239 | char *policy_s = option_find_str(options, "policy", "constant"); 240 | net->policy = get_policy(policy_s); 241 | net->burn_in = option_find_int_quiet(options, "burn_in", 0); 242 | if(net->policy == STEP) 243 | { 244 | net->step = option_find_int(options, "step", 1); 245 | net->scale = option_find_float(options, "scale", 1); 246 | } 247 | else if (net->policy == STEPS) 248 | { 249 | char *l = option_find(options, "steps"); 250 | char *p = option_find(options, "scales"); 251 | if(!l || !p) 252 | { 253 | error("STEPS policy must have steps and scales in cfg file"); 254 | } 255 | 256 | int len = strlen(l); 257 | int n = 1; 258 | // 259 | for(int i = 0; i < len; i++) 260 | { 261 | if (l[i] == ',') 262 | { 263 | n++; 264 | } 265 | } 266 | int *steps = (int *) calloc(n, sizeof(int)); 267 | float *scales = (float *)calloc(n, sizeof(float)); 268 | for(int i = 0; i < n; i++) 269 | { 270 | int step = atoi(l); 271 | float scale = atof(p); 272 | l = strchr(l, ',')+1; 273 | p = strchr(p, ',')+1; 274 | steps[i] = step; 275 | scales[i] = scale; 276 | } 277 | net->scales = scales; 278 | net->steps = steps; 279 | net->num_steps = n; 280 | } 281 | else if (net->policy == EXP) 282 | { 283 | net->gamma = option_find_float(options, "gamma", 1); 284 | } 285 | else if (net->policy == SIG) 286 | { 287 | net->gamma = option_find_float(options, "gamma", 1); 288 | net->step = option_find_int(options, "step", 1); 289 | } 290 | else if (net->policy == POLY || net->policy == RANDOM) 291 | { 292 | net->power = option_find_float(options, "power", 1); 293 | } 294 | net->max_batches = option_find_int(options, "max_batches", 0); 295 | } 296 | 297 | // free section 298 | void free_section(section *s) 299 | { 300 | free(s->type); 301 | node *n = s->options->front; 302 | while(n){ 303 | kvp *pair = (kvp *)n->val; 304 | free(pair->key); 305 | free(pair); 306 | node *next = n->next; 307 | free(n); 308 | n = next; 309 | } 310 | free(s->options); 311 | free(s); 312 | } 313 | 314 | // tiny-yolo.cfg 315 | network parse_network_cfg(char *filename) 316 | { 317 | // read cfg lines into a list 318 | // list: size, *front(start 'node'), *back(end 'node') 319 | // node: val(*'section'), *next, *prev 320 | // section: *type, *option('list') 321 | // list: size, *front(start 'node'), *back(end 'node') 322 | // node: val(*'kvp'), *next, *prev 323 | // kvp: *key, *val, used?(init 0 - unused) 324 | list *sections = read_cfg(filename); 325 | node *n = sections->front; 326 | if(!n) 327 | { 328 | error("Config file has no sections"); 329 | } 330 | // network within net(general setup) + 16 layers(9 conv + 6 maxpool + 1 region) in tiny-yolo 331 | network net = make_network(sections->size - 1); 332 | size_params params; // why define this? 333 | 334 | // traverse the sections in the top list 335 | section *s = (section *)n->val; 336 | list *options = s->options; 337 | //if(!is_network(s)) error("First section must be [net] or [network]"); 338 | parse_net_options(options, &net); 339 | 340 | params.h = net.h; 341 | params.w = net.w; 342 | params.c = net.c; 343 | params.inputs = net.inputs; 344 | params.batch = net.batch; 345 | params.time_steps = net.time_steps; 346 | params.net = net; 347 | 348 | size_t workspace_size = 0; 349 | n = n->next; 350 | int count = 0; 351 | free_section(s); 352 | fprintf(stderr, "layer filters size input output\n"); 353 | while(n) 354 | { 355 | params.index = count; 356 | fprintf(stderr, "%5d ", count); 357 | s = (section *)n->val; 358 | options = s->options; 359 | 360 | layer l; 361 | init_layer(l); 362 | 363 | //printf("\n(1).workspace_size:%d\n;",l.workspace_size); 364 | 365 | LAYER_TYPE lt = string_to_layer_type(s->type); 366 | if(lt == CONVOLUTIONAL) 367 | { 368 | l = parse_convolutional(options, params); 369 | } 370 | else if(lt == MAXPOOL) 371 | { 372 | l = parse_maxpool(options, params); 373 | } 374 | else if(lt == REGION) 375 | { 376 | l = parse_region(options, params); 377 | } 378 | 379 | //printf("\n(2).workspace_size:%d\n;",l.workspace_size); 380 | /*}else if(lt == LOCAL){ 381 | l = parse_local(options, params); 382 | }else if(lt == ACTIVE){ 383 | l = parse_activation(options, params); 384 | }else if(lt == RNN){ 385 | l = parse_rnn(options, params); 386 | }else if(lt == GRU){ 387 | l = parse_gru(options, params); 388 | }else if(lt == CRNN){ 389 | l = parse_crnn(options, params); 390 | }else if(lt == CONNECTED){ 391 | l = parse_connected(options, params); 392 | }else if(lt == CROP){ 393 | l = parse_crop(options, params); 394 | }else if(lt == COST){ 395 | l = parse_cost(options, params); 396 | }else if(lt == REGION){ 397 | l = parse_region(options, params); 398 | }else if(lt == DETECTION){ 399 | l = parse_detection(options, params); 400 | }else if(lt == SOFTMAX){ 401 | l = parse_softmax(options, params); 402 | net.hierarchy = l.softmax_tree; 403 | }else if(lt == NORMALIZATION){ 404 | l = parse_normalization(options, params); 405 | }else if(lt == BATCHNORM){ 406 | l = parse_batchnorm(options, params); 407 | }else if(lt == MAXPOOL){ 408 | l = parse_maxpool(options, params); 409 | }else if(lt == REORG){ 410 | l = parse_reorg(options, params); 411 | }else if(lt == AVGPOOL){ 412 | l = parse_avgpool(options, params); 413 | }else if(lt == ROUTE){ 414 | l = parse_route(options, params, net); 415 | }else if(lt == SHORTCUT){ 416 | l = parse_shortcut(options, params, net); 417 | }else if(lt == DROPOUT){ 418 | l = parse_dropout(options, params); 419 | l.output = net.layers[count-1].output; 420 | l.delta = net.layers[count-1].delta; 421 | }else{ 422 | fprintf(stderr, "Type not recognized: %s\n", s->type); 423 | }*/ 424 | l.dontload = option_find_int_quiet(options, "dontload", 0); 425 | l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); 426 | // check unused kvps 427 | option_unused(options); 428 | net.layers[count] = l; 429 | //printf("n:%d; l.workspace_size: %d;\n",count,l.workspace_size); 430 | if (l.workspace_size > workspace_size) 431 | { 432 | workspace_size = l.workspace_size; 433 | } 434 | free_section(s); 435 | n = n->next; 436 | count++; 437 | if(n) 438 | { 439 | params.h = l.out_h; 440 | params.w = l.out_w; 441 | params.c = l.out_c; 442 | params.inputs = l.outputs; 443 | } 444 | } 445 | free_list(sections); 446 | net.outputs = get_network_output_size(net); // output size 447 | //printf("net.outputs:%d;\n",net.outputs); 448 | net.output = get_network_output(net); // output value??? 449 | if(workspace_size) 450 | { 451 | //printf("workspace_size:%ld;\n", workspace_size); 452 | //?????????? 453 | //net.workspace = (float *)calloc(1, workspace_size); 454 | net.workspace = (float *)calloc(workspace_size,sizeof(float)); 455 | //net.workspace = (float *)sds_alloc(workspace_size * sizeof(float)); 456 | } 457 | return net; 458 | } 459 | 460 | // read configs 461 | list *read_cfg(char *filename) 462 | { 463 | FILE *file = fopen(filename, "r"); 464 | //if(file == 0) file_error(filename); 465 | char *line; 466 | int nu = 0; 467 | list *sections = make_list(); 468 | section *current = 0; 469 | while((line=fgetl(file)) != 0) 470 | { 471 | nu++; 472 | strip(line); 473 | switch(line[0]) 474 | { 475 | case '[': 476 | current = (section *)malloc(sizeof(section)); 477 | list_insert(sections, current); 478 | current->options = make_list(); 479 | current->type = line; 480 | break; 481 | case '\0': 482 | case '#': 483 | case ';': 484 | free(line); 485 | break; 486 | default: 487 | if(!read_option(line, current->options)){ 488 | fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); 489 | free(line); 490 | } 491 | break; 492 | } 493 | } 494 | fclose(file); 495 | return sections; 496 | } 497 | 498 | // laod weights 499 | void load_convolutional_weights(layer l, FILE *fp) 500 | { 501 | if(l.binary) 502 | { 503 | //load_convolutional_weights_binary(l, fp); 504 | //return; 505 | } 506 | int num = l.n*l.c*l.size*l.size; 507 | fread(l.biases, sizeof(float), l.n, fp); 508 | //printf("num:%d; l.n:%d;\n",num,l.n); 509 | if (l.batch_normalize && (!l.dontloadscales)) 510 | { 511 | fread(l.scales, sizeof(float), l.n, fp); 512 | fread(l.rolling_mean, sizeof(float), l.n, fp); 513 | fread(l.rolling_variance, sizeof(float), l.n, fp); 514 | //printf("enter here1\n"); 515 | if(0) 516 | { 517 | // 518 | for(int i = 0; i < l.n; i++) 519 | { 520 | printf("%g, ", l.rolling_mean[i]); 521 | } 522 | printf("\n"); 523 | for(int i = 0; i < l.n; i++) 524 | { 525 | printf("%g, ", l.rolling_variance[i]); 526 | } 527 | printf("\n"); 528 | } 529 | if(0) 530 | { 531 | fill_cpu(l.n, 0, l.rolling_mean, 1); 532 | fill_cpu(l.n, 0, l.rolling_variance, 1); 533 | } 534 | } 535 | fread(l.weights, sizeof(float), num, fp); 536 | //l.adam = 0; 537 | if(l.adam) 538 | { 539 | fread(l.m, sizeof(float), num, fp); 540 | fread(l.v, sizeof(float), num, fp); 541 | //printf("enter here2\n"); 542 | } 543 | //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1); 544 | if (l.flipped) 545 | { 546 | transpose_matrix(l.weights, l.c*l.size*l.size, l.n); 547 | //printf("enter here3\n"); 548 | } 549 | /* 550 | // print weights 551 | for(int j = 300; j < 400; j++) 552 | { 553 | printf("l.weights[%d]:%.12f;\n",j,l.weights[j]); 554 | } 555 | */ 556 | //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights); 557 | } 558 | 559 | // batchnorm weights 560 | void load_batchnorm_weights(layer l, FILE *fp) 561 | { 562 | fread(l.scales, sizeof(float), l.c, fp); 563 | fread(l.rolling_mean, sizeof(float), l.c, fp); 564 | fread(l.rolling_variance, sizeof(float), l.c, fp); 565 | 566 | } 567 | 568 | // connected weights 569 | void load_connected_weights(layer l, FILE *fp, int transpose) 570 | { 571 | fread(l.biases, sizeof(float), l.outputs, fp); 572 | fread(l.weights, sizeof(float), l.outputs*l.inputs, fp); 573 | if(transpose) 574 | { 575 | transpose_matrix(l.weights, l.inputs, l.outputs); 576 | } 577 | //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs)); 578 | //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs)); 579 | if (l.batch_normalize && (!l.dontloadscales)) 580 | { 581 | fread(l.scales, sizeof(float), l.outputs, fp); 582 | fread(l.rolling_mean, sizeof(float), l.outputs, fp); 583 | fread(l.rolling_variance, sizeof(float), l.outputs, fp); 584 | //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs)); 585 | //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs)); 586 | //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs)); 587 | } 588 | 589 | } 590 | 591 | // load weights top function 592 | void load_weights_upto(network *net, char *filename, int cutoff) 593 | { 594 | fprintf(stderr, "Loading weights from %s...", filename); 595 | fflush(stdout); 596 | FILE *fp = fopen(filename, "rb"); 597 | //if(!fp) file_error(filename); 598 | 599 | int major; 600 | int minor; 601 | int revision; 602 | fread(&major, sizeof(int), 1, fp); 603 | fread(&minor, sizeof(int), 1, fp); 604 | fread(&revision, sizeof(int), 1, fp); 605 | fread(net->seen, sizeof(int), 1, fp); 606 | int transpose = (major > 1000) || (minor > 1000); 607 | // 608 | for(int i = 0; i < net->n && i < cutoff; i++){ 609 | layer l = net->layers[i]; 610 | if (l.dontload) continue; 611 | if(l.type == CONVOLUTIONAL) 612 | { 613 | //printf("layer %d: CONVOLUTIONAl;\n",i); 614 | load_convolutional_weights(l, fp); 615 | } 616 | if(l.type == CONNECTED) 617 | { 618 | load_connected_weights(l, fp, transpose); 619 | } 620 | if(l.type == BATCHNORM) 621 | { 622 | load_batchnorm_weights(l, fp); 623 | } 624 | if(l.type == CRNN) 625 | { 626 | load_convolutional_weights(*(l.input_layer), fp); 627 | load_convolutional_weights(*(l.self_layer), fp); 628 | load_convolutional_weights(*(l.output_layer), fp); 629 | } 630 | if(l.type == RNN) 631 | { 632 | load_connected_weights(*(l.input_layer), fp, transpose); 633 | load_connected_weights(*(l.self_layer), fp, transpose); 634 | load_connected_weights(*(l.output_layer), fp, transpose); 635 | } 636 | if(l.type == GRU) 637 | { 638 | load_connected_weights(*(l.input_z_layer), fp, transpose); 639 | load_connected_weights(*(l.input_r_layer), fp, transpose); 640 | load_connected_weights(*(l.input_h_layer), fp, transpose); 641 | load_connected_weights(*(l.state_z_layer), fp, transpose); 642 | load_connected_weights(*(l.state_r_layer), fp, transpose); 643 | load_connected_weights(*(l.state_h_layer), fp, transpose); 644 | } 645 | if(l.type == LOCAL) 646 | { 647 | int locations = l.out_w*l.out_h; 648 | int size = l.size*l.size*l.c*l.n*locations; 649 | fread(l.biases, sizeof(float), l.outputs, fp); 650 | fread(l.weights, sizeof(float), size, fp); 651 | } 652 | } 653 | fprintf(stderr, "Done!\n"); 654 | fclose(fp); 655 | } 656 | 657 | // load weights top function 658 | void load_weights(network *net, char *filename) 659 | { 660 | load_weights_upto(net, filename, net->n); 661 | } 662 | 663 | #endif /* SRC_PARSER_CPP_ */ 664 | -------------------------------------------------------------------------------- /stb_image_write.h: -------------------------------------------------------------------------------- 1 | /* stb_image_write - v0.98 - public domain - http://nothings.org/stb/stb_image_write.h 2 | writes out PNG/BMP/TGA images to C stdio - Sean Barrett 2010 3 | no warranty implied; use at your own risk 4 | 5 | 6 | Before #including, 7 | 8 | #define STB_IMAGE_WRITE_IMPLEMENTATION 9 | 10 | in the file that you want to have the implementation. 11 | 12 | Will probably not work correctly with strict-aliasing optimizations. 13 | 14 | ABOUT: 15 | 16 | This header file is a library for writing images to C stdio. It could be 17 | adapted to write to memory or a general streaming interface; let me know. 18 | 19 | The PNG output is not optimal; it is 20-50% larger than the file 20 | written by a decent optimizing implementation. This library is designed 21 | for source code compactness and simplicitly, not optimal image file size 22 | or run-time performance. 23 | 24 | BUILDING: 25 | 26 | You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. 27 | You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace 28 | malloc,realloc,free. 29 | You can define STBIW_MEMMOVE() to replace memmove() 30 | 31 | USAGE: 32 | 33 | There are four functions, one for each image file format: 34 | 35 | int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); 36 | int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); 37 | int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); 38 | int stbi_write_hdr(char const *filename, int w, int h, int comp, const void *data); 39 | 40 | Each function returns 0 on failure and non-0 on success. 41 | 42 | The functions create an image file defined by the parameters. The image 43 | is a rectangle of pixels stored from left-to-right, top-to-bottom. 44 | Each pixel contains 'comp' channels of data stored interleaved with 8-bits 45 | per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is 46 | monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. 47 | The *data pointer points to the first byte of the top-left-most pixel. 48 | For PNG, "stride_in_bytes" is the distance in bytes from the first byte of 49 | a row of pixels to the first byte of the next row of pixels. 50 | 51 | PNG creates output files with the same number of components as the input. 52 | The BMP format expands Y to RGB in the file format and does not 53 | output alpha. 54 | 55 | PNG supports writing rectangles of data even when the bytes storing rows of 56 | data are not consecutive in memory (e.g. sub-rectangles of a larger image), 57 | by supplying the stride between the beginning of adjacent rows. The other 58 | formats do not. (Thus you cannot write a native-format BMP through the BMP 59 | writer, both because it is in BGR order and because it may have padding 60 | at the end of the line.) 61 | 62 | HDR expects linear float data. Since the format is always 32-bit rgb(e) 63 | data, alpha (if provided) is discarded, and for monochrome data it is 64 | replicated across all three channels. 65 | 66 | CREDITS: 67 | 68 | PNG/BMP/TGA 69 | Sean Barrett 70 | HDR 71 | Baldur Karlsson 72 | TGA monochrome: 73 | Jean-Sebastien Guay 74 | misc enhancements: 75 | Tim Kelsey 76 | bugfixes: 77 | github:Chribba 78 | */ 79 | 80 | #ifndef INCLUDE_STB_IMAGE_WRITE_H 81 | #define INCLUDE_STB_IMAGE_WRITE_H 82 | 83 | #ifdef __cplusplus 84 | extern "C" { 85 | #endif 86 | 87 | extern int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); 88 | extern int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); 89 | extern int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); 90 | extern int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | 96 | #endif//INCLUDE_STB_IMAGE_WRITE_H 97 | 98 | #ifdef STB_IMAGE_WRITE_IMPLEMENTATION 99 | 100 | #include 101 | #include 102 | #include 103 | #include 104 | #include 105 | 106 | #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && defined(STBIW_REALLOC) 107 | // ok 108 | #elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) 109 | // ok 110 | #else 111 | #error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC." 112 | #endif 113 | 114 | #ifndef STBIW_MALLOC 115 | #define STBIW_MALLOC(sz) malloc(sz) 116 | #define STBIW_REALLOC(p,sz) realloc(p,sz) 117 | #define STBIW_FREE(p) free(p) 118 | #endif 119 | #ifndef STBIW_MEMMOVE 120 | #define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) 121 | #endif 122 | 123 | 124 | #ifndef STBIW_ASSERT 125 | #include 126 | #define STBIW_ASSERT(x) assert(x) 127 | #endif 128 | 129 | typedef unsigned int stbiw_uint32; 130 | typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; 131 | 132 | static void writefv(FILE *f, const char *fmt, va_list v) 133 | { 134 | while (*fmt) { 135 | switch (*fmt++) { 136 | case ' ': break; 137 | case '1': { unsigned char x = (unsigned char) va_arg(v, int); fputc(x,f); break; } 138 | case '2': { int x = va_arg(v,int); unsigned char b[2]; 139 | b[0] = (unsigned char) x; b[1] = (unsigned char) (x>>8); 140 | fwrite(b,2,1,f); break; } 141 | case '4': { stbiw_uint32 x = va_arg(v,int); unsigned char b[4]; 142 | b[0]=(unsigned char)x; b[1]=(unsigned char)(x>>8); 143 | b[2]=(unsigned char)(x>>16); b[3]=(unsigned char)(x>>24); 144 | fwrite(b,4,1,f); break; } 145 | default: 146 | STBIW_ASSERT(0); 147 | return; 148 | } 149 | } 150 | } 151 | 152 | static void write3(FILE *f, unsigned char a, unsigned char b, unsigned char c) 153 | { 154 | unsigned char arr[3]; 155 | arr[0] = a, arr[1] = b, arr[2] = c; 156 | fwrite(arr, 3, 1, f); 157 | } 158 | 159 | static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) 160 | { 161 | unsigned char bg[3] = { 255, 0, 255}, px[3]; 162 | stbiw_uint32 zero = 0; 163 | int i,j,k, j_end; 164 | 165 | if (y <= 0) 166 | return; 167 | 168 | if (vdir < 0) 169 | j_end = -1, j = y-1; 170 | else 171 | j_end = y, j = 0; 172 | 173 | for (; j != j_end; j += vdir) { 174 | for (i=0; i < x; ++i) { 175 | unsigned char *d = (unsigned char *) data + (j*x+i)*comp; 176 | if (write_alpha < 0) 177 | fwrite(&d[comp-1], 1, 1, f); 178 | switch (comp) { 179 | case 1: fwrite(d, 1, 1, f); 180 | break; 181 | case 2: if (expand_mono) 182 | write3(f, d[0],d[0],d[0]); // monochrome bmp 183 | else 184 | fwrite(d, 1, 1, f); // monochrome TGA 185 | break; 186 | case 4: 187 | if (!write_alpha) { 188 | // composite against pink background 189 | for (k=0; k < 3; ++k) 190 | px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255; 191 | write3(f, px[1-rgb_dir],px[1],px[1+rgb_dir]); 192 | break; 193 | } 194 | /* FALLTHROUGH */ 195 | case 3: 196 | write3(f, d[1-rgb_dir],d[1],d[1+rgb_dir]); 197 | break; 198 | } 199 | if (write_alpha > 0) 200 | fwrite(&d[comp-1], 1, 1, f); 201 | } 202 | fwrite(&zero,scanline_pad,1,f); 203 | } 204 | } 205 | 206 | static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) 207 | { 208 | FILE *f; 209 | if (y < 0 || x < 0) return 0; 210 | f = fopen(filename, "wb"); 211 | if (f) { 212 | va_list v; 213 | va_start(v, fmt); 214 | writefv(f, fmt, v); 215 | va_end(v); 216 | write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad,expand_mono); 217 | fclose(f); 218 | } 219 | return f != NULL; 220 | } 221 | 222 | int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) 223 | { 224 | int pad = (-x*3) & 3; 225 | return outfile(filename,-1,-1,x,y,comp,1,(void *) data,0,pad, 226 | "11 4 22 4" "4 44 22 444444", 227 | 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header 228 | 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header 229 | } 230 | 231 | int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) 232 | { 233 | int has_alpha = (comp == 2 || comp == 4); 234 | int colorbytes = has_alpha ? comp-1 : comp; 235 | int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 236 | return outfile(filename, -1,-1, x, y, comp, 0, (void *) data, has_alpha, 0, 237 | "111 221 2222 11", 0,0,format, 0,0,0, 0,0,x,y, (colorbytes+has_alpha)*8, has_alpha*8); 238 | } 239 | 240 | // ************************************************************************************************* 241 | // Radiance RGBE HDR writer 242 | // by Baldur Karlsson 243 | #define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) 244 | 245 | void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) 246 | { 247 | int exponent; 248 | float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); 249 | 250 | if (maxcomp < 1e-32) { 251 | rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; 252 | } else { 253 | float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; 254 | 255 | rgbe[0] = (unsigned char)(linear[0] * normalize); 256 | rgbe[1] = (unsigned char)(linear[1] * normalize); 257 | rgbe[2] = (unsigned char)(linear[2] * normalize); 258 | rgbe[3] = (unsigned char)(exponent + 128); 259 | } 260 | } 261 | 262 | void stbiw__write_run_data(FILE *f, int length, unsigned char databyte) 263 | { 264 | unsigned char lengthbyte = (unsigned char) (length+128); 265 | STBIW_ASSERT(length+128 <= 255); 266 | fwrite(&lengthbyte, 1, 1, f); 267 | fwrite(&databyte, 1, 1, f); 268 | } 269 | 270 | void stbiw__write_dump_data(FILE *f, int length, unsigned char *data) 271 | { 272 | unsigned char lengthbyte = (unsigned char )(length & 0xff); 273 | STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code 274 | fwrite(&lengthbyte, 1, 1, f); 275 | fwrite(data, length, 1, f); 276 | } 277 | 278 | void stbiw__write_hdr_scanline(FILE *f, int width, int comp, unsigned char *scratch, const float *scanline) 279 | { 280 | unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; 281 | unsigned char rgbe[4]; 282 | float linear[3] = {0}; 283 | int x; 284 | 285 | scanlineheader[2] = (width&0xff00)>>8; 286 | scanlineheader[3] = (width&0x00ff); 287 | 288 | /* skip RLE for images too small or large */ 289 | if (width < 8 || width >= 32768) { 290 | for (x=0; x < width; x++) { 291 | switch (comp) { 292 | case 4: /* fallthrough */ 293 | case 3: linear[2] = scanline[x*comp + 2]; 294 | linear[1] = scanline[x*comp + 1]; 295 | linear[0] = scanline[x*comp + 0]; 296 | break; 297 | case 2: /* fallthrough */ 298 | case 1: linear[0] = linear[1] = linear[2] = scanline[x*comp + 0]; 299 | break; 300 | } 301 | stbiw__linear_to_rgbe(rgbe, linear); 302 | fwrite(rgbe, 4, 1, f); 303 | } 304 | } else { 305 | int c,r; 306 | /* encode into scratch buffer */ 307 | for (x=0; x < width; x++) { 308 | switch(comp) { 309 | case 4: /* fallthrough */ 310 | case 3: linear[2] = scanline[x*comp + 2]; 311 | linear[1] = scanline[x*comp + 1]; 312 | linear[0] = scanline[x*comp + 0]; 313 | break; 314 | case 2: /* fallthrough */ 315 | case 1: linear[0] = linear[1] = linear[2] = scanline[x*comp + 0]; 316 | break; 317 | } 318 | stbiw__linear_to_rgbe(rgbe, linear); 319 | scratch[x + width*0] = rgbe[0]; 320 | scratch[x + width*1] = rgbe[1]; 321 | scratch[x + width*2] = rgbe[2]; 322 | scratch[x + width*3] = rgbe[3]; 323 | } 324 | 325 | fwrite(scanlineheader, 4, 1, f); 326 | 327 | /* RLE each component separately */ 328 | for (c=0; c < 4; c++) { 329 | unsigned char *comp = &scratch[width*c]; 330 | 331 | x = 0; 332 | while (x < width) { 333 | // find first run 334 | r = x; 335 | while (r+2 < width) { 336 | if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) 337 | break; 338 | ++r; 339 | } 340 | if (r+2 >= width) 341 | r = width; 342 | // dump up to first run 343 | while (x < r) { 344 | int len = r-x; 345 | if (len > 128) len = 128; 346 | stbiw__write_dump_data(f, len, &comp[x]); 347 | x += len; 348 | } 349 | // if there's a run, output it 350 | if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd 351 | // find next byte after run 352 | while (r < width && comp[r] == comp[x]) 353 | ++r; 354 | // output run up to r 355 | while (x < r) { 356 | int len = r-x; 357 | if (len > 127) len = 127; 358 | stbiw__write_run_data(f, len, comp[x]); 359 | x += len; 360 | } 361 | } 362 | } 363 | } 364 | } 365 | } 366 | 367 | int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) 368 | { 369 | int i; 370 | FILE *f; 371 | if (y <= 0 || x <= 0 || data == NULL) return 0; 372 | f = fopen(filename, "wb"); 373 | if (f) { 374 | /* Each component is stored separately. Allocate scratch space for full output scanline. */ 375 | unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); 376 | fprintf(f, "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n" ); 377 | fprintf(f, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n" , y, x); 378 | for(i=0; i < y; i++) 379 | stbiw__write_hdr_scanline(f, x, comp, scratch, data + comp*i*x); 380 | STBIW_FREE(scratch); 381 | fclose(f); 382 | } 383 | return f != NULL; 384 | } 385 | 386 | ///////////////////////////////////////////////////////// 387 | // PNG 388 | 389 | // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() 390 | #define stbiw__sbraw(a) ((int *) (a) - 2) 391 | #define stbiw__sbm(a) stbiw__sbraw(a)[0] 392 | #define stbiw__sbn(a) stbiw__sbraw(a)[1] 393 | 394 | #define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) 395 | #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) 396 | #define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) 397 | 398 | #define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) 399 | #define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) 400 | #define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) 401 | 402 | static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) 403 | { 404 | int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; 405 | void *p = STBIW_REALLOC(*arr ? stbiw__sbraw(*arr) : 0, itemsize * m + sizeof(int)*2); 406 | STBIW_ASSERT(p); 407 | if (p) { 408 | if (!*arr) ((int *) p)[1] = 0; 409 | *arr = (void *) ((int *) p + 2); 410 | stbiw__sbm(*arr) = m; 411 | } 412 | return *arr; 413 | } 414 | 415 | static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) 416 | { 417 | while (*bitcount >= 8) { 418 | stbiw__sbpush(data, (unsigned char) *bitbuffer); 419 | *bitbuffer >>= 8; 420 | *bitcount -= 8; 421 | } 422 | return data; 423 | } 424 | 425 | static int stbiw__zlib_bitrev(int code, int codebits) 426 | { 427 | int res=0; 428 | while (codebits--) { 429 | res = (res << 1) | (code & 1); 430 | code >>= 1; 431 | } 432 | return res; 433 | } 434 | 435 | static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) 436 | { 437 | int i; 438 | for (i=0; i < limit && i < 258; ++i) 439 | if (a[i] != b[i]) break; 440 | return i; 441 | } 442 | 443 | static unsigned int stbiw__zhash(unsigned char *data) 444 | { 445 | stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); 446 | hash ^= hash << 3; 447 | hash += hash >> 5; 448 | hash ^= hash << 4; 449 | hash += hash >> 17; 450 | hash ^= hash << 25; 451 | hash += hash >> 6; 452 | return hash; 453 | } 454 | 455 | #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) 456 | #define stbiw__zlib_add(code,codebits) \ 457 | (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) 458 | #define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) 459 | // default huffman tables 460 | #define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) 461 | #define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) 462 | #define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) 463 | #define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) 464 | #define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) 465 | #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) 466 | 467 | #define stbiw__ZHASH 16384 468 | 469 | unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) 470 | { 471 | static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; 472 | static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; 473 | static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; 474 | static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; 475 | unsigned int bitbuf=0; 476 | int i,j, bitcount=0; 477 | unsigned char *out = NULL; 478 | unsigned char **hash_table[stbiw__ZHASH]; // 64KB on the stack! 479 | if (quality < 5) quality = 5; 480 | 481 | stbiw__sbpush(out, 0x78); // DEFLATE 32K window 482 | stbiw__sbpush(out, 0x5e); // FLEVEL = 1 483 | stbiw__zlib_add(1,1); // BFINAL = 1 484 | stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman 485 | 486 | for (i=0; i < stbiw__ZHASH; ++i) 487 | hash_table[i] = NULL; 488 | 489 | i=0; 490 | while (i < data_len-3) { 491 | // hash next 3 bytes of data to be compressed 492 | int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; 493 | unsigned char *bestloc = 0; 494 | unsigned char **hlist = hash_table[h]; 495 | int n = stbiw__sbcount(hlist); 496 | for (j=0; j < n; ++j) { 497 | if (hlist[j]-data > i-32768) { // if entry lies within window 498 | int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); 499 | if (d >= best) best=d,bestloc=hlist[j]; 500 | } 501 | } 502 | // when hash table entry is too long, delete half the entries 503 | if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { 504 | STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); 505 | stbiw__sbn(hash_table[h]) = quality; 506 | } 507 | stbiw__sbpush(hash_table[h],data+i); 508 | 509 | if (bestloc) { 510 | // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal 511 | h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); 512 | hlist = hash_table[h]; 513 | n = stbiw__sbcount(hlist); 514 | for (j=0; j < n; ++j) { 515 | if (hlist[j]-data > i-32767) { 516 | int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); 517 | if (e > best) { // if next match is better, bail on current match 518 | bestloc = NULL; 519 | break; 520 | } 521 | } 522 | } 523 | } 524 | 525 | if (bestloc) { 526 | int d = (int) (data+i - bestloc); // distance back 527 | STBIW_ASSERT(d <= 32767 && best <= 258); 528 | for (j=0; best > lengthc[j+1]-1; ++j); 529 | stbiw__zlib_huff(j+257); 530 | if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); 531 | for (j=0; d > distc[j+1]-1; ++j); 532 | stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); 533 | if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); 534 | i += best; 535 | } else { 536 | stbiw__zlib_huffb(data[i]); 537 | ++i; 538 | } 539 | } 540 | // write out final bytes 541 | for (;i < data_len; ++i) 542 | stbiw__zlib_huffb(data[i]); 543 | stbiw__zlib_huff(256); // end of block 544 | // pad with 0 bits to byte boundary 545 | while (bitcount) 546 | stbiw__zlib_add(0,1); 547 | 548 | for (i=0; i < stbiw__ZHASH; ++i) 549 | (void) stbiw__sbfree(hash_table[i]); 550 | 551 | { 552 | // compute adler32 on input 553 | unsigned int i=0, s1=1, s2=0, blocklen = data_len % 5552; 554 | int j=0; 555 | while (j < data_len) { 556 | for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; 557 | s1 %= 65521, s2 %= 65521; 558 | j += blocklen; 559 | blocklen = 5552; 560 | } 561 | stbiw__sbpush(out, (unsigned char) (s2 >> 8)); 562 | stbiw__sbpush(out, (unsigned char) s2); 563 | stbiw__sbpush(out, (unsigned char) (s1 >> 8)); 564 | stbiw__sbpush(out, (unsigned char) s1); 565 | } 566 | *out_len = stbiw__sbn(out); 567 | // make returned pointer freeable 568 | STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); 569 | return (unsigned char *) stbiw__sbraw(out); 570 | } 571 | 572 | unsigned int stbiw__crc32(unsigned char *buffer, int len) 573 | { 574 | static unsigned int crc_table[256]; 575 | unsigned int crc = ~0u; 576 | int i,j; 577 | if (crc_table[1] == 0) 578 | for(i=0; i < 256; i++) 579 | for (crc_table[i]=i, j=0; j < 8; ++j) 580 | crc_table[i] = (crc_table[i] >> 1) ^ (crc_table[i] & 1 ? 0xedb88320 : 0); 581 | for (i=0; i < len; ++i) 582 | crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; 583 | return ~crc; 584 | } 585 | 586 | #define stbiw__wpng4(o,a,b,c,d) ((o)[0]=(unsigned char)(a),(o)[1]=(unsigned char)(b),(o)[2]=(unsigned char)(c),(o)[3]=(unsigned char)(d),(o)+=4) 587 | #define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); 588 | #define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) 589 | 590 | static void stbiw__wpcrc(unsigned char **data, int len) 591 | { 592 | unsigned int crc = stbiw__crc32(*data - len - 4, len+4); 593 | stbiw__wp32(*data, crc); 594 | } 595 | 596 | static unsigned char stbiw__paeth(int a, int b, int c) 597 | { 598 | int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); 599 | if (pa <= pb && pa <= pc) return (unsigned char) a; 600 | if (pb <= pc) return (unsigned char) b; 601 | return (unsigned char) c; 602 | } 603 | 604 | unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) 605 | { 606 | int ctype[5] = { -1, 0, 4, 2, 6 }; 607 | unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; 608 | unsigned char *out,*o, *filt, *zlib; 609 | signed char *line_buffer; 610 | int i,j,k,p,zlen; 611 | 612 | if (stride_bytes == 0) 613 | stride_bytes = x * n; 614 | 615 | filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; 616 | line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } 617 | for (j=0; j < y; ++j) { 618 | static int mapping[] = { 0,1,2,3,4 }; 619 | static int firstmap[] = { 0,1,0,5,6 }; 620 | int *mymap = j ? mapping : firstmap; 621 | int best = 0, bestval = 0x7fffffff; 622 | for (p=0; p < 2; ++p) { 623 | for (k= p?best:0; k < 5; ++k) { 624 | int type = mymap[k],est=0; 625 | unsigned char *z = pixels + stride_bytes*j; 626 | for (i=0; i < n; ++i) 627 | switch (type) { 628 | case 0: line_buffer[i] = z[i]; break; 629 | case 1: line_buffer[i] = z[i]; break; 630 | case 2: line_buffer[i] = z[i] - z[i-stride_bytes]; break; 631 | case 3: line_buffer[i] = z[i] - (z[i-stride_bytes]>>1); break; 632 | case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-stride_bytes],0)); break; 633 | case 5: line_buffer[i] = z[i]; break; 634 | case 6: line_buffer[i] = z[i]; break; 635 | } 636 | for (i=n; i < x*n; ++i) { 637 | switch (type) { 638 | case 0: line_buffer[i] = z[i]; break; 639 | case 1: line_buffer[i] = z[i] - z[i-n]; break; 640 | case 2: line_buffer[i] = z[i] - z[i-stride_bytes]; break; 641 | case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-stride_bytes])>>1); break; 642 | case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-stride_bytes], z[i-stride_bytes-n]); break; 643 | case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; 644 | case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; 645 | } 646 | } 647 | if (p) break; 648 | for (i=0; i < x*n; ++i) 649 | est += abs((signed char) line_buffer[i]); 650 | if (est < bestval) { bestval = est; best = k; } 651 | } 652 | } 653 | // when we get here, best contains the filter type, and line_buffer contains the data 654 | filt[j*(x*n+1)] = (unsigned char) best; 655 | STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); 656 | } 657 | STBIW_FREE(line_buffer); 658 | zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, 8); // increase 8 to get smaller but use more memory 659 | STBIW_FREE(filt); 660 | if (!zlib) return 0; 661 | 662 | // each tag requires 12 bytes of overhead 663 | out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); 664 | if (!out) return 0; 665 | *out_len = 8 + 12+13 + 12+zlen + 12; 666 | 667 | o=out; 668 | STBIW_MEMMOVE(o,sig,8); o+= 8; 669 | stbiw__wp32(o, 13); // header length 670 | stbiw__wptag(o, "IHDR"); 671 | stbiw__wp32(o, x); 672 | stbiw__wp32(o, y); 673 | *o++ = 8; 674 | *o++ = (unsigned char) ctype[n]; 675 | *o++ = 0; 676 | *o++ = 0; 677 | *o++ = 0; 678 | stbiw__wpcrc(&o,13); 679 | 680 | stbiw__wp32(o, zlen); 681 | stbiw__wptag(o, "IDAT"); 682 | STBIW_MEMMOVE(o, zlib, zlen); 683 | o += zlen; 684 | STBIW_FREE(zlib); 685 | stbiw__wpcrc(&o, zlen); 686 | 687 | stbiw__wp32(o,0); 688 | stbiw__wptag(o, "IEND"); 689 | stbiw__wpcrc(&o,0); 690 | 691 | STBIW_ASSERT(o == out + *out_len); 692 | 693 | return out; 694 | } 695 | 696 | int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) 697 | { 698 | FILE *f; 699 | int len; 700 | unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); 701 | if (!png) return 0; 702 | f = fopen(filename, "wb"); 703 | if (!f) { STBIW_FREE(png); return 0; } 704 | fwrite(png, 1, len, f); 705 | fclose(f); 706 | STBIW_FREE(png); 707 | return 1; 708 | } 709 | #endif // STB_IMAGE_WRITE_IMPLEMENTATION 710 | 711 | /* Revision history 712 | 0.98 (2015-04-08) 713 | added STBIW_MALLOC, STBIW_ASSERT etc 714 | 0.97 (2015-01-18) 715 | fixed HDR asserts, rewrote HDR rle logic 716 | 0.96 (2015-01-17) 717 | add HDR output 718 | fix monochrome BMP 719 | 0.95 (2014-08-17) 720 | add monochrome TGA output 721 | 0.94 (2014-05-31) 722 | rename private functions to avoid conflicts with stb_image.h 723 | 0.93 (2014-05-27) 724 | warning fixes 725 | 0.92 (2010-08-01) 726 | casts to unsigned char to fix warnings 727 | 0.91 (2010-07-17) 728 | first public release 729 | 0.90 first internal release 730 | */ 731 | --------------------------------------------------------------------------------